github.com/cilium/cilium@v1.16.2/pkg/service/service.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package service 5 6 import ( 7 "errors" 8 "fmt" 9 "net" 10 "net/netip" 11 "sync/atomic" 12 13 "github.com/sirupsen/logrus" 14 "k8s.io/apimachinery/pkg/util/sets" 15 16 "github.com/cilium/cilium/pkg/bpf" 17 "github.com/cilium/cilium/pkg/cidr" 18 cmtypes "github.com/cilium/cilium/pkg/clustermesh/types" 19 "github.com/cilium/cilium/pkg/counter" 20 datapathOpt "github.com/cilium/cilium/pkg/datapath/option" 21 "github.com/cilium/cilium/pkg/datapath/sockets" 22 datapathTypes "github.com/cilium/cilium/pkg/datapath/types" 23 "github.com/cilium/cilium/pkg/k8s" 24 lb "github.com/cilium/cilium/pkg/loadbalancer" 25 "github.com/cilium/cilium/pkg/lock" 26 "github.com/cilium/cilium/pkg/logging" 27 "github.com/cilium/cilium/pkg/logging/logfields" 28 "github.com/cilium/cilium/pkg/maps/lbmap" 29 "github.com/cilium/cilium/pkg/metrics" 30 monitorAgent "github.com/cilium/cilium/pkg/monitor/agent" 31 monitorAPI "github.com/cilium/cilium/pkg/monitor/api" 32 "github.com/cilium/cilium/pkg/node" 33 "github.com/cilium/cilium/pkg/node/addressing" 34 nodeTypes "github.com/cilium/cilium/pkg/node/types" 35 "github.com/cilium/cilium/pkg/option" 36 "github.com/cilium/cilium/pkg/service/healthserver" 37 "github.com/cilium/cilium/pkg/time" 38 ) 39 40 // ErrLocalRedirectServiceExists represents an error when a Local redirect 41 // service exists with the same Frontend. 42 type ErrLocalRedirectServiceExists struct { 43 frontend lb.L3n4AddrID 44 name lb.ServiceName 45 } 46 47 // NewErrLocalRedirectServiceExists returns a new ErrLocalRedirectServiceExists 48 func NewErrLocalRedirectServiceExists(frontend lb.L3n4AddrID, name lb.ServiceName) error { 49 return &ErrLocalRedirectServiceExists{ 50 frontend: frontend, 51 name: name, 52 } 53 } 54 55 func (e ErrLocalRedirectServiceExists) Error() string { 56 return fmt.Sprintf("local-redirect service exists for "+ 57 "frontend %v, skip update for svc %v", e.frontend, e.name) 58 } 59 60 func (e *ErrLocalRedirectServiceExists) Is(target error) bool { 61 t, ok := target.(*ErrLocalRedirectServiceExists) 62 if !ok { 63 return false 64 } 65 return e.frontend.DeepEqual(&t.frontend) && e.name == t.name 66 } 67 68 // healthServer is used to manage HealthCheckNodePort listeners 69 type healthServer interface { 70 UpsertService(svcID lb.ID, svcNS, svcName string, localEndpoints int, port uint16) 71 DeleteService(svcID lb.ID) 72 } 73 74 type svcInfo struct { 75 hash string 76 frontend lb.L3n4AddrID 77 backends []*lb.Backend 78 // Hashed `backends`; pointing to the same objects. 79 backendByHash map[string]*lb.Backend 80 81 svcType lb.SVCType 82 svcExtTrafficPolicy lb.SVCTrafficPolicy 83 svcIntTrafficPolicy lb.SVCTrafficPolicy 84 svcNatPolicy lb.SVCNatPolicy 85 sessionAffinity bool 86 sessionAffinityTimeoutSec uint32 87 svcHealthCheckNodePort uint16 88 healthcheckFrontendHash string 89 svcName lb.ServiceName 90 loadBalancerSourceRanges []*cidr.CIDR 91 l7LBProxyPort uint16 // Non-zero for egress L7 LB services 92 LoopbackHostport bool 93 94 restoredFromDatapath bool 95 // The hashes of the backends restored from the datapath and 96 // not yet heard about from the service cache. 97 restoredBackendHashes sets.Set[string] 98 } 99 100 func (svc *svcInfo) isL7LBService() bool { 101 return svc.l7LBProxyPort != 0 102 } 103 104 func (svc *svcInfo) deepCopyToLBSVC() *lb.SVC { 105 backends := make([]*lb.Backend, len(svc.backends)) 106 for i, backend := range svc.backends { 107 backends[i] = backend.DeepCopy() 108 } 109 return &lb.SVC{ 110 Frontend: *svc.frontend.DeepCopy(), 111 Backends: backends, 112 Type: svc.svcType, 113 ExtTrafficPolicy: svc.svcExtTrafficPolicy, 114 IntTrafficPolicy: svc.svcIntTrafficPolicy, 115 NatPolicy: svc.svcNatPolicy, 116 HealthCheckNodePort: svc.svcHealthCheckNodePort, 117 Name: svc.svcName, 118 L7LBProxyPort: svc.l7LBProxyPort, 119 LoopbackHostport: svc.LoopbackHostport, 120 } 121 } 122 123 func (svc *svcInfo) isExtLocal() bool { 124 switch svc.svcType { 125 case lb.SVCTypeNodePort, lb.SVCTypeLoadBalancer, lb.SVCTypeExternalIPs: 126 return svc.svcExtTrafficPolicy == lb.SVCTrafficPolicyLocal 127 default: 128 return false 129 } 130 } 131 132 func (svc *svcInfo) isIntLocal() bool { 133 switch svc.svcType { 134 case lb.SVCTypeClusterIP, lb.SVCTypeNodePort, lb.SVCTypeLoadBalancer, lb.SVCTypeExternalIPs: 135 return svc.svcIntTrafficPolicy == lb.SVCTrafficPolicyLocal 136 default: 137 return false 138 } 139 } 140 141 func (svc *svcInfo) filterBackends(frontend lb.L3n4AddrID) bool { 142 switch svc.svcType { 143 case lb.SVCTypeLocalRedirect: 144 return true 145 default: 146 // When both traffic policies are Local, there is only the external scope, which 147 // should contain node-local backends only. Checking isExtLocal is still enough. 148 switch frontend.Scope { 149 case lb.ScopeExternal: 150 if svc.svcType == lb.SVCTypeClusterIP { 151 // ClusterIP doesn't support externalTrafficPolicy and has only the 152 // external scope, which contains only node-local backends when 153 // internalTrafficPolicy=Local. 154 return svc.isIntLocal() 155 } 156 return svc.isExtLocal() 157 case lb.ScopeInternal: 158 return svc.isIntLocal() 159 default: 160 return false 161 } 162 } 163 } 164 165 func (svc *svcInfo) useMaglev() bool { 166 if option.Config.NodePortAlg != option.NodePortAlgMaglev { 167 return false 168 } 169 // Provision the Maglev LUT for ClusterIP only if ExternalClusterIP is 170 // enabled because ClusterIP can also be accessed from outside with this 171 // setting. We don't do it unconditionally to avoid increasing memory 172 // footprint. 173 if svc.svcType == lb.SVCTypeClusterIP && !option.Config.ExternalClusterIP { 174 return false 175 } 176 // Wildcarded frontend is not exposed for external traffic. 177 if svc.svcType == lb.SVCTypeNodePort && isWildcardAddr(svc.frontend) { 178 return false 179 } 180 // Only provision the Maglev LUT for service types which are reachable 181 // from outside the node. 182 switch svc.svcType { 183 case lb.SVCTypeClusterIP, 184 lb.SVCTypeNodePort, 185 lb.SVCTypeLoadBalancer, 186 lb.SVCTypeHostPort, 187 lb.SVCTypeExternalIPs: 188 return true 189 } 190 return false 191 } 192 193 type L7LBInfo struct { 194 // Backend Sync registrations that are interested in Service backend changes 195 // to reflect this in a L7 loadbalancer (e.g. Envoy) 196 backendSyncRegistrations map[BackendSyncer]struct{} 197 198 // Name of the L7 LB resource (e.g. CEC) that needs this service to be redirected to an 199 // L7 Loadbalancer specified in that resource. 200 // Only one resource may do this for any given service. 201 ownerRef L7LBResourceName 202 203 // port number for L7 LB redirection. Can be zero if only backend sync 204 // has been requested. 205 proxyPort uint16 206 207 // (sub)set of service's frontend ports to be redirected. If empty, all frontend ports will be redirected. 208 ports []uint16 209 } 210 211 // isProtoAndPortMatch returns true if frontend has protocol TCP and its Port is in i.ports, or if 212 // i.ports is empty. 213 // 'ports' is typically short for no point optimizing the search. 214 func (i *L7LBInfo) isProtoAndPortMatch(fe *lb.L4Addr) bool { 215 // L7 LB redirect is only supported for TCP frontends 216 // The below is to make sure that UDP and SCTP are not allowed instead of comparing with lb.TCP 217 // The reason is to avoid extra dependencies with ongoing work to differentiate protocols in datapath, 218 // which might add more values such as lb.Any, lb.None, etc. 219 if fe.Protocol == lb.UDP || fe.Protocol == lb.SCTP { 220 return false 221 } 222 223 // Empty 'ports' matches all ports 224 if len(i.ports) == 0 { 225 return true 226 } 227 228 for _, p := range i.ports { 229 if p == fe.Port { 230 return true 231 } 232 } 233 return false 234 } 235 236 type L7LBResourceName struct { 237 Namespace string 238 Name string 239 } 240 241 func (svc *svcInfo) checkLBSourceRange() bool { 242 if option.Config.EnableSVCSourceRangeCheck { 243 return len(svc.loadBalancerSourceRanges) != 0 244 } 245 246 return false 247 } 248 249 // Service is a service handler. Its main responsibility is to reflect 250 // service-related changes into BPF maps used by datapath BPF programs. 251 // The changes can be triggered either by k8s_watcher or directly by 252 // API calls to the /services endpoint. 253 type Service struct { 254 lock.RWMutex 255 256 svcByHash map[string]*svcInfo 257 svcByID map[lb.ID]*svcInfo 258 259 backendRefCount counter.Counter[string] 260 // only used to keep track of the existing hash->ID mapping, 261 // not for loadbalancing decisions. 262 backendByHash map[string]*lb.Backend 263 264 healthServer healthServer 265 monitorAgent monitorAgent.Agent 266 267 lbmap datapathTypes.LBMap 268 lastUpdatedTs atomic.Value 269 270 l7lbSvcs map[lb.ServiceName]*L7LBInfo 271 272 backendConnectionHandler sockets.SocketDestroyer 273 274 backendDiscovery datapathTypes.NodeNeighbors 275 } 276 277 // newService creates a new instance of the service handler. 278 func newService(monitorAgent monitorAgent.Agent, lbmap datapathTypes.LBMap, backendDiscoveryHandler datapathTypes.NodeNeighbors) *Service { 279 var localHealthServer healthServer 280 if option.Config.EnableHealthCheckNodePort { 281 localHealthServer = healthserver.New() 282 } 283 284 svc := &Service{ 285 svcByHash: map[string]*svcInfo{}, 286 svcByID: map[lb.ID]*svcInfo{}, 287 backendRefCount: counter.Counter[string]{}, 288 backendByHash: map[string]*lb.Backend{}, 289 monitorAgent: monitorAgent, 290 healthServer: localHealthServer, 291 lbmap: lbmap, 292 l7lbSvcs: map[lb.ServiceName]*L7LBInfo{}, 293 backendConnectionHandler: backendConnectionHandler{}, 294 backendDiscovery: backendDiscoveryHandler, 295 } 296 svc.lastUpdatedTs.Store(time.Now()) 297 298 return svc 299 } 300 301 // RegisterL7LBServiceRedirect makes the given service to be locally redirected to the 302 // given proxy port. 303 func (s *Service) RegisterL7LBServiceRedirect(serviceName lb.ServiceName, resourceName L7LBResourceName, proxyPort uint16, frontendPorts []uint16) error { 304 if proxyPort == 0 { 305 return errors.New("proxy port for L7 LB redirection must be nonzero") 306 } 307 308 if logging.CanLogAt(log.Logger, logrus.DebugLevel) { 309 log.WithFields(logrus.Fields{ 310 logfields.ServiceName: serviceName.Name, 311 logfields.ServiceNamespace: serviceName.Namespace, 312 logfields.L7LBProxyPort: proxyPort, 313 logfields.L7LBFrontendPorts: frontendPorts, 314 }).Debug("Registering service for L7 proxy port redirection") 315 } 316 317 s.Lock() 318 defer s.Unlock() 319 320 err := s.registerL7LBServiceRedirect(serviceName, resourceName, proxyPort, frontendPorts) 321 if err != nil { 322 return err 323 } 324 325 return s.reUpsertServicesByName(serviceName.Name, serviceName.Namespace) 326 } 327 328 // 's' must be locked 329 func (s *Service) registerL7LBServiceRedirect(serviceName lb.ServiceName, resourceName L7LBResourceName, proxyPort uint16, frontendPorts []uint16) error { 330 info := s.l7lbSvcs[serviceName] 331 if info == nil { 332 info = &L7LBInfo{} 333 } 334 335 // Only one CEC resource for a given service may request L7 LB redirection at a time. 336 empty := L7LBResourceName{} 337 if info.ownerRef != empty && info.ownerRef != resourceName { 338 return fmt.Errorf("Service %q already registered for L7 LB redirection via a proxy resource %q", serviceName, info.ownerRef) 339 } 340 341 info.ownerRef = resourceName 342 info.proxyPort = proxyPort 343 344 if len(frontendPorts) == 0 { 345 info.ports = nil 346 } else { 347 info.ports = make([]uint16, len(frontendPorts)) 348 copy(info.ports, frontendPorts) 349 } 350 351 s.l7lbSvcs[serviceName] = info 352 353 return nil 354 } 355 356 // RegisterL7LBServiceBackendSync registers a BackendSync to be informed when the backends of a Service change. 357 func (s *Service) RegisterL7LBServiceBackendSync(serviceName lb.ServiceName, backendSyncRegistration BackendSyncer) error { 358 if backendSyncRegistration == nil { 359 return nil 360 } 361 362 if logging.CanLogAt(log.Logger, logrus.DebugLevel) { 363 log.WithFields(logrus.Fields{ 364 logfields.ServiceName: serviceName.Name, 365 logfields.ServiceNamespace: serviceName.Namespace, 366 logfields.ProxyName: backendSyncRegistration.ProxyName(), 367 }).Debug("Registering service backend sync for L7 loadbalancer") 368 } 369 370 s.Lock() 371 defer s.Unlock() 372 s.registerL7LBServiceBackendSync(serviceName, backendSyncRegistration) 373 374 return s.reUpsertServicesByName(serviceName.Name, serviceName.Namespace) 375 } 376 377 // 's' must be locked 378 func (s *Service) registerL7LBServiceBackendSync(serviceName lb.ServiceName, backendSyncRegistration BackendSyncer) { 379 info := s.l7lbSvcs[serviceName] 380 if info == nil { 381 info = &L7LBInfo{} 382 } 383 384 if info.backendSyncRegistrations == nil { 385 info.backendSyncRegistrations = make(map[BackendSyncer]struct{}, 1) 386 } 387 info.backendSyncRegistrations[backendSyncRegistration] = struct{}{} 388 389 s.l7lbSvcs[serviceName] = info 390 } 391 392 func (s *Service) DeregisterL7LBServiceRedirect(serviceName lb.ServiceName, resourceName L7LBResourceName) error { 393 if logging.CanLogAt(log.Logger, logrus.DebugLevel) { 394 log.WithFields(logrus.Fields{ 395 logfields.ServiceName: serviceName.Name, 396 logfields.ServiceNamespace: serviceName.Namespace, 397 }).Debug("Deregistering service from L7 load balancing") 398 } 399 400 s.Lock() 401 defer s.Unlock() 402 403 changed := s.deregisterL7LBServiceRedirect(serviceName, resourceName) 404 405 if !changed { 406 return nil 407 } 408 409 return s.reUpsertServicesByName(serviceName.Name, serviceName.Namespace) 410 } 411 412 func (s *Service) deregisterL7LBServiceRedirect(serviceName lb.ServiceName, resourceName L7LBResourceName) bool { 413 info, found := s.l7lbSvcs[serviceName] 414 if !found { 415 return false 416 } 417 418 empty := L7LBResourceName{} 419 420 changed := false 421 422 if info.ownerRef == resourceName { 423 info.ownerRef = empty 424 info.proxyPort = 0 425 changed = true 426 } 427 428 if len(info.backendSyncRegistrations) == 0 && info.ownerRef == empty { 429 delete(s.l7lbSvcs, serviceName) 430 changed = true 431 } 432 433 return changed 434 } 435 436 func (s *Service) DeregisterL7LBServiceBackendSync(serviceName lb.ServiceName, backendSyncRegistration BackendSyncer) error { 437 if backendSyncRegistration == nil { 438 return nil 439 } 440 441 if logging.CanLogAt(log.Logger, logrus.DebugLevel) { 442 log.WithFields(logrus.Fields{ 443 logfields.ServiceName: serviceName.Name, 444 logfields.ServiceNamespace: serviceName.Namespace, 445 logfields.ProxyName: backendSyncRegistration.ProxyName(), 446 }).Debug("Deregistering service backend sync for L7 loadbalancer") 447 } 448 449 s.Lock() 450 defer s.Unlock() 451 changed := s.deregisterL7LBServiceBackendSync(serviceName, backendSyncRegistration) 452 453 if !changed { 454 return nil 455 } 456 457 return s.reUpsertServicesByName(serviceName.Name, serviceName.Namespace) 458 } 459 460 func (s *Service) deregisterL7LBServiceBackendSync(serviceName lb.ServiceName, backendSyncRegistration BackendSyncer) bool { 461 info, found := s.l7lbSvcs[serviceName] 462 if !found { 463 return false 464 } 465 466 if info.backendSyncRegistrations == nil { 467 return false 468 } 469 470 if _, registered := info.backendSyncRegistrations[backendSyncRegistration]; !registered { 471 return false 472 } 473 474 delete(info.backendSyncRegistrations, backendSyncRegistration) 475 476 empty := L7LBResourceName{} 477 if len(info.backendSyncRegistrations) == 0 && info.ownerRef == empty { 478 delete(s.l7lbSvcs, serviceName) 479 } 480 481 return true 482 } 483 484 // BackendSyncer performs a synchronization of service backends to an 485 // external loadbalancer (e.g. Envoy L7 Loadbalancer). 486 type BackendSyncer interface { 487 // ProxyName returns a human readable name of the L7 Proxy that acts as 488 // // L7 loadbalancer. 489 ProxyName() string 490 491 // Sync triggers the actual synchronization and passes the information 492 // about the service that should be synchronized. 493 Sync(svc *lb.SVC) error 494 } 495 496 func (s *Service) GetLastUpdatedTs() time.Time { 497 if val := s.lastUpdatedTs.Load(); val != nil { 498 ts, ok := val.(time.Time) 499 if ok { 500 return ts 501 } 502 } 503 return time.Now() 504 } 505 506 func (s *Service) GetCurrentTs() time.Time { 507 return time.Now() 508 } 509 510 func (s *Service) populateBackendMapV3FromV2(ipv4, ipv6 bool) error { 511 const ( 512 v4 = "ipv4" 513 v6 = "ipv6" 514 ) 515 516 enabled := map[string]bool{v4: ipv4, v6: ipv6} 517 518 for v, e := range enabled { 519 if !e { 520 continue 521 } 522 523 var ( 524 err error 525 v2Map *bpf.Map 526 v3Map *bpf.Map 527 v3BackendVal lbmap.BackendValue 528 ) 529 530 copyBackendEntries := func(key bpf.MapKey, value bpf.MapValue) { 531 if v == v4 { 532 v3Map = lbmap.Backend4MapV3 533 v1BackendVal := value.(*lbmap.Backend4Value) 534 addrCluster := cmtypes.AddrClusterFrom(v1BackendVal.Address.Addr(), 0) 535 v3BackendVal, err = lbmap.NewBackend4ValueV3( 536 addrCluster, 537 v1BackendVal.Port, 538 v1BackendVal.Proto, 539 lb.GetBackendStateFromFlags(v1BackendVal.Flags), 540 0, 541 ) 542 if err != nil { 543 log.WithError(err).WithField(logfields.BPFMapName, v3Map.Name()).Debug("Error creating map value") 544 return 545 } 546 } else { 547 v3Map = lbmap.Backend6MapV3 548 v1BackendVal := value.(*lbmap.Backend6Value) 549 addrCluster := cmtypes.AddrClusterFrom(v1BackendVal.Address.Addr(), 0) 550 v3BackendVal, err = lbmap.NewBackend6ValueV3( 551 addrCluster, 552 v1BackendVal.Port, 553 v1BackendVal.Proto, 554 lb.GetBackendStateFromFlags(v1BackendVal.Flags), 555 0, 556 ) 557 if err != nil { 558 log.WithError(err).WithField(logfields.BPFMapName, v3Map.Name()).Debug("Error creating map value") 559 return 560 } 561 } 562 563 err := v3Map.Update(key, v3BackendVal) 564 if err != nil { 565 log.WithError(err).WithField(logfields.BPFMapName, v3Map.Name()).Warn("Error updating map") 566 } 567 } 568 569 if v == v4 { 570 v2Map = lbmap.Backend4MapV2 571 } else { 572 v2Map = lbmap.Backend6MapV2 573 } 574 575 err = v2Map.DumpWithCallback(copyBackendEntries) 576 if err != nil { 577 return fmt.Errorf("unable to populate %s: %w", v2Map.Name(), err) 578 } 579 580 // V2 backend map will be removed from bpffs at this point, 581 // the map will be actually removed once the last program 582 // referencing it has been removed. 583 err = v2Map.Close() 584 if err != nil { 585 log.WithError(err).WithField(logfields.BPFMapName, v2Map.Name()).Warn("Error closing map") 586 } 587 588 err = v2Map.Unpin() 589 if err != nil { 590 log.WithError(err).WithField(logfields.BPFMapName, v2Map.Name()).Warn("Error unpinning map") 591 } 592 593 } 594 return nil 595 } 596 597 // InitMaps opens or creates BPF maps used by services. 598 // 599 // If restore is set to false, entries of the maps are removed. 600 func (s *Service) InitMaps(ipv6, ipv4, sockMaps, restore bool) error { 601 s.Lock() 602 defer s.Unlock() 603 604 var ( 605 v2BackendMapExistsV4 bool 606 v2BackendMapExistsV6 bool 607 ) 608 609 toOpen := []*bpf.Map{} 610 toDelete := []*bpf.Map{} 611 if ipv6 { 612 toOpen = append(toOpen, lbmap.Service6MapV2, lbmap.Backend6MapV3, lbmap.RevNat6Map) 613 if !restore { 614 toDelete = append(toDelete, lbmap.Service6MapV2, lbmap.Backend6MapV3, lbmap.RevNat6Map) 615 } 616 if sockMaps { 617 if err := lbmap.CreateSockRevNat6Map(); err != nil { 618 return err 619 } 620 } 621 v2BackendMapExistsV6 = lbmap.Backend6MapV2.Open() == nil 622 } 623 if ipv4 { 624 toOpen = append(toOpen, lbmap.Service4MapV2, lbmap.Backend4MapV3, lbmap.RevNat4Map) 625 if !restore { 626 toDelete = append(toDelete, lbmap.Service4MapV2, lbmap.Backend4MapV3, lbmap.RevNat4Map) 627 } 628 if sockMaps { 629 if err := lbmap.CreateSockRevNat4Map(); err != nil { 630 return err 631 } 632 } 633 v2BackendMapExistsV4 = lbmap.Backend4MapV2.Open() == nil 634 } 635 636 for _, m := range toOpen { 637 if err := m.OpenOrCreate(); err != nil { 638 return err 639 } 640 } 641 for _, m := range toDelete { 642 if err := m.DeleteAll(); err != nil { 643 return err 644 } 645 } 646 647 if v2BackendMapExistsV4 || v2BackendMapExistsV6 { 648 log.Info("Backend map v2 exists. Migrating entries to backend map v3.") 649 if err := s.populateBackendMapV3FromV2(v2BackendMapExistsV4, v2BackendMapExistsV6); err != nil { 650 log.WithError(err).Warn("Error populating V3 map from V2 map, might interrupt existing connections during upgrade") 651 } 652 } 653 654 return nil 655 } 656 657 // UpsertService inserts or updates the given service. 658 // 659 // The first return value is true if the service hasn't existed before. 660 func (s *Service) UpsertService(params *lb.SVC) (bool, lb.ID, error) { 661 s.Lock() 662 defer s.Unlock() 663 return s.upsertService(params) 664 } 665 666 // reUpsertServicesByName upserts a service again to update it's internal state after 667 // changes for L7 service redirection. 668 // Write lock on 's' must be held. 669 func (s *Service) reUpsertServicesByName(name, namespace string) error { 670 for _, svc := range s.svcByHash { 671 if svc.svcName.Name == name && svc.svcName.Namespace == namespace { 672 svcCopy := svc.deepCopyToLBSVC() 673 if _, _, err := s.upsertService(svcCopy); err != nil { 674 return fmt.Errorf("error while updating service in LB map: %w", err) 675 } 676 } 677 } 678 return nil 679 } 680 681 func (s *Service) upsertService(params *lb.SVC) (bool, lb.ID, error) { 682 empty := L7LBResourceName{} 683 684 // Set L7 LB for this service if registered. 685 l7lbInfo, exists := s.l7lbSvcs[params.Name] 686 if exists && l7lbInfo.ownerRef != empty && l7lbInfo.isProtoAndPortMatch(¶ms.Frontend.L4Addr) { 687 params.L7LBProxyPort = l7lbInfo.proxyPort 688 } else { 689 params.L7LBProxyPort = 0 690 } 691 692 // L7 LB is sharing a C union in the datapath, disable session 693 // affinity if L7 LB is configured for this service. 694 if params.L7LBProxyPort != 0 { 695 params.SessionAffinity = false 696 params.SessionAffinityTimeoutSec = 0 697 } 698 699 // Implement a "lazy load" function for the scoped logger, so the expensive 700 // call to 'WithFields' is only done if needed. 701 debugLogsEnabled := logging.CanLogAt(log.Logger, logrus.DebugLevel) 702 scopedLog := log 703 scopedLogPopulated := false 704 getScopedLog := func() *logrus.Entry { 705 if !scopedLogPopulated { 706 scopedLog = scopedLog.WithFields(logrus.Fields{ 707 logfields.ServiceIP: params.Frontend.L3n4Addr, 708 logfields.Backends: params.Backends, 709 710 logfields.ServiceType: params.Type, 711 logfields.ServiceExtTrafficPolicy: params.ExtTrafficPolicy, 712 logfields.ServiceIntTrafficPolicy: params.IntTrafficPolicy, 713 logfields.ServiceHealthCheckNodePort: params.HealthCheckNodePort, 714 logfields.ServiceName: params.Name.Name, 715 logfields.ServiceNamespace: params.Name.Namespace, 716 717 logfields.SessionAffinity: params.SessionAffinity, 718 logfields.SessionAffinityTimeout: params.SessionAffinityTimeoutSec, 719 720 logfields.LoadBalancerSourceRanges: params.LoadBalancerSourceRanges, 721 722 logfields.L7LBProxyPort: params.L7LBProxyPort, 723 }) 724 725 scopedLogPopulated = true 726 } 727 return scopedLog 728 } 729 730 if debugLogsEnabled { 731 getScopedLog().Debug("Upserting service") 732 } 733 734 if !option.Config.EnableSVCSourceRangeCheck && 735 len(params.LoadBalancerSourceRanges) != 0 { 736 getScopedLog().Warnf("--%s is disabled, ignoring loadBalancerSourceRanges", 737 option.EnableSVCSourceRangeCheck) 738 } 739 740 // Backends must either be the same IP proto as the frontend, or can be of 741 // a different proto for NAT46/64. However, backends must be consistently 742 // either v4 or v6, but not a mix. 743 v4Seen := 0 744 v6Seen := 0 745 for _, b := range params.Backends { 746 if b.L3n4Addr.IsIPv6() { 747 v6Seen++ 748 } else { 749 v4Seen++ 750 } 751 } 752 if v4Seen > 0 && v6Seen > 0 { 753 err := fmt.Errorf("Unable to upsert service %s with a mixed set of IPv4 and IPv6 backends", params.Frontend.L3n4Addr.String()) 754 return false, lb.ID(0), err 755 } 756 v6Svc := params.Frontend.IsIPv6() 757 if (v6Svc || v6Seen > 0) && !option.Config.EnableIPv6 { 758 err := fmt.Errorf("Unable to upsert service %s as IPv6 is disabled", params.Frontend.L3n4Addr.String()) 759 return false, lb.ID(0), err 760 } 761 if (!v6Svc || v4Seen > 0) && !option.Config.EnableIPv4 { 762 err := fmt.Errorf("Unable to upsert service %s as IPv4 is disabled", params.Frontend.L3n4Addr.String()) 763 return false, lb.ID(0), err 764 } 765 params.NatPolicy = lb.SVCNatPolicyNone 766 if v6Svc && v4Seen > 0 { 767 params.NatPolicy = lb.SVCNatPolicyNat64 768 } else if !v6Svc && v6Seen > 0 { 769 params.NatPolicy = lb.SVCNatPolicyNat46 770 } 771 if params.NatPolicy != lb.SVCNatPolicyNone && !option.Config.NodePortNat46X64 { 772 err := fmt.Errorf("Unable to upsert service %s as NAT46/64 is disabled", params.Frontend.L3n4Addr.String()) 773 return false, lb.ID(0), err 774 } 775 776 // If needed, create svcInfo and allocate service ID 777 svc, new, prevSessionAffinity, prevLoadBalancerSourceRanges, err := s.createSVCInfoIfNotExist(params) 778 if err != nil { 779 return false, lb.ID(0), err 780 } 781 782 // TODO(brb) defer ServiceID release after we have a lbmap "rollback" 783 // If getScopedLog() has not been called, this field will still be included 784 // from this point on in the function. 785 scopedLog = scopedLog.WithField(logfields.ServiceID, svc.frontend.ID) 786 if debugLogsEnabled { 787 getScopedLog().Debug("Acquired service ID") 788 } 789 790 filterBackends := svc.filterBackends(params.Frontend) 791 prevBackendCount := len(svc.backends) 792 793 backendsCopy := []*lb.Backend{} 794 for _, b := range params.Backends { 795 // Local redirect services or services with trafficPolicy=Local may 796 // only use node-local backends for external scope. We implement this by 797 // filtering out all backend IPs which are not a local endpoint. 798 if filterBackends && len(b.NodeName) > 0 && b.NodeName != nodeTypes.GetName() { 799 continue 800 } 801 backendsCopy = append(backendsCopy, b.DeepCopy()) 802 } 803 804 // Update backends cache and allocate/release backend IDs 805 newBackends, obsoleteBackends, obsoleteSVCBackendIDs, err := s.updateBackendsCacheLocked(svc, backendsCopy) 806 if err != nil { 807 return false, lb.ID(0), err 808 } 809 810 if l7lbInfo != nil { 811 for bs := range l7lbInfo.backendSyncRegistrations { 812 svcCopy := svc.deepCopyToLBSVC() 813 if err := bs.Sync(svcCopy); err != nil { 814 return false, lb.ID(0), fmt.Errorf("failed to sync L7 LB backends (proxy: %s): %w", bs.ProxyName(), err) 815 } 816 } 817 } 818 819 // Update lbmaps (BPF service maps) 820 if err = s.upsertServiceIntoLBMaps(svc, svc.isExtLocal(), svc.isIntLocal(), prevBackendCount, 821 newBackends, obsoleteBackends, prevSessionAffinity, prevLoadBalancerSourceRanges, 822 obsoleteSVCBackendIDs, getScopedLog, debugLogsEnabled); err != nil { 823 return false, lb.ID(0), err 824 } 825 826 // Update managed neighbor entries of the LB 827 if option.Config.DatapathMode == datapathOpt.DatapathModeLBOnly { 828 s.upsertBackendNeighbors(newBackends, obsoleteBackends) 829 } 830 831 // Only add a HealthCheckNodePort server if this is a service which may 832 // only contain local backends (i.e. it has externalTrafficPolicy=Local) 833 if option.Config.EnableHealthCheckNodePort { 834 if svc.isExtLocal() && filterBackends && svc.svcHealthCheckNodePort > 0 { 835 // HealthCheckNodePort is used by external systems to poll the state of the Service, 836 // it should never take into consideration Terminating backends, even when there are only 837 // Terminating backends. 838 // 839 // There is one special case is L7 proxy service, which never have any 840 // backends because the traffic will be redirected. 841 activeBackends := 0 842 if l7lbInfo != nil { 843 // Set this to 1 because Envoy will be running in this case. 844 getScopedLog().WithField(logfields.ServiceHealthCheckNodePort, svc.svcHealthCheckNodePort). 845 Debug("L7 service with HealthcheckNodePort enabled") 846 activeBackends = 1 847 } else { 848 for _, b := range backendsCopy { 849 if b.State == lb.BackendStateActive { 850 activeBackends++ 851 } 852 } 853 } 854 s.healthServer.UpsertService(svc.frontend.ID, svc.svcName.Namespace, svc.svcName.Name, 855 activeBackends, svc.svcHealthCheckNodePort) 856 857 if err = s.upsertNodePortHealthService(svc, &nodeMetaCollector{}); err != nil { 858 return false, lb.ID(0), fmt.Errorf("upserting NodePort health service failed: %w", err) 859 } 860 861 } else if svc.svcHealthCheckNodePort == 0 { 862 // Remove the health check server in case this service used to have 863 // externalTrafficPolicy=Local with HealthCheckNodePort in the previous 864 // version, but not anymore. 865 s.healthServer.DeleteService(lb.ID(svc.frontend.ID)) 866 867 if svc.healthcheckFrontendHash != "" { 868 healthSvc := s.svcByHash[svc.healthcheckFrontendHash] 869 if healthSvc != nil { 870 s.deleteServiceLocked(healthSvc) 871 } 872 svc.healthcheckFrontendHash = "" 873 } 874 } 875 } 876 877 if new { 878 metrics.ServicesEventsCount.WithLabelValues("add").Inc() 879 } else { 880 metrics.ServicesEventsCount.WithLabelValues("update").Inc() 881 } 882 883 s.notifyMonitorServiceUpsert(svc.frontend, svc.backends, 884 svc.svcType, svc.svcExtTrafficPolicy, svc.svcIntTrafficPolicy, svc.svcName.Name, svc.svcName.Namespace) 885 return new, lb.ID(svc.frontend.ID), nil 886 } 887 888 type NodeMetaCollector interface { 889 GetIPv4() net.IP 890 GetIPv6() net.IP 891 } 892 893 type nodeMetaCollector struct{} 894 895 func (n *nodeMetaCollector) GetIPv4() net.IP { 896 return node.GetIPv4() 897 } 898 899 func (n *nodeMetaCollector) GetIPv6() net.IP { 900 return node.GetIPv6() 901 } 902 903 // upsertNodePortHealthService makes the HealthCheckNodePort available to the external IP of the service 904 func (s *Service) upsertNodePortHealthService(svc *svcInfo, nodeMeta NodeMetaCollector) error { 905 // For any service that has a healthCheckNodePort, we create a healthCheck service 906 // The service that is created does not need an another healthCheck service. 907 // The easiest way end that loop is to check for the HealthCheckNodePort 908 // Also, without a healthCheckNodePort, we don't need to create a healthCheck service 909 if !option.Config.EnableHealthCheckLoadBalancerIP || svc.svcType != lb.SVCTypeLoadBalancer || svc.svcHealthCheckNodePort == 0 { 910 if svc.healthcheckFrontendHash == "" { 911 return nil 912 } 913 914 healthSvc := s.svcByHash[svc.healthcheckFrontendHash] 915 if healthSvc != nil { 916 s.deleteServiceLocked(healthSvc) 917 } 918 svc.healthcheckFrontendHash = "" 919 920 return nil 921 } 922 923 healthCheckSvcName := svc.svcName 924 healthCheckSvcName.Name = svc.svcName.Name + "-healthCheck" 925 926 healthCheckFrontend := *lb.NewL3n4AddrID( 927 lb.TCP, 928 svc.frontend.AddrCluster, 929 svc.svcHealthCheckNodePort, 930 lb.ScopeExternal, 931 0, 932 ) 933 934 if svc.healthcheckFrontendHash != "" && svc.healthcheckFrontendHash != healthCheckFrontend.Hash() { 935 healthSvc := s.svcByHash[svc.healthcheckFrontendHash] 936 if healthSvc != nil { 937 s.deleteServiceLocked(healthSvc) 938 } 939 } 940 941 var ip netip.Addr 942 var ok bool 943 if svc.frontend.AddrCluster.Is4() { 944 ip, ok = netip.AddrFromSlice(nodeMeta.GetIPv4().To4()) 945 } else { 946 ip, ok = netip.AddrFromSlice(nodeMeta.GetIPv6()) 947 } 948 949 if !ok { 950 return fmt.Errorf("failed to parse node IP") 951 } 952 953 clusterAddr := cmtypes.AddrClusterFrom(ip, option.Config.ClusterID) 954 955 healthCheckBackends := []*lb.Backend{ 956 { 957 L3n4Addr: *lb.NewL3n4Addr(lb.TCP, clusterAddr, svc.svcHealthCheckNodePort, lb.ScopeInternal), 958 State: lb.BackendStateActive, 959 NodeName: nodeTypes.GetName(), 960 }, 961 } 962 // Create a new service with the healthcheck frontend and healthcheck backend 963 healthCheckSvc := &lb.SVC{ 964 Name: healthCheckSvcName, 965 Type: svc.svcType, 966 Frontend: healthCheckFrontend, 967 ExtTrafficPolicy: lb.SVCTrafficPolicyLocal, 968 IntTrafficPolicy: lb.SVCTrafficPolicyLocal, 969 Backends: healthCheckBackends, 970 LoopbackHostport: true, 971 } 972 973 _, _, err := s.upsertService(healthCheckSvc) 974 if err != nil { 975 return err 976 } 977 svc.healthcheckFrontendHash = healthCheckFrontend.Hash() 978 979 log.WithFields(logrus.Fields{ 980 logfields.ServiceName: svc.svcName.Name, 981 logfields.ServiceNamespace: svc.svcName.Namespace, 982 }).Debug("Created healthcheck service for frontend") 983 984 return nil 985 } 986 987 // UpdateBackendsState updates all the service(s) with the updated state of 988 // the given backends. It also persists the updated backend states to the BPF maps. 989 // 990 // Backend state transitions are validated before processing. 991 // 992 // In case of duplicated backends in the list, the state will be updated to the 993 // last duplicate entry. 994 func (s *Service) UpdateBackendsState(backends []*lb.Backend) error { 995 if len(backends) == 0 { 996 return nil 997 } 998 999 if logging.CanLogAt(log.Logger, logrus.DebugLevel) { 1000 for _, b := range backends { 1001 log.WithFields(logrus.Fields{ 1002 logfields.L3n4Addr: b.L3n4Addr.String(), 1003 logfields.BackendState: b.State, 1004 logfields.BackendPreferred: b.Preferred, 1005 }).Debug("Update backend states") 1006 } 1007 } 1008 1009 var ( 1010 errs error 1011 updatedBackends []*lb.Backend 1012 ) 1013 updateSvcs := make(map[lb.ID]*datapathTypes.UpsertServiceParams) 1014 1015 s.Lock() 1016 defer s.Unlock() 1017 for _, updatedB := range backends { 1018 hash := updatedB.L3n4Addr.Hash() 1019 1020 be, exists := s.backendByHash[hash] 1021 if !exists { 1022 // Cilium service API and Kubernetes events are asynchronous, so it's 1023 // possible to receive an API call for a backend that's already deleted. 1024 continue 1025 } 1026 if !lb.IsValidStateTransition(be.State, updatedB.State) { 1027 currentState, _ := be.State.String() 1028 newState, _ := updatedB.State.String() 1029 errs = errors.Join(errs, 1030 fmt.Errorf("invalid state transition for backend[%s] (%s) -> (%s)", 1031 updatedB.String(), currentState, newState), 1032 ) 1033 continue 1034 } 1035 be.State = updatedB.State 1036 be.Preferred = updatedB.Preferred 1037 1038 for id, info := range s.svcByID { 1039 var p *datapathTypes.UpsertServiceParams 1040 for i, b := range info.backends { 1041 if b.L3n4Addr.String() != updatedB.L3n4Addr.String() { 1042 continue 1043 } 1044 if b.State == updatedB.State { 1045 break 1046 } 1047 info.backends[i].State = updatedB.State 1048 info.backends[i].Preferred = updatedB.Preferred 1049 found := false 1050 1051 if p, found = updateSvcs[id]; !found { 1052 p = &datapathTypes.UpsertServiceParams{ 1053 ID: uint16(id), 1054 IP: info.frontend.L3n4Addr.AddrCluster.AsNetIP(), 1055 Port: info.frontend.L3n4Addr.L4Addr.Port, 1056 PrevBackendsCount: len(info.backends), 1057 IPv6: info.frontend.IsIPv6(), 1058 Type: info.svcType, 1059 ExtLocal: info.isExtLocal(), 1060 IntLocal: info.isIntLocal(), 1061 Scope: info.frontend.L3n4Addr.Scope, 1062 SessionAffinity: info.sessionAffinity, 1063 SessionAffinityTimeoutSec: info.sessionAffinityTimeoutSec, 1064 CheckSourceRange: info.checkLBSourceRange(), 1065 UseMaglev: info.useMaglev(), 1066 Name: info.svcName, 1067 LoopbackHostport: info.LoopbackHostport, 1068 } 1069 } 1070 p.PreferredBackends, p.ActiveBackends, p.NonActiveBackends = segregateBackends(info.backends) 1071 updateSvcs[id] = p 1072 log.WithFields(logrus.Fields{ 1073 logfields.ServiceID: p.ID, 1074 logfields.BackendID: b.ID, 1075 logfields.L3n4Addr: b.L3n4Addr.String(), 1076 logfields.BackendState: b.State, 1077 logfields.BackendPreferred: b.Preferred, 1078 }).Info("Persisting service with backend state update") 1079 } 1080 s.svcByID[id] = info 1081 s.svcByHash[info.frontend.Hash()] = info 1082 } 1083 updatedBackends = append(updatedBackends, be) 1084 } 1085 1086 // Update the persisted backend state in BPF maps. 1087 for _, b := range updatedBackends { 1088 log.WithFields(logrus.Fields{ 1089 logfields.BackendID: b.ID, 1090 logfields.L3n4Addr: b.L3n4Addr.String(), 1091 logfields.BackendState: b.State, 1092 logfields.BackendPreferred: b.Preferred, 1093 }).Info("Persisting updated backend state for backend") 1094 if err := s.lbmap.UpdateBackendWithState(b); err != nil { 1095 errs = errors.Join(errs, fmt.Errorf("failed to update backend %+v: %w", b, err)) 1096 } 1097 } 1098 1099 for i := range updateSvcs { 1100 errs = errors.Join(errs, s.lbmap.UpsertService(updateSvcs[i])) 1101 } 1102 return errs 1103 } 1104 1105 // DeleteServiceByID removes a service identified by the given ID. 1106 func (s *Service) DeleteServiceByID(id lb.ServiceID) (bool, error) { 1107 s.Lock() 1108 defer s.Unlock() 1109 1110 if svc, found := s.svcByID[lb.ID(id)]; found { 1111 return true, s.deleteServiceLocked(svc) 1112 } 1113 1114 return false, nil 1115 } 1116 1117 // DeleteService removes the given service. 1118 func (s *Service) DeleteService(frontend lb.L3n4Addr) (bool, error) { 1119 s.Lock() 1120 defer s.Unlock() 1121 1122 if svc, found := s.svcByHash[frontend.Hash()]; found { 1123 return true, s.deleteServiceLocked(svc) 1124 } 1125 1126 return false, nil 1127 } 1128 1129 // GetDeepCopyServiceByID returns a deep-copy of a service identified with 1130 // the given ID. 1131 // 1132 // If a service cannot be found, returns false. 1133 func (s *Service) GetDeepCopyServiceByID(id lb.ServiceID) (*lb.SVC, bool) { 1134 s.RLock() 1135 defer s.RUnlock() 1136 1137 svc, found := s.svcByID[lb.ID(id)] 1138 if !found { 1139 return nil, false 1140 } 1141 1142 return svc.deepCopyToLBSVC(), true 1143 } 1144 1145 // GetDeepCopyServices returns a deep-copy of all installed services. 1146 func (s *Service) GetDeepCopyServices() []*lb.SVC { 1147 s.RLock() 1148 defer s.RUnlock() 1149 1150 svcs := make([]*lb.SVC, 0, len(s.svcByHash)) 1151 for _, svc := range s.svcByHash { 1152 svcs = append(svcs, svc.deepCopyToLBSVC()) 1153 } 1154 1155 return svcs 1156 } 1157 1158 // GetDeepCopyServiceByFrontend returns a deep-copy of the service that matches the Frontend address. 1159 func (s *Service) GetDeepCopyServiceByFrontend(frontend lb.L3n4Addr) (*lb.SVC, bool) { 1160 s.RLock() 1161 defer s.RUnlock() 1162 1163 if svc, found := s.svcByHash[frontend.Hash()]; found { 1164 return svc.deepCopyToLBSVC(), true 1165 } 1166 1167 return nil, false 1168 } 1169 1170 // RestoreServices restores services from BPF maps. 1171 // 1172 // It first restores all the service entries, followed by backend entries. 1173 // In the process, it deletes any duplicate backend entries that were leaked, and 1174 // are not referenced by any service entries. 1175 // 1176 // The method should be called once before establishing a connectivity 1177 // to kube-apiserver. 1178 func (s *Service) RestoreServices() error { 1179 s.Lock() 1180 defer s.Unlock() 1181 backendsById := make(map[lb.BackendID]struct{}) 1182 1183 var errs error 1184 // Restore service cache from BPF maps 1185 if err := s.restoreServicesLocked(backendsById); err != nil { 1186 errs = errors.Join(errs, fmt.Errorf("error while restoring services: %w", err)) 1187 } 1188 1189 // Restore backend IDs 1190 if err := s.restoreBackendsLocked(backendsById); err != nil { 1191 errs = errors.Join(errs, fmt.Errorf("error while restoring backends: %w", err)) 1192 } 1193 1194 // Remove LB source ranges for no longer existing services 1195 if option.Config.EnableSVCSourceRangeCheck { 1196 errs = errors.Join(errs, s.restoreAndDeleteOrphanSourceRanges()) 1197 } 1198 return errs 1199 } 1200 1201 // deleteOrphanAffinityMatchesLocked removes affinity matches which point to 1202 // non-existent svc ID and backend ID tuples. 1203 func (s *Service) deleteOrphanAffinityMatchesLocked() error { 1204 matches, err := s.lbmap.DumpAffinityMatches() 1205 if err != nil { 1206 return err 1207 } 1208 1209 toRemove := map[lb.ID][]lb.BackendID{} 1210 1211 local := make(map[lb.ID]map[lb.BackendID]struct{}, len(s.svcByID)) 1212 for id, svc := range s.svcByID { 1213 if !svc.sessionAffinity { 1214 continue 1215 } 1216 local[id] = make(map[lb.BackendID]struct{}, len(svc.backends)) 1217 for _, backend := range svc.backends { 1218 local[id][backend.ID] = struct{}{} 1219 } 1220 } 1221 1222 for svcID, backendIDs := range matches { 1223 for bID := range backendIDs { 1224 found := false 1225 if _, ok := local[lb.ID(svcID)]; ok { 1226 if _, ok := local[lb.ID(svcID)][lb.BackendID(bID)]; ok { 1227 found = true 1228 } 1229 } 1230 if !found { 1231 toRemove[lb.ID(svcID)] = append(toRemove[lb.ID(svcID)], lb.BackendID(bID)) 1232 } 1233 } 1234 } 1235 1236 for svcID, backendIDs := range toRemove { 1237 s.deleteBackendsFromAffinityMatchMap(svcID, backendIDs) 1238 } 1239 1240 return nil 1241 } 1242 1243 func (s *Service) restoreAndDeleteOrphanSourceRanges() error { 1244 opts := []bool{} 1245 if option.Config.EnableIPv4 { 1246 opts = append(opts, false) 1247 } 1248 if option.Config.EnableIPv6 { 1249 opts = append(opts, true) 1250 } 1251 1252 for _, ipv6 := range opts { 1253 srcRangesBySvcID, err := s.lbmap.DumpSourceRanges(ipv6) 1254 if err != nil { 1255 return err 1256 } 1257 for svcID, srcRanges := range srcRangesBySvcID { 1258 svc, found := s.svcByID[lb.ID(svcID)] 1259 if !found { 1260 // Delete ranges 1261 if err := s.lbmap.UpdateSourceRanges(svcID, srcRanges, nil, ipv6); err != nil { 1262 return err 1263 } 1264 } else { 1265 svc.loadBalancerSourceRanges = srcRanges 1266 } 1267 } 1268 } 1269 1270 return nil 1271 } 1272 1273 // SyncWithK8sFinished removes services which we haven't heard about during 1274 // a sync period of cilium-agent's k8s service cache. 1275 // 1276 // The removal is based on an assumption that during the sync period 1277 // UpsertService() is going to be called for each alive service. 1278 // 1279 // Additionally, it returns a list of services which are associated with 1280 // stale backends, and which shall be refreshed. Stale services shall be 1281 // refreshed regardless of whether an error is also returned or not. 1282 // 1283 // The localOnly flag allows to perform a two pass removal, handling local 1284 // services first, and processing global ones only after full synchronization 1285 // with all remote clusters. 1286 func (s *Service) SyncWithK8sFinished(localOnly bool, localServices sets.Set[k8s.ServiceID]) (stale []k8s.ServiceID, err error) { 1287 s.Lock() 1288 defer s.Unlock() 1289 1290 for _, svc := range s.svcByHash { 1291 svcID := k8s.ServiceID{ 1292 Cluster: svc.svcName.Cluster, 1293 Namespace: svc.svcName.Namespace, 1294 Name: svc.svcName.Name, 1295 } 1296 1297 // Skip processing global services when the localOnly flag is set. 1298 if localOnly && !localServices.Has(svcID) { 1299 continue 1300 } 1301 1302 if svc.restoredFromDatapath { 1303 log.WithFields(logrus.Fields{ 1304 logfields.ServiceID: svc.frontend.ID, 1305 logfields.L3n4Addr: logfields.Repr(svc.frontend.L3n4Addr), 1306 }). 1307 Warn("Deleting no longer present service") 1308 1309 if err := s.deleteServiceLocked(svc); err != nil { 1310 return stale, fmt.Errorf("Unable to remove service %+v: %w", svc, err) 1311 } 1312 } else if svc.restoredBackendHashes.Len() > 0 { 1313 // The service is still associated with stale backends 1314 stale = append(stale, svcID) 1315 log.WithFields(logrus.Fields{ 1316 logfields.ServiceID: svc.frontend.ID, 1317 logfields.ServiceName: svc.svcName.String(), 1318 logfields.L3n4Addr: logfields.Repr(svc.frontend.L3n4Addr), 1319 logfields.OrphanBackends: svc.restoredBackendHashes.Len(), 1320 }).Info("Service has stale backends: triggering refresh") 1321 } 1322 1323 svc.restoredBackendHashes = nil 1324 } 1325 1326 if localOnly { 1327 // Wait for full clustermesh synchronization before finalizing the 1328 // removal of orphan backends and affinity matches. 1329 return stale, nil 1330 } 1331 1332 // Remove no longer existing affinity matches 1333 if option.Config.EnableSessionAffinity { 1334 if err := s.deleteOrphanAffinityMatchesLocked(); err != nil { 1335 return stale, err 1336 } 1337 } 1338 1339 // Remove obsolete backends and release their IDs 1340 if err := s.deleteOrphanBackends(); err != nil { 1341 log.WithError(err).Warn("Failed to remove orphan backends") 1342 } 1343 1344 return stale, nil 1345 } 1346 1347 func (s *Service) createSVCInfoIfNotExist(p *lb.SVC) (*svcInfo, bool, bool, 1348 []*cidr.CIDR, error, 1349 ) { 1350 prevSessionAffinity := false 1351 prevLoadBalancerSourceRanges := []*cidr.CIDR{} 1352 1353 hash := p.Frontend.Hash() 1354 svc, found := s.svcByHash[hash] 1355 if !found { 1356 // Allocate service ID for the new service 1357 addrID, err := AcquireID(p.Frontend.L3n4Addr, uint32(p.Frontend.ID)) 1358 if err != nil { 1359 return nil, false, false, nil, 1360 fmt.Errorf("Unable to allocate service ID %d for %v: %w", 1361 p.Frontend.ID, p.Frontend, err) 1362 } 1363 p.Frontend.ID = addrID.ID 1364 1365 svc = &svcInfo{ 1366 hash: hash, 1367 frontend: p.Frontend, 1368 backendByHash: map[string]*lb.Backend{}, 1369 1370 svcType: p.Type, 1371 svcName: p.Name, 1372 1373 sessionAffinity: p.SessionAffinity, 1374 sessionAffinityTimeoutSec: p.SessionAffinityTimeoutSec, 1375 1376 svcExtTrafficPolicy: p.ExtTrafficPolicy, 1377 svcIntTrafficPolicy: p.IntTrafficPolicy, 1378 svcNatPolicy: p.NatPolicy, 1379 svcHealthCheckNodePort: p.HealthCheckNodePort, 1380 loadBalancerSourceRanges: p.LoadBalancerSourceRanges, 1381 l7LBProxyPort: p.L7LBProxyPort, 1382 LoopbackHostport: p.LoopbackHostport, 1383 } 1384 s.svcByID[p.Frontend.ID] = svc 1385 s.svcByHash[hash] = svc 1386 } else { 1387 // Local Redirect Policies with service matcher would have same frontend 1388 // as the service clusterIP type. In such cases, if a Local redirect service 1389 // exists, we shouldn't override it with clusterIP type (e.g., k8s event/sync, etc). 1390 if svc.svcType == lb.SVCTypeLocalRedirect && p.Type == lb.SVCTypeClusterIP { 1391 err := NewErrLocalRedirectServiceExists(p.Frontend, p.Name) 1392 return svc, !found, prevSessionAffinity, prevLoadBalancerSourceRanges, err 1393 } 1394 // Local-redirect service can only override clusterIP service type or itself. 1395 if p.Type == lb.SVCTypeLocalRedirect && 1396 (svc.svcType != lb.SVCTypeClusterIP && svc.svcType != lb.SVCTypeLocalRedirect) { 1397 err := fmt.Errorf("skip local-redirect service for "+ 1398 "frontend %v as it overlaps with svc %v of type %v", 1399 p.Frontend, svc.svcName, svc.svcType) 1400 return svc, !found, prevSessionAffinity, prevLoadBalancerSourceRanges, err 1401 } 1402 prevSessionAffinity = svc.sessionAffinity 1403 prevLoadBalancerSourceRanges = svc.loadBalancerSourceRanges 1404 svc.svcType = p.Type 1405 svc.svcExtTrafficPolicy = p.ExtTrafficPolicy 1406 svc.svcIntTrafficPolicy = p.IntTrafficPolicy 1407 svc.svcNatPolicy = p.NatPolicy 1408 svc.svcHealthCheckNodePort = p.HealthCheckNodePort 1409 svc.sessionAffinity = p.SessionAffinity 1410 svc.sessionAffinityTimeoutSec = p.SessionAffinityTimeoutSec 1411 svc.loadBalancerSourceRanges = p.LoadBalancerSourceRanges 1412 // Name, namespace and cluster are optional and intended for exposure via 1413 // API. They they are not part of any BPF maps and cannot be restored 1414 // from datapath. 1415 if p.Name.Name != "" { 1416 svc.svcName.Name = p.Name.Name 1417 } 1418 if p.Name.Namespace != "" { 1419 svc.svcName.Namespace = p.Name.Namespace 1420 } 1421 if p.Name.Cluster != "" { 1422 svc.svcName.Cluster = p.Name.Cluster 1423 } 1424 // We have heard about the service from k8s, so unset the flag so that 1425 // SyncWithK8sFinished() won't consider the service obsolete, and thus 1426 // won't remove it. 1427 svc.restoredFromDatapath = false 1428 1429 // Update L7 load balancer proxy port 1430 svc.l7LBProxyPort = p.L7LBProxyPort 1431 } 1432 1433 return svc, !found, prevSessionAffinity, prevLoadBalancerSourceRanges, nil 1434 } 1435 1436 func (s *Service) deleteBackendsFromAffinityMatchMap(svcID lb.ID, backendIDs []lb.BackendID) { 1437 log.WithFields(logrus.Fields{ 1438 logfields.Backends: backendIDs, 1439 logfields.ServiceID: svcID, 1440 }).Debug("Deleting backends from session affinity match") 1441 1442 for _, bID := range backendIDs { 1443 if err := s.lbmap.DeleteAffinityMatch(uint16(svcID), bID); err != nil { 1444 log.WithFields(logrus.Fields{ 1445 logfields.BackendID: bID, 1446 logfields.ServiceID: svcID, 1447 }).WithError(err).Warn("Unable to remove entry from affinity match map") 1448 } 1449 } 1450 } 1451 1452 func (s *Service) addBackendsToAffinityMatchMap(svcID lb.ID, backendIDs []lb.BackendID) { 1453 log.WithFields(logrus.Fields{ 1454 logfields.Backends: backendIDs, 1455 logfields.ServiceID: svcID, 1456 }).Debug("Adding backends to affinity match map") 1457 1458 for _, bID := range backendIDs { 1459 if err := s.lbmap.AddAffinityMatch(uint16(svcID), bID); err != nil { 1460 log.WithFields(logrus.Fields{ 1461 logfields.BackendID: bID, 1462 logfields.ServiceID: svcID, 1463 }).WithError(err).Warn("Unable to add entry to affinity match map") 1464 } 1465 } 1466 } 1467 1468 func (s *Service) upsertServiceIntoLBMaps(svc *svcInfo, isExtLocal, isIntLocal bool, 1469 prevBackendCount int, newBackends []*lb.Backend, obsoleteBackends []*lb.Backend, 1470 prevSessionAffinity bool, prevLoadBalancerSourceRanges []*cidr.CIDR, 1471 obsoleteSVCBackendIDs []lb.BackendID, getScopedLog func() *logrus.Entry, 1472 debugLogsEnabled bool, 1473 ) error { 1474 v6FE := svc.frontend.IsIPv6() 1475 1476 var ( 1477 toDeleteAffinity, toAddAffinity []lb.BackendID 1478 checkLBSrcRange bool 1479 ) 1480 1481 // Update sessionAffinity 1482 // 1483 // If L7 LB is configured for this service then BPF level session affinity is not used so 1484 // that the L7 proxy port may be passed in a shared union in the service entry. 1485 if option.Config.EnableSessionAffinity && !svc.isL7LBService() { 1486 if prevSessionAffinity && !svc.sessionAffinity { 1487 // Remove backends from the affinity match because the svc's sessionAffinity 1488 // has been disabled 1489 toDeleteAffinity = make([]lb.BackendID, 0, len(obsoleteSVCBackendIDs)+len(svc.backends)) 1490 toDeleteAffinity = append(toDeleteAffinity, obsoleteSVCBackendIDs...) 1491 for _, b := range svc.backends { 1492 toDeleteAffinity = append(toDeleteAffinity, b.ID) 1493 } 1494 } else if svc.sessionAffinity { 1495 toAddAffinity = make([]lb.BackendID, 0, len(svc.backends)) 1496 for _, b := range svc.backends { 1497 toAddAffinity = append(toAddAffinity, b.ID) 1498 } 1499 if prevSessionAffinity { 1500 // Remove obsolete svc backends if previously the svc had the affinity enabled 1501 toDeleteAffinity = make([]lb.BackendID, 0, len(obsoleteSVCBackendIDs)) 1502 toDeleteAffinity = append(toDeleteAffinity, obsoleteSVCBackendIDs...) 1503 } 1504 } 1505 1506 s.deleteBackendsFromAffinityMatchMap(svc.frontend.ID, toDeleteAffinity) 1507 // New affinity matches (toAddAffinity) will be added after the new 1508 // backends have been added. 1509 } 1510 1511 // Update LB source range check cidrs 1512 if checkLBSrcRange = svc.checkLBSourceRange() || len(prevLoadBalancerSourceRanges) != 0; checkLBSrcRange { 1513 if err := s.lbmap.UpdateSourceRanges(uint16(svc.frontend.ID), 1514 prevLoadBalancerSourceRanges, svc.loadBalancerSourceRanges, 1515 v6FE); err != nil { 1516 return err 1517 } 1518 } 1519 1520 // Add new backends into BPF maps 1521 for _, b := range newBackends { 1522 if debugLogsEnabled { 1523 getScopedLog().WithFields(logrus.Fields{ 1524 logfields.BackendID: b.ID, 1525 logfields.BackendWeight: b.Weight, 1526 logfields.L3n4Addr: b.L3n4Addr, 1527 }).Debug("Adding new backend") 1528 } 1529 1530 if err := s.lbmap.AddBackend(b, b.L3n4Addr.IsIPv6()); err != nil { 1531 return err 1532 } 1533 } 1534 1535 // Upsert service entries into BPF maps 1536 preferredBackends, activeBackends, nonActiveBackends := segregateBackends(svc.backends) 1537 1538 natPolicy := lb.SVCNatPolicyNone 1539 natPolicySet := false 1540 for _, b := range svc.backends { 1541 // All backends have been previously checked to be either v4 or v6. 1542 if !natPolicySet { 1543 natPolicySet = true 1544 v6BE := b.L3n4Addr.IsIPv6() 1545 if v6FE && !v6BE { 1546 natPolicy = lb.SVCNatPolicyNat64 1547 } else if !v6FE && v6BE { 1548 natPolicy = lb.SVCNatPolicyNat46 1549 } 1550 } 1551 } 1552 if natPolicy == lb.SVCNatPolicyNat64 { 1553 // Backends have been added to the v4 backend map, but we now also need 1554 // to add them to the v6 backend map as v4-in-v6 address. The reason is 1555 // that backends could be used by multiple services, so a v4->v4 service 1556 // expects them in the v4 map, but v6->v4 service enters the v6 datapath 1557 // and looks them up in the v6 backend map (v4-in-v6), and only later on 1558 // after DNAT transforms the packet into a v4 one. 1559 for _, b := range newBackends { 1560 if err := s.lbmap.AddBackend(b, true); err != nil { 1561 return err 1562 } 1563 } 1564 } 1565 svc.svcNatPolicy = natPolicy 1566 1567 p := &datapathTypes.UpsertServiceParams{ 1568 ID: uint16(svc.frontend.ID), 1569 IP: svc.frontend.L3n4Addr.AddrCluster.AsNetIP(), 1570 Port: svc.frontend.L3n4Addr.L4Addr.Port, 1571 PreferredBackends: preferredBackends, 1572 ActiveBackends: activeBackends, 1573 NonActiveBackends: nonActiveBackends, 1574 PrevBackendsCount: prevBackendCount, 1575 IPv6: v6FE, 1576 NatPolicy: natPolicy, 1577 Type: svc.svcType, 1578 ExtLocal: isExtLocal, 1579 IntLocal: isIntLocal, 1580 Scope: svc.frontend.L3n4Addr.Scope, 1581 SessionAffinity: svc.sessionAffinity, 1582 SessionAffinityTimeoutSec: svc.sessionAffinityTimeoutSec, 1583 CheckSourceRange: checkLBSrcRange, 1584 UseMaglev: svc.useMaglev(), 1585 L7LBProxyPort: svc.l7LBProxyPort, 1586 Name: svc.svcName, 1587 LoopbackHostport: svc.LoopbackHostport, 1588 } 1589 if err := s.lbmap.UpsertService(p); err != nil { 1590 return err 1591 } 1592 1593 // If L7 LB is configured for this service then BPF level session affinity is not used. 1594 if option.Config.EnableSessionAffinity && !svc.isL7LBService() { 1595 s.addBackendsToAffinityMatchMap(svc.frontend.ID, toAddAffinity) 1596 } 1597 1598 // Remove backends not used by any service from BPF maps 1599 for _, be := range obsoleteBackends { 1600 id := be.ID 1601 if debugLogsEnabled { 1602 getScopedLog().WithField(logfields.BackendID, id). 1603 Debug("Removing obsolete backend") 1604 } 1605 s.lbmap.DeleteBackendByID(id) 1606 s.TerminateUDPConnectionsToBackend(&be.L3n4Addr) 1607 } 1608 1609 return nil 1610 } 1611 1612 func (s *Service) restoreBackendsLocked(svcBackendsById map[lb.BackendID]struct{}) error { 1613 failed, restored, skipped := 0, 0, 0 1614 backends, err := s.lbmap.DumpBackendMaps() 1615 if err != nil { 1616 return fmt.Errorf("Unable to dump backend maps: %w", err) 1617 } 1618 1619 debugLogsEnabled := logging.CanLogAt(log.Logger, logrus.DebugLevel) 1620 1621 svcBackendsCount := len(svcBackendsById) 1622 for _, b := range backends { 1623 if debugLogsEnabled { 1624 log.WithFields(logrus.Fields{ 1625 logfields.BackendID: b.ID, 1626 logfields.L3n4Addr: b.L3n4Addr.String(), 1627 logfields.BackendState: b.State, 1628 logfields.BackendPreferred: b.Preferred, 1629 }).Debug("Restoring backend") 1630 } 1631 1632 if _, ok := svcBackendsById[b.ID]; !ok && (svcBackendsCount != 0) { 1633 // If a backend by ID isn't referenced by any of the services, it's 1634 // likely a leaked backend. In case of duplicate leaked backends, 1635 // there would be multiple IDs allocated for the same backend resource 1636 // identified by its L3nL4Addr hash. The second check for service 1637 // backends count is added for unusual cases where there might've been 1638 // a problem with reading entries from the services map. In such cases, 1639 // the agent should not wipe out the backends map, as this can disrupt 1640 // existing connections. SyncWithK8sFinished will later sync the backends 1641 // map with the latest state. 1642 // Leaked backend scenarios: 1643 // 1) Backend entries leaked, no duplicates 1644 // 2) Backend entries leaked with duplicates: 1645 // a) backend with overlapping L3nL4Addr hash is associated with service(s) 1646 // Sequence of events: 1647 // Backends were leaked prior to agent restart, but there was at least 1648 // one service that the backend by hash is associated with. 1649 // s.backendByHash will have a non-zero reference count for the 1650 // overlapping L3nL4Addr hash. 1651 // b) none of the backends are associated with services 1652 // Sequence of events: 1653 // All the services these backends were associated with were deleted 1654 // prior to agent restart. 1655 // s.backendByHash will not have an entry for the backends hash. 1656 // As none of the service entries have a reference to these backends 1657 // in the services map, the backends were likely not available for 1658 // load-balancing new traffic. While there is a slim chance that the 1659 // backends could have previously established active connections, 1660 // and these connections can get disrupted. However, the leaks likely 1661 // happened when service entries were deleted, so those connections 1662 // were also expected to be terminated. 1663 // Regardless, delete the duplicates as this can affect restoration of current 1664 // active backends, and may prevent new backends getting added as map 1665 // size is limited, which can lead to connectivity disruptions. 1666 id := b.ID 1667 DeleteBackendID(id) 1668 if err := s.lbmap.DeleteBackendByID(id); err != nil { 1669 // As the backends map is not expected to be updated during restore, 1670 // the deletion call shouldn't fail. But log the error, just 1671 // in case... 1672 log.Errorf("unable to delete leaked backend: %v", id) 1673 } 1674 if debugLogsEnabled { 1675 log.WithFields(logrus.Fields{ 1676 logfields.BackendID: b.ID, 1677 logfields.L3n4Addr: b.L3n4Addr, 1678 logfields.BackendState: b.State, 1679 logfields.BackendPreferred: b.Preferred, 1680 }).Debug("Leaked backend entry not restored") 1681 } 1682 skipped++ 1683 continue 1684 } 1685 if err := RestoreBackendID(b.L3n4Addr, b.ID); err != nil { 1686 log.WithError(err).WithFields(logrus.Fields{ 1687 logfields.BackendID: b.ID, 1688 logfields.L3n4Addr: b.L3n4Addr, 1689 logfields.BackendState: b.State, 1690 logfields.BackendPreferred: b.Preferred, 1691 }).Warning("Unable to restore backend") 1692 failed++ 1693 continue 1694 } 1695 restored++ 1696 hash := b.L3n4Addr.Hash() 1697 s.backendByHash[hash] = b 1698 } 1699 1700 log.WithFields(logrus.Fields{ 1701 logfields.RestoredBackends: restored, 1702 logfields.FailedBackends: failed, 1703 logfields.SkippedBackends: skipped, 1704 }).Info("Restored backends from maps") 1705 1706 return nil 1707 } 1708 1709 func (s *Service) deleteOrphanBackends() error { 1710 orphanBackends := 0 1711 1712 for hash, b := range s.backendByHash { 1713 if s.backendRefCount[hash] == 0 { 1714 log.WithField(logfields.BackendID, b.ID). 1715 Debug("Removing orphan backend") 1716 // The b.ID is unique across IPv4/6, hence attempt 1717 // to clean it from both maps, and ignore errors. 1718 DeleteBackendID(b.ID) 1719 s.lbmap.DeleteBackendByID(b.ID) 1720 delete(s.backendByHash, hash) 1721 orphanBackends++ 1722 } 1723 } 1724 log.WithFields(logrus.Fields{ 1725 logfields.OrphanBackends: orphanBackends, 1726 }).Info("Deleted orphan backends") 1727 1728 return nil 1729 } 1730 1731 func (s *Service) restoreServicesLocked(svcBackendsById map[lb.BackendID]struct{}) error { 1732 failed, restored := 0, 0 1733 1734 svcs, errors := s.lbmap.DumpServiceMaps() 1735 for _, err := range errors { 1736 log.WithError(err).Warning("Error occurred while dumping service maps") 1737 } 1738 1739 for _, svc := range svcs { 1740 scopedLog := log.WithFields(logrus.Fields{ 1741 logfields.ServiceID: svc.Frontend.ID, 1742 logfields.ServiceIP: svc.Frontend.L3n4Addr.String(), 1743 }) 1744 scopedLog.Debug("Restoring service") 1745 1746 if _, err := RestoreID(svc.Frontend.L3n4Addr, uint32(svc.Frontend.ID)); err != nil { 1747 failed++ 1748 scopedLog.WithError(err).Warning("Unable to restore service ID") 1749 } 1750 1751 newSVC := &svcInfo{ 1752 hash: svc.Frontend.Hash(), 1753 frontend: svc.Frontend, 1754 backends: svc.Backends, 1755 backendByHash: map[string]*lb.Backend{}, 1756 svcType: svc.Type, 1757 svcExtTrafficPolicy: svc.ExtTrafficPolicy, 1758 svcIntTrafficPolicy: svc.IntTrafficPolicy, 1759 svcNatPolicy: svc.NatPolicy, 1760 LoopbackHostport: svc.LoopbackHostport, 1761 1762 sessionAffinity: svc.SessionAffinity, 1763 sessionAffinityTimeoutSec: svc.SessionAffinityTimeoutSec, 1764 1765 // Indicate that the svc was restored from the BPF maps, so that 1766 // SyncWithK8sFinished() could remove services which were restored 1767 // from the maps but not present in the k8sServiceCache (e.g. a svc 1768 // was deleted while cilium-agent was down). 1769 restoredFromDatapath: true, 1770 } 1771 1772 for j, backend := range svc.Backends { 1773 // DumpServiceMaps() can return services with some empty (nil) backends. 1774 if backend == nil { 1775 continue 1776 } 1777 1778 hash := backend.L3n4Addr.Hash() 1779 s.backendRefCount.Add(hash) 1780 newSVC.backendByHash[hash] = svc.Backends[j] 1781 svcBackendsById[backend.ID] = struct{}{} 1782 } 1783 1784 if len(newSVC.backendByHash) > 0 { 1785 // Indicate that these backends were restored from BPF maps, 1786 // so that they are not removed until SyncWithK8sFinished() 1787 // is executed (if not observed in the meanwhile) to prevent 1788 // disrupting valid connections. 1789 newSVC.restoredBackendHashes = sets.KeySet(newSVC.backendByHash) 1790 } 1791 1792 // Recalculate Maglev lookup tables if the maps were removed due to 1793 // the changed M param. 1794 ipv6 := newSVC.frontend.IsIPv6() || (svc.NatPolicy == lb.SVCNatPolicyNat46) 1795 recreated := s.lbmap.IsMaglevLookupTableRecreated(ipv6) 1796 if option.Config.DatapathMode == datapathOpt.DatapathModeLBOnly && 1797 newSVC.useMaglev() && recreated { 1798 1799 backends := make(map[string]*lb.Backend, len(newSVC.backends)) 1800 for _, b := range newSVC.backends { 1801 // DumpServiceMaps() can return services with some empty (nil) backends. 1802 if b == nil { 1803 continue 1804 } 1805 1806 backends[b.String()] = b 1807 } 1808 if err := s.lbmap.UpsertMaglevLookupTable(uint16(newSVC.frontend.ID), backends, 1809 ipv6); err != nil { 1810 scopedLog.WithError(err).Warning("Unable to upsert into the Maglev BPF map.") 1811 continue 1812 } 1813 } 1814 1815 s.svcByHash[newSVC.hash] = newSVC 1816 s.svcByID[newSVC.frontend.ID] = newSVC 1817 restored++ 1818 } 1819 1820 log.WithFields(logrus.Fields{ 1821 logfields.RestoredSVCs: restored, 1822 logfields.FailedSVCs: failed, 1823 }).Info("Restored services from maps") 1824 1825 return nil 1826 } 1827 1828 func (s *Service) deleteServiceLocked(svc *svcInfo) error { 1829 ipv6 := svc.frontend.L3n4Addr.IsIPv6() || svc.svcNatPolicy == lb.SVCNatPolicyNat46 1830 obsoleteBackendIDs, obsoleteBackends := s.deleteBackendsFromCacheLocked(svc) 1831 scopedLog := log.WithFields(logrus.Fields{ 1832 logfields.ServiceID: svc.frontend.ID, 1833 logfields.ServiceIP: svc.frontend.L3n4Addr, 1834 logfields.Backends: svc.backends, 1835 }) 1836 scopedLog.Debug("Deleting service") 1837 1838 if err := s.lbmap.DeleteService(svc.frontend, len(svc.backends), 1839 svc.useMaglev(), svc.svcNatPolicy); err != nil { 1840 return err 1841 } 1842 1843 // Delete affinity matches 1844 if option.Config.EnableSessionAffinity && svc.sessionAffinity { 1845 backendIDs := make([]lb.BackendID, 0, len(svc.backends)) 1846 for _, b := range svc.backends { 1847 backendIDs = append(backendIDs, b.ID) 1848 } 1849 s.deleteBackendsFromAffinityMatchMap(svc.frontend.ID, backendIDs) 1850 } 1851 1852 if option.Config.EnableSVCSourceRangeCheck && 1853 svc.svcType == lb.SVCTypeLoadBalancer { 1854 if err := s.lbmap.UpdateSourceRanges(uint16(svc.frontend.ID), 1855 svc.loadBalancerSourceRanges, nil, ipv6); err != nil { 1856 return err 1857 } 1858 } 1859 1860 delete(s.svcByHash, svc.hash) 1861 delete(s.svcByID, svc.frontend.ID) 1862 1863 for _, id := range obsoleteBackendIDs { 1864 scopedLog.WithField(logfields.BackendID, id). 1865 Debug("Deleting obsolete backend") 1866 s.lbmap.DeleteBackendByID(id) 1867 } 1868 if err := DeleteID(uint32(svc.frontend.ID)); err != nil { 1869 return fmt.Errorf("Unable to release service ID %d: %w", svc.frontend.ID, err) 1870 } 1871 1872 // Delete managed neighbor entries of the LB 1873 if option.Config.DatapathMode == datapathOpt.DatapathModeLBOnly { 1874 s.deleteBackendNeighbors(obsoleteBackends) 1875 } 1876 1877 if svc.healthcheckFrontendHash != "" { 1878 healthSvc := s.svcByHash[svc.healthcheckFrontendHash] 1879 if healthSvc != nil { 1880 s.deleteServiceLocked(healthSvc) 1881 } 1882 } 1883 1884 if option.Config.EnableHealthCheckNodePort { 1885 s.healthServer.DeleteService(lb.ID(svc.frontend.ID)) 1886 } 1887 1888 metrics.ServicesEventsCount.WithLabelValues("delete").Inc() 1889 s.notifyMonitorServiceDelete(svc.frontend.ID) 1890 1891 return nil 1892 } 1893 1894 func (s *Service) updateBackendsCacheLocked(svc *svcInfo, backends []*lb.Backend) ( 1895 []*lb.Backend, []*lb.Backend, []lb.BackendID, error, 1896 ) { 1897 obsoleteBackends := []*lb.Backend{} // not used by any svc 1898 obsoleteSVCBackendIDs := []lb.BackendID{} // removed from the svc, but might be used by other svc 1899 newBackends := []*lb.Backend{} // previously not used by any svc 1900 backendSet := map[string]struct{}{} 1901 1902 for i, backend := range backends { 1903 hash := backend.L3n4Addr.Hash() 1904 backendSet[hash] = struct{}{} 1905 1906 if b, found := svc.backendByHash[hash]; !found { 1907 if s.backendRefCount.Add(hash) { 1908 id, err := AcquireBackendID(backend.L3n4Addr) 1909 if err != nil { 1910 s.backendRefCount.Delete(hash) 1911 return nil, nil, nil, fmt.Errorf("Unable to acquire backend ID for %q: %w", 1912 backend.L3n4Addr, err) 1913 } 1914 backends[i].ID = id 1915 backends[i].Weight = backend.Weight 1916 newBackends = append(newBackends, backends[i]) 1917 s.backendByHash[hash] = backends[i].DeepCopy() 1918 } else { 1919 backends[i].ID = s.backendByHash[hash].ID 1920 } 1921 } else { 1922 // We observed this backend, hence let's remove it from the list 1923 // of the restored ones. 1924 svc.restoredBackendHashes.Delete(hash) 1925 1926 backends[i].ID = b.ID 1927 // Backend state can either be updated via kubernetes events, 1928 // or service API. If the state update is coming via kubernetes events, 1929 // then we need to update the internal state. Currently, the only state 1930 // update in this case is for the terminating state or when backend 1931 // weight has changed. All other state updates happen via the API 1932 // (UpdateBackendsState) in which case we need to set the backend state 1933 // to the saved state. 1934 switch { 1935 case backends[i].State == lb.BackendStateTerminating && 1936 b.State != lb.BackendStateTerminating: 1937 b.State = backends[i].State 1938 // Update the persisted backend state in BPF maps. 1939 if err := s.lbmap.UpdateBackendWithState(backends[i]); err != nil { 1940 return nil, nil, nil, fmt.Errorf("failed to update backend %+v: %w", 1941 backends[i], err) 1942 } 1943 case backends[i].Weight != b.Weight: 1944 // Update the cached weight as weight has changed 1945 b.Weight = backends[i].Weight 1946 // Update but do not persist the state as backend might be set as active 1947 // only temporarily for specific service 1948 b.State = backends[i].State 1949 default: 1950 // Set the backend state to the saved state. 1951 backends[i].State = b.State 1952 } 1953 } 1954 svc.backendByHash[hash] = backends[i] 1955 } 1956 1957 for hash, backend := range svc.backendByHash { 1958 if _, found := backendSet[hash]; !found { 1959 if svc.restoredBackendHashes.Has(hash) { 1960 // Don't treat backends restored from the datapath and not yet observed as 1961 // obsolete, because that would cause connections targeting those backends 1962 // to be dropped in case we haven't fully synchronized yet. 1963 backends = append(backends, backend) 1964 continue 1965 } 1966 1967 obsoleteSVCBackendIDs = append(obsoleteSVCBackendIDs, backend.ID) 1968 if s.backendRefCount.Delete(hash) { 1969 DeleteBackendID(backend.ID) 1970 delete(s.backendByHash, hash) 1971 obsoleteBackends = append(obsoleteBackends, backend) 1972 } 1973 delete(svc.backendByHash, hash) 1974 } 1975 } 1976 1977 svc.backends = backends 1978 return newBackends, obsoleteBackends, obsoleteSVCBackendIDs, nil 1979 } 1980 1981 func (s *Service) deleteBackendsFromCacheLocked(svc *svcInfo) ([]lb.BackendID, []*lb.Backend) { 1982 obsoleteBackendIDs := []lb.BackendID{} 1983 obsoleteBackends := []*lb.Backend{} 1984 1985 for hash, backend := range svc.backendByHash { 1986 if s.backendRefCount.Delete(hash) { 1987 DeleteBackendID(backend.ID) 1988 obsoleteBackendIDs = append(obsoleteBackendIDs, backend.ID) 1989 obsoleteBackends = append(obsoleteBackends, backend.DeepCopy()) 1990 } 1991 } 1992 1993 return obsoleteBackendIDs, obsoleteBackends 1994 } 1995 1996 func (s *Service) notifyMonitorServiceUpsert(frontend lb.L3n4AddrID, backends []*lb.Backend, 1997 svcType lb.SVCType, svcExtTrafficPolicy, svcIntTrafficPolicy lb.SVCTrafficPolicy, svcName, svcNamespace string, 1998 ) { 1999 id := uint32(frontend.ID) 2000 fe := monitorAPI.ServiceUpsertNotificationAddr{ 2001 IP: frontend.AddrCluster.AsNetIP(), 2002 Port: frontend.Port, 2003 } 2004 2005 be := make([]monitorAPI.ServiceUpsertNotificationAddr, 0, len(backends)) 2006 for _, backend := range backends { 2007 b := monitorAPI.ServiceUpsertNotificationAddr{ 2008 IP: backend.AddrCluster.AsNetIP(), 2009 Port: backend.Port, 2010 } 2011 be = append(be, b) 2012 } 2013 2014 msg := monitorAPI.ServiceUpsertMessage(id, fe, be, string(svcType), string(svcExtTrafficPolicy), string(svcIntTrafficPolicy), svcName, svcNamespace) 2015 s.monitorAgent.SendEvent(monitorAPI.MessageTypeAgent, msg) 2016 } 2017 2018 func (s *Service) notifyMonitorServiceDelete(id lb.ID) { 2019 s.monitorAgent.SendEvent(monitorAPI.MessageTypeAgent, monitorAPI.ServiceDeleteMessage(uint32(id))) 2020 } 2021 2022 // GetServiceNameByAddr returns namespace and name of the service with a given L3n4Addr. The third 2023 // return value is set to true if and only if the service is found in the map. 2024 func (s *Service) GetServiceNameByAddr(addr lb.L3n4Addr) (string, string, bool) { 2025 s.RLock() 2026 defer s.RUnlock() 2027 2028 svc, found := s.svcByHash[addr.Hash()] 2029 if !found { 2030 return "", "", false 2031 } 2032 2033 return svc.svcName.Namespace, svc.svcName.Name, true 2034 } 2035 2036 // isWildcardAddr returns true if given frontend is used for wildcard svc lookups 2037 // (by bpf_sock). 2038 func isWildcardAddr(frontend lb.L3n4AddrID) bool { 2039 if frontend.IsIPv6() { 2040 return cmtypes.MustParseAddrCluster("::").Equal(frontend.AddrCluster) 2041 } 2042 return cmtypes.MustParseAddrCluster("0.0.0.0").Equal(frontend.AddrCluster) 2043 } 2044 2045 // segregateBackends returns the list of active, preferred and nonActive backends to be 2046 // added to the lbmaps. If EnableK8sTerminatingEndpoint and there are no active backends, 2047 // segregateBackends will return all terminating backends as active. 2048 func segregateBackends(backends []*lb.Backend) (preferredBackends map[string]*lb.Backend, 2049 activeBackends map[string]*lb.Backend, nonActiveBackends []lb.BackendID, 2050 ) { 2051 preferredBackends = make(map[string]*lb.Backend) 2052 activeBackends = make(map[string]*lb.Backend, len(backends)) 2053 2054 for _, b := range backends { 2055 // Separate active from non-active backends so that they won't be selected 2056 // to serve new requests, but can be restored after agent restart. Non-active backends 2057 // are kept in the affinity and backend maps so that existing connections 2058 // are able to terminate gracefully. Such backends would either be cleaned-up 2059 // when the backends are deleted, or they could transition to active state. 2060 if b.State == lb.BackendStateActive { 2061 activeBackends[b.String()] = b 2062 // keep another list of preferred backends if available 2063 if b.Preferred { 2064 preferredBackends[b.String()] = b 2065 } 2066 } else { 2067 nonActiveBackends = append(nonActiveBackends, b.ID) 2068 } 2069 } 2070 // To avoid connections drops during rolling updates, Kubernetes defines a Terminating state on the EndpointSlices 2071 // that can be used to identify Pods that, despite being terminated, still can serve traffic. 2072 // In case that there are no Active backends, use the Backends in TerminatingState to answer new requests 2073 // and avoid traffic disruption until new active backends are created. 2074 // https://github.com/kubernetes/enhancements/tree/master/keps/sig-network/1669-proxy-terminating-endpoints 2075 if option.Config.EnableK8sTerminatingEndpoint && len(activeBackends) == 0 { 2076 nonActiveBackends = []lb.BackendID{} 2077 for _, b := range backends { 2078 if b.State == lb.BackendStateTerminating { 2079 activeBackends[b.String()] = b 2080 } else { 2081 nonActiveBackends = append(nonActiveBackends, b.ID) 2082 } 2083 } 2084 } 2085 return preferredBackends, activeBackends, nonActiveBackends 2086 } 2087 2088 // SyncNodePortFrontends updates all NodePort services with a new set of frontend 2089 // IP addresses. 2090 func (s *Service) SyncNodePortFrontends(addrs sets.Set[netip.Addr]) error { 2091 s.Lock() 2092 defer s.Unlock() 2093 2094 existingFEs := sets.New[netip.Addr]() 2095 removedFEs := make([]*svcInfo, 0) 2096 2097 // Find all NodePort services by finding the surrogate services, and find 2098 // services with a removed frontend. 2099 v4Svcs := make([]*svcInfo, 0) 2100 v6Svcs := make([]*svcInfo, 0) 2101 for _, svc := range s.svcByID { 2102 if svc.svcType != lb.SVCTypeNodePort { 2103 continue 2104 } 2105 2106 switch svc.frontend.AddrCluster.Addr() { 2107 case netip.IPv4Unspecified(): 2108 v4Svcs = append(v4Svcs, svc) 2109 case netip.IPv6Unspecified(): 2110 v6Svcs = append(v6Svcs, svc) 2111 default: 2112 addr := svc.frontend.AddrCluster.Addr() 2113 existingFEs.Insert(addr) 2114 if _, ok := addrs[addr]; !ok { 2115 removedFEs = append(removedFEs, svc) 2116 } 2117 } 2118 } 2119 2120 // Delete the services of the removed frontends 2121 for _, svc := range removedFEs { 2122 log := log.WithField(logfields.K8sNamespace, svc.svcName.Namespace). 2123 WithField(logfields.K8sSvcName, svc.svcName.Name). 2124 WithField(logfields.L3n4Addr, svc.frontend.L3n4Addr) 2125 2126 if err := s.deleteServiceLocked(svc); err != nil { 2127 return fmt.Errorf("delete service: %w", err) 2128 } else { 2129 log.Debug("Deleted nodeport service of a removed frontend") 2130 } 2131 } 2132 2133 // Create services for the new frontends 2134 for addr := range addrs { 2135 if !existingFEs.Has(addr) { 2136 // No services for this frontend, create them. 2137 svcs := v4Svcs 2138 if addr.Is6() { 2139 svcs = v6Svcs 2140 } 2141 for _, svcInfo := range svcs { 2142 fe := lb.NewL3n4AddrID( 2143 svcInfo.frontend.Protocol, 2144 cmtypes.AddrClusterFrom(addr, svcInfo.frontend.AddrCluster.ClusterID()), 2145 svcInfo.frontend.Port, 2146 svcInfo.frontend.Scope, 2147 0, 2148 ) 2149 svc := svcInfo.deepCopyToLBSVC() 2150 svc.Frontend = *fe 2151 2152 log := log.WithField(logfields.K8sNamespace, svc.Name.Namespace). 2153 WithField(logfields.K8sSvcName, svc.Name.Name). 2154 WithField(logfields.L3n4Addr, svc.Frontend.L3n4Addr) 2155 _, _, err := s.upsertService(svc) 2156 if err != nil { 2157 return fmt.Errorf("upsert service: %w", err) 2158 } else { 2159 log.Debug("Created nodeport service for new frontend") 2160 } 2161 } 2162 } 2163 } 2164 return nil 2165 } 2166 2167 func backendToNode(b *lb.Backend) *nodeTypes.Node { 2168 return &nodeTypes.Node{ 2169 Name: fmt.Sprintf("backend-%s", b.L3n4Addr.AddrCluster.AsNetIP()), 2170 IPAddresses: []nodeTypes.Address{{ 2171 Type: addressing.NodeInternalIP, 2172 IP: b.L3n4Addr.AddrCluster.AsNetIP(), 2173 }}, 2174 } 2175 } 2176 2177 func (s *Service) upsertBackendNeighbors(newBackends, oldBackends []*lb.Backend) { 2178 for _, b := range newBackends { 2179 s.backendDiscovery.InsertMiscNeighbor(backendToNode(b)) 2180 } 2181 s.deleteBackendNeighbors(oldBackends) 2182 } 2183 2184 func (s *Service) deleteBackendNeighbors(obsoleteBackends []*lb.Backend) { 2185 for _, b := range obsoleteBackends { 2186 s.backendDiscovery.DeleteMiscNeighbor(backendToNode(b)) 2187 } 2188 }