github.com/cilium/cilium@v1.16.2/pkg/endpointmanager/manager.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package endpointmanager 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "maps" 11 "net/netip" 12 "sync" 13 14 "github.com/cilium/hive/cell" 15 "github.com/sirupsen/logrus" 16 17 "github.com/cilium/cilium/pkg/completion" 18 "github.com/cilium/cilium/pkg/controller" 19 "github.com/cilium/cilium/pkg/endpoint" 20 endpointid "github.com/cilium/cilium/pkg/endpoint/id" 21 "github.com/cilium/cilium/pkg/endpoint/regeneration" 22 "github.com/cilium/cilium/pkg/identity/cache" 23 "github.com/cilium/cilium/pkg/ipcache" 24 "github.com/cilium/cilium/pkg/lock" 25 "github.com/cilium/cilium/pkg/logging" 26 "github.com/cilium/cilium/pkg/logging/logfields" 27 "github.com/cilium/cilium/pkg/mcastmanager" 28 "github.com/cilium/cilium/pkg/metrics" 29 "github.com/cilium/cilium/pkg/metrics/metric" 30 "github.com/cilium/cilium/pkg/node" 31 "github.com/cilium/cilium/pkg/option" 32 "github.com/cilium/cilium/pkg/policy" 33 "github.com/cilium/cilium/pkg/time" 34 ) 35 36 var ( 37 log = logging.DefaultLogger.WithField(logfields.LogSubsys, "endpoint-manager") 38 metricsOnce sync.Once 39 launchTime = 30 * time.Second 40 41 endpointGCControllerGroup = controller.NewGroup("endpoint-gc") 42 ) 43 44 // endpointManager is a structure designed for containing state about the 45 // collection of locally running endpoints. 46 type endpointManager struct { 47 health cell.Health 48 49 // mutex protects endpoints and endpointsAux 50 mutex lock.RWMutex 51 52 // endpoints is the global list of endpoints indexed by ID. mutex must 53 // be held to read and write. 54 endpoints map[uint16]*endpoint.Endpoint 55 endpointsAux map[string]*endpoint.Endpoint 56 57 // mcastManager handles IPv6 multicast group join/leave for pods. This is required for the 58 // node to receive ICMPv6 NDP messages, especially NS (Neighbor Solicitation) message, so 59 // pod's IPv6 address is discoverable. 60 mcastManager *mcastmanager.MCastManager 61 62 // EndpointSynchronizer updates external resources (e.g., Kubernetes) with 63 // up-to-date information about endpoints managed by the endpoint manager. 64 EndpointResourceSynchronizer 65 66 // subscribers are notified when events occur in the endpointManager. 67 subscribers map[Subscriber]struct{} 68 69 // checkHealth supports endpoint garbage collection by verifying the health 70 // of an endpoint. 71 checkHealth EndpointCheckerFunc 72 73 // deleteEndpoint is the function used to remove the endpoint from the 74 // endpointManager and clean it up. Always set to RemoveEndpoint. 75 deleteEndpoint endpointDeleteFunc 76 77 // A mark-and-sweep garbage collector may operate on the endpoint list. 78 // This is configured via WithPeriodicEndpointGC() and will mark 79 // endpoints for removal on one run of the controller, then in the 80 // subsequent controller run will remove the endpoints. 81 markedEndpoints []uint16 82 83 // controllers associated with the endpoint manager. 84 controllers *controller.Manager 85 86 policyMapPressure *policyMapPressure 87 88 // locaNodeStore allows to retrieve information and observe changes about 89 // the local node. 90 localNodeStore *node.LocalNodeStore 91 92 // Allocator for local endpoint identifiers. 93 epIDAllocator *epIDAllocator 94 } 95 96 // endpointDeleteFunc is used to abstract away concrete Endpoint Delete 97 // functionality from endpoint management for testing purposes. 98 type endpointDeleteFunc func(*endpoint.Endpoint, endpoint.DeleteConfig) []error 99 100 // New creates a new endpointManager. 101 func New(epSynchronizer EndpointResourceSynchronizer, lns *node.LocalNodeStore, health cell.Health) *endpointManager { 102 mgr := endpointManager{ 103 health: health, 104 endpoints: make(map[uint16]*endpoint.Endpoint), 105 endpointsAux: make(map[string]*endpoint.Endpoint), 106 mcastManager: mcastmanager.New(option.Config.IPv6MCastDevice), 107 EndpointResourceSynchronizer: epSynchronizer, 108 subscribers: make(map[Subscriber]struct{}), 109 controllers: controller.NewManager(), 110 localNodeStore: lns, 111 epIDAllocator: newEPIDAllocator(), 112 } 113 mgr.deleteEndpoint = mgr.removeEndpoint 114 mgr.policyMapPressure = newPolicyMapPressure() 115 return &mgr 116 } 117 118 // WithPeriodicEndpointGC runs a controller to periodically garbage collect 119 // endpoints that match the specified EndpointCheckerFunc. 120 func (mgr *endpointManager) WithPeriodicEndpointGC(ctx context.Context, checkHealth EndpointCheckerFunc, interval time.Duration) *endpointManager { 121 mgr.checkHealth = checkHealth 122 mgr.controllers.UpdateController("endpoint-gc", 123 controller.ControllerParams{ 124 Group: endpointGCControllerGroup, 125 DoFunc: mgr.markAndSweep, 126 RunInterval: interval, 127 Context: ctx, 128 Health: mgr.health.NewScope("endpoint-gc"), 129 }) 130 return mgr 131 } 132 133 // waitForProxyCompletions blocks until all proxy changes have been completed. 134 func waitForProxyCompletions(proxyWaitGroup *completion.WaitGroup) error { 135 err := proxyWaitGroup.Context().Err() 136 if err != nil { 137 return fmt.Errorf("context cancelled before waiting for proxy updates: %w", err) 138 } 139 140 start := time.Now() 141 log.Debug("Waiting for proxy updates to complete...") 142 err = proxyWaitGroup.Wait() 143 if err != nil { 144 return fmt.Errorf("proxy updates failed: %w", err) 145 } 146 log.Debug("Wait time for proxy updates: ", time.Since(start)) 147 148 return nil 149 } 150 151 // UpdatePolicyMaps returns a WaitGroup which is signaled upon once all endpoints 152 // have had their PolicyMaps updated against the Endpoint's desired policy state. 153 // 154 // Endpoints will wait on the 'notifyWg' parameter before updating policy maps. 155 func (mgr *endpointManager) UpdatePolicyMaps(ctx context.Context, notifyWg *sync.WaitGroup) *sync.WaitGroup { 156 var epWG sync.WaitGroup 157 var wg sync.WaitGroup 158 159 proxyWaitGroup := completion.NewWaitGroup(ctx) 160 161 eps := mgr.GetEndpoints() 162 epWG.Add(len(eps)) 163 wg.Add(1) 164 165 // This is in a goroutine to allow the caller to proceed with other tasks before waiting for the ACKs to complete 166 go func() { 167 // Wait for all the eps to have applied policy map 168 // changes before waiting for the changes to be ACKed 169 epWG.Wait() 170 if err := waitForProxyCompletions(proxyWaitGroup); err != nil { 171 log.WithError(err).Warning("Failed to apply L7 proxy policy changes. These will be re-applied in future updates.") 172 } 173 wg.Done() 174 }() 175 176 // TODO: bound by number of CPUs? 177 for _, ep := range eps { 178 go func(ep *endpoint.Endpoint) { 179 // Proceed only after all notifications have been delivered to endpoints 180 notifyWg.Wait() 181 if err := ep.ApplyPolicyMapChanges(proxyWaitGroup); err != nil && !errors.Is(err, endpoint.ErrNotAlive) { 182 ep.Logger("endpointmanager").WithError(err).Warning("Failed to apply policy map changes. These will be re-applied in future updates.") 183 } 184 epWG.Done() 185 }(ep) 186 } 187 188 return &wg 189 } 190 191 // InitMetrics hooks the endpointManager into the metrics subsystem. This can 192 // only be done once, globally, otherwise the metrics library will panic. 193 func (mgr *endpointManager) InitMetrics(registry *metrics.Registry) { 194 if option.Config.DryMode { 195 return 196 } 197 metricsOnce.Do(func() { // Endpoint is a function used to collect this metric. We cannot 198 // increment/decrement a gauge since we invoke Remove gratuitously and that 199 // would result in negative counts. 200 // It must be thread-safe. 201 202 metrics.Endpoint = metric.NewGaugeFunc(metric.GaugeOpts{ 203 Namespace: metrics.Namespace, 204 Name: "endpoint", 205 Help: "Number of endpoints managed by this agent", 206 }, 207 func() float64 { return float64(len(mgr.GetEndpoints())) }, 208 ) 209 registry.MustRegister(metrics.Endpoint) 210 }) 211 } 212 213 // allocateID checks if the ID can be reused. If it cannot, returns an error. 214 // If an ID of 0 is provided, a new ID is allocated. If a new ID cannot be 215 // allocated, returns an error. 216 func (mgr *endpointManager) allocateID(currID uint16) (uint16, error) { 217 var newID uint16 218 if currID != 0 { 219 if err := mgr.epIDAllocator.reuse(currID); err != nil { 220 return 0, fmt.Errorf("unable to reuse endpoint ID: %w", err) 221 } 222 newID = currID 223 } else { 224 id := mgr.epIDAllocator.allocate() 225 if id == uint16(0) { 226 return 0, fmt.Errorf("no more endpoint IDs available") 227 } 228 newID = id 229 } 230 231 return newID, nil 232 } 233 234 func (mgr *endpointManager) removeIDLocked(currID uint16) { 235 delete(mgr.endpoints, currID) 236 } 237 238 // RemoveID removes the id from the endpoints map in the endpointManager. 239 func (mgr *endpointManager) RemoveID(currID uint16) { 240 mgr.mutex.Lock() 241 defer mgr.mutex.Unlock() 242 mgr.removeIDLocked(currID) 243 } 244 245 // Lookup looks up the endpoint by prefix id 246 func (mgr *endpointManager) Lookup(id string) (*endpoint.Endpoint, error) { 247 mgr.mutex.RLock() 248 defer mgr.mutex.RUnlock() 249 250 prefix, eid, err := endpointid.Parse(id) 251 if err != nil { 252 return nil, err 253 } 254 255 switch prefix { 256 case endpointid.CiliumLocalIdPrefix: 257 n, err := endpointid.ParseCiliumID(id) 258 if err != nil { 259 return nil, err 260 } 261 if n > endpointid.MaxEndpointID { 262 return nil, fmt.Errorf("%d: endpoint ID too large", n) 263 } 264 return mgr.lookupCiliumID(uint16(n)), nil 265 266 case endpointid.CiliumGlobalIdPrefix: 267 return nil, ErrUnsupportedID 268 269 case endpointid.CNIAttachmentIdPrefix: 270 return mgr.lookupCNIAttachmentID(eid), nil 271 272 case endpointid.ContainerIdPrefix: 273 return mgr.lookupContainerID(eid), nil 274 275 case endpointid.DockerEndpointPrefix: 276 return mgr.lookupDockerEndpoint(eid), nil 277 278 case endpointid.ContainerNamePrefix: 279 return mgr.lookupDockerContainerName(eid), nil 280 281 case endpointid.PodNamePrefix: 282 return mgr.lookupPodNameLocked(eid), nil 283 284 case endpointid.CEPNamePrefix: 285 return mgr.lookupCEPNameLocked(eid), nil 286 287 case endpointid.IPv4Prefix: 288 return mgr.lookupIPv4(eid), nil 289 290 case endpointid.IPv6Prefix: 291 return mgr.lookupIPv6(eid), nil 292 293 default: 294 return nil, ErrInvalidPrefix{InvalidPrefix: prefix.String()} 295 } 296 } 297 298 // LookupCiliumID looks up endpoint by endpoint ID 299 func (mgr *endpointManager) LookupCiliumID(id uint16) *endpoint.Endpoint { 300 mgr.mutex.RLock() 301 ep := mgr.lookupCiliumID(id) 302 mgr.mutex.RUnlock() 303 return ep 304 } 305 306 // LookupCNIAttachmentID looks up endpoint by CNI attachment ID 307 func (mgr *endpointManager) LookupCNIAttachmentID(id string) *endpoint.Endpoint { 308 mgr.mutex.RLock() 309 ep := mgr.lookupCNIAttachmentID(id) 310 mgr.mutex.RUnlock() 311 return ep 312 } 313 314 // LookupIPv4 looks up endpoint by IPv4 address 315 func (mgr *endpointManager) LookupIPv4(ipv4 string) *endpoint.Endpoint { 316 mgr.mutex.RLock() 317 ep := mgr.lookupIPv4(ipv4) 318 mgr.mutex.RUnlock() 319 return ep 320 } 321 322 // LookupIPv6 looks up endpoint by IPv6 address 323 func (mgr *endpointManager) LookupIPv6(ipv6 string) *endpoint.Endpoint { 324 mgr.mutex.RLock() 325 ep := mgr.lookupIPv6(ipv6) 326 mgr.mutex.RUnlock() 327 return ep 328 } 329 330 // LookupIP looks up endpoint by IP address 331 func (mgr *endpointManager) LookupIP(ip netip.Addr) (ep *endpoint.Endpoint) { 332 ipStr := ip.Unmap().String() 333 mgr.mutex.RLock() 334 if ip.Is4() { 335 ep = mgr.lookupIPv4(ipStr) 336 } else { 337 ep = mgr.lookupIPv6(ipStr) 338 } 339 mgr.mutex.RUnlock() 340 return ep 341 } 342 343 // LookupCEPName looks up an endpoint by its K8s namespace + cep name 344 func (mgr *endpointManager) LookupCEPName(namespacedName string) *endpoint.Endpoint { 345 mgr.mutex.RLock() 346 ep := mgr.lookupCEPNameLocked(namespacedName) 347 mgr.mutex.RUnlock() 348 return ep 349 } 350 351 // GetEndpointsByPodName looks up endpoints by namespace + pod name 352 func (mgr *endpointManager) GetEndpointsByPodName(namespacedName string) []*endpoint.Endpoint { 353 mgr.mutex.RLock() 354 defer mgr.mutex.RUnlock() 355 eps := make([]*endpoint.Endpoint, 0, 1) 356 for _, ep := range mgr.endpoints { 357 if ep.GetK8sNamespaceAndPodName() == namespacedName { 358 eps = append(eps, ep) 359 } 360 } 361 362 return eps 363 } 364 365 // GetEndpointsByContainerID looks up endpoints by container ID 366 func (mgr *endpointManager) GetEndpointsByContainerID(containerID string) []*endpoint.Endpoint { 367 mgr.mutex.RLock() 368 defer mgr.mutex.RUnlock() 369 370 eps := make([]*endpoint.Endpoint, 0, 1) 371 for _, ep := range mgr.endpoints { 372 if ep.GetContainerID() == containerID { 373 eps = append(eps, ep) 374 } 375 } 376 return eps 377 } 378 379 // ReleaseID releases the ID of the specified endpoint from the endpointManager. 380 // Returns an error if the ID cannot be released. 381 func (mgr *endpointManager) ReleaseID(ep *endpoint.Endpoint) error { 382 return mgr.epIDAllocator.release(ep.ID) 383 } 384 385 // unexpose removes the endpoint from the endpointmanager, so subsequent 386 // lookups will no longer find the endpoint. 387 func (mgr *endpointManager) unexpose(ep *endpoint.Endpoint) { 388 defer ep.Close() 389 identifiers := ep.Identifiers() 390 391 previousState := ep.GetState() 392 393 mgr.mutex.Lock() 394 defer mgr.mutex.Unlock() 395 396 // This must be done before the ID is released for the endpoint! 397 mgr.removeIDLocked(ep.ID) 398 mgr.mcastManager.RemoveAddress(ep.IPv6) 399 400 // We haven't yet allocated the ID for a restoring endpoint, so no 401 // need to release it. 402 if previousState != endpoint.StateRestoring { 403 if err := mgr.ReleaseID(ep); err != nil { 404 log.WithError(err).WithFields(logrus.Fields{ 405 "state": previousState, 406 logfields.CNIAttachmentID: identifiers[endpointid.CNIAttachmentIdPrefix], 407 logfields.CEPName: identifiers[endpointid.CEPNamePrefix], 408 }).Warning("Unable to release endpoint ID") 409 } 410 } 411 412 mgr.removeReferencesLocked(identifiers) 413 } 414 415 // removeEndpoint stops the active handling of events by the specified endpoint, 416 // and prevents the endpoint from being globally acccessible via other packages. 417 func (mgr *endpointManager) removeEndpoint(ep *endpoint.Endpoint, conf endpoint.DeleteConfig) []error { 418 mgr.unexpose(ep) 419 result := ep.Delete(conf) 420 421 mgr.mutex.RLock() 422 for s := range mgr.subscribers { 423 s.EndpointDeleted(ep, conf) 424 } 425 mgr.mutex.RUnlock() 426 427 return result 428 } 429 430 // RemoveEndpoint stops the active handling of events by the specified endpoint, 431 // and prevents the endpoint from being globally acccessible via other packages. 432 func (mgr *endpointManager) RemoveEndpoint(ep *endpoint.Endpoint, conf endpoint.DeleteConfig) []error { 433 return mgr.deleteEndpoint(ep, conf) 434 } 435 436 // lookupCiliumID looks up endpoint by endpoint ID 437 func (mgr *endpointManager) lookupCiliumID(id uint16) *endpoint.Endpoint { 438 if ep, ok := mgr.endpoints[id]; ok { 439 return ep 440 } 441 return nil 442 } 443 444 func (mgr *endpointManager) lookupDockerEndpoint(id string) *endpoint.Endpoint { 445 if ep, ok := mgr.endpointsAux[endpointid.NewID(endpointid.DockerEndpointPrefix, id)]; ok { 446 return ep 447 } 448 return nil 449 } 450 451 func (mgr *endpointManager) lookupPodNameLocked(name string) *endpoint.Endpoint { 452 if ep, ok := mgr.endpointsAux[endpointid.NewID(endpointid.PodNamePrefix, name)]; ok { 453 return ep 454 } 455 return nil 456 } 457 458 func (mgr *endpointManager) lookupCEPNameLocked(name string) *endpoint.Endpoint { 459 if ep, ok := mgr.endpointsAux[endpointid.NewID(endpointid.CEPNamePrefix, name)]; ok { 460 return ep 461 } 462 return nil 463 } 464 465 func (mgr *endpointManager) lookupDockerContainerName(name string) *endpoint.Endpoint { 466 if ep, ok := mgr.endpointsAux[endpointid.NewID(endpointid.ContainerNamePrefix, name)]; ok { 467 return ep 468 } 469 return nil 470 } 471 472 func (mgr *endpointManager) lookupIPv4(ipv4 string) *endpoint.Endpoint { 473 if ep, ok := mgr.endpointsAux[endpointid.NewID(endpointid.IPv4Prefix, ipv4)]; ok { 474 return ep 475 } 476 return nil 477 } 478 479 func (mgr *endpointManager) lookupIPv6(ipv6 string) *endpoint.Endpoint { 480 if ep, ok := mgr.endpointsAux[endpointid.NewID(endpointid.IPv6Prefix, ipv6)]; ok { 481 return ep 482 } 483 return nil 484 } 485 486 func (mgr *endpointManager) lookupContainerID(id string) *endpoint.Endpoint { 487 if ep, ok := mgr.endpointsAux[endpointid.NewID(endpointid.ContainerIdPrefix, id)]; ok { 488 return ep 489 } 490 return nil 491 } 492 493 func (mgr *endpointManager) lookupCNIAttachmentID(id string) *endpoint.Endpoint { 494 if ep, ok := mgr.endpointsAux[endpointid.NewID(endpointid.CNIAttachmentIdPrefix, id)]; ok { 495 return ep 496 } 497 return nil 498 } 499 500 // updateIDReferenceLocked updates the endpoints map in the endpointManager for 501 // the given Endpoint. 502 func (mgr *endpointManager) updateIDReferenceLocked(ep *endpoint.Endpoint) { 503 if ep == nil { 504 return 505 } 506 mgr.endpoints[ep.ID] = ep 507 } 508 509 func (mgr *endpointManager) updateReferencesLocked(ep *endpoint.Endpoint, identifiers endpointid.Identifiers) { 510 for k := range identifiers { 511 id := endpointid.NewID(k, identifiers[k]) 512 mgr.endpointsAux[id] = ep 513 } 514 } 515 516 // UpdateReferences updates maps the contents of mappings to the specified endpoint. 517 func (mgr *endpointManager) UpdateReferences(ep *endpoint.Endpoint) error { 518 mgr.mutex.Lock() 519 defer mgr.mutex.Unlock() 520 521 identifiers := ep.Identifiers() 522 mgr.updateReferencesLocked(ep, identifiers) 523 524 return nil 525 } 526 527 // removeReferencesLocked removes the mappings from the endpointmanager. 528 func (mgr *endpointManager) removeReferencesLocked(identifiers endpointid.Identifiers) { 529 for prefix := range identifiers { 530 id := endpointid.NewID(prefix, identifiers[prefix]) 531 delete(mgr.endpointsAux, id) 532 } 533 } 534 535 // RegenerateAllEndpoints calls a setState for each endpoint and 536 // regenerates if state transaction is valid. During this process, the endpoint 537 // list is locked and cannot be modified. 538 // Returns a waiting group that can be used to know when all the endpoints are 539 // regenerated. 540 func (mgr *endpointManager) RegenerateAllEndpoints(regenMetadata *regeneration.ExternalRegenerationMetadata) *sync.WaitGroup { 541 var wg sync.WaitGroup 542 543 eps := mgr.GetEndpoints() 544 wg.Add(len(eps)) 545 546 // Dereference "reason" field outside of logging statement; see 547 // https://github.com/sirupsen/logrus/issues/1003. 548 reason := regenMetadata.Reason 549 log.WithFields(logrus.Fields{"reason": reason}).Info("regenerating all endpoints") 550 for _, ep := range eps { 551 go func(ep *endpoint.Endpoint) { 552 <-ep.RegenerateIfAlive(regenMetadata) 553 wg.Done() 554 }(ep) 555 } 556 557 return &wg 558 } 559 560 // OverrideEndpointOpts applies the given options to all endpoints. 561 func (mgr *endpointManager) OverrideEndpointOpts(om option.OptionMap) { 562 for _, ep := range mgr.GetEndpoints() { 563 if _, err := ep.ApplyOpts(om); err != nil && !errors.Is(err, endpoint.ErrNotAlive) { 564 log.WithError(err).WithFields(logrus.Fields{ 565 "ep": ep.GetID(), 566 }).Error("Override endpoint options failed") 567 } 568 } 569 } 570 571 // HasGlobalCT returns true if the endpoints have a global CT, false otherwise. 572 func (mgr *endpointManager) HasGlobalCT() bool { 573 eps := mgr.GetEndpoints() 574 for _, e := range eps { 575 if !e.Options.IsEnabled(option.ConntrackLocal) { 576 return true 577 } 578 } 579 return false 580 } 581 582 // GetEndpoints returns a slice of all endpoints present in endpoint manager. 583 func (mgr *endpointManager) GetEndpoints() []*endpoint.Endpoint { 584 mgr.mutex.RLock() 585 eps := make([]*endpoint.Endpoint, 0, len(mgr.endpoints)) 586 for _, ep := range mgr.endpoints { 587 eps = append(eps, ep) 588 } 589 mgr.mutex.RUnlock() 590 return eps 591 } 592 593 // GetPolicyEndpoints returns a map of all endpoints present in endpoint 594 // manager as policy.Endpoint interface set for the map key. 595 func (mgr *endpointManager) GetPolicyEndpoints() map[policy.Endpoint]struct{} { 596 mgr.mutex.RLock() 597 eps := make(map[policy.Endpoint]struct{}, len(mgr.endpoints)) 598 for _, ep := range mgr.endpoints { 599 eps[ep] = struct{}{} 600 } 601 mgr.mutex.RUnlock() 602 return eps 603 } 604 605 func (mgr *endpointManager) expose(ep *endpoint.Endpoint) error { 606 newID, err := mgr.allocateID(ep.ID) 607 if err != nil { 608 return err 609 } 610 611 mgr.mutex.Lock() 612 // Get a copy of the identifiers before exposing the endpoint 613 identifiers := ep.Identifiers() 614 ep.PolicyMapPressureUpdater = mgr.policyMapPressure 615 ep.Start(newID) 616 mgr.mcastManager.AddAddress(ep.IPv6) 617 mgr.updateIDReferenceLocked(ep) 618 mgr.updateReferencesLocked(ep, identifiers) 619 mgr.mutex.Unlock() 620 621 ep.InitEndpointHealth(mgr.health) 622 mgr.RunK8sCiliumEndpointSync(ep, ep.GetReporter("cep-k8s-sync")) 623 624 return nil 625 } 626 627 // RestoreEndpoint exposes the specified endpoint to other subsystems via the 628 // manager. 629 func (mgr *endpointManager) RestoreEndpoint(ep *endpoint.Endpoint) error { 630 ep.SetDefaultConfiguration() 631 err := mgr.expose(ep) 632 if err != nil { 633 return err 634 } 635 mgr.mutex.RLock() 636 // Unlock the mutex after reading the subscribers list to not block 637 // endpoint restore operation. This could potentially mean that 638 // subscribers are called even after they've unsubscribed. However, 639 // consumers unsubscribe during the tear down phase so the restore 640 // callbacks may likely not race with unsubscribe calls. 641 subscribers := maps.Clone(mgr.subscribers) 642 mgr.mutex.RUnlock() 643 for s := range subscribers { 644 s.EndpointRestored(ep) 645 } 646 647 return nil 648 } 649 650 // AddEndpoint takes the prepared endpoint object and starts managing it. 651 func (mgr *endpointManager) AddEndpoint(owner regeneration.Owner, ep *endpoint.Endpoint) (err error) { 652 if ep.ID != 0 { 653 return fmt.Errorf("Endpoint ID is already set to %d", ep.ID) 654 } 655 656 // Updating logger to re-populate pod fields 657 // when endpoint and its logger are created pod details are not populated 658 // and all subsequent logs have empty pod details like ip addresses, k8sPodName 659 // this update will populate pod details in logger 660 ep.UpdateLogger(map[string]interface{}{ 661 logfields.ContainerID: ep.GetShortContainerID(), 662 logfields.IPv4: ep.GetIPv4Address(), 663 logfields.IPv6: ep.GetIPv6Address(), 664 logfields.K8sPodName: ep.GetK8sNamespaceAndPodName(), 665 logfields.CEPName: ep.GetK8sNamespaceAndCEPName(), 666 }) 667 668 err = mgr.expose(ep) 669 if err != nil { 670 return err 671 } 672 673 mgr.mutex.RLock() 674 for s := range mgr.subscribers { 675 s.EndpointCreated(ep) 676 } 677 mgr.mutex.RUnlock() 678 679 return nil 680 } 681 682 func (mgr *endpointManager) AddIngressEndpoint( 683 ctx context.Context, 684 owner regeneration.Owner, 685 policyGetter policyRepoGetter, 686 ipcache *ipcache.IPCache, 687 proxy endpoint.EndpointProxy, 688 allocator cache.IdentityAllocator, 689 ) error { 690 ep, err := endpoint.CreateIngressEndpoint(owner, policyGetter, ipcache, proxy, allocator) 691 if err != nil { 692 return err 693 } 694 695 if err := mgr.AddEndpoint(owner, ep); err != nil { 696 return err 697 } 698 699 ep.InitWithIngressLabels(ctx, launchTime) 700 701 return nil 702 } 703 704 func (mgr *endpointManager) AddHostEndpoint( 705 ctx context.Context, 706 owner regeneration.Owner, 707 policyGetter policyRepoGetter, 708 ipcache *ipcache.IPCache, 709 proxy endpoint.EndpointProxy, 710 allocator cache.IdentityAllocator, 711 ) error { 712 ep, err := endpoint.CreateHostEndpoint(owner, policyGetter, ipcache, proxy, allocator) 713 if err != nil { 714 return err 715 } 716 717 if err := mgr.AddEndpoint(owner, ep); err != nil { 718 return err 719 } 720 721 node.SetEndpointID(ep.GetID()) 722 723 mgr.initHostEndpointLabels(ctx, ep) 724 725 return nil 726 } 727 728 type policyRepoGetter interface { 729 GetPolicyRepository() *policy.Repository 730 } 731 732 // InitHostEndpointLabels initializes the host endpoint's labels with the 733 // node's known labels. 734 func (mgr *endpointManager) InitHostEndpointLabels(ctx context.Context) { 735 ep := mgr.GetHostEndpoint() 736 if ep == nil { 737 log.Error("Attempted to init host endpoint labels but host endpoint not set.") 738 return 739 } 740 741 mgr.initHostEndpointLabels(ctx, ep) 742 } 743 744 func (mgr *endpointManager) initHostEndpointLabels(ctx context.Context, ep *endpoint.Endpoint) { 745 // initHostEndpointLabels is executed by the daemon start hook, and 746 // at that point we are guaranteed that the local node has already 747 // been initialized, and this Get() operation returns immediately. 748 ln, err := mgr.localNodeStore.Get(ctx) 749 if err != nil { 750 // An error may be returned here only if the context has been canceled, 751 // which means that we are already shutting down. In that case, let's 752 // just return immediately, as we cannot do anything else. 753 return 754 } 755 756 ep.InitWithNodeLabels(ctx, ln.Labels, launchTime) 757 758 // Start the observer to keep the labels synchronized in case they change 759 mgr.startNodeLabelsObserver(ln.Labels) 760 } 761 762 // WaitForEndpointsAtPolicyRev waits for all endpoints which existed at the time 763 // this function is called to be at a given policy revision. 764 // New endpoints appearing while waiting are ignored. 765 func (mgr *endpointManager) WaitForEndpointsAtPolicyRev(ctx context.Context, rev uint64) error { 766 eps := mgr.GetEndpoints() 767 for i := range eps { 768 select { 769 case <-ctx.Done(): 770 return ctx.Err() 771 case <-eps[i].WaitForPolicyRevision(ctx, rev, nil): 772 if ctx.Err() != nil { 773 return ctx.Err() 774 } 775 } 776 } 777 return nil 778 } 779 780 // CallbackForEndpointsAtPolicyRev registers a callback on all endpoints that 781 // exist when invoked. It is similar to WaitForEndpointsAtPolicyRevision but 782 // each endpoint that reaches the desired revision calls 'done' independently. 783 // The provided callback should not block and generally be lightweight. 784 func (mgr *endpointManager) CallbackForEndpointsAtPolicyRev(ctx context.Context, rev uint64, done func(time.Time)) error { 785 eps := mgr.GetEndpoints() 786 for i := range eps { 787 eps[i].WaitForPolicyRevision(ctx, rev, done) 788 } 789 return nil 790 } 791 792 // EndpointExists returns whether the endpoint with id exists. 793 func (mgr *endpointManager) EndpointExists(id uint16) bool { 794 return mgr.LookupCiliumID(id) != nil 795 } 796 797 // GetEndpointNetnsCookieByIP returns the netns cookie for the passed endpoint with ip address if found. 798 func (mgr *endpointManager) GetEndpointNetnsCookieByIP(ip netip.Addr) (uint64, error) { 799 ep := mgr.LookupIP(ip) 800 if ep == nil { 801 return 0, fmt.Errorf("endpoint not found by ip %v", ip) 802 } 803 804 return ep.GetEndpointNetnsCookie(), nil 805 }