github.com/cilium/cilium@v1.16.2/pkg/endpointmanager/gc.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package endpointmanager 5 6 import ( 7 "context" 8 9 "github.com/sirupsen/logrus" 10 11 "github.com/cilium/cilium/pkg/endpoint" 12 "github.com/cilium/cilium/pkg/logging/logfields" 13 ) 14 15 // EndpointCheckerFunc can verify whether an endpoint is currently healthy. 16 type EndpointCheckerFunc func(*endpoint.Endpoint) error 17 18 // markAndSweep performs a two-phase garbage collection of endpoints using the 19 // configured EndpointChecker. 20 // 21 // 1) Mark all endpoints that require GC. Do not GC these endpoints this round. 22 // 2) Sweep all endpoints marked as requiring GC during the previous iteration. 23 // 24 // This way, if there is a temporary condition that will be resolved by other 25 // components in the system, then we will not flag warnings about the system 26 // getting out-of-sync. 27 func (mgr *endpointManager) markAndSweep(ctx context.Context) error { 28 marked := mgr.markEndpoints() 29 30 mgr.mutex.Lock() 31 toSweep := mgr.markedEndpoints 32 mgr.markedEndpoints = marked 33 mgr.mutex.Unlock() 34 35 // Avoid returning an error which would cause the calling controller to 36 // re-run the garbage collection more frequently than the RunInterval. 37 mgr.sweepEndpoints(toSweep) 38 return nil 39 } 40 41 // markEndpoints runs all endpoints in the manager against the configured 42 // EndpointChecker and returns a slice of endpoint ids that require garbage 43 // collection. 44 func (mgr *endpointManager) markEndpoints() []uint16 { 45 mgr.mutex.RLock() 46 defer mgr.mutex.RUnlock() 47 48 needsGC := make([]uint16, 0, len(mgr.endpoints)) 49 for eid, ep := range mgr.endpoints { 50 if err := mgr.checkHealth(ep); err != nil { 51 needsGC = append(needsGC, eid) 52 } 53 } 54 return needsGC 55 } 56 57 // sweepEndpoints iterates through the specified list of endpoints marked for 58 // deletion and attempts to garbage-collect them if they still exist. 59 func (mgr *endpointManager) sweepEndpoints(markedEndpoints []uint16) { 60 toSweep := make([]*endpoint.Endpoint, 0, len(markedEndpoints)) 61 62 // 'markedEndpoints' were marked during the previous mark round, so 63 // they may no longer be valid endpoints. Narrow the list to only the 64 // endpoints that remain. Then, release the lock so RemoveEndpoint() 65 // below can independently grab it. 66 mgr.mutex.RLock() 67 for _, id := range markedEndpoints { 68 if ep, ok := mgr.endpoints[id]; ok { 69 toSweep = append(toSweep, ep) 70 } 71 } 72 mgr.mutex.RUnlock() 73 74 for _, ep := range toSweep { 75 log.WithFields(logrus.Fields{ 76 logfields.EndpointID: ep.StringID(), 77 logfields.ContainerID: ep.GetShortContainerID(), 78 logfields.K8sPodName: ep.GetK8sNamespaceAndPodName(), 79 logfields.CEPName: ep.GetK8sNamespaceAndCEPName(), 80 logfields.URL: "https://github.com/kubernetes/kubernetes/issues/86944", 81 }).Warning("Stray endpoint found. You may be affected by upstream Kubernetes issue #86944.") 82 errs := mgr.RemoveEndpoint(ep, endpoint.DeleteConfig{ 83 NoIPRelease: ep.DatapathConfiguration.ExternalIpam, 84 }) 85 if len(errs) > 0 { 86 scopedLog := log.WithField(logfields.EndpointID, ep.GetID()) 87 for _, err := range errs { 88 scopedLog.WithError(err).Warn("Ignoring error while garbage collecting endpoint") 89 } 90 } 91 } 92 }