github.com/cilium/cilium@v1.16.2/pkg/endpointmanager/gc.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package endpointmanager
     5  
     6  import (
     7  	"context"
     8  
     9  	"github.com/sirupsen/logrus"
    10  
    11  	"github.com/cilium/cilium/pkg/endpoint"
    12  	"github.com/cilium/cilium/pkg/logging/logfields"
    13  )
    14  
    15  // EndpointCheckerFunc can verify whether an endpoint is currently healthy.
    16  type EndpointCheckerFunc func(*endpoint.Endpoint) error
    17  
    18  // markAndSweep performs a two-phase garbage collection of endpoints using the
    19  // configured EndpointChecker.
    20  //
    21  // 1) Mark all endpoints that require GC. Do not GC these endpoints this round.
    22  // 2) Sweep all endpoints marked as requiring GC during the previous iteration.
    23  //
    24  // This way, if there is a temporary condition that will be resolved by other
    25  // components in the system, then we will not flag warnings about the system
    26  // getting out-of-sync.
    27  func (mgr *endpointManager) markAndSweep(ctx context.Context) error {
    28  	marked := mgr.markEndpoints()
    29  
    30  	mgr.mutex.Lock()
    31  	toSweep := mgr.markedEndpoints
    32  	mgr.markedEndpoints = marked
    33  	mgr.mutex.Unlock()
    34  
    35  	// Avoid returning an error which would cause the calling controller to
    36  	// re-run the garbage collection more frequently than the RunInterval.
    37  	mgr.sweepEndpoints(toSweep)
    38  	return nil
    39  }
    40  
    41  // markEndpoints runs all endpoints in the manager against the configured
    42  // EndpointChecker and returns a slice of endpoint ids that require garbage
    43  // collection.
    44  func (mgr *endpointManager) markEndpoints() []uint16 {
    45  	mgr.mutex.RLock()
    46  	defer mgr.mutex.RUnlock()
    47  
    48  	needsGC := make([]uint16, 0, len(mgr.endpoints))
    49  	for eid, ep := range mgr.endpoints {
    50  		if err := mgr.checkHealth(ep); err != nil {
    51  			needsGC = append(needsGC, eid)
    52  		}
    53  	}
    54  	return needsGC
    55  }
    56  
    57  // sweepEndpoints iterates through the specified list of endpoints marked for
    58  // deletion and attempts to garbage-collect them if they still exist.
    59  func (mgr *endpointManager) sweepEndpoints(markedEndpoints []uint16) {
    60  	toSweep := make([]*endpoint.Endpoint, 0, len(markedEndpoints))
    61  
    62  	// 'markedEndpoints' were marked during the previous mark round, so
    63  	// they may no longer be valid endpoints. Narrow the list to only the
    64  	// endpoints that remain. Then, release the lock so RemoveEndpoint()
    65  	// below can independently grab it.
    66  	mgr.mutex.RLock()
    67  	for _, id := range markedEndpoints {
    68  		if ep, ok := mgr.endpoints[id]; ok {
    69  			toSweep = append(toSweep, ep)
    70  		}
    71  	}
    72  	mgr.mutex.RUnlock()
    73  
    74  	for _, ep := range toSweep {
    75  		log.WithFields(logrus.Fields{
    76  			logfields.EndpointID:  ep.StringID(),
    77  			logfields.ContainerID: ep.GetShortContainerID(),
    78  			logfields.K8sPodName:  ep.GetK8sNamespaceAndPodName(),
    79  			logfields.CEPName:     ep.GetK8sNamespaceAndCEPName(),
    80  			logfields.URL:         "https://github.com/kubernetes/kubernetes/issues/86944",
    81  		}).Warning("Stray endpoint found. You may be affected by upstream Kubernetes issue #86944.")
    82  		errs := mgr.RemoveEndpoint(ep, endpoint.DeleteConfig{
    83  			NoIPRelease: ep.DatapathConfiguration.ExternalIpam,
    84  		})
    85  		if len(errs) > 0 {
    86  			scopedLog := log.WithField(logfields.EndpointID, ep.GetID())
    87  			for _, err := range errs {
    88  				scopedLog.WithError(err).Warn("Ignoring error while garbage collecting endpoint")
    89  			}
    90  		}
    91  	}
    92  }