github.com/cilium/cilium@v1.16.2/operator/identitygc/crd_gc.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package identitygc
     5  
     6  import (
     7  	"context"
     8  	"strconv"
     9  	"time"
    10  
    11  	"github.com/sirupsen/logrus"
    12  	k8serrors "k8s.io/apimachinery/pkg/api/errors"
    13  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    14  
    15  	operatorK8s "github.com/cilium/cilium/operator/k8s"
    16  	"github.com/cilium/cilium/pkg/controller"
    17  	v2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
    18  	"github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2alpha1"
    19  	"github.com/cilium/cilium/pkg/k8s/identitybackend"
    20  	"github.com/cilium/cilium/pkg/k8s/resource"
    21  	"github.com/cilium/cilium/pkg/logging/logfields"
    22  	"github.com/cilium/cilium/pkg/option"
    23  )
    24  
    25  var crdIdentityGCControllerGroup = controller.NewGroup("crd-identity-gc")
    26  
    27  func (igc *GC) startCRDModeGC(ctx context.Context) error {
    28  	if igc.gcInterval == 0 {
    29  		igc.logger.Debug("CRD identity garbage collector disabled with interval set to 0")
    30  		return nil
    31  	}
    32  
    33  	igc.logger.WithField(logfields.Interval, igc.gcInterval).Info("Starting CRD identity garbage collector")
    34  
    35  	igc.mgr = controller.NewManager()
    36  	igc.mgr.UpdateController("crd-identity-gc",
    37  		controller.ControllerParams{
    38  			Group:        crdIdentityGCControllerGroup,
    39  			RunInterval:  igc.gcInterval,
    40  			DoFunc:       igc.gc,
    41  			NoErrorRetry: true,
    42  		})
    43  
    44  	return igc.wp.Submit("heartbeat-updater", igc.runHeartbeatUpdater)
    45  }
    46  
    47  func (igc *GC) runHeartbeatUpdater(ctx context.Context) error {
    48  	for event := range igc.identity.Events(ctx) {
    49  		switch event.Kind {
    50  		case resource.Upsert:
    51  			// Identity is marked as alive if it is new or it has
    52  			// been updated.
    53  			igc.heartbeatStore.markAlive(event.Object.Name, time.Now())
    54  		case resource.Delete:
    55  			// When the identity is deleted, delete the
    56  			// heartbeat entry as well. This will not be
    57  			// 100% accurate as the CiliumEndpoint can live
    58  			// longer than the CiliumIdentity. See
    59  			// heartbeatStore.gc()
    60  			igc.heartbeatStore.delete(event.Object.Name)
    61  		}
    62  		event.Done(nil)
    63  	}
    64  	return nil
    65  }
    66  
    67  // gc is a single iteration of a garbage collection. It will
    68  // delete identities that have not had its heartbeat lifesign updated
    69  // since HeartbeatTimeout.
    70  func (igc *GC) gc(ctx context.Context) error {
    71  	igc.logger.Debug("Running CRD identity garbage collector")
    72  	cepStore, err := igc.ciliumEndpoint.Store(ctx)
    73  	if err != nil {
    74  		igc.logger.WithError(err).Error("unable to get CEP store")
    75  		return err
    76  	}
    77  	identitiesStore, err := igc.identity.Store(ctx)
    78  	if err != nil {
    79  		igc.logger.WithError(err).Error("unable to get Cilium identities from local store")
    80  		return err
    81  	}
    82  
    83  	var idsInCESs map[string]bool
    84  	cesEnabled := option.Config.EnableCiliumEndpointSlice
    85  	if cesEnabled {
    86  		cesStore, err := igc.ciliumEndpointSlice.Store(ctx)
    87  		if err != nil {
    88  			igc.logger.WithError(err).Warning("unable to get CES  store")
    89  		} else {
    90  			idsInCESs = usedIdentitiesInCESs(cesStore)
    91  		}
    92  	}
    93  
    94  	identities := identitiesStore.List()
    95  	totalEntries := len(identities)
    96  	deletedEntries := 0
    97  
    98  	timeNow := time.Now()
    99  	for _, identity := range identities {
   100  		foundInCES := false
   101  		if cesEnabled {
   102  			_, foundInCES = idsInCESs[identity.Name]
   103  		}
   104  		// The identity is definitely alive if there's a CE or CES using it.
   105  		alive := foundInCES || hasCEWithIdentity(cepStore, identity.Name)
   106  
   107  		if alive {
   108  			igc.heartbeatStore.markAlive(identity.Name, timeNow)
   109  			continue
   110  		}
   111  
   112  		if !igc.heartbeatStore.isAlive(identity.Name) {
   113  			ts, ok := identity.Annotations[identitybackend.HeartBeatAnnotation]
   114  			if !ok {
   115  				log.WithFields(logrus.Fields{
   116  					logfields.Identity: identity.Name,
   117  					logfields.K8sUID:   identity.UID,
   118  				}).Info("Marking identity for later deletion")
   119  
   120  				// Deep copy so we get a version we are allowed to update
   121  				identity = identity.DeepCopy()
   122  				if identity.Annotations == nil {
   123  					identity.Annotations = make(map[string]string)
   124  				}
   125  
   126  				identity.Annotations[identitybackend.HeartBeatAnnotation] = timeNow.Format(time.RFC3339Nano)
   127  				if err := igc.updateIdentity(ctx, identity); err != nil {
   128  					log.WithError(err).
   129  						WithField(logfields.Identity, identity).
   130  						Error("Marking identity for later deletion")
   131  					return err
   132  				}
   133  
   134  				continue
   135  			}
   136  
   137  			log.WithFields(logrus.Fields{
   138  				logfields.Identity: identity,
   139  			}).Debugf("Deleting unused identity; marked for deletion at %s", ts)
   140  
   141  			err := igc.deleteIdentity(ctx, identity)
   142  			if err != nil {
   143  				if k8serrors.IsConflict(err) {
   144  					// If a conflict arises, defer deletion to the next gc
   145  					// run and permit gc to continue. This prevents
   146  					// identities from accumulating if there are frequent
   147  					// conflicts.
   148  					log.WithFields(logrus.Fields{
   149  						logfields.Identity: identity.Name,
   150  						logfields.K8sUID:   identity.UID,
   151  					}).Warn("Could not delete identity due to conflict")
   152  					continue
   153  				}
   154  
   155  				log.WithError(err).WithFields(logrus.Fields{
   156  					logfields.Identity: identity,
   157  				}).Error("Deleting unused identity")
   158  				return err
   159  			} else {
   160  				deletedEntries++
   161  			}
   162  		}
   163  
   164  		// If Context was canceled we should break
   165  		if ctx.Err() != nil {
   166  			break
   167  		}
   168  	}
   169  
   170  	if ctx.Err() == nil {
   171  		igc.successfulRuns++
   172  		igc.metrics.IdentityGCRuns.WithLabelValues(LabelValueOutcomeSuccess).Set(float64(igc.successfulRuns))
   173  	} else {
   174  		igc.failedRuns++
   175  		igc.metrics.IdentityGCRuns.WithLabelValues(LabelValueOutcomeFail).Set(float64(igc.failedRuns))
   176  	}
   177  	aliveEntries := totalEntries - deletedEntries
   178  	igc.metrics.IdentityGCSize.WithLabelValues(LabelValueOutcomeAlive).Set(float64(aliveEntries))
   179  	igc.metrics.IdentityGCSize.WithLabelValues(LabelValueOutcomeDeleted).Set(float64(deletedEntries))
   180  
   181  	igc.heartbeatStore.gc()
   182  
   183  	return nil
   184  }
   185  
   186  // deleteIdentity deletes an identity. It includes the resource version and
   187  // will error if the object has since been changed.
   188  func (igc *GC) deleteIdentity(ctx context.Context, identity *v2.CiliumIdentity) error {
   189  	// Wait until we can delete an identity
   190  	if err := igc.rateLimiter.Wait(ctx); err != nil {
   191  		return err
   192  	}
   193  
   194  	// Delete the identity from the auth identity store
   195  	if err := igc.authIdentityClient.Delete(ctx, identity.Name); err != nil {
   196  		return err
   197  	}
   198  
   199  	if err := igc.clientset.Delete(
   200  		ctx,
   201  		identity.Name,
   202  		metav1.DeleteOptions{
   203  			Preconditions: &metav1.Preconditions{
   204  				UID:             &identity.UID,
   205  				ResourceVersion: &identity.ResourceVersion,
   206  			},
   207  		},
   208  	); err != nil {
   209  		return err
   210  	}
   211  
   212  	log.WithField(logfields.Identity, identity.GetName()).Debug("Garbage collected identity")
   213  
   214  	return nil
   215  }
   216  
   217  func (igc *GC) updateIdentity(ctx context.Context, identity *v2.CiliumIdentity) error {
   218  	if _, err := igc.clientset.Update(
   219  		ctx,
   220  		identity,
   221  		metav1.UpdateOptions{},
   222  	); err != nil {
   223  		return err
   224  	}
   225  
   226  	log.WithField(logfields.Identity, identity.GetName()).Debug("Updated identity")
   227  
   228  	return nil
   229  }
   230  
   231  func hasCEWithIdentity(cepStore resource.Store[*v2.CiliumEndpoint], identity string) bool {
   232  	ces, _ := cepStore.IndexKeys(operatorK8s.CiliumEndpointIndexIdentity, identity)
   233  	return len(ces) != 0
   234  }
   235  
   236  func usedIdentitiesInCESs(cesStore resource.Store[*v2alpha1.CiliumEndpointSlice]) map[string]bool {
   237  	usedIdentities := make(map[string]bool)
   238  	cesObjList := cesStore.List()
   239  	for _, ces := range cesObjList {
   240  		for _, cep := range ces.Endpoints {
   241  			id := strconv.FormatInt(cep.IdentityID, 10)
   242  			usedIdentities[id] = true
   243  		}
   244  	}
   245  	return usedIdentities
   246  }