github.com/cilium/cilium@v1.16.2/operator/watchers/cilium_node_gc.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package watchers
     5  
     6  import (
     7  	"context"
     8  	"sync"
     9  	"time"
    10  
    11  	k8serrors "k8s.io/apimachinery/pkg/api/errors"
    12  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    13  	"k8s.io/client-go/tools/cache"
    14  
    15  	"github.com/cilium/cilium/pkg/controller"
    16  	cilium_v2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
    17  	k8sClient "github.com/cilium/cilium/pkg/k8s/client"
    18  	ciliumv2 "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned/typed/cilium.io/v2"
    19  	"github.com/cilium/cilium/pkg/lock"
    20  	"github.com/cilium/cilium/pkg/logging/logfields"
    21  )
    22  
    23  var ciliumNodeGCControllerGroup = controller.NewGroup("cilium-node-gc")
    24  
    25  // ciliumNodeGCCandidate keeps track of cilium nodes, which are candidate for GC.
    26  // Underlying there is a map with node name as key, and last marked timestamp as value.
    27  type ciliumNodeGCCandidate struct {
    28  	lock          lock.RWMutex
    29  	nodesToRemove map[string]time.Time
    30  }
    31  
    32  func newCiliumNodeGCCandidate() *ciliumNodeGCCandidate {
    33  	return &ciliumNodeGCCandidate{
    34  		nodesToRemove: map[string]time.Time{},
    35  	}
    36  }
    37  
    38  func (c *ciliumNodeGCCandidate) Get(nodeName string) (time.Time, bool) {
    39  	c.lock.RLock()
    40  	defer c.lock.RUnlock()
    41  	val, exists := c.nodesToRemove[nodeName]
    42  	return val, exists
    43  }
    44  
    45  func (c *ciliumNodeGCCandidate) Add(nodeName string) {
    46  	c.lock.Lock()
    47  	defer c.lock.Unlock()
    48  	c.nodesToRemove[nodeName] = time.Now()
    49  }
    50  
    51  func (c *ciliumNodeGCCandidate) Delete(nodeName string) {
    52  	c.lock.Lock()
    53  	defer c.lock.Unlock()
    54  	delete(c.nodesToRemove, nodeName)
    55  }
    56  
    57  // RunCiliumNodeGC performs garbage collector for cilium node resource
    58  func RunCiliumNodeGC(ctx context.Context, wg *sync.WaitGroup, clientset k8sClient.Clientset, ciliumNodeStore cache.Store, interval time.Duration) {
    59  	nodesInit(wg, clientset.Slim(), ctx.Done())
    60  
    61  	// wait for k8s nodes synced is done
    62  	select {
    63  	case <-slimNodeStoreSynced:
    64  	case <-ctx.Done():
    65  		return
    66  	}
    67  
    68  	log.Info("Starting to garbage collect stale CiliumNode custom resources")
    69  
    70  	candidateStore := newCiliumNodeGCCandidate()
    71  	// create the controller to perform mark and sweep operation for cilium nodes
    72  	ctrlMgr.UpdateController("cilium-node-gc",
    73  		controller.ControllerParams{
    74  			Group:   ciliumNodeGCControllerGroup,
    75  			Context: ctx,
    76  			DoFunc: func(ctx context.Context) error {
    77  				return performCiliumNodeGC(ctx, clientset.CiliumV2().CiliumNodes(), ciliumNodeStore,
    78  					nodeGetter{}, interval, candidateStore)
    79  			},
    80  			RunInterval: interval,
    81  		},
    82  	)
    83  
    84  	wg.Add(1)
    85  	go func() {
    86  		defer wg.Done()
    87  		<-ctx.Done()
    88  		ctrlMgr.RemoveControllerAndWait("cilium-node-gc")
    89  	}()
    90  }
    91  
    92  func performCiliumNodeGC(ctx context.Context, client ciliumv2.CiliumNodeInterface, ciliumNodeStore cache.Store,
    93  	nodeGetter slimNodeGetter, interval time.Duration, candidateStore *ciliumNodeGCCandidate) error {
    94  	for _, nodeName := range ciliumNodeStore.ListKeys() {
    95  		scopedLog := log.WithField(logfields.NodeName, nodeName)
    96  		_, err := nodeGetter.GetK8sSlimNode(nodeName)
    97  		if err == nil {
    98  			scopedLog.Debugf("CiliumNode is valid, no garbage collection required")
    99  			continue
   100  		}
   101  
   102  		if !k8serrors.IsNotFound(err) {
   103  			scopedLog.WithError(err).Error("Unable to fetch k8s node from store")
   104  			return err
   105  		}
   106  
   107  		obj, _, err := ciliumNodeStore.GetByKey(nodeName)
   108  		if err != nil {
   109  			scopedLog.WithError(err).Error("Unable to fetch CiliumNode from store")
   110  			return err
   111  		}
   112  
   113  		cn, ok := obj.(*cilium_v2.CiliumNode)
   114  		if !ok {
   115  			scopedLog.Errorf("Object stored in store is not *cilium_v2.CiliumNode but %T", obj)
   116  			return err
   117  		}
   118  
   119  		// if there is owner references, let k8s handle garbage collection
   120  		if len(cn.GetOwnerReferences()) > 0 {
   121  			continue
   122  		}
   123  
   124  		lastMarkedTime, exists := candidateStore.Get(nodeName)
   125  		if !exists {
   126  			scopedLog.Info("Add CiliumNode to garbage collector candidates")
   127  			candidateStore.Add(nodeName)
   128  			continue
   129  		}
   130  
   131  		// only remove the node if last marked time is more than running interval
   132  		if lastMarkedTime.Before(time.Now().Add(-interval)) {
   133  			scopedLog.Info("Perform GC for invalid CiliumNode")
   134  			err = client.Delete(ctx, nodeName, metav1.DeleteOptions{})
   135  			if err != nil && !k8serrors.IsNotFound(err) {
   136  				scopedLog.WithError(err).Error("Failed to delete invalid CiliumNode")
   137  				return err
   138  			}
   139  			scopedLog.Info("CiliumNode is garbage collected successfully")
   140  			candidateStore.Delete(nodeName)
   141  		}
   142  	}
   143  	return nil
   144  }