github.com/cilium/cilium@v1.16.2/operator/watchers/cilium_node_gc.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package watchers 5 6 import ( 7 "context" 8 "sync" 9 "time" 10 11 k8serrors "k8s.io/apimachinery/pkg/api/errors" 12 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 13 "k8s.io/client-go/tools/cache" 14 15 "github.com/cilium/cilium/pkg/controller" 16 cilium_v2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2" 17 k8sClient "github.com/cilium/cilium/pkg/k8s/client" 18 ciliumv2 "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned/typed/cilium.io/v2" 19 "github.com/cilium/cilium/pkg/lock" 20 "github.com/cilium/cilium/pkg/logging/logfields" 21 ) 22 23 var ciliumNodeGCControllerGroup = controller.NewGroup("cilium-node-gc") 24 25 // ciliumNodeGCCandidate keeps track of cilium nodes, which are candidate for GC. 26 // Underlying there is a map with node name as key, and last marked timestamp as value. 27 type ciliumNodeGCCandidate struct { 28 lock lock.RWMutex 29 nodesToRemove map[string]time.Time 30 } 31 32 func newCiliumNodeGCCandidate() *ciliumNodeGCCandidate { 33 return &ciliumNodeGCCandidate{ 34 nodesToRemove: map[string]time.Time{}, 35 } 36 } 37 38 func (c *ciliumNodeGCCandidate) Get(nodeName string) (time.Time, bool) { 39 c.lock.RLock() 40 defer c.lock.RUnlock() 41 val, exists := c.nodesToRemove[nodeName] 42 return val, exists 43 } 44 45 func (c *ciliumNodeGCCandidate) Add(nodeName string) { 46 c.lock.Lock() 47 defer c.lock.Unlock() 48 c.nodesToRemove[nodeName] = time.Now() 49 } 50 51 func (c *ciliumNodeGCCandidate) Delete(nodeName string) { 52 c.lock.Lock() 53 defer c.lock.Unlock() 54 delete(c.nodesToRemove, nodeName) 55 } 56 57 // RunCiliumNodeGC performs garbage collector for cilium node resource 58 func RunCiliumNodeGC(ctx context.Context, wg *sync.WaitGroup, clientset k8sClient.Clientset, ciliumNodeStore cache.Store, interval time.Duration) { 59 nodesInit(wg, clientset.Slim(), ctx.Done()) 60 61 // wait for k8s nodes synced is done 62 select { 63 case <-slimNodeStoreSynced: 64 case <-ctx.Done(): 65 return 66 } 67 68 log.Info("Starting to garbage collect stale CiliumNode custom resources") 69 70 candidateStore := newCiliumNodeGCCandidate() 71 // create the controller to perform mark and sweep operation for cilium nodes 72 ctrlMgr.UpdateController("cilium-node-gc", 73 controller.ControllerParams{ 74 Group: ciliumNodeGCControllerGroup, 75 Context: ctx, 76 DoFunc: func(ctx context.Context) error { 77 return performCiliumNodeGC(ctx, clientset.CiliumV2().CiliumNodes(), ciliumNodeStore, 78 nodeGetter{}, interval, candidateStore) 79 }, 80 RunInterval: interval, 81 }, 82 ) 83 84 wg.Add(1) 85 go func() { 86 defer wg.Done() 87 <-ctx.Done() 88 ctrlMgr.RemoveControllerAndWait("cilium-node-gc") 89 }() 90 } 91 92 func performCiliumNodeGC(ctx context.Context, client ciliumv2.CiliumNodeInterface, ciliumNodeStore cache.Store, 93 nodeGetter slimNodeGetter, interval time.Duration, candidateStore *ciliumNodeGCCandidate) error { 94 for _, nodeName := range ciliumNodeStore.ListKeys() { 95 scopedLog := log.WithField(logfields.NodeName, nodeName) 96 _, err := nodeGetter.GetK8sSlimNode(nodeName) 97 if err == nil { 98 scopedLog.Debugf("CiliumNode is valid, no garbage collection required") 99 continue 100 } 101 102 if !k8serrors.IsNotFound(err) { 103 scopedLog.WithError(err).Error("Unable to fetch k8s node from store") 104 return err 105 } 106 107 obj, _, err := ciliumNodeStore.GetByKey(nodeName) 108 if err != nil { 109 scopedLog.WithError(err).Error("Unable to fetch CiliumNode from store") 110 return err 111 } 112 113 cn, ok := obj.(*cilium_v2.CiliumNode) 114 if !ok { 115 scopedLog.Errorf("Object stored in store is not *cilium_v2.CiliumNode but %T", obj) 116 return err 117 } 118 119 // if there is owner references, let k8s handle garbage collection 120 if len(cn.GetOwnerReferences()) > 0 { 121 continue 122 } 123 124 lastMarkedTime, exists := candidateStore.Get(nodeName) 125 if !exists { 126 scopedLog.Info("Add CiliumNode to garbage collector candidates") 127 candidateStore.Add(nodeName) 128 continue 129 } 130 131 // only remove the node if last marked time is more than running interval 132 if lastMarkedTime.Before(time.Now().Add(-interval)) { 133 scopedLog.Info("Perform GC for invalid CiliumNode") 134 err = client.Delete(ctx, nodeName, metav1.DeleteOptions{}) 135 if err != nil && !k8serrors.IsNotFound(err) { 136 scopedLog.WithError(err).Error("Failed to delete invalid CiliumNode") 137 return err 138 } 139 scopedLog.Info("CiliumNode is garbage collected successfully") 140 candidateStore.Delete(nodeName) 141 } 142 } 143 return nil 144 }