github.com/looshlee/beatles@v0.0.0-20220727174639-742810ab631c/operator/k8s_cep_gc.go (about) 1 // Copyright 2016-2018 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package main 16 17 import ( 18 "context" 19 "time" 20 21 "github.com/cilium/cilium/pkg/controller" 22 "github.com/cilium/cilium/pkg/k8s" 23 "github.com/cilium/cilium/pkg/logging/logfields" 24 25 "github.com/sirupsen/logrus" 26 core_v1 "k8s.io/api/core/v1" 27 k8serrors "k8s.io/apimachinery/pkg/api/errors" 28 meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 ) 30 31 var ( 32 // ciliumEndpointGCInterval is the interval between attempts of the CEP GC 33 // controller. 34 // Note that only one node per cluster should run this, and most iterations 35 // will simply return. 36 ciliumEndpointGCInterval time.Duration 37 ) 38 39 // enableCiliumEndpointSyncGC starts the node-singleton sweeper for 40 // CiliumEndpoint objects where the managing node is no longer running. These 41 // objects are created by the sync-to-k8s-ciliumendpoint controller on each 42 // Endpoint. 43 // The general steps are: 44 // - get list of nodes 45 // - only run with probability 1/nodes 46 // - get list of CEPs 47 // - for each CEP 48 // delete CEP if the corresponding pod does not exist 49 // CiliumEndpoint objects have the same name as the pod they represent 50 func enableCiliumEndpointSyncGC() { 51 var ( 52 controllerName = "to-k8s-ciliumendpoint-gc" 53 scopedLog = log.WithField("controller", controllerName) 54 ) 55 56 log.Info("Starting to garbage collect stale CiliumEndpoint custom resources...") 57 58 ciliumClient := ciliumK8sClient.CiliumV2() 59 60 // this dummy manager is needed only to add this controller to the global list 61 controller.NewManager().UpdateController(controllerName, 62 controller.ControllerParams{ 63 RunInterval: ciliumEndpointGCInterval, 64 DoFunc: func(ctx context.Context) error { 65 var ( 66 listOpts = meta_v1.ListOptions{Limit: 10} 67 loopStop = time.Now().Add(ciliumEndpointGCInterval) 68 ) 69 70 pods, err := k8s.Client().CoreV1().Pods("").List(meta_v1.ListOptions{}) 71 if err != nil { 72 return err 73 } 74 75 podsCache := map[string]*core_v1.Pod{} 76 for _, pod := range pods.Items { 77 podsCache[pod.Namespace+"/"+pod.Name] = &pod 78 } 79 80 perCEPFetch: 81 for time.Now().Before(loopStop) { // Guard against no-break bugs 82 time.Sleep(time.Second) // Throttle lookups in case of a busy loop 83 84 ceps, err := ciliumClient.CiliumEndpoints(meta_v1.NamespaceAll).List(listOpts) 85 switch { 86 case err != nil && k8serrors.IsResourceExpired(err) && ceps.Continue != "": 87 // This combination means we saw a 410 ResourceExpired error but we 88 // can iterate on the now-current snapshot. We need to refetch, 89 // however. 90 // See https://github.com/kubernetes/apimachinery/blob/master/pkg/apis/meta/v1/types.go#L350-L381 91 // or the docs for k8s.io/apimachinery/pkg/apis/meta/v1.ListOptions 92 // vendored into this repo. 93 listOpts.Continue = ceps.Continue 94 continue perCEPFetch 95 96 case err != nil: 97 scopedLog.WithError(err).Debug("Cannot list CEPs") 98 return err 99 } 100 101 // setup listOpts for the next iteration 102 listOpts.Continue = ceps.Continue 103 104 // For each CEP we fetched, check if we know about it 105 for _, cep := range ceps.Items { 106 cepFullName := cep.Namespace + "/" + cep.Name 107 _, exists := podsCache[cepFullName] 108 if !exists { 109 // delete 110 scopedLog = scopedLog.WithFields(logrus.Fields{ 111 logfields.EndpointID: cep.Status.ID, 112 logfields.K8sPodName: cepFullName, 113 }) 114 scopedLog.Debug("Orphaned CiliumEndpoint is being garbage collected") 115 PropagationPolicy := meta_v1.DeletePropagationBackground // because these are const strings but the API wants pointers 116 if err := ciliumClient.CiliumEndpoints(cep.Namespace).Delete(cep.Name, &meta_v1.DeleteOptions{PropagationPolicy: &PropagationPolicy}); err != nil { 117 scopedLog.WithError(err).Debug("Unable to delete orphaned CEP") 118 return err 119 } 120 } 121 } 122 if ceps.Continue != "" { 123 // there is more data, continue 124 continue perCEPFetch 125 } 126 break perCEPFetch // break out as a safe default to avoid spammy loops 127 } 128 return nil 129 }, 130 }) 131 }