github.com/aporeto-inc/trireme-lib@v10.358.0+incompatible/monitor/internal/pod/delete_controller.go (about) 1 // +build !windows 2 3 package podmonitor 4 5 import ( 6 "context" 7 "time" 8 9 "go.aporeto.io/trireme-lib/common" 10 "go.aporeto.io/trireme-lib/monitor/config" 11 "go.aporeto.io/trireme-lib/monitor/extractors" 12 "go.aporeto.io/trireme-lib/policy" 13 "go.uber.org/zap" 14 corev1 "k8s.io/api/core/v1" 15 "k8s.io/apimachinery/pkg/api/errors" 16 "sigs.k8s.io/controller-runtime/pkg/client" 17 "sigs.k8s.io/controller-runtime/pkg/event" 18 ) 19 20 // deleteControllerReconcileFunc is the reconciler function signature for the DeleteController 21 type deleteControllerReconcileFunc func(context.Context, client.Client, string, *config.ProcessorConfig, time.Duration, map[string]DeleteObject, extractors.PodSandboxExtractor, chan event.GenericEvent) 22 23 // DeleteController is responsible for cleaning up after Kubernetes because we 24 // are missing our native ID on the last reconcile event where the pod has already 25 // been deleted. This is also more reliable because we are filling this controller 26 // with events starting from the time when we first see a deletion timestamp on a pod. 27 // It pretty much facilitates the work of a finalizer without needing a finalizer and 28 // also only kicking in once a pod has *really* been deleted. 29 type DeleteController struct { 30 client client.Client 31 nodeName string 32 handler *config.ProcessorConfig 33 34 deleteCh chan DeleteEvent 35 reconcileCh chan struct{} 36 reconcileFunc deleteControllerReconcileFunc 37 tickerPeriod time.Duration 38 itemProcessTimeout time.Duration 39 sandboxExtractor extractors.PodSandboxExtractor 40 eventsCh chan event.GenericEvent 41 } 42 43 // DeleteObject is the obj used to store in the event map. 44 type DeleteObject struct { 45 podUID string 46 sandboxID string 47 podName client.ObjectKey 48 } 49 50 // NewDeleteController creates a new DeleteController. 51 func NewDeleteController(c client.Client, nodeName string, pc *config.ProcessorConfig, sandboxExtractor extractors.PodSandboxExtractor, eventsCh chan event.GenericEvent) *DeleteController { 52 return &DeleteController{ 53 client: c, 54 nodeName: nodeName, 55 handler: pc, 56 deleteCh: make(chan DeleteEvent, 1000), 57 reconcileCh: make(chan struct{}), 58 reconcileFunc: deleteControllerReconcile, 59 tickerPeriod: 5 * time.Second, 60 itemProcessTimeout: 30 * time.Second, 61 sandboxExtractor: sandboxExtractor, 62 eventsCh: eventsCh, 63 } 64 } 65 66 // GetDeleteCh returns the delete channel on which to queue delete events 67 func (c *DeleteController) GetDeleteCh() chan<- DeleteEvent { 68 return c.deleteCh 69 } 70 71 // GetReconcileCh returns the channel on which to notify the controller about an immediate reconcile event 72 func (c *DeleteController) GetReconcileCh() chan<- struct{} { 73 return c.reconcileCh 74 } 75 76 // Start implemets the Runnable interface 77 func (c *DeleteController) Start(z <-chan struct{}) error { 78 backgroundCtx := context.Background() 79 t := time.NewTicker(c.tickerPeriod) 80 m := make(map[string]DeleteObject) 81 82 // the poor man's controller loop 83 for { 84 select { 85 case ev := <-c.deleteCh: 86 obj := DeleteObject{podUID: ev.PodUID, sandboxID: ev.SandboxID, podName: ev.NamespaceName} 87 // here don't update the map, insert only if not present. 88 if _, ok := m[ev.PodUID]; !ok { 89 m[ev.PodUID] = obj 90 } 91 case <-c.reconcileCh: 92 c.reconcileFunc(backgroundCtx, c.client, c.nodeName, c.handler, c.itemProcessTimeout, m, c.sandboxExtractor, c.eventsCh) 93 case <-t.C: 94 c.reconcileFunc(backgroundCtx, c.client, c.nodeName, c.handler, c.itemProcessTimeout, m, c.sandboxExtractor, c.eventsCh) 95 case <-z: 96 t.Stop() 97 return nil 98 } 99 } 100 } 101 102 // deleteControllerReconcile is the real reconciler implementation for the DeleteController 103 func deleteControllerReconcile(backgroundCtx context.Context, c client.Client, nodeName string, pc *config.ProcessorConfig, itemProcessTimeout time.Duration, m map[string]DeleteObject, sandboxExtractor extractors.PodSandboxExtractor, eventCh chan event.GenericEvent) { 104 for podUID, req := range m { 105 deleteControllerProcessItem(backgroundCtx, c, nodeName, pc, itemProcessTimeout, m, podUID, req.podName, sandboxExtractor, eventCh) 106 } 107 } 108 109 func deleteControllerProcessItem(backgroundCtx context.Context, c client.Client, nodeName string, pc *config.ProcessorConfig, itemProcessTimeout time.Duration, m map[string]DeleteObject, podUID string, req client.ObjectKey, sandboxExtractor extractors.PodSandboxExtractor, eventCh chan event.GenericEvent) { 110 var ok bool 111 var delObj DeleteObject 112 if delObj, ok = m[podUID]; !ok { 113 zap.L().Warn("DeleteController: nativeID not found in delete controller map", zap.String("nativeID", podUID)) 114 return 115 } 116 ctx, cancel := context.WithTimeout(backgroundCtx, itemProcessTimeout) 117 defer cancel() 118 pod := &corev1.Pod{} 119 if err := c.Get(ctx, req, pod); err != nil { 120 if errors.IsNotFound(err) { 121 // this is the normal case: a pod is gone 122 // so just send a destroy event 123 zap.L().Warn("DeleteController: the pod is deleted in the cluster, so call the destroy PU") 124 if err := pc.Policy.HandlePUEvent( 125 ctx, 126 podUID, 127 common.EventDestroy, 128 policy.NewPURuntimeWithDefaults(), 129 ); err != nil { 130 // we don't really care, we just warn 131 zap.L().Warn("DeleteController: Failed to handle destroy event", zap.String("puID", podUID), zap.String("namespacedName", req.String()), zap.Error(err)) 132 } 133 // we only fire events away, we don't really care about the error anyway 134 // it is up to the policy engine to make sense of that 135 delete(m, podUID) 136 } else { 137 // we don't really care, we just warn 138 zap.L().Warn("DeleteController: Failed to get pod from Kubernetes API", zap.String("puID", podUID), zap.String("namespacedName", req.String()), zap.Error(err)) 139 } 140 return 141 } 142 143 // For StatefulSets we need to account for another special case: pods that move between nodes *keep* the same UID, so they won't fit the check below. 144 // However, we can simply double-check the node name in the same way how we already filter events in the watcher/monitor 145 if pod.Spec.NodeName != nodeName { 146 zap.L().Warn("DeleteController: the pod is now on a different node, send destroy event and delete the cache", zap.String("puID", podUID), zap.String("namespacedName", req.String()), zap.String("podNodeName", pod.Spec.NodeName), zap.String("nodeName", nodeName)) 147 if err := pc.Policy.HandlePUEvent( 148 ctx, 149 podUID, 150 common.EventDestroy, 151 policy.NewPURuntimeWithDefaults(), 152 ); err != nil { 153 // we don't really care, we just warn 154 zap.L().Warn("DeleteController: Failed to handle destroy event", zap.String("puID", podUID), zap.String("namespacedName", req.String()), zap.Error(err)) 155 } 156 // we only fire events away, we don't really care about the error anyway 157 // it is up to the policy engine to make sense of that 158 delete(m, podUID) 159 return 160 } 161 162 // the edge case: a pod with the same namespaced name came up and we have missed a delete event 163 // this means that this pod belongs to a different PU and must live, therefore we try to delete the old one 164 165 // the following code also takes care of any restarts in the Pod, the restarts can be caused by either 166 // the sandbox getting killed or all the containers restarting due a crash or kill. 167 168 // Now destroy the PU only if the following 169 // 1. Simple case if the pod UID don't match then go ahead and destroy the PU. 170 // 2. When the pod UID match then do the following: 171 // 2.a Get the current SandboxID from the pod. 172 // 2.b Get the sandboxID from the map. 173 // 2.c If the sandBoxID differ then send the destroy event for the old(map) sandBoxID. 174 175 // 1st case, simple if the pod UID don't match then just call the destroy PU event and delete the map entry with the old key. 176 if string(pod.UID) != delObj.podUID { 177 178 zap.L().Warn("DeleteController: Pod does not have expected native ID, we must have missed an event and the same pod was recreated. Trying to destroy PU", zap.String("puID", podUID), zap.String("namespacedName", req.String()), zap.String("podUID", string(pod.GetUID()))) 179 if err := pc.Policy.HandlePUEvent( 180 ctx, 181 podUID, 182 common.EventDestroy, 183 policy.NewPURuntimeWithDefaults(), 184 ); err != nil { 185 // we don't really care, we just warn 186 zap.L().Warn("DeleteController: Failed to handle destroy event", zap.String("puID", podUID), zap.String("namespacedName", req.String()), zap.Error(err)) 187 } 188 // we only fire events away, we don't really care about the error anyway 189 // it is up to the policy engine to make sense of that 190 delete(m, podUID) 191 return 192 } 193 194 // now the 2nd case, when pod UID match 195 if string(pod.UID) == delObj.podUID { 196 zap.L().Debug("DeleteController: the pod UID Match happened for", zap.String("podName:", req.String()), zap.String("podUID", string(pod.UID))) 197 // 2a get the current sandboxID 198 if sandboxExtractor == nil { 199 return 200 } 201 currentSandboxID, err := sandboxExtractor(ctx, pod) 202 if err != nil { 203 zap.L().Debug("DeleteController: cannot extract the SandboxID, return", zap.String("namespacedName", req.String()), zap.String("podUID", string(pod.GetUID()))) 204 return 205 } 206 // update the map with the sandboxID 207 // here we update the map only if the sandboxID has not been extracted. 208 // The extraction of the sandboxID if missed by the main controller then we will update the map below. 209 if delObj.sandboxID == "" { 210 delObj = DeleteObject{podUID: podUID, sandboxID: currentSandboxID, podName: req} 211 m[podUID] = delObj 212 } 213 // 2b get the pod/old sandboxID 214 oldSandboxID := delObj.sandboxID 215 216 zap.L().Debug("DeleteController:", zap.String(" the sandboxID, curr:", currentSandboxID), zap.String(" old sandboxID: ", oldSandboxID)) 217 // 2c compare the oldSandboxID and currentSandboxID, if they differ then destroy the PU 218 if oldSandboxID != currentSandboxID { 219 zap.L().Warn("DeleteController: Pod SandboxID differ. Trying to destroy PU", zap.String("namespacedName", req.String()), zap.String("currentSandboxID", currentSandboxID), zap.String("oldSandboxID", oldSandboxID)) 220 if err := pc.Policy.HandlePUEvent( 221 ctx, 222 podUID, 223 common.EventDestroy, 224 policy.NewPURuntimeWithDefaults(), 225 ); err != nil { 226 // we don't really care, we just warn 227 zap.L().Warn("DeleteController: Failed to handle destroy event", zap.String("puID", podUID), zap.String("namespacedName", req.String()), zap.Error(err)) 228 } 229 // we only fire events away, we don't really care about the error anyway 230 // it is up to the policy engine to make sense of that 231 delete(m, podUID) 232 zap.L().Warn("DeleteController: PU destroyed, now send event for the pod-controller to reconcile", zap.String(" podName: ", req.String())) 233 // below we send event to the main pod-controller to reconcile again and to create a PU if it is not created yet. 234 eventCh <- event.GenericEvent{ 235 Object: pod, 236 Meta: pod.GetObjectMeta(), 237 } 238 return 239 } 240 } 241 }