github.com/cilium/cilium@v1.16.2/operator/cmd/k8s_pod_controller.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package cmd 5 6 import ( 7 "context" 8 "fmt" 9 "sync" 10 "time" 11 12 "github.com/sirupsen/logrus" 13 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 14 15 operatorOption "github.com/cilium/cilium/operator/option" 16 "github.com/cilium/cilium/operator/watchers" 17 "github.com/cilium/cilium/pkg/controller" 18 k8sClient "github.com/cilium/cilium/pkg/k8s/client" 19 slim_corev1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/core/v1" 20 "github.com/cilium/cilium/pkg/logging/logfields" 21 ) 22 23 const ( 24 minimalPodRestartInterval = 5 * time.Minute 25 unmanagedPodMinimalAge = 30 * time.Second 26 ) 27 28 var ( 29 lastPodRestart = map[string]time.Time{} 30 31 restartUnmanagedPodsControllerGroup = controller.NewGroup("restart-unmanaged-pods") 32 ) 33 34 func enableUnmanagedController(ctx context.Context, wg *sync.WaitGroup, clientset k8sClient.Clientset) { 35 // These functions will block until the resources are synced with k8s. 36 watchers.CiliumEndpointsInit(ctx, wg, clientset) 37 watchers.UnmanagedPodsInit(ctx, wg, clientset) 38 39 mgr := controller.NewManager() 40 41 wg.Add(1) 42 go func() { 43 defer wg.Done() 44 <-ctx.Done() 45 mgr.RemoveAllAndWait() 46 }() 47 48 mgr.UpdateController("restart-unmanaged-pods", 49 controller.ControllerParams{ 50 Group: restartUnmanagedPodsControllerGroup, 51 RunInterval: time.Duration(operatorOption.Config.UnmanagedPodWatcherInterval) * time.Second, 52 DoFunc: func(ctx context.Context) error { 53 for podName, lastRestart := range lastPodRestart { 54 if time.Since(lastRestart) > 2*minimalPodRestartInterval { 55 delete(lastPodRestart, podName) 56 } 57 } 58 for _, podItem := range watchers.UnmanagedPodStore.List() { 59 pod, ok := podItem.(*slim_corev1.Pod) 60 if !ok { 61 log.Errorf("unexpected type mapping: found %T, expected %T", pod, &slim_corev1.Pod{}) 62 continue 63 } 64 if pod.Spec.HostNetwork { 65 continue 66 } 67 cep, exists, err := watchers.HasCE(pod.Namespace, pod.Name) 68 if err != nil { 69 log.WithError(err).WithField(logfields.EndpointID, fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)). 70 Errorf("Unexpected error when getting CiliumEndpoint") 71 continue 72 } 73 podID := fmt.Sprintf("%s/%s", pod.Namespace, pod.Name) 74 if exists { 75 log.WithFields(logrus.Fields{ 76 logfields.K8sPodName: podID, 77 logfields.Identity: cep.Status.ID, 78 }).Debug("Found managed pod due to presence of a CEP") 79 } else { 80 log.WithField(logfields.K8sPodName, podID).Debugf("Found unmanaged pod") 81 if startTime := pod.Status.StartTime; startTime != nil { 82 if age := time.Since((*startTime).Time); age > unmanagedPodMinimalAge { 83 if lastRestart, ok := lastPodRestart[podID]; ok { 84 if timeSinceRestart := time.Since(lastRestart); timeSinceRestart < minimalPodRestartInterval { 85 log.WithField(logfields.K8sPodName, podID). 86 Debugf("Not restarting unmanaged pod, only %s since last restart", timeSinceRestart) 87 continue 88 } 89 } 90 91 log.WithField(logfields.K8sPodName, podID).Infof("Restarting unmanaged pod, started %s ago", age) 92 if err := clientset.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, metav1.DeleteOptions{}); err != nil { 93 log.WithError(err).WithField(logfields.K8sPodName, podID).Warning("Unable to restart pod") 94 } else { 95 lastPodRestart[podID] = time.Now() 96 97 // Delete a single pod per iteration to avoid killing all replicas at once 98 return nil 99 } 100 101 } 102 } 103 } 104 } 105 106 return nil 107 }, 108 }) 109 }