github.com/cilium/cilium@v1.16.2/operator/cmd/k8s_pod_controller.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package cmd
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/sirupsen/logrus"
    13  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    14  
    15  	operatorOption "github.com/cilium/cilium/operator/option"
    16  	"github.com/cilium/cilium/operator/watchers"
    17  	"github.com/cilium/cilium/pkg/controller"
    18  	k8sClient "github.com/cilium/cilium/pkg/k8s/client"
    19  	slim_corev1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/core/v1"
    20  	"github.com/cilium/cilium/pkg/logging/logfields"
    21  )
    22  
    23  const (
    24  	minimalPodRestartInterval = 5 * time.Minute
    25  	unmanagedPodMinimalAge    = 30 * time.Second
    26  )
    27  
    28  var (
    29  	lastPodRestart = map[string]time.Time{}
    30  
    31  	restartUnmanagedPodsControllerGroup = controller.NewGroup("restart-unmanaged-pods")
    32  )
    33  
    34  func enableUnmanagedController(ctx context.Context, wg *sync.WaitGroup, clientset k8sClient.Clientset) {
    35  	// These functions will block until the resources are synced with k8s.
    36  	watchers.CiliumEndpointsInit(ctx, wg, clientset)
    37  	watchers.UnmanagedPodsInit(ctx, wg, clientset)
    38  
    39  	mgr := controller.NewManager()
    40  
    41  	wg.Add(1)
    42  	go func() {
    43  		defer wg.Done()
    44  		<-ctx.Done()
    45  		mgr.RemoveAllAndWait()
    46  	}()
    47  
    48  	mgr.UpdateController("restart-unmanaged-pods",
    49  		controller.ControllerParams{
    50  			Group:       restartUnmanagedPodsControllerGroup,
    51  			RunInterval: time.Duration(operatorOption.Config.UnmanagedPodWatcherInterval) * time.Second,
    52  			DoFunc: func(ctx context.Context) error {
    53  				for podName, lastRestart := range lastPodRestart {
    54  					if time.Since(lastRestart) > 2*minimalPodRestartInterval {
    55  						delete(lastPodRestart, podName)
    56  					}
    57  				}
    58  				for _, podItem := range watchers.UnmanagedPodStore.List() {
    59  					pod, ok := podItem.(*slim_corev1.Pod)
    60  					if !ok {
    61  						log.Errorf("unexpected type mapping: found %T, expected %T", pod, &slim_corev1.Pod{})
    62  						continue
    63  					}
    64  					if pod.Spec.HostNetwork {
    65  						continue
    66  					}
    67  					cep, exists, err := watchers.HasCE(pod.Namespace, pod.Name)
    68  					if err != nil {
    69  						log.WithError(err).WithField(logfields.EndpointID, fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)).
    70  							Errorf("Unexpected error when getting CiliumEndpoint")
    71  						continue
    72  					}
    73  					podID := fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)
    74  					if exists {
    75  						log.WithFields(logrus.Fields{
    76  							logfields.K8sPodName: podID,
    77  							logfields.Identity:   cep.Status.ID,
    78  						}).Debug("Found managed pod due to presence of a CEP")
    79  					} else {
    80  						log.WithField(logfields.K8sPodName, podID).Debugf("Found unmanaged pod")
    81  						if startTime := pod.Status.StartTime; startTime != nil {
    82  							if age := time.Since((*startTime).Time); age > unmanagedPodMinimalAge {
    83  								if lastRestart, ok := lastPodRestart[podID]; ok {
    84  									if timeSinceRestart := time.Since(lastRestart); timeSinceRestart < minimalPodRestartInterval {
    85  										log.WithField(logfields.K8sPodName, podID).
    86  											Debugf("Not restarting unmanaged pod, only %s since last restart", timeSinceRestart)
    87  										continue
    88  									}
    89  								}
    90  
    91  								log.WithField(logfields.K8sPodName, podID).Infof("Restarting unmanaged pod, started %s ago", age)
    92  								if err := clientset.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, metav1.DeleteOptions{}); err != nil {
    93  									log.WithError(err).WithField(logfields.K8sPodName, podID).Warning("Unable to restart pod")
    94  								} else {
    95  									lastPodRestart[podID] = time.Now()
    96  
    97  									// Delete a single pod per iteration to avoid killing all replicas at once
    98  									return nil
    99  								}
   100  
   101  							}
   102  						}
   103  					}
   104  				}
   105  
   106  				return nil
   107  			},
   108  		})
   109  }