github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/caas/kubernetes/provider/resources/pod.go (about)

     1  // Copyright 2020 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package resources
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"time"
    10  
    11  	"github.com/juju/errors"
    12  	corev1 "k8s.io/api/core/v1"
    13  	k8serrors "k8s.io/apimachinery/pkg/api/errors"
    14  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    15  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    16  	"k8s.io/apimachinery/pkg/runtime"
    17  	types "k8s.io/apimachinery/pkg/types"
    18  	"k8s.io/client-go/kubernetes"
    19  
    20  	k8spod "github.com/juju/juju/caas/kubernetes/pod"
    21  	k8sconstants "github.com/juju/juju/caas/kubernetes/provider/constants"
    22  	"github.com/juju/juju/core/status"
    23  )
    24  
    25  // Pod extends the k8s service.
    26  type Pod struct {
    27  	corev1.Pod
    28  }
    29  
    30  // NewPod creates a new service resource.
    31  func NewPod(name string, namespace string, in *corev1.Pod) *Pod {
    32  	if in == nil {
    33  		in = &corev1.Pod{}
    34  	}
    35  	in.SetName(name)
    36  	in.SetNamespace(namespace)
    37  	return &Pod{*in}
    38  }
    39  
    40  // ListPods returns a list of Pods.
    41  func ListPods(ctx context.Context, client kubernetes.Interface, namespace string, opts metav1.ListOptions) ([]Pod, error) {
    42  	api := client.CoreV1().Pods(namespace)
    43  	var items []Pod
    44  	for {
    45  		res, err := api.List(ctx, opts)
    46  		if err != nil {
    47  			return nil, errors.Trace(err)
    48  		}
    49  		for _, v := range res.Items {
    50  			items = append(items, Pod{Pod: v})
    51  		}
    52  		if res.RemainingItemCount == nil || *res.RemainingItemCount == 0 {
    53  			break
    54  		}
    55  		opts.Continue = res.Continue
    56  	}
    57  	return items, nil
    58  }
    59  
    60  // Clone returns a copy of the resource.
    61  func (p *Pod) Clone() Resource {
    62  	clone := *p
    63  	return &clone
    64  }
    65  
    66  // ID returns a comparable ID for the Resource
    67  func (p *Pod) ID() ID {
    68  	return ID{"Pod", p.Name, p.Namespace}
    69  }
    70  
    71  // Apply patches the resource change.
    72  func (p *Pod) Apply(ctx context.Context, client kubernetes.Interface) error {
    73  	api := client.CoreV1().Pods(p.Namespace)
    74  	data, err := runtime.Encode(unstructured.UnstructuredJSONScheme, &p.Pod)
    75  	if err != nil {
    76  		return errors.Trace(err)
    77  	}
    78  	res, err := api.Patch(ctx, p.Name, types.StrategicMergePatchType, data, metav1.PatchOptions{
    79  		FieldManager: JujuFieldManager,
    80  	})
    81  	if k8serrors.IsNotFound(err) {
    82  		res, err = api.Create(ctx, &p.Pod, metav1.CreateOptions{
    83  			FieldManager: JujuFieldManager,
    84  		})
    85  	}
    86  	if k8serrors.IsConflict(err) {
    87  		return errors.Annotatef(errConflict, "pod %q", p.Name)
    88  	}
    89  	if err != nil {
    90  		return errors.Trace(err)
    91  	}
    92  	p.Pod = *res
    93  	return nil
    94  }
    95  
    96  // Get refreshes the resource.
    97  func (p *Pod) Get(ctx context.Context, client kubernetes.Interface) error {
    98  	api := client.CoreV1().Pods(p.Namespace)
    99  	res, err := api.Get(ctx, p.Name, metav1.GetOptions{})
   100  	if k8serrors.IsNotFound(err) {
   101  		return errors.NewNotFound(err, "k8s")
   102  	} else if err != nil {
   103  		return errors.Trace(err)
   104  	}
   105  	p.Pod = *res
   106  	return nil
   107  }
   108  
   109  // Delete removes the resource.
   110  func (p *Pod) Delete(ctx context.Context, client kubernetes.Interface) error {
   111  	api := client.CoreV1().Pods(p.Namespace)
   112  	err := api.Delete(ctx, p.Name, metav1.DeleteOptions{
   113  		PropagationPolicy: k8sconstants.DefaultPropagationPolicy(),
   114  	})
   115  	if k8serrors.IsNotFound(err) {
   116  		return nil
   117  	} else if err != nil {
   118  		return errors.Trace(err)
   119  	}
   120  	return nil
   121  }
   122  
   123  // Events emitted by the resource.
   124  func (p *Pod) Events(ctx context.Context, client kubernetes.Interface) ([]corev1.Event, error) {
   125  	return ListEventsForObject(ctx, client, p.Namespace, p.Name, "Pod")
   126  }
   127  
   128  // ComputeStatus returns a juju status for the resource.
   129  func (p *Pod) ComputeStatus(ctx context.Context, client kubernetes.Interface, now time.Time) (string, status.Status, time.Time, error) {
   130  	return PodToJujuStatus(p.Pod, now, func() ([]corev1.Event, error) { return p.Events(ctx, client) })
   131  }
   132  
   133  type EventGetter func() ([]corev1.Event, error)
   134  
   135  const (
   136  	PodReasonCompleted                = "Completed"
   137  	PodReasonContainerCreating        = "ContainerCreating"
   138  	PodReasonContainersNotInitialized = "ContainersNotInitialized"
   139  	PodReasonContainersNotReady       = "ContainersNotReady"
   140  	PodReasonCrashLoopBackoff         = "CrashLoopBackOff"
   141  	PodReasonError                    = "Error"
   142  	PodReasonImagePull                = "ErrImagePull"
   143  	PodReasonInitializing             = "PodInitializing"
   144  )
   145  
   146  var (
   147  	podContainersReadyReasonsMap = map[string]status.Status{
   148  		PodReasonContainersNotReady: status.Maintenance,
   149  	}
   150  
   151  	podInitializedReasonsMap = map[string]status.Status{
   152  		PodReasonContainersNotInitialized: status.Maintenance,
   153  	}
   154  
   155  	podReadyReasonMap = map[string]status.Status{
   156  		PodReasonContainersNotReady:       status.Maintenance,
   157  		PodReasonContainersNotInitialized: status.Maintenance,
   158  	}
   159  
   160  	podScheduledReasonsMap = map[string]status.Status{
   161  		corev1.PodReasonUnschedulable: status.Blocked,
   162  	}
   163  )
   164  
   165  // PodToJujuStatus takes a Kubernetes pod and translates it to a known Juju
   166  // status. If this function can't determine the reason for a pod's state either
   167  // a status of error or unknown is returned. Function returns the status message,
   168  // juju status, the time of the status event and any errors that occurred.
   169  func PodToJujuStatus(
   170  	pod corev1.Pod,
   171  	now time.Time,
   172  	events EventGetter,
   173  ) (string, status.Status, time.Time, error) {
   174  	since := now
   175  	defaultStatusMessage := pod.Status.Message
   176  
   177  	if pod.DeletionTimestamp != nil {
   178  		return defaultStatusMessage, status.Terminated, since, nil
   179  	}
   180  
   181  	// conditionHandler tries to handle the state of the supplied condition.
   182  	// if the condition status is true true is returned from this function.
   183  	// Otherwise if the condition is unknown or false the function attempts to
   184  	// map the condition reason onto a known juju status
   185  	conditionHandler := func(
   186  		pc *corev1.PodCondition,
   187  		reasonMapper func(reason string) status.Status,
   188  	) (bool, status.Status, string) {
   189  		if pc.Status == corev1.ConditionTrue {
   190  			return true, "", ""
   191  		} else if pc.Status == corev1.ConditionUnknown {
   192  			return false, status.Unknown, pc.Message
   193  		}
   194  		return false, reasonMapper(pc.Reason), pc.Message
   195  	}
   196  
   197  	// reasonMapper takes a mapping of Kubernetes pod reasons to juju statuses.
   198  	// If no reason is found in the map the default reason supplied is returned
   199  	reasonMapper := func(
   200  		reasons map[string]status.Status,
   201  		def status.Status) func(string) status.Status {
   202  		return func(r string) status.Status {
   203  			if stat, ok := reasons[r]; ok {
   204  				return stat
   205  			}
   206  			return def
   207  		}
   208  	}
   209  
   210  	// Start by processing the pod conditions in their lifecycle order
   211  	// Has the pod been scheduled?
   212  	_, cond := k8spod.GetPodCondition(&pod.Status, corev1.PodScheduled)
   213  	if cond == nil {
   214  		// Doesn't have scheduling information. Should not get here.
   215  		return defaultStatusMessage, status.Unknown, since, nil
   216  	} else if r, s, m := conditionHandler(cond, reasonMapper(podScheduledReasonsMap, status.Allocating)); !r {
   217  		return m, s, cond.LastProbeTime.Time, nil
   218  	}
   219  
   220  	// Have the init containers run?
   221  	if _, cond := k8spod.GetPodCondition(&pod.Status, corev1.PodInitialized); cond != nil {
   222  		r, s, m := conditionHandler(cond, reasonMapper(podInitializedReasonsMap, status.Maintenance))
   223  		if errM, isErr := interrogatePodContainerStatus(pod.Status.InitContainerStatuses); !r && isErr {
   224  			return errM, status.Error, cond.LastProbeTime.Time, nil
   225  		} else if !r {
   226  			return m, s, cond.LastProbeTime.Time, nil
   227  		}
   228  	}
   229  
   230  	// Have the containers started/finished?
   231  	_, cond = k8spod.GetPodCondition(&pod.Status, corev1.ContainersReady)
   232  	if cond == nil {
   233  		return defaultStatusMessage, status.Unknown, since, nil
   234  	} else if r, s, m := conditionHandler(
   235  		cond, reasonMapper(podContainersReadyReasonsMap, status.Maintenance)); !r {
   236  		if errM, isErr := interrogatePodContainerStatus(pod.Status.ContainerStatuses); isErr {
   237  			return errM, status.Error, cond.LastProbeTime.Time, nil
   238  		}
   239  		return m, s, cond.LastProbeTime.Time, nil
   240  	}
   241  
   242  	// Made it this far are we ready?
   243  	_, cond = k8spod.GetPodCondition(&pod.Status, corev1.PodReady)
   244  	if cond == nil {
   245  		return defaultStatusMessage, status.Unknown, since, nil
   246  	} else if r, s, m := conditionHandler(
   247  		cond, reasonMapper(podReadyReasonMap, status.Maintenance)); !r {
   248  		return m, s, cond.LastProbeTime.Time, nil
   249  	} else if r {
   250  		return "", status.Running, since, nil
   251  	}
   252  
   253  	// If we have made it this far then something is very wrong in the state
   254  	// of the pod.
   255  
   256  	// If we can't find a status message lets take a look at the event log
   257  	if defaultStatusMessage == "" {
   258  		eventList, err := events()
   259  		if err != nil {
   260  			return "", "", time.Time{}, errors.Trace(err)
   261  		}
   262  
   263  		if count := len(eventList); count > 0 {
   264  			defaultStatusMessage = eventList[count-1].Message
   265  		}
   266  	}
   267  	return defaultStatusMessage, status.Unknown, since, nil
   268  }
   269  
   270  // interrogatePodContainerStatus combs a set of container statuses. If a
   271  // container is found to be in an error state, its error message and true are
   272  // returned, Otherwise an empty message and false.
   273  func interrogatePodContainerStatus(containers []corev1.ContainerStatus) (string, bool) {
   274  	for _, c := range containers {
   275  		if c.State.Running != nil {
   276  			continue
   277  		}
   278  
   279  		if c.State.Waiting != nil {
   280  			m, isError := isContainerReasonError(c.State.Waiting.Reason)
   281  			if isError {
   282  				m = fmt.Sprintf("%s: %s", m, c.State.Waiting.Message)
   283  			}
   284  			return m, isError
   285  		}
   286  
   287  		if c.State.Terminated != nil {
   288  			m, isError := isContainerReasonError(c.State.Terminated.Reason)
   289  			if isError {
   290  				m = fmt.Sprintf("%s: %s", m, c.State.Terminated.Message)
   291  			}
   292  			return m, isError
   293  		}
   294  	}
   295  	return "", false
   296  }
   297  
   298  // isContainerReasonError decides if a reason on a container status is
   299  // considered to be an error. If an error is found on the reason then a
   300  // description of the error is returned with true. Otherwise an empty
   301  // description and false.
   302  func isContainerReasonError(reason string) (string, bool) {
   303  	switch reason {
   304  	case PodReasonContainerCreating:
   305  		return "creating pod container(s)", false
   306  	case PodReasonError:
   307  		return "container error", true
   308  	case PodReasonImagePull:
   309  		return "OCI image pull error", true
   310  	case PodReasonCrashLoopBackoff:
   311  		return "crash loop backoff", true
   312  	case PodReasonCompleted:
   313  		return "", false
   314  	case PodReasonInitializing:
   315  		return "pod initializing", false
   316  	default:
   317  		return fmt.Sprintf("unknown container reason %q", reason), true
   318  	}
   319  }