k8s.io/kubernetes@v1.29.3/pkg/kubelet/util/pod_startup_latency_tracker_test.go (about)

     1  /*
     2  Copyright 2022 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package util
    18  
    19  import (
    20  	"strings"
    21  	"testing"
    22  	"time"
    23  
    24  	"github.com/stretchr/testify/assert"
    25  	corev1 "k8s.io/api/core/v1"
    26  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    27  	"k8s.io/apimachinery/pkg/types"
    28  
    29  	"k8s.io/component-base/metrics/testutil"
    30  	"k8s.io/kubernetes/pkg/kubelet/metrics"
    31  	testingclock "k8s.io/utils/clock/testing"
    32  )
    33  
    34  var frozenTime = time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)
    35  
    36  const (
    37  	uid         = "3c1df8a9-11a8-4791-aeae-184c18cca686"
    38  	metricsName = "kubelet_pod_start_sli_duration_seconds"
    39  )
    40  
    41  func TestNoEvents(t *testing.T) {
    42  	t.Run("metrics registered; no incoming events", func(t *testing.T) {
    43  
    44  		// expects no metrics in the output
    45  		wants := ""
    46  
    47  		metrics.Register()
    48  
    49  		tracker := &basicPodStartupLatencyTracker{
    50  			pods: map[types.UID]*perPodState{},
    51  		}
    52  
    53  		if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
    54  			t.Fatal(err)
    55  		}
    56  
    57  		assert.Empty(t, tracker.pods)
    58  		metrics.PodStartSLIDuration.Reset()
    59  	})
    60  }
    61  
    62  func TestPodsRunningBeforeKubeletStarted(t *testing.T) {
    63  	t.Run("pod was running for 10m before kubelet started", func(t *testing.T) {
    64  
    65  		// expects no metrics in the output
    66  		wants := ""
    67  
    68  		metrics.Register()
    69  
    70  		tracker := &basicPodStartupLatencyTracker{
    71  			pods: map[types.UID]*perPodState{},
    72  		}
    73  
    74  		if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
    75  			t.Fatal(err)
    76  		}
    77  
    78  		podStarted := &corev1.Pod{
    79  			Status: corev1.PodStatus{
    80  				StartTime: &metav1.Time{Time: frozenTime.Add(-10 * time.Minute)},
    81  			},
    82  		}
    83  		tracker.ObservedPodOnWatch(podStarted, frozenTime)
    84  
    85  		assert.Empty(t, tracker.pods)
    86  		metrics.PodStartSLIDuration.Reset()
    87  	})
    88  }
    89  
    90  func TestSinglePodOneImageDownloadRecorded(t *testing.T) {
    91  
    92  	t.Run("single pod; started in 3s, image pulling 100ms", func(t *testing.T) {
    93  
    94  		wants := `
    95  # HELP kubelet_pod_start_sli_duration_seconds [ALPHA] Duration in seconds to start a pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch
    96  # TYPE kubelet_pod_start_sli_duration_seconds histogram
    97  kubelet_pod_start_sli_duration_seconds_bucket{le="0.5"} 0
    98  kubelet_pod_start_sli_duration_seconds_bucket{le="1"} 0
    99  kubelet_pod_start_sli_duration_seconds_bucket{le="2"} 0
   100  kubelet_pod_start_sli_duration_seconds_bucket{le="3"} 1
   101  kubelet_pod_start_sli_duration_seconds_bucket{le="4"} 1
   102  kubelet_pod_start_sli_duration_seconds_bucket{le="5"} 1
   103  kubelet_pod_start_sli_duration_seconds_bucket{le="6"} 1
   104  kubelet_pod_start_sli_duration_seconds_bucket{le="8"} 1
   105  kubelet_pod_start_sli_duration_seconds_bucket{le="10"} 1
   106  kubelet_pod_start_sli_duration_seconds_bucket{le="20"} 1
   107  kubelet_pod_start_sli_duration_seconds_bucket{le="30"} 1
   108  kubelet_pod_start_sli_duration_seconds_bucket{le="45"} 1
   109  kubelet_pod_start_sli_duration_seconds_bucket{le="60"} 1
   110  kubelet_pod_start_sli_duration_seconds_bucket{le="120"} 1
   111  kubelet_pod_start_sli_duration_seconds_bucket{le="180"} 1
   112  kubelet_pod_start_sli_duration_seconds_bucket{le="240"} 1
   113  kubelet_pod_start_sli_duration_seconds_bucket{le="300"} 1
   114  kubelet_pod_start_sli_duration_seconds_bucket{le="360"} 1
   115  kubelet_pod_start_sli_duration_seconds_bucket{le="480"} 1
   116  kubelet_pod_start_sli_duration_seconds_bucket{le="600"} 1
   117  kubelet_pod_start_sli_duration_seconds_bucket{le="900"} 1
   118  kubelet_pod_start_sli_duration_seconds_bucket{le="1200"} 1
   119  kubelet_pod_start_sli_duration_seconds_bucket{le="1800"} 1
   120  kubelet_pod_start_sli_duration_seconds_bucket{le="2700"} 1
   121  kubelet_pod_start_sli_duration_seconds_bucket{le="3600"} 1
   122  kubelet_pod_start_sli_duration_seconds_bucket{le="+Inf"} 1
   123  kubelet_pod_start_sli_duration_seconds_sum 2.9
   124  kubelet_pod_start_sli_duration_seconds_count 1
   125  		`
   126  
   127  		fakeClock := testingclock.NewFakeClock(frozenTime)
   128  
   129  		metrics.Register()
   130  
   131  		tracker := &basicPodStartupLatencyTracker{
   132  			pods:  map[types.UID]*perPodState{},
   133  			clock: fakeClock,
   134  		}
   135  
   136  		podInit := buildInitializingPod()
   137  		tracker.ObservedPodOnWatch(podInit, frozenTime)
   138  
   139  		// image pulling took 100ms
   140  		tracker.RecordImageStartedPulling(podInit.UID)
   141  		fakeClock.Step(time.Millisecond * 100)
   142  		tracker.RecordImageFinishedPulling(podInit.UID)
   143  
   144  		podState, ok := tracker.pods[podInit.UID]
   145  		if !ok {
   146  			t.Errorf("expected to track pod: %s, but pod not found", podInit.UID)
   147  		}
   148  		if !podState.lastFinishedPulling.Equal(podState.firstStartedPulling.Add(time.Millisecond * 100)) {
   149  			t.Errorf("expected pod firstStartedPulling: %s and lastFinishedPulling: %s but got firstStartedPulling: %s and lastFinishedPulling: %s",
   150  				podState.firstStartedPulling, podState.firstStartedPulling.Add(time.Millisecond*100), podState.firstStartedPulling, podState.lastFinishedPulling)
   151  		}
   152  
   153  		podStarted := buildRunningPod()
   154  		tracker.RecordStatusUpdated(podStarted)
   155  
   156  		// 3s later, observe the same pod on watch
   157  		tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*3))
   158  
   159  		if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
   160  			t.Fatal(err)
   161  		}
   162  
   163  		// cleanup
   164  		tracker.DeletePodStartupState(podStarted.UID)
   165  
   166  		assert.Empty(t, tracker.pods)
   167  		metrics.PodStartSLIDuration.Reset()
   168  	})
   169  }
   170  
   171  func TestSinglePodMultipleDownloadsAndRestartsRecorded(t *testing.T) {
   172  
   173  	t.Run("single pod; started in 30s, image pulling between 10th and 20th seconds", func(t *testing.T) {
   174  
   175  		wants := `
   176  # HELP kubelet_pod_start_sli_duration_seconds [ALPHA] Duration in seconds to start a pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch
   177  # TYPE kubelet_pod_start_sli_duration_seconds histogram
   178  kubelet_pod_start_sli_duration_seconds_bucket{le="0.5"} 0
   179  kubelet_pod_start_sli_duration_seconds_bucket{le="1"} 0
   180  kubelet_pod_start_sli_duration_seconds_bucket{le="2"} 0
   181  kubelet_pod_start_sli_duration_seconds_bucket{le="3"} 0
   182  kubelet_pod_start_sli_duration_seconds_bucket{le="4"} 0
   183  kubelet_pod_start_sli_duration_seconds_bucket{le="5"} 0
   184  kubelet_pod_start_sli_duration_seconds_bucket{le="6"} 0
   185  kubelet_pod_start_sli_duration_seconds_bucket{le="8"} 0
   186  kubelet_pod_start_sli_duration_seconds_bucket{le="10"} 0
   187  kubelet_pod_start_sli_duration_seconds_bucket{le="20"} 1
   188  kubelet_pod_start_sli_duration_seconds_bucket{le="30"} 1
   189  kubelet_pod_start_sli_duration_seconds_bucket{le="45"} 1
   190  kubelet_pod_start_sli_duration_seconds_bucket{le="60"} 1
   191  kubelet_pod_start_sli_duration_seconds_bucket{le="120"} 1
   192  kubelet_pod_start_sli_duration_seconds_bucket{le="180"} 1
   193  kubelet_pod_start_sli_duration_seconds_bucket{le="240"} 1
   194  kubelet_pod_start_sli_duration_seconds_bucket{le="300"} 1
   195  kubelet_pod_start_sli_duration_seconds_bucket{le="360"} 1
   196  kubelet_pod_start_sli_duration_seconds_bucket{le="480"} 1
   197  kubelet_pod_start_sli_duration_seconds_bucket{le="600"} 1
   198  kubelet_pod_start_sli_duration_seconds_bucket{le="900"} 1
   199  kubelet_pod_start_sli_duration_seconds_bucket{le="1200"} 1
   200  kubelet_pod_start_sli_duration_seconds_bucket{le="1800"} 1
   201  kubelet_pod_start_sli_duration_seconds_bucket{le="2700"} 1
   202  kubelet_pod_start_sli_duration_seconds_bucket{le="3600"} 1
   203  kubelet_pod_start_sli_duration_seconds_bucket{le="+Inf"} 1
   204  kubelet_pod_start_sli_duration_seconds_sum 20
   205  kubelet_pod_start_sli_duration_seconds_count 1
   206  `
   207  
   208  		fakeClock := testingclock.NewFakeClock(frozenTime)
   209  
   210  		metrics.Register()
   211  
   212  		tracker := &basicPodStartupLatencyTracker{
   213  			pods:  map[types.UID]*perPodState{},
   214  			clock: fakeClock,
   215  		}
   216  
   217  		podInitializing := buildInitializingPod()
   218  		tracker.ObservedPodOnWatch(podInitializing, frozenTime)
   219  
   220  		// image pulling started at 10s and the last one finished at 30s
   221  		// first image starts pulling at 10s
   222  		fakeClock.SetTime(frozenTime.Add(time.Second * 10))
   223  		tracker.RecordImageStartedPulling(podInitializing.UID)
   224  		// second image starts pulling at 11s
   225  		fakeClock.SetTime(frozenTime.Add(time.Second * 11))
   226  		tracker.RecordImageStartedPulling(podInitializing.UID)
   227  		// third image starts pulling at 14s
   228  		fakeClock.SetTime(frozenTime.Add(time.Second * 14))
   229  		tracker.RecordImageStartedPulling(podInitializing.UID)
   230  		// first image finished pulling at 18s
   231  		fakeClock.SetTime(frozenTime.Add(time.Second * 18))
   232  		tracker.RecordImageFinishedPulling(podInitializing.UID)
   233  		// second and third finished pulling at 20s
   234  		fakeClock.SetTime(frozenTime.Add(time.Second * 20))
   235  		tracker.RecordImageFinishedPulling(podInitializing.UID)
   236  
   237  		podState, ok := tracker.pods[podInitializing.UID]
   238  		if !ok {
   239  			t.Errorf("expected to track pod: %s, but pod not found", podInitializing.UID)
   240  		}
   241  		if !podState.firstStartedPulling.Equal(frozenTime.Add(time.Second * 10)) { // second and third image start pulling should not affect pod firstStartedPulling
   242  			t.Errorf("expected pod firstStartedPulling: %s but got firstStartedPulling: %s",
   243  				podState.firstStartedPulling.Add(time.Second*10), podState.firstStartedPulling)
   244  		}
   245  		if !podState.lastFinishedPulling.Equal(frozenTime.Add(time.Second * 20)) { // should be updated when the pod's last image finished pulling
   246  			t.Errorf("expected pod lastFinishedPulling: %s but got lastFinishedPulling: %s",
   247  				podState.lastFinishedPulling.Add(time.Second*20), podState.lastFinishedPulling)
   248  		}
   249  
   250  		// pod started
   251  		podStarted := buildRunningPod()
   252  		tracker.RecordStatusUpdated(podStarted)
   253  
   254  		// at 30s observe the same pod on watch
   255  		tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*30))
   256  
   257  		if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
   258  			t.Fatal(err)
   259  		}
   260  
   261  		// any new pod observations should not impact the metrics, as the pod should be recorder only once
   262  		tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*150))
   263  		tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*200))
   264  		tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*250))
   265  
   266  		if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
   267  			t.Fatal(err)
   268  		}
   269  
   270  		// cleanup
   271  		tracker.DeletePodStartupState(podStarted.UID)
   272  
   273  		assert.Empty(t, tracker.pods)
   274  		metrics.PodStartSLIDuration.Reset()
   275  	})
   276  }
   277  
   278  func buildInitializingPod() *corev1.Pod {
   279  	return buildPodWithStatus([]corev1.ContainerStatus{
   280  		{State: corev1.ContainerState{Waiting: &corev1.ContainerStateWaiting{Reason: "PodInitializing"}}},
   281  	})
   282  }
   283  
   284  func buildRunningPod() *corev1.Pod {
   285  	return buildPodWithStatus([]corev1.ContainerStatus{
   286  		{State: corev1.ContainerState{Running: &corev1.ContainerStateRunning{StartedAt: metav1.NewTime(frozenTime)}}},
   287  	})
   288  }
   289  
   290  func buildPodWithStatus(cs []corev1.ContainerStatus) *corev1.Pod {
   291  	return &corev1.Pod{
   292  		ObjectMeta: metav1.ObjectMeta{
   293  			UID:               types.UID(uid),
   294  			CreationTimestamp: metav1.NewTime(frozenTime),
   295  		},
   296  		Status: corev1.PodStatus{
   297  			ContainerStatuses: cs,
   298  		},
   299  	}
   300  }