k8s.io/kubernetes@v1.29.3/pkg/kubelet/util/pod_startup_latency_tracker_test.go (about) 1 /* 2 Copyright 2022 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package util 18 19 import ( 20 "strings" 21 "testing" 22 "time" 23 24 "github.com/stretchr/testify/assert" 25 corev1 "k8s.io/api/core/v1" 26 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 "k8s.io/apimachinery/pkg/types" 28 29 "k8s.io/component-base/metrics/testutil" 30 "k8s.io/kubernetes/pkg/kubelet/metrics" 31 testingclock "k8s.io/utils/clock/testing" 32 ) 33 34 var frozenTime = time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC) 35 36 const ( 37 uid = "3c1df8a9-11a8-4791-aeae-184c18cca686" 38 metricsName = "kubelet_pod_start_sli_duration_seconds" 39 ) 40 41 func TestNoEvents(t *testing.T) { 42 t.Run("metrics registered; no incoming events", func(t *testing.T) { 43 44 // expects no metrics in the output 45 wants := "" 46 47 metrics.Register() 48 49 tracker := &basicPodStartupLatencyTracker{ 50 pods: map[types.UID]*perPodState{}, 51 } 52 53 if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil { 54 t.Fatal(err) 55 } 56 57 assert.Empty(t, tracker.pods) 58 metrics.PodStartSLIDuration.Reset() 59 }) 60 } 61 62 func TestPodsRunningBeforeKubeletStarted(t *testing.T) { 63 t.Run("pod was running for 10m before kubelet started", func(t *testing.T) { 64 65 // expects no metrics in the output 66 wants := "" 67 68 metrics.Register() 69 70 tracker := &basicPodStartupLatencyTracker{ 71 pods: map[types.UID]*perPodState{}, 72 } 73 74 if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil { 75 t.Fatal(err) 76 } 77 78 podStarted := &corev1.Pod{ 79 Status: corev1.PodStatus{ 80 StartTime: &metav1.Time{Time: frozenTime.Add(-10 * time.Minute)}, 81 }, 82 } 83 tracker.ObservedPodOnWatch(podStarted, frozenTime) 84 85 assert.Empty(t, tracker.pods) 86 metrics.PodStartSLIDuration.Reset() 87 }) 88 } 89 90 func TestSinglePodOneImageDownloadRecorded(t *testing.T) { 91 92 t.Run("single pod; started in 3s, image pulling 100ms", func(t *testing.T) { 93 94 wants := ` 95 # HELP kubelet_pod_start_sli_duration_seconds [ALPHA] Duration in seconds to start a pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch 96 # TYPE kubelet_pod_start_sli_duration_seconds histogram 97 kubelet_pod_start_sli_duration_seconds_bucket{le="0.5"} 0 98 kubelet_pod_start_sli_duration_seconds_bucket{le="1"} 0 99 kubelet_pod_start_sli_duration_seconds_bucket{le="2"} 0 100 kubelet_pod_start_sli_duration_seconds_bucket{le="3"} 1 101 kubelet_pod_start_sli_duration_seconds_bucket{le="4"} 1 102 kubelet_pod_start_sli_duration_seconds_bucket{le="5"} 1 103 kubelet_pod_start_sli_duration_seconds_bucket{le="6"} 1 104 kubelet_pod_start_sli_duration_seconds_bucket{le="8"} 1 105 kubelet_pod_start_sli_duration_seconds_bucket{le="10"} 1 106 kubelet_pod_start_sli_duration_seconds_bucket{le="20"} 1 107 kubelet_pod_start_sli_duration_seconds_bucket{le="30"} 1 108 kubelet_pod_start_sli_duration_seconds_bucket{le="45"} 1 109 kubelet_pod_start_sli_duration_seconds_bucket{le="60"} 1 110 kubelet_pod_start_sli_duration_seconds_bucket{le="120"} 1 111 kubelet_pod_start_sli_duration_seconds_bucket{le="180"} 1 112 kubelet_pod_start_sli_duration_seconds_bucket{le="240"} 1 113 kubelet_pod_start_sli_duration_seconds_bucket{le="300"} 1 114 kubelet_pod_start_sli_duration_seconds_bucket{le="360"} 1 115 kubelet_pod_start_sli_duration_seconds_bucket{le="480"} 1 116 kubelet_pod_start_sli_duration_seconds_bucket{le="600"} 1 117 kubelet_pod_start_sli_duration_seconds_bucket{le="900"} 1 118 kubelet_pod_start_sli_duration_seconds_bucket{le="1200"} 1 119 kubelet_pod_start_sli_duration_seconds_bucket{le="1800"} 1 120 kubelet_pod_start_sli_duration_seconds_bucket{le="2700"} 1 121 kubelet_pod_start_sli_duration_seconds_bucket{le="3600"} 1 122 kubelet_pod_start_sli_duration_seconds_bucket{le="+Inf"} 1 123 kubelet_pod_start_sli_duration_seconds_sum 2.9 124 kubelet_pod_start_sli_duration_seconds_count 1 125 ` 126 127 fakeClock := testingclock.NewFakeClock(frozenTime) 128 129 metrics.Register() 130 131 tracker := &basicPodStartupLatencyTracker{ 132 pods: map[types.UID]*perPodState{}, 133 clock: fakeClock, 134 } 135 136 podInit := buildInitializingPod() 137 tracker.ObservedPodOnWatch(podInit, frozenTime) 138 139 // image pulling took 100ms 140 tracker.RecordImageStartedPulling(podInit.UID) 141 fakeClock.Step(time.Millisecond * 100) 142 tracker.RecordImageFinishedPulling(podInit.UID) 143 144 podState, ok := tracker.pods[podInit.UID] 145 if !ok { 146 t.Errorf("expected to track pod: %s, but pod not found", podInit.UID) 147 } 148 if !podState.lastFinishedPulling.Equal(podState.firstStartedPulling.Add(time.Millisecond * 100)) { 149 t.Errorf("expected pod firstStartedPulling: %s and lastFinishedPulling: %s but got firstStartedPulling: %s and lastFinishedPulling: %s", 150 podState.firstStartedPulling, podState.firstStartedPulling.Add(time.Millisecond*100), podState.firstStartedPulling, podState.lastFinishedPulling) 151 } 152 153 podStarted := buildRunningPod() 154 tracker.RecordStatusUpdated(podStarted) 155 156 // 3s later, observe the same pod on watch 157 tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*3)) 158 159 if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil { 160 t.Fatal(err) 161 } 162 163 // cleanup 164 tracker.DeletePodStartupState(podStarted.UID) 165 166 assert.Empty(t, tracker.pods) 167 metrics.PodStartSLIDuration.Reset() 168 }) 169 } 170 171 func TestSinglePodMultipleDownloadsAndRestartsRecorded(t *testing.T) { 172 173 t.Run("single pod; started in 30s, image pulling between 10th and 20th seconds", func(t *testing.T) { 174 175 wants := ` 176 # HELP kubelet_pod_start_sli_duration_seconds [ALPHA] Duration in seconds to start a pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch 177 # TYPE kubelet_pod_start_sli_duration_seconds histogram 178 kubelet_pod_start_sli_duration_seconds_bucket{le="0.5"} 0 179 kubelet_pod_start_sli_duration_seconds_bucket{le="1"} 0 180 kubelet_pod_start_sli_duration_seconds_bucket{le="2"} 0 181 kubelet_pod_start_sli_duration_seconds_bucket{le="3"} 0 182 kubelet_pod_start_sli_duration_seconds_bucket{le="4"} 0 183 kubelet_pod_start_sli_duration_seconds_bucket{le="5"} 0 184 kubelet_pod_start_sli_duration_seconds_bucket{le="6"} 0 185 kubelet_pod_start_sli_duration_seconds_bucket{le="8"} 0 186 kubelet_pod_start_sli_duration_seconds_bucket{le="10"} 0 187 kubelet_pod_start_sli_duration_seconds_bucket{le="20"} 1 188 kubelet_pod_start_sli_duration_seconds_bucket{le="30"} 1 189 kubelet_pod_start_sli_duration_seconds_bucket{le="45"} 1 190 kubelet_pod_start_sli_duration_seconds_bucket{le="60"} 1 191 kubelet_pod_start_sli_duration_seconds_bucket{le="120"} 1 192 kubelet_pod_start_sli_duration_seconds_bucket{le="180"} 1 193 kubelet_pod_start_sli_duration_seconds_bucket{le="240"} 1 194 kubelet_pod_start_sli_duration_seconds_bucket{le="300"} 1 195 kubelet_pod_start_sli_duration_seconds_bucket{le="360"} 1 196 kubelet_pod_start_sli_duration_seconds_bucket{le="480"} 1 197 kubelet_pod_start_sli_duration_seconds_bucket{le="600"} 1 198 kubelet_pod_start_sli_duration_seconds_bucket{le="900"} 1 199 kubelet_pod_start_sli_duration_seconds_bucket{le="1200"} 1 200 kubelet_pod_start_sli_duration_seconds_bucket{le="1800"} 1 201 kubelet_pod_start_sli_duration_seconds_bucket{le="2700"} 1 202 kubelet_pod_start_sli_duration_seconds_bucket{le="3600"} 1 203 kubelet_pod_start_sli_duration_seconds_bucket{le="+Inf"} 1 204 kubelet_pod_start_sli_duration_seconds_sum 20 205 kubelet_pod_start_sli_duration_seconds_count 1 206 ` 207 208 fakeClock := testingclock.NewFakeClock(frozenTime) 209 210 metrics.Register() 211 212 tracker := &basicPodStartupLatencyTracker{ 213 pods: map[types.UID]*perPodState{}, 214 clock: fakeClock, 215 } 216 217 podInitializing := buildInitializingPod() 218 tracker.ObservedPodOnWatch(podInitializing, frozenTime) 219 220 // image pulling started at 10s and the last one finished at 30s 221 // first image starts pulling at 10s 222 fakeClock.SetTime(frozenTime.Add(time.Second * 10)) 223 tracker.RecordImageStartedPulling(podInitializing.UID) 224 // second image starts pulling at 11s 225 fakeClock.SetTime(frozenTime.Add(time.Second * 11)) 226 tracker.RecordImageStartedPulling(podInitializing.UID) 227 // third image starts pulling at 14s 228 fakeClock.SetTime(frozenTime.Add(time.Second * 14)) 229 tracker.RecordImageStartedPulling(podInitializing.UID) 230 // first image finished pulling at 18s 231 fakeClock.SetTime(frozenTime.Add(time.Second * 18)) 232 tracker.RecordImageFinishedPulling(podInitializing.UID) 233 // second and third finished pulling at 20s 234 fakeClock.SetTime(frozenTime.Add(time.Second * 20)) 235 tracker.RecordImageFinishedPulling(podInitializing.UID) 236 237 podState, ok := tracker.pods[podInitializing.UID] 238 if !ok { 239 t.Errorf("expected to track pod: %s, but pod not found", podInitializing.UID) 240 } 241 if !podState.firstStartedPulling.Equal(frozenTime.Add(time.Second * 10)) { // second and third image start pulling should not affect pod firstStartedPulling 242 t.Errorf("expected pod firstStartedPulling: %s but got firstStartedPulling: %s", 243 podState.firstStartedPulling.Add(time.Second*10), podState.firstStartedPulling) 244 } 245 if !podState.lastFinishedPulling.Equal(frozenTime.Add(time.Second * 20)) { // should be updated when the pod's last image finished pulling 246 t.Errorf("expected pod lastFinishedPulling: %s but got lastFinishedPulling: %s", 247 podState.lastFinishedPulling.Add(time.Second*20), podState.lastFinishedPulling) 248 } 249 250 // pod started 251 podStarted := buildRunningPod() 252 tracker.RecordStatusUpdated(podStarted) 253 254 // at 30s observe the same pod on watch 255 tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*30)) 256 257 if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil { 258 t.Fatal(err) 259 } 260 261 // any new pod observations should not impact the metrics, as the pod should be recorder only once 262 tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*150)) 263 tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*200)) 264 tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*250)) 265 266 if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil { 267 t.Fatal(err) 268 } 269 270 // cleanup 271 tracker.DeletePodStartupState(podStarted.UID) 272 273 assert.Empty(t, tracker.pods) 274 metrics.PodStartSLIDuration.Reset() 275 }) 276 } 277 278 func buildInitializingPod() *corev1.Pod { 279 return buildPodWithStatus([]corev1.ContainerStatus{ 280 {State: corev1.ContainerState{Waiting: &corev1.ContainerStateWaiting{Reason: "PodInitializing"}}}, 281 }) 282 } 283 284 func buildRunningPod() *corev1.Pod { 285 return buildPodWithStatus([]corev1.ContainerStatus{ 286 {State: corev1.ContainerState{Running: &corev1.ContainerStateRunning{StartedAt: metav1.NewTime(frozenTime)}}}, 287 }) 288 } 289 290 func buildPodWithStatus(cs []corev1.ContainerStatus) *corev1.Pod { 291 return &corev1.Pod{ 292 ObjectMeta: metav1.ObjectMeta{ 293 UID: types.UID(uid), 294 CreationTimestamp: metav1.NewTime(frozenTime), 295 }, 296 Status: corev1.PodStatus{ 297 ContainerStatuses: cs, 298 }, 299 } 300 }