k8s.io/kubernetes@v1.29.3/pkg/kubelet/prober/worker_test.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package prober 18 19 import ( 20 "context" 21 "fmt" 22 "os" 23 "testing" 24 "time" 25 26 v1 "k8s.io/api/core/v1" 27 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 "k8s.io/apimachinery/pkg/util/wait" 29 "k8s.io/client-go/kubernetes/fake" 30 kubepod "k8s.io/kubernetes/pkg/kubelet/pod" 31 "k8s.io/kubernetes/pkg/kubelet/prober/results" 32 "k8s.io/kubernetes/pkg/kubelet/status" 33 statustest "k8s.io/kubernetes/pkg/kubelet/status/testing" 34 kubeletutil "k8s.io/kubernetes/pkg/kubelet/util" 35 "k8s.io/kubernetes/pkg/probe" 36 ) 37 38 func init() { 39 } 40 41 func TestDoProbe(t *testing.T) { 42 m := newTestManager() 43 44 for _, probeType := range [...]probeType{liveness, readiness, startup} { 45 // Test statuses. 46 runningStatus := getTestRunningStatusWithStarted(probeType != startup) 47 pendingStatus := getTestRunningStatusWithStarted(probeType != startup) 48 pendingStatus.ContainerStatuses[0].State.Running = nil 49 terminatedStatus := getTestRunningStatusWithStarted(probeType != startup) 50 terminatedStatus.ContainerStatuses[0].State.Running = nil 51 terminatedStatus.ContainerStatuses[0].State.Terminated = &v1.ContainerStateTerminated{ 52 StartedAt: metav1.Now(), 53 } 54 otherStatus := getTestRunningStatusWithStarted(probeType != startup) 55 otherStatus.ContainerStatuses[0].Name = "otherContainer" 56 failedStatus := getTestRunningStatusWithStarted(probeType != startup) 57 failedStatus.Phase = v1.PodFailed 58 59 tests := []struct { 60 probe v1.Probe 61 podStatus *v1.PodStatus 62 expectContinue map[string]bool 63 expectSet bool 64 expectedResult results.Result 65 setDeletionTimestamp bool 66 }{ 67 { // No status. 68 expectContinue: map[string]bool{ 69 liveness.String(): true, 70 readiness.String(): true, 71 startup.String(): true, 72 }, 73 }, 74 { // Pod failed 75 podStatus: &failedStatus, 76 }, 77 { // Pod deletion 78 podStatus: &runningStatus, 79 setDeletionTimestamp: true, 80 expectSet: true, 81 expectContinue: map[string]bool{ 82 readiness.String(): true, 83 }, 84 expectedResult: results.Success, 85 }, 86 { // No container status 87 podStatus: &otherStatus, 88 expectContinue: map[string]bool{ 89 liveness.String(): true, 90 readiness.String(): true, 91 startup.String(): true, 92 }, 93 }, 94 { // Container waiting 95 podStatus: &pendingStatus, 96 expectContinue: map[string]bool{ 97 liveness.String(): true, 98 readiness.String(): true, 99 startup.String(): true, 100 }, 101 expectSet: true, 102 expectedResult: results.Failure, 103 }, 104 { // Container terminated 105 podStatus: &terminatedStatus, 106 expectSet: true, 107 expectedResult: results.Failure, 108 }, 109 { // Probe successful. 110 podStatus: &runningStatus, 111 expectContinue: map[string]bool{ 112 liveness.String(): true, 113 readiness.String(): true, 114 startup.String(): true, 115 }, 116 expectSet: true, 117 expectedResult: results.Success, 118 }, 119 { // Initial delay passed 120 podStatus: &runningStatus, 121 probe: v1.Probe{ 122 InitialDelaySeconds: -100, 123 }, 124 expectContinue: map[string]bool{ 125 liveness.String(): true, 126 readiness.String(): true, 127 startup.String(): true, 128 }, 129 expectSet: true, 130 expectedResult: results.Success, 131 }, 132 } 133 134 for i, test := range tests { 135 ctx := context.Background() 136 w := newTestWorker(m, probeType, test.probe) 137 if test.podStatus != nil { 138 m.statusManager.SetPodStatus(w.pod, *test.podStatus) 139 } 140 if test.setDeletionTimestamp { 141 now := metav1.Now() 142 w.pod.ObjectMeta.DeletionTimestamp = &now 143 } 144 if c := w.doProbe(ctx); c != test.expectContinue[probeType.String()] { 145 t.Errorf("[%s-%d] Expected continue to be %v but got %v", probeType, i, test.expectContinue[probeType.String()], c) 146 } 147 result, ok := resultsManager(m, probeType).Get(testContainerID) 148 if ok != test.expectSet { 149 t.Errorf("[%s-%d] Expected to have result: %v but got %v", probeType, i, test.expectSet, ok) 150 } 151 if result != test.expectedResult { 152 t.Errorf("[%s-%d] Expected result: %v but got %v", probeType, i, test.expectedResult, result) 153 } 154 155 // Clean up. 156 testRootDir := "" 157 if tempDir, err := os.MkdirTemp("", "kubelet_test."); err != nil { 158 t.Fatalf("can't make a temp rootdir: %v", err) 159 } else { 160 testRootDir = tempDir 161 } 162 m.statusManager = status.NewManager(&fake.Clientset{}, kubepod.NewBasicPodManager(), &statustest.FakePodDeletionSafetyProvider{}, kubeletutil.NewPodStartupLatencyTracker(), testRootDir) 163 resultsManager(m, probeType).Remove(testContainerID) 164 } 165 } 166 } 167 168 func TestInitialDelay(t *testing.T) { 169 ctx := context.Background() 170 m := newTestManager() 171 172 for _, probeType := range [...]probeType{liveness, readiness, startup} { 173 w := newTestWorker(m, probeType, v1.Probe{ 174 InitialDelaySeconds: 10, 175 }) 176 m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(probeType != startup)) 177 178 expectContinue(t, w, w.doProbe(ctx), "during initial delay") 179 // Default value depends on probe, Success for liveness, Failure for readiness, Unknown for startup 180 switch probeType { 181 case liveness: 182 expectResult(t, w, results.Success, "during initial delay") 183 case readiness: 184 expectResult(t, w, results.Failure, "during initial delay") 185 case startup: 186 expectResult(t, w, results.Unknown, "during initial delay") 187 } 188 189 // 100 seconds later... 190 laterStatus := getTestRunningStatusWithStarted(probeType != startup) 191 laterStatus.ContainerStatuses[0].State.Running.StartedAt.Time = 192 time.Now().Add(-100 * time.Second) 193 m.statusManager.SetPodStatus(w.pod, laterStatus) 194 195 // Second call should succeed (already waited). 196 expectContinue(t, w, w.doProbe(ctx), "after initial delay") 197 expectResult(t, w, results.Success, "after initial delay") 198 } 199 } 200 201 func TestFailureThreshold(t *testing.T) { 202 ctx := context.Background() 203 m := newTestManager() 204 w := newTestWorker(m, readiness, v1.Probe{SuccessThreshold: 1, FailureThreshold: 3}) 205 m.statusManager.SetPodStatus(w.pod, getTestRunningStatus()) 206 207 for i := 0; i < 2; i++ { 208 // First probe should succeed. 209 m.prober.exec = fakeExecProber{probe.Success, nil} 210 211 for j := 0; j < 3; j++ { 212 msg := fmt.Sprintf("%d success (%d)", j+1, i) 213 expectContinue(t, w, w.doProbe(ctx), msg) 214 expectResult(t, w, results.Success, msg) 215 } 216 217 // Prober starts failing :( 218 m.prober.exec = fakeExecProber{probe.Failure, nil} 219 220 // Next 2 probes should still be "success". 221 for j := 0; j < 2; j++ { 222 msg := fmt.Sprintf("%d failing (%d)", j+1, i) 223 expectContinue(t, w, w.doProbe(ctx), msg) 224 expectResult(t, w, results.Success, msg) 225 } 226 227 // Third & following fail. 228 for j := 0; j < 3; j++ { 229 msg := fmt.Sprintf("%d failure (%d)", j+3, i) 230 expectContinue(t, w, w.doProbe(ctx), msg) 231 expectResult(t, w, results.Failure, msg) 232 } 233 } 234 } 235 236 func TestSuccessThreshold(t *testing.T) { 237 ctx := context.Background() 238 m := newTestManager() 239 w := newTestWorker(m, readiness, v1.Probe{SuccessThreshold: 3, FailureThreshold: 1}) 240 m.statusManager.SetPodStatus(w.pod, getTestRunningStatus()) 241 242 // Start out failure. 243 w.resultsManager.Set(testContainerID, results.Failure, &v1.Pod{}) 244 245 for i := 0; i < 2; i++ { 246 // Probe defaults to Failure. 247 for j := 0; j < 2; j++ { 248 msg := fmt.Sprintf("%d success (%d)", j+1, i) 249 expectContinue(t, w, w.doProbe(ctx), msg) 250 expectResult(t, w, results.Failure, msg) 251 } 252 253 // Continuing success! 254 for j := 0; j < 3; j++ { 255 msg := fmt.Sprintf("%d success (%d)", j+3, i) 256 expectContinue(t, w, w.doProbe(ctx), msg) 257 expectResult(t, w, results.Success, msg) 258 } 259 260 // Prober flakes :( 261 m.prober.exec = fakeExecProber{probe.Failure, nil} 262 msg := fmt.Sprintf("1 failure (%d)", i) 263 expectContinue(t, w, w.doProbe(ctx), msg) 264 expectResult(t, w, results.Failure, msg) 265 266 // Back to success. 267 m.prober.exec = fakeExecProber{probe.Success, nil} 268 } 269 } 270 271 func TestCleanUp(t *testing.T) { 272 m := newTestManager() 273 274 for _, probeType := range [...]probeType{liveness, readiness, startup} { 275 key := probeKey{testPodUID, testContainerName, probeType} 276 w := newTestWorker(m, probeType, v1.Probe{}) 277 m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(probeType != startup)) 278 go w.run() 279 m.workers[key] = w 280 281 // Wait for worker to run. 282 condition := func() (bool, error) { 283 ready, _ := resultsManager(m, probeType).Get(testContainerID) 284 return ready == results.Success, nil 285 } 286 if ready, _ := condition(); !ready { 287 if err := wait.Poll(100*time.Millisecond, wait.ForeverTestTimeout, condition); err != nil { 288 t.Fatalf("[%s] Error waiting for worker ready: %v", probeType, err) 289 } 290 } 291 292 for i := 0; i < 10; i++ { 293 w.stop() // Stop should be callable multiple times without consequence. 294 } 295 if err := waitForWorkerExit(t, m, []probeKey{key}); err != nil { 296 t.Fatalf("[%s] error waiting for worker exit: %v", probeType, err) 297 } 298 299 if _, ok := resultsManager(m, probeType).Get(testContainerID); ok { 300 t.Errorf("[%s] Expected result to be cleared.", probeType) 301 } 302 if _, ok := m.workers[key]; ok { 303 t.Errorf("[%s] Expected worker to be cleared.", probeType) 304 } 305 } 306 } 307 308 func expectResult(t *testing.T, w *worker, expectedResult results.Result, msg string) { 309 result, ok := resultsManager(w.probeManager, w.probeType).Get(w.containerID) 310 if !ok { 311 t.Errorf("[%s - %s] Expected result to be set, but was not set", w.probeType, msg) 312 } else if result != expectedResult { 313 t.Errorf("[%s - %s] Expected result to be %v, but was %v", 314 w.probeType, msg, expectedResult, result) 315 } 316 } 317 318 func expectContinue(t *testing.T, w *worker, c bool, msg string) { 319 if !c { 320 t.Errorf("[%s - %s] Expected to continue, but did not", w.probeType, msg) 321 } 322 } 323 324 func resultsManager(m *manager, probeType probeType) results.Manager { 325 switch probeType { 326 case readiness: 327 return m.readinessManager 328 case liveness: 329 return m.livenessManager 330 case startup: 331 return m.startupManager 332 } 333 panic(fmt.Errorf("Unhandled case: %v", probeType)) 334 } 335 336 func TestOnHoldOnLivenessOrStartupCheckFailure(t *testing.T) { 337 ctx := context.Background() 338 m := newTestManager() 339 340 for _, probeType := range [...]probeType{liveness, startup} { 341 w := newTestWorker(m, probeType, v1.Probe{SuccessThreshold: 1, FailureThreshold: 1}) 342 status := getTestRunningStatusWithStarted(probeType != startup) 343 m.statusManager.SetPodStatus(w.pod, status) 344 345 // First probe should fail. 346 m.prober.exec = fakeExecProber{probe.Failure, nil} 347 msg := "first probe" 348 expectContinue(t, w, w.doProbe(ctx), msg) 349 expectResult(t, w, results.Failure, msg) 350 if !w.onHold { 351 t.Errorf("Prober should be on hold due to %s check failure", probeType) 352 } 353 // Set fakeExecProber to return success. However, the result will remain 354 // failure because the worker is on hold and won't probe. 355 m.prober.exec = fakeExecProber{probe.Success, nil} 356 msg = "while on hold" 357 expectContinue(t, w, w.doProbe(ctx), msg) 358 expectResult(t, w, results.Failure, msg) 359 if !w.onHold { 360 t.Errorf("Prober should be on hold due to %s check failure", probeType) 361 } 362 363 // Set a new container ID to lift the hold. The next probe will succeed. 364 status.ContainerStatuses[0].ContainerID = "test://newCont_ID" 365 m.statusManager.SetPodStatus(w.pod, status) 366 msg = "hold lifted" 367 expectContinue(t, w, w.doProbe(ctx), msg) 368 expectResult(t, w, results.Success, msg) 369 if w.onHold { 370 t.Errorf("Prober should not be on hold anymore") 371 } 372 } 373 } 374 375 func TestResultRunOnLivenessCheckFailure(t *testing.T) { 376 ctx := context.Background() 377 m := newTestManager() 378 w := newTestWorker(m, liveness, v1.Probe{SuccessThreshold: 1, FailureThreshold: 3}) 379 m.statusManager.SetPodStatus(w.pod, getTestRunningStatus()) 380 381 m.prober.exec = fakeExecProber{probe.Success, nil} 382 msg := "initial probe success" 383 expectContinue(t, w, w.doProbe(ctx), msg) 384 expectResult(t, w, results.Success, msg) 385 if w.resultRun != 1 { 386 t.Errorf("Prober resultRun should be 1") 387 } 388 389 m.prober.exec = fakeExecProber{probe.Failure, nil} 390 msg = "probe failure, result success" 391 expectContinue(t, w, w.doProbe(ctx), msg) 392 expectResult(t, w, results.Success, msg) 393 if w.resultRun != 1 { 394 t.Errorf("Prober resultRun should be 1") 395 } 396 397 m.prober.exec = fakeExecProber{probe.Failure, nil} 398 msg = "2nd probe failure, result success" 399 expectContinue(t, w, w.doProbe(ctx), msg) 400 expectResult(t, w, results.Success, msg) 401 if w.resultRun != 2 { 402 t.Errorf("Prober resultRun should be 2") 403 } 404 405 // Exceeding FailureThreshold should cause resultRun to 406 // reset to 0 so that the probe on the restarted pod 407 // also gets FailureThreshold attempts to succeed. 408 m.prober.exec = fakeExecProber{probe.Failure, nil} 409 msg = "3rd probe failure, result failure" 410 expectContinue(t, w, w.doProbe(ctx), msg) 411 expectResult(t, w, results.Failure, msg) 412 if w.resultRun != 0 { 413 t.Errorf("Prober resultRun should be reset to 0") 414 } 415 } 416 417 func TestResultRunOnStartupCheckFailure(t *testing.T) { 418 ctx := context.Background() 419 m := newTestManager() 420 w := newTestWorker(m, startup, v1.Probe{SuccessThreshold: 1, FailureThreshold: 3}) 421 m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(false)) 422 423 // Below FailureThreshold leaves probe state unchanged 424 // which is failed for startup at first. 425 m.prober.exec = fakeExecProber{probe.Failure, nil} 426 msg := "probe failure, result unknown" 427 expectContinue(t, w, w.doProbe(ctx), msg) 428 expectResult(t, w, results.Unknown, msg) 429 if w.resultRun != 1 { 430 t.Errorf("Prober resultRun should be 1") 431 } 432 433 m.prober.exec = fakeExecProber{probe.Failure, nil} 434 msg = "2nd probe failure, result unknown" 435 expectContinue(t, w, w.doProbe(ctx), msg) 436 expectResult(t, w, results.Unknown, msg) 437 if w.resultRun != 2 { 438 t.Errorf("Prober resultRun should be 2") 439 } 440 441 // Exceeding FailureThreshold should cause resultRun to 442 // reset to 0 so that the probe on the restarted pod 443 // also gets FailureThreshold attempts to succeed. 444 m.prober.exec = fakeExecProber{probe.Failure, nil} 445 msg = "3rd probe failure, result failure" 446 expectContinue(t, w, w.doProbe(ctx), msg) 447 expectResult(t, w, results.Failure, msg) 448 if w.resultRun != 0 { 449 t.Errorf("Prober resultRun should be reset to 0") 450 } 451 } 452 453 func TestLivenessProbeDisabledByStarted(t *testing.T) { 454 ctx := context.Background() 455 m := newTestManager() 456 w := newTestWorker(m, liveness, v1.Probe{SuccessThreshold: 1, FailureThreshold: 1}) 457 m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(false)) 458 // livenessProbe fails, but is disabled 459 m.prober.exec = fakeExecProber{probe.Failure, nil} 460 msg := "Not started, probe failure, result success" 461 expectContinue(t, w, w.doProbe(ctx), msg) 462 expectResult(t, w, results.Success, msg) 463 // setting started state 464 m.statusManager.SetContainerStartup(w.pod.UID, w.containerID, true) 465 // livenessProbe fails 466 m.prober.exec = fakeExecProber{probe.Failure, nil} 467 msg = "Started, probe failure, result failure" 468 expectContinue(t, w, w.doProbe(ctx), msg) 469 expectResult(t, w, results.Failure, msg) 470 } 471 472 func TestStartupProbeDisabledByStarted(t *testing.T) { 473 ctx := context.Background() 474 m := newTestManager() 475 w := newTestWorker(m, startup, v1.Probe{SuccessThreshold: 1, FailureThreshold: 2}) 476 m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(false)) 477 // startupProbe fails < FailureThreshold, stays unknown 478 m.prober.exec = fakeExecProber{probe.Failure, nil} 479 msg := "Not started, probe failure, result unknown" 480 expectContinue(t, w, w.doProbe(ctx), msg) 481 expectResult(t, w, results.Unknown, msg) 482 // startupProbe succeeds 483 m.prober.exec = fakeExecProber{probe.Success, nil} 484 msg = "Started, probe success, result success" 485 expectContinue(t, w, w.doProbe(ctx), msg) 486 expectResult(t, w, results.Success, msg) 487 // setting started state 488 m.statusManager.SetContainerStartup(w.pod.UID, w.containerID, true) 489 // startupProbe fails, but is disabled 490 m.prober.exec = fakeExecProber{probe.Failure, nil} 491 msg = "Started, probe failure, result success" 492 expectContinue(t, w, w.doProbe(ctx), msg) 493 expectResult(t, w, results.Success, msg) 494 }