k8s.io/kubernetes@v1.29.3/pkg/kubelet/prober/worker_test.go

k8s.io/kubernetes@v1.29.3/pkg/kubelet/prober/worker_test.go (about)

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package prober
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"os"
    23  	"testing"
    24  	"time"
    25  
    26  	v1 "k8s.io/api/core/v1"
    27  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    28  	"k8s.io/apimachinery/pkg/util/wait"
    29  	"k8s.io/client-go/kubernetes/fake"
    30  	kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
    31  	"k8s.io/kubernetes/pkg/kubelet/prober/results"
    32  	"k8s.io/kubernetes/pkg/kubelet/status"
    33  	statustest "k8s.io/kubernetes/pkg/kubelet/status/testing"
    34  	kubeletutil "k8s.io/kubernetes/pkg/kubelet/util"
    35  	"k8s.io/kubernetes/pkg/probe"
    36  )
    37  
    38  func init() {
    39  }
    40  
    41  func TestDoProbe(t *testing.T) {
    42  	m := newTestManager()
    43  
    44  	for _, probeType := range [...]probeType{liveness, readiness, startup} {
    45  		// Test statuses.
    46  		runningStatus := getTestRunningStatusWithStarted(probeType != startup)
    47  		pendingStatus := getTestRunningStatusWithStarted(probeType != startup)
    48  		pendingStatus.ContainerStatuses[0].State.Running = nil
    49  		terminatedStatus := getTestRunningStatusWithStarted(probeType != startup)
    50  		terminatedStatus.ContainerStatuses[0].State.Running = nil
    51  		terminatedStatus.ContainerStatuses[0].State.Terminated = &v1.ContainerStateTerminated{
    52  			StartedAt: metav1.Now(),
    53  		}
    54  		otherStatus := getTestRunningStatusWithStarted(probeType != startup)
    55  		otherStatus.ContainerStatuses[0].Name = "otherContainer"
    56  		failedStatus := getTestRunningStatusWithStarted(probeType != startup)
    57  		failedStatus.Phase = v1.PodFailed
    58  
    59  		tests := []struct {
    60  			probe                v1.Probe
    61  			podStatus            *v1.PodStatus
    62  			expectContinue       map[string]bool
    63  			expectSet            bool
    64  			expectedResult       results.Result
    65  			setDeletionTimestamp bool
    66  		}{
    67  			{ // No status.
    68  				expectContinue: map[string]bool{
    69  					liveness.String():  true,
    70  					readiness.String(): true,
    71  					startup.String():   true,
    72  				},
    73  			},
    74  			{ // Pod failed
    75  				podStatus: &failedStatus,
    76  			},
    77  			{ // Pod deletion
    78  				podStatus:            &runningStatus,
    79  				setDeletionTimestamp: true,
    80  				expectSet:            true,
    81  				expectContinue: map[string]bool{
    82  					readiness.String(): true,
    83  				},
    84  				expectedResult: results.Success,
    85  			},
    86  			{ // No container status
    87  				podStatus: &otherStatus,
    88  				expectContinue: map[string]bool{
    89  					liveness.String():  true,
    90  					readiness.String(): true,
    91  					startup.String():   true,
    92  				},
    93  			},
    94  			{ // Container waiting
    95  				podStatus: &pendingStatus,
    96  				expectContinue: map[string]bool{
    97  					liveness.String():  true,
    98  					readiness.String(): true,
    99  					startup.String():   true,
   100  				},
   101  				expectSet:      true,
   102  				expectedResult: results.Failure,
   103  			},
   104  			{ // Container terminated
   105  				podStatus:      &terminatedStatus,
   106  				expectSet:      true,
   107  				expectedResult: results.Failure,
   108  			},
   109  			{ // Probe successful.
   110  				podStatus: &runningStatus,
   111  				expectContinue: map[string]bool{
   112  					liveness.String():  true,
   113  					readiness.String(): true,
   114  					startup.String():   true,
   115  				},
   116  				expectSet:      true,
   117  				expectedResult: results.Success,
   118  			},
   119  			{ // Initial delay passed
   120  				podStatus: &runningStatus,
   121  				probe: v1.Probe{
   122  					InitialDelaySeconds: -100,
   123  				},
   124  				expectContinue: map[string]bool{
   125  					liveness.String():  true,
   126  					readiness.String(): true,
   127  					startup.String():   true,
   128  				},
   129  				expectSet:      true,
   130  				expectedResult: results.Success,
   131  			},
   132  		}
   133  
   134  		for i, test := range tests {
   135  			ctx := context.Background()
   136  			w := newTestWorker(m, probeType, test.probe)
   137  			if test.podStatus != nil {
   138  				m.statusManager.SetPodStatus(w.pod, *test.podStatus)
   139  			}
   140  			if test.setDeletionTimestamp {
   141  				now := metav1.Now()
   142  				w.pod.ObjectMeta.DeletionTimestamp = &now
   143  			}
   144  			if c := w.doProbe(ctx); c != test.expectContinue[probeType.String()] {
   145  				t.Errorf("[%s-%d] Expected continue to be %v but got %v", probeType, i, test.expectContinue[probeType.String()], c)
   146  			}
   147  			result, ok := resultsManager(m, probeType).Get(testContainerID)
   148  			if ok != test.expectSet {
   149  				t.Errorf("[%s-%d] Expected to have result: %v but got %v", probeType, i, test.expectSet, ok)
   150  			}
   151  			if result != test.expectedResult {
   152  				t.Errorf("[%s-%d] Expected result: %v but got %v", probeType, i, test.expectedResult, result)
   153  			}
   154  
   155  			// Clean up.
   156  			testRootDir := ""
   157  			if tempDir, err := os.MkdirTemp("", "kubelet_test."); err != nil {
   158  				t.Fatalf("can't make a temp rootdir: %v", err)
   159  			} else {
   160  				testRootDir = tempDir
   161  			}
   162  			m.statusManager = status.NewManager(&fake.Clientset{}, kubepod.NewBasicPodManager(), &statustest.FakePodDeletionSafetyProvider{}, kubeletutil.NewPodStartupLatencyTracker(), testRootDir)
   163  			resultsManager(m, probeType).Remove(testContainerID)
   164  		}
   165  	}
   166  }
   167  
   168  func TestInitialDelay(t *testing.T) {
   169  	ctx := context.Background()
   170  	m := newTestManager()
   171  
   172  	for _, probeType := range [...]probeType{liveness, readiness, startup} {
   173  		w := newTestWorker(m, probeType, v1.Probe{
   174  			InitialDelaySeconds: 10,
   175  		})
   176  		m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(probeType != startup))
   177  
   178  		expectContinue(t, w, w.doProbe(ctx), "during initial delay")
   179  		// Default value depends on probe, Success for liveness, Failure for readiness, Unknown for startup
   180  		switch probeType {
   181  		case liveness:
   182  			expectResult(t, w, results.Success, "during initial delay")
   183  		case readiness:
   184  			expectResult(t, w, results.Failure, "during initial delay")
   185  		case startup:
   186  			expectResult(t, w, results.Unknown, "during initial delay")
   187  		}
   188  
   189  		// 100 seconds later...
   190  		laterStatus := getTestRunningStatusWithStarted(probeType != startup)
   191  		laterStatus.ContainerStatuses[0].State.Running.StartedAt.Time =
   192  			time.Now().Add(-100 * time.Second)
   193  		m.statusManager.SetPodStatus(w.pod, laterStatus)
   194  
   195  		// Second call should succeed (already waited).
   196  		expectContinue(t, w, w.doProbe(ctx), "after initial delay")
   197  		expectResult(t, w, results.Success, "after initial delay")
   198  	}
   199  }
   200  
   201  func TestFailureThreshold(t *testing.T) {
   202  	ctx := context.Background()
   203  	m := newTestManager()
   204  	w := newTestWorker(m, readiness, v1.Probe{SuccessThreshold: 1, FailureThreshold: 3})
   205  	m.statusManager.SetPodStatus(w.pod, getTestRunningStatus())
   206  
   207  	for i := 0; i < 2; i++ {
   208  		// First probe should succeed.
   209  		m.prober.exec = fakeExecProber{probe.Success, nil}
   210  
   211  		for j := 0; j < 3; j++ {
   212  			msg := fmt.Sprintf("%d success (%d)", j+1, i)
   213  			expectContinue(t, w, w.doProbe(ctx), msg)
   214  			expectResult(t, w, results.Success, msg)
   215  		}
   216  
   217  		// Prober starts failing :(
   218  		m.prober.exec = fakeExecProber{probe.Failure, nil}
   219  
   220  		// Next 2 probes should still be "success".
   221  		for j := 0; j < 2; j++ {
   222  			msg := fmt.Sprintf("%d failing (%d)", j+1, i)
   223  			expectContinue(t, w, w.doProbe(ctx), msg)
   224  			expectResult(t, w, results.Success, msg)
   225  		}
   226  
   227  		// Third & following fail.
   228  		for j := 0; j < 3; j++ {
   229  			msg := fmt.Sprintf("%d failure (%d)", j+3, i)
   230  			expectContinue(t, w, w.doProbe(ctx), msg)
   231  			expectResult(t, w, results.Failure, msg)
   232  		}
   233  	}
   234  }
   235  
   236  func TestSuccessThreshold(t *testing.T) {
   237  	ctx := context.Background()
   238  	m := newTestManager()
   239  	w := newTestWorker(m, readiness, v1.Probe{SuccessThreshold: 3, FailureThreshold: 1})
   240  	m.statusManager.SetPodStatus(w.pod, getTestRunningStatus())
   241  
   242  	// Start out failure.
   243  	w.resultsManager.Set(testContainerID, results.Failure, &v1.Pod{})
   244  
   245  	for i := 0; i < 2; i++ {
   246  		// Probe defaults to Failure.
   247  		for j := 0; j < 2; j++ {
   248  			msg := fmt.Sprintf("%d success (%d)", j+1, i)
   249  			expectContinue(t, w, w.doProbe(ctx), msg)
   250  			expectResult(t, w, results.Failure, msg)
   251  		}
   252  
   253  		// Continuing success!
   254  		for j := 0; j < 3; j++ {
   255  			msg := fmt.Sprintf("%d success (%d)", j+3, i)
   256  			expectContinue(t, w, w.doProbe(ctx), msg)
   257  			expectResult(t, w, results.Success, msg)
   258  		}
   259  
   260  		// Prober flakes :(
   261  		m.prober.exec = fakeExecProber{probe.Failure, nil}
   262  		msg := fmt.Sprintf("1 failure (%d)", i)
   263  		expectContinue(t, w, w.doProbe(ctx), msg)
   264  		expectResult(t, w, results.Failure, msg)
   265  
   266  		// Back to success.
   267  		m.prober.exec = fakeExecProber{probe.Success, nil}
   268  	}
   269  }
   270  
   271  func TestCleanUp(t *testing.T) {
   272  	m := newTestManager()
   273  
   274  	for _, probeType := range [...]probeType{liveness, readiness, startup} {
   275  		key := probeKey{testPodUID, testContainerName, probeType}
   276  		w := newTestWorker(m, probeType, v1.Probe{})
   277  		m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(probeType != startup))
   278  		go w.run()
   279  		m.workers[key] = w
   280  
   281  		// Wait for worker to run.
   282  		condition := func() (bool, error) {
   283  			ready, _ := resultsManager(m, probeType).Get(testContainerID)
   284  			return ready == results.Success, nil
   285  		}
   286  		if ready, _ := condition(); !ready {
   287  			if err := wait.Poll(100*time.Millisecond, wait.ForeverTestTimeout, condition); err != nil {
   288  				t.Fatalf("[%s] Error waiting for worker ready: %v", probeType, err)
   289  			}
   290  		}
   291  
   292  		for i := 0; i < 10; i++ {
   293  			w.stop() // Stop should be callable multiple times without consequence.
   294  		}
   295  		if err := waitForWorkerExit(t, m, []probeKey{key}); err != nil {
   296  			t.Fatalf("[%s] error waiting for worker exit: %v", probeType, err)
   297  		}
   298  
   299  		if _, ok := resultsManager(m, probeType).Get(testContainerID); ok {
   300  			t.Errorf("[%s] Expected result to be cleared.", probeType)
   301  		}
   302  		if _, ok := m.workers[key]; ok {
   303  			t.Errorf("[%s] Expected worker to be cleared.", probeType)
   304  		}
   305  	}
   306  }
   307  
   308  func expectResult(t *testing.T, w *worker, expectedResult results.Result, msg string) {
   309  	result, ok := resultsManager(w.probeManager, w.probeType).Get(w.containerID)
   310  	if !ok {
   311  		t.Errorf("[%s - %s] Expected result to be set, but was not set", w.probeType, msg)
   312  	} else if result != expectedResult {
   313  		t.Errorf("[%s - %s] Expected result to be %v, but was %v",
   314  			w.probeType, msg, expectedResult, result)
   315  	}
   316  }
   317  
   318  func expectContinue(t *testing.T, w *worker, c bool, msg string) {
   319  	if !c {
   320  		t.Errorf("[%s - %s] Expected to continue, but did not", w.probeType, msg)
   321  	}
   322  }
   323  
   324  func resultsManager(m *manager, probeType probeType) results.Manager {
   325  	switch probeType {
   326  	case readiness:
   327  		return m.readinessManager
   328  	case liveness:
   329  		return m.livenessManager
   330  	case startup:
   331  		return m.startupManager
   332  	}
   333  	panic(fmt.Errorf("Unhandled case: %v", probeType))
   334  }
   335  
   336  func TestOnHoldOnLivenessOrStartupCheckFailure(t *testing.T) {
   337  	ctx := context.Background()
   338  	m := newTestManager()
   339  
   340  	for _, probeType := range [...]probeType{liveness, startup} {
   341  		w := newTestWorker(m, probeType, v1.Probe{SuccessThreshold: 1, FailureThreshold: 1})
   342  		status := getTestRunningStatusWithStarted(probeType != startup)
   343  		m.statusManager.SetPodStatus(w.pod, status)
   344  
   345  		// First probe should fail.
   346  		m.prober.exec = fakeExecProber{probe.Failure, nil}
   347  		msg := "first probe"
   348  		expectContinue(t, w, w.doProbe(ctx), msg)
   349  		expectResult(t, w, results.Failure, msg)
   350  		if !w.onHold {
   351  			t.Errorf("Prober should be on hold due to %s check failure", probeType)
   352  		}
   353  		// Set fakeExecProber to return success. However, the result will remain
   354  		// failure because the worker is on hold and won't probe.
   355  		m.prober.exec = fakeExecProber{probe.Success, nil}
   356  		msg = "while on hold"
   357  		expectContinue(t, w, w.doProbe(ctx), msg)
   358  		expectResult(t, w, results.Failure, msg)
   359  		if !w.onHold {
   360  			t.Errorf("Prober should be on hold due to %s check failure", probeType)
   361  		}
   362  
   363  		// Set a new container ID to lift the hold. The next probe will succeed.
   364  		status.ContainerStatuses[0].ContainerID = "test://newCont_ID"
   365  		m.statusManager.SetPodStatus(w.pod, status)
   366  		msg = "hold lifted"
   367  		expectContinue(t, w, w.doProbe(ctx), msg)
   368  		expectResult(t, w, results.Success, msg)
   369  		if w.onHold {
   370  			t.Errorf("Prober should not be on hold anymore")
   371  		}
   372  	}
   373  }
   374  
   375  func TestResultRunOnLivenessCheckFailure(t *testing.T) {
   376  	ctx := context.Background()
   377  	m := newTestManager()
   378  	w := newTestWorker(m, liveness, v1.Probe{SuccessThreshold: 1, FailureThreshold: 3})
   379  	m.statusManager.SetPodStatus(w.pod, getTestRunningStatus())
   380  
   381  	m.prober.exec = fakeExecProber{probe.Success, nil}
   382  	msg := "initial probe success"
   383  	expectContinue(t, w, w.doProbe(ctx), msg)
   384  	expectResult(t, w, results.Success, msg)
   385  	if w.resultRun != 1 {
   386  		t.Errorf("Prober resultRun should be 1")
   387  	}
   388  
   389  	m.prober.exec = fakeExecProber{probe.Failure, nil}
   390  	msg = "probe failure, result success"
   391  	expectContinue(t, w, w.doProbe(ctx), msg)
   392  	expectResult(t, w, results.Success, msg)
   393  	if w.resultRun != 1 {
   394  		t.Errorf("Prober resultRun should be 1")
   395  	}
   396  
   397  	m.prober.exec = fakeExecProber{probe.Failure, nil}
   398  	msg = "2nd probe failure, result success"
   399  	expectContinue(t, w, w.doProbe(ctx), msg)
   400  	expectResult(t, w, results.Success, msg)
   401  	if w.resultRun != 2 {
   402  		t.Errorf("Prober resultRun should be 2")
   403  	}
   404  
   405  	// Exceeding FailureThreshold should cause resultRun to
   406  	// reset to 0 so that the probe on the restarted pod
   407  	// also gets FailureThreshold attempts to succeed.
   408  	m.prober.exec = fakeExecProber{probe.Failure, nil}
   409  	msg = "3rd probe failure, result failure"
   410  	expectContinue(t, w, w.doProbe(ctx), msg)
   411  	expectResult(t, w, results.Failure, msg)
   412  	if w.resultRun != 0 {
   413  		t.Errorf("Prober resultRun should be reset to 0")
   414  	}
   415  }
   416  
   417  func TestResultRunOnStartupCheckFailure(t *testing.T) {
   418  	ctx := context.Background()
   419  	m := newTestManager()
   420  	w := newTestWorker(m, startup, v1.Probe{SuccessThreshold: 1, FailureThreshold: 3})
   421  	m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(false))
   422  
   423  	// Below FailureThreshold leaves probe state unchanged
   424  	// which is failed for startup at first.
   425  	m.prober.exec = fakeExecProber{probe.Failure, nil}
   426  	msg := "probe failure, result unknown"
   427  	expectContinue(t, w, w.doProbe(ctx), msg)
   428  	expectResult(t, w, results.Unknown, msg)
   429  	if w.resultRun != 1 {
   430  		t.Errorf("Prober resultRun should be 1")
   431  	}
   432  
   433  	m.prober.exec = fakeExecProber{probe.Failure, nil}
   434  	msg = "2nd probe failure, result unknown"
   435  	expectContinue(t, w, w.doProbe(ctx), msg)
   436  	expectResult(t, w, results.Unknown, msg)
   437  	if w.resultRun != 2 {
   438  		t.Errorf("Prober resultRun should be 2")
   439  	}
   440  
   441  	// Exceeding FailureThreshold should cause resultRun to
   442  	// reset to 0 so that the probe on the restarted pod
   443  	// also gets FailureThreshold attempts to succeed.
   444  	m.prober.exec = fakeExecProber{probe.Failure, nil}
   445  	msg = "3rd probe failure, result failure"
   446  	expectContinue(t, w, w.doProbe(ctx), msg)
   447  	expectResult(t, w, results.Failure, msg)
   448  	if w.resultRun != 0 {
   449  		t.Errorf("Prober resultRun should be reset to 0")
   450  	}
   451  }
   452  
   453  func TestLivenessProbeDisabledByStarted(t *testing.T) {
   454  	ctx := context.Background()
   455  	m := newTestManager()
   456  	w := newTestWorker(m, liveness, v1.Probe{SuccessThreshold: 1, FailureThreshold: 1})
   457  	m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(false))
   458  	// livenessProbe fails, but is disabled
   459  	m.prober.exec = fakeExecProber{probe.Failure, nil}
   460  	msg := "Not started, probe failure, result success"
   461  	expectContinue(t, w, w.doProbe(ctx), msg)
   462  	expectResult(t, w, results.Success, msg)
   463  	// setting started state
   464  	m.statusManager.SetContainerStartup(w.pod.UID, w.containerID, true)
   465  	// livenessProbe fails
   466  	m.prober.exec = fakeExecProber{probe.Failure, nil}
   467  	msg = "Started, probe failure, result failure"
   468  	expectContinue(t, w, w.doProbe(ctx), msg)
   469  	expectResult(t, w, results.Failure, msg)
   470  }
   471  
   472  func TestStartupProbeDisabledByStarted(t *testing.T) {
   473  	ctx := context.Background()
   474  	m := newTestManager()
   475  	w := newTestWorker(m, startup, v1.Probe{SuccessThreshold: 1, FailureThreshold: 2})
   476  	m.statusManager.SetPodStatus(w.pod, getTestRunningStatusWithStarted(false))
   477  	// startupProbe fails < FailureThreshold, stays unknown
   478  	m.prober.exec = fakeExecProber{probe.Failure, nil}
   479  	msg := "Not started, probe failure, result unknown"
   480  	expectContinue(t, w, w.doProbe(ctx), msg)
   481  	expectResult(t, w, results.Unknown, msg)
   482  	// startupProbe succeeds
   483  	m.prober.exec = fakeExecProber{probe.Success, nil}
   484  	msg = "Started, probe success, result success"
   485  	expectContinue(t, w, w.doProbe(ctx), msg)
   486  	expectResult(t, w, results.Success, msg)
   487  	// setting started state
   488  	m.statusManager.SetContainerStartup(w.pod.UID, w.containerID, true)
   489  	// startupProbe fails, but is disabled
   490  	m.prober.exec = fakeExecProber{probe.Failure, nil}
   491  	msg = "Started, probe failure, result success"
   492  	expectContinue(t, w, w.doProbe(ctx), msg)
   493  	expectResult(t, w, results.Success, msg)
   494  }