k8s.io/kubernetes@v1.29.3/test/integration/podgc/podgc_test.go (about)

     1  /*
     2  Copyright 2022 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package podgc
    18  
    19  import (
    20  	"testing"
    21  	"time"
    22  
    23  	"github.com/google/go-cmp/cmp"
    24  	"github.com/google/go-cmp/cmp/cmpopts"
    25  	v1 "k8s.io/api/core/v1"
    26  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    27  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    28  	"k8s.io/apimachinery/pkg/util/wait"
    29  	utilfeature "k8s.io/apiserver/pkg/util/feature"
    30  	"k8s.io/client-go/informers"
    31  	featuregatetesting "k8s.io/component-base/featuregate/testing"
    32  	"k8s.io/klog/v2"
    33  	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
    34  	"k8s.io/kubernetes/pkg/controller/podgc"
    35  	"k8s.io/kubernetes/pkg/features"
    36  	testutils "k8s.io/kubernetes/test/integration/util"
    37  	"k8s.io/utils/ptr"
    38  )
    39  
    40  // TestPodGcOrphanedPodsWithFinalizer tests deletion of orphaned pods
    41  func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) {
    42  	tests := map[string]struct {
    43  		enablePodDisruptionConditions bool
    44  		enableJobPodReplacementPolicy bool
    45  		phase                         v1.PodPhase
    46  		wantPhase                     v1.PodPhase
    47  		wantDisruptionTarget          *v1.PodCondition
    48  	}{
    49  		"PodDisruptionConditions enabled": {
    50  			enablePodDisruptionConditions: true,
    51  			phase:                         v1.PodPending,
    52  			wantPhase:                     v1.PodFailed,
    53  			wantDisruptionTarget: &v1.PodCondition{
    54  				Type:    v1.DisruptionTarget,
    55  				Status:  v1.ConditionTrue,
    56  				Reason:  "DeletionByPodGC",
    57  				Message: "PodGC: node no longer exists",
    58  			},
    59  		},
    60  		"PodDisruptionConditions and PodReplacementPolicy enabled": {
    61  			enablePodDisruptionConditions: true,
    62  			enableJobPodReplacementPolicy: true,
    63  			phase:                         v1.PodPending,
    64  			wantPhase:                     v1.PodFailed,
    65  			wantDisruptionTarget: &v1.PodCondition{
    66  				Type:    v1.DisruptionTarget,
    67  				Status:  v1.ConditionTrue,
    68  				Reason:  "DeletionByPodGC",
    69  				Message: "PodGC: node no longer exists",
    70  			},
    71  		},
    72  		"Only PodReplacementPolicy enabled; no PodDisruptionCondition": {
    73  			enablePodDisruptionConditions: false,
    74  			enableJobPodReplacementPolicy: true,
    75  			phase:                         v1.PodPending,
    76  			wantPhase:                     v1.PodFailed,
    77  		},
    78  		"PodDisruptionConditions disabled": {
    79  			enablePodDisruptionConditions: false,
    80  			phase:                         v1.PodPending,
    81  			wantPhase:                     v1.PodPending,
    82  		},
    83  		"PodDisruptionConditions enabled; succeeded pod": {
    84  			enablePodDisruptionConditions: true,
    85  			phase:                         v1.PodSucceeded,
    86  			wantPhase:                     v1.PodSucceeded,
    87  		},
    88  		"PodDisruptionConditions enabled; failed pod": {
    89  			enablePodDisruptionConditions: true,
    90  			phase:                         v1.PodFailed,
    91  			wantPhase:                     v1.PodFailed,
    92  		},
    93  	}
    94  
    95  	for name, test := range tests {
    96  		t.Run(name, func(t *testing.T) {
    97  			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
    98  			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobPodReplacementPolicy, test.enableJobPodReplacementPolicy)()
    99  			testCtx := setup(t, "podgc-orphaned")
   100  			cs := testCtx.ClientSet
   101  
   102  			node := &v1.Node{
   103  				ObjectMeta: metav1.ObjectMeta{
   104  					Name: "node",
   105  				},
   106  				Spec: v1.NodeSpec{},
   107  				Status: v1.NodeStatus{
   108  					Conditions: []v1.NodeCondition{
   109  						{
   110  							Type:   v1.NodeReady,
   111  							Status: v1.ConditionTrue,
   112  						},
   113  					},
   114  				},
   115  			}
   116  			node, err := cs.CoreV1().Nodes().Create(testCtx.Ctx, node, metav1.CreateOptions{})
   117  			if err != nil {
   118  				t.Fatalf("Failed to create node '%v', err: %v", node.Name, err)
   119  			}
   120  
   121  			pod := &v1.Pod{
   122  				ObjectMeta: metav1.ObjectMeta{
   123  					Name:       "testpod",
   124  					Namespace:  testCtx.NS.Name,
   125  					Finalizers: []string{"test.k8s.io/finalizer"},
   126  				},
   127  				Spec: v1.PodSpec{
   128  					NodeName: node.Name,
   129  					Containers: []v1.Container{
   130  						{Name: "foo", Image: "bar"},
   131  					},
   132  				},
   133  			}
   134  
   135  			pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Create(testCtx.Ctx, pod, metav1.CreateOptions{})
   136  			if err != nil {
   137  				t.Fatalf("Error %v, while creating pod: %v", err, klog.KObj(pod))
   138  			}
   139  			defer testutils.RemovePodFinalizers(testCtx.Ctx, testCtx.ClientSet, t, *pod)
   140  
   141  			pod.Status.Phase = test.phase
   142  			if _, err := testCtx.ClientSet.CoreV1().Pods(testCtx.NS.Name).UpdateStatus(testCtx.Ctx, pod, metav1.UpdateOptions{}); err != nil {
   143  				t.Fatalf("Error %v, while setting phase %v for pod: %v", err, test.phase, klog.KObj(pod))
   144  			}
   145  
   146  			// we delete the node to orphan the pod
   147  			err = cs.CoreV1().Nodes().Delete(testCtx.Ctx, pod.Spec.NodeName, metav1.DeleteOptions{})
   148  			if err != nil {
   149  				t.Fatalf("Failed to delete node: %v, err: %v", pod.Spec.NodeName, err)
   150  			}
   151  			err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, time.Second*15, true, testutils.PodIsGettingEvicted(cs, pod.Namespace, pod.Name))
   152  			if err != nil {
   153  				t.Fatalf("Error '%v' while waiting for the pod '%v' to be terminating", err, klog.KObj(pod))
   154  			}
   155  			pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Get(testCtx.Ctx, pod.Name, metav1.GetOptions{})
   156  			if err != nil {
   157  				t.Fatalf("Error: '%v' while updating pod info: '%v'", err, klog.KObj(pod))
   158  			}
   159  			_, gotDisruptionTarget := podutil.GetPodCondition(&pod.Status, v1.DisruptionTarget)
   160  			if diff := cmp.Diff(test.wantDisruptionTarget, gotDisruptionTarget, cmpopts.IgnoreFields(v1.PodCondition{}, "LastTransitionTime")); diff != "" {
   161  				t.Errorf("Pod %v has unexpected DisruptionTarget condition: %s", klog.KObj(pod), diff)
   162  			}
   163  			if pod.Status.Phase != test.wantPhase {
   164  				t.Errorf("Unexpected phase for pod %q. Got: %q, want: %q", klog.KObj(pod), pod.Status.Phase, test.wantPhase)
   165  			}
   166  		})
   167  	}
   168  }
   169  
   170  // TestTerminatingOnOutOfServiceNode tests deletion pods terminating on out-of-service nodes
   171  func TestTerminatingOnOutOfServiceNode(t *testing.T) {
   172  	tests := map[string]struct {
   173  		enablePodDisruptionConditions bool
   174  		enableJobPodReplacementPolicy bool
   175  		withFinalizer                 bool
   176  		wantPhase                     v1.PodPhase
   177  	}{
   178  		"pod has phase changed to Failed when PodDisruptionConditions enabled": {
   179  			enablePodDisruptionConditions: true,
   180  			withFinalizer:                 true,
   181  			wantPhase:                     v1.PodFailed,
   182  		},
   183  		"pod has phase unchanged when PodDisruptionConditions disabled": {
   184  			enablePodDisruptionConditions: false,
   185  			withFinalizer:                 true,
   186  			wantPhase:                     v1.PodPending,
   187  		},
   188  		"pod is getting deleted when no finalizer and PodDisruptionConditions enabled": {
   189  			enablePodDisruptionConditions: true,
   190  			withFinalizer:                 false,
   191  		},
   192  		"pod is getting deleted when no finalizer and PodDisruptionConditions disabled": {
   193  			enablePodDisruptionConditions: false,
   194  			withFinalizer:                 false,
   195  		},
   196  		"pod has phase changed when PodDisruptionConditions disabled, but JobPodReplacementPolicy enabled": {
   197  			enablePodDisruptionConditions: false,
   198  			enableJobPodReplacementPolicy: true,
   199  			withFinalizer:                 true,
   200  			wantPhase:                     v1.PodFailed,
   201  		},
   202  	}
   203  
   204  	for name, test := range tests {
   205  		t.Run(name, func(t *testing.T) {
   206  			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
   207  			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeOutOfServiceVolumeDetach, true)()
   208  			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobPodReplacementPolicy, test.enableJobPodReplacementPolicy)()
   209  			testCtx := setup(t, "podgc-out-of-service")
   210  			cs := testCtx.ClientSet
   211  
   212  			node := &v1.Node{
   213  				ObjectMeta: metav1.ObjectMeta{
   214  					Name: "node",
   215  				},
   216  				Spec: v1.NodeSpec{},
   217  				Status: v1.NodeStatus{
   218  					Conditions: []v1.NodeCondition{
   219  						{
   220  							Type:   v1.NodeReady,
   221  							Status: v1.ConditionFalse,
   222  						},
   223  					},
   224  				},
   225  			}
   226  			node, err := cs.CoreV1().Nodes().Create(testCtx.Ctx, node, metav1.CreateOptions{})
   227  			if err != nil {
   228  				t.Fatalf("Failed to create node '%v', err: %v", node.Name, err)
   229  			}
   230  
   231  			pod := &v1.Pod{
   232  				ObjectMeta: metav1.ObjectMeta{
   233  					Name:      "testpod",
   234  					Namespace: testCtx.NS.Name,
   235  				},
   236  				Spec: v1.PodSpec{
   237  					NodeName: node.Name,
   238  					Containers: []v1.Container{
   239  						{Name: "foo", Image: "bar"},
   240  					},
   241  				},
   242  				Status: v1.PodStatus{
   243  					Phase: v1.PodRunning,
   244  				},
   245  			}
   246  			if test.withFinalizer {
   247  				pod.ObjectMeta.Finalizers = []string{"test.k8s.io/finalizer"}
   248  			}
   249  
   250  			pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Create(testCtx.Ctx, pod, metav1.CreateOptions{})
   251  			if err != nil {
   252  				t.Fatalf("Error %v, while creating pod: %v", err, klog.KObj(pod))
   253  			}
   254  			if test.withFinalizer {
   255  				defer testutils.RemovePodFinalizers(testCtx.Ctx, testCtx.ClientSet, t, *pod)
   256  			}
   257  
   258  			// trigger termination of the pod, but with long grace period so that it is not removed immediately
   259  			err = cs.CoreV1().Pods(testCtx.NS.Name).Delete(testCtx.Ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: ptr.To[int64](300)})
   260  			if err != nil {
   261  				t.Fatalf("Error: '%v' while deleting pod: '%v'", err, klog.KObj(pod))
   262  			}
   263  			// wait until the pod is terminating
   264  			err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, time.Second*15, true, testutils.PodIsGettingEvicted(cs, pod.Namespace, pod.Name))
   265  			if err != nil {
   266  				t.Fatalf("Error '%v' while waiting for the pod '%v' to be terminating", err, klog.KObj(pod))
   267  			}
   268  			// taint the node with the out-of-service taint
   269  			err = testutils.AddTaintToNode(cs, pod.Spec.NodeName, v1.Taint{Key: v1.TaintNodeOutOfService, Value: "", Effect: v1.TaintEffectNoExecute})
   270  			if err != nil {
   271  				t.Fatalf("Failed to taint node: %v, err: %v", pod.Spec.NodeName, err)
   272  			}
   273  			if test.withFinalizer {
   274  				// wait until the pod phase is set as expected
   275  				err = wait.Poll(time.Second, time.Second*15, func() (bool, error) {
   276  					var e error
   277  					pod, e = cs.CoreV1().Pods(pod.Namespace).Get(testCtx.Ctx, pod.Name, metav1.GetOptions{})
   278  					if e != nil {
   279  						return true, e
   280  					}
   281  					return test.wantPhase == pod.Status.Phase, nil
   282  				})
   283  				if err != nil {
   284  					t.Errorf("Error %q while waiting for the pod %q to be in expected phase", err, klog.KObj(pod))
   285  				}
   286  				_, cond := podutil.GetPodCondition(&pod.Status, v1.DisruptionTarget)
   287  				if cond != nil {
   288  					t.Errorf("Pod %q has an unexpected condition: %q", klog.KObj(pod), v1.DisruptionTarget)
   289  				}
   290  			} else {
   291  				// wait until the pod is deleted
   292  				err = wait.PollImmediate(time.Second, time.Second*15, func() (bool, error) {
   293  					var e error
   294  					pod, e = cs.CoreV1().Pods(pod.Namespace).Get(testCtx.Ctx, pod.Name, metav1.GetOptions{})
   295  					if e == nil {
   296  						return pod == nil, nil
   297  					}
   298  					// there was an error
   299  					if apierrors.IsNotFound(e) {
   300  						return true, nil
   301  					}
   302  					return false, e
   303  				})
   304  				if err != nil {
   305  					t.Errorf("Error %q while waiting for the pod %q to be deleted", err, klog.KObj(pod))
   306  				}
   307  			}
   308  		})
   309  	}
   310  }
   311  
   312  // TestPodGcForPodsWithDuplicatedFieldKeys regression test for https://issues.k8s.io/118261
   313  func TestPodGcForPodsWithDuplicatedFieldKeys(t *testing.T) {
   314  	tests := map[string]struct {
   315  		pod                  *v1.Pod
   316  		wantDisruptionTarget *v1.PodCondition
   317  	}{
   318  		"Orphan pod with duplicated env vars": {
   319  			pod: &v1.Pod{
   320  				ObjectMeta: metav1.ObjectMeta{
   321  					Name:       "testpod",
   322  					Finalizers: []string{"test.k8s.io/finalizer"},
   323  				},
   324  				Spec: v1.PodSpec{
   325  					NodeName: "non-existing-node",
   326  					Containers: []v1.Container{
   327  						{
   328  							Name:  "foo",
   329  							Image: "bar",
   330  							Env: []v1.EnvVar{
   331  								{
   332  									Name:  "XYZ",
   333  									Value: "1",
   334  								},
   335  								{
   336  									Name:  "XYZ",
   337  									Value: "2",
   338  								},
   339  							},
   340  						},
   341  					},
   342  				},
   343  			},
   344  			wantDisruptionTarget: &v1.PodCondition{
   345  				Type:    v1.DisruptionTarget,
   346  				Status:  v1.ConditionTrue,
   347  				Reason:  "DeletionByPodGC",
   348  				Message: "PodGC: node no longer exists",
   349  			},
   350  		},
   351  		"Orphan pod with duplicated ports; scenario from https://issues.k8s.io/113482": {
   352  			pod: &v1.Pod{
   353  				ObjectMeta: metav1.ObjectMeta{
   354  					Name:       "testpod",
   355  					Finalizers: []string{"test.k8s.io/finalizer"},
   356  				},
   357  				Spec: v1.PodSpec{
   358  					NodeName: "non-existing-node",
   359  					Containers: []v1.Container{
   360  						{
   361  							Name:  "foo",
   362  							Image: "bar",
   363  							Ports: []v1.ContainerPort{
   364  								{
   365  									ContainerPort: 93,
   366  									HostPort:      9376,
   367  								},
   368  								{
   369  									ContainerPort: 93,
   370  									HostPort:      9377,
   371  								},
   372  							},
   373  						},
   374  					},
   375  				},
   376  			},
   377  			wantDisruptionTarget: &v1.PodCondition{
   378  				Type:    v1.DisruptionTarget,
   379  				Status:  v1.ConditionTrue,
   380  				Reason:  "DeletionByPodGC",
   381  				Message: "PodGC: node no longer exists",
   382  			},
   383  		},
   384  	}
   385  
   386  	for name, test := range tests {
   387  		t.Run(name, func(t *testing.T) {
   388  			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, true)()
   389  			testCtx := setup(t, "podgc-orphaned")
   390  			cs := testCtx.ClientSet
   391  
   392  			pod := test.pod
   393  			pod.Namespace = testCtx.NS.Namespace
   394  			pod, err := cs.CoreV1().Pods(testCtx.NS.Name).Create(testCtx.Ctx, pod, metav1.CreateOptions{})
   395  			if err != nil {
   396  				t.Fatalf("Error %v, while creating pod: %v", err, klog.KObj(pod))
   397  			}
   398  			defer testutils.RemovePodFinalizers(testCtx.Ctx, testCtx.ClientSet, t, *pod)
   399  
   400  			// getting evicted due to NodeName being "non-existing-node"
   401  			err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, time.Second*15, true, testutils.PodIsGettingEvicted(cs, pod.Namespace, pod.Name))
   402  			if err != nil {
   403  				t.Fatalf("Error '%v' while waiting for the pod '%v' to be terminating", err, klog.KObj(pod))
   404  			}
   405  			pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Get(testCtx.Ctx, pod.Name, metav1.GetOptions{})
   406  			if err != nil {
   407  				t.Fatalf("Error: '%v' while updating pod info: '%v'", err, klog.KObj(pod))
   408  			}
   409  			_, gotDisruptionTarget := podutil.GetPodCondition(&pod.Status, v1.DisruptionTarget)
   410  			if diff := cmp.Diff(test.wantDisruptionTarget, gotDisruptionTarget, cmpopts.IgnoreFields(v1.PodCondition{}, "LastTransitionTime")); diff != "" {
   411  				t.Errorf("Pod %v has unexpected DisruptionTarget condition: %s", klog.KObj(pod), diff)
   412  			}
   413  			if gotDisruptionTarget != nil && gotDisruptionTarget.LastTransitionTime.IsZero() {
   414  				t.Errorf("Pod %v has DisruptionTarget condition without LastTransitionTime", klog.KObj(pod))
   415  			}
   416  			if pod.Status.Phase != v1.PodFailed {
   417  				t.Errorf("Unexpected phase for pod %q. Got: %q, want: %q", klog.KObj(pod), pod.Status.Phase, v1.PodFailed)
   418  			}
   419  		})
   420  	}
   421  }
   422  
   423  func setup(t *testing.T, name string) *testutils.TestContext {
   424  	testCtx := testutils.InitTestAPIServer(t, name, nil)
   425  	externalInformers := informers.NewSharedInformerFactory(testCtx.ClientSet, time.Second)
   426  
   427  	podgc := podgc.NewPodGCInternal(testCtx.Ctx,
   428  		testCtx.ClientSet,
   429  		externalInformers.Core().V1().Pods(),
   430  		externalInformers.Core().V1().Nodes(),
   431  		0,
   432  		500*time.Millisecond,
   433  		time.Second)
   434  
   435  	// Waiting for all controllers to sync
   436  	externalInformers.Start(testCtx.Ctx.Done())
   437  	externalInformers.WaitForCacheSync(testCtx.Ctx.Done())
   438  
   439  	go podgc.Run(testCtx.Ctx)
   440  	return testCtx
   441  }