k8s.io/kubernetes@v1.29.3/pkg/controller/job/indexed_job_utils_test.go

k8s.io/kubernetes@v1.29.3/pkg/controller/job/indexed_job_utils_test.go (about)

     1  /*
     2  Copyright 2021 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package job
    18  
    19  import (
    20  	"math"
    21  	"strconv"
    22  	"testing"
    23  	"time"
    24  
    25  	"github.com/google/go-cmp/cmp"
    26  	batch "k8s.io/api/batch/v1"
    27  	v1 "k8s.io/api/core/v1"
    28  	"k8s.io/apimachinery/pkg/util/sets"
    29  	"k8s.io/apiserver/pkg/util/feature"
    30  	featuregatetesting "k8s.io/component-base/featuregate/testing"
    31  	"k8s.io/klog/v2/ktesting"
    32  	"k8s.io/kubernetes/pkg/controller"
    33  	"k8s.io/kubernetes/pkg/features"
    34  	"k8s.io/utils/ptr"
    35  )
    36  
    37  const noIndex = "-"
    38  
    39  func TestCalculateSucceededIndexes(t *testing.T) {
    40  	logger, _ := ktesting.NewTestContext(t)
    41  	cases := map[string]struct {
    42  		prevSucceeded       string
    43  		pods                []indexPhase
    44  		completions         int32
    45  		wantStatusIntervals orderedIntervals
    46  		wantIntervals       orderedIntervals
    47  	}{
    48  		"one index": {
    49  			pods:          []indexPhase{{"1", v1.PodSucceeded}},
    50  			completions:   2,
    51  			wantIntervals: []interval{{1, 1}},
    52  		},
    53  		"two separate": {
    54  			pods: []indexPhase{
    55  				{"2", v1.PodFailed},
    56  				{"5", v1.PodSucceeded},
    57  				{"5", v1.PodSucceeded},
    58  				{"10", v1.PodFailed},
    59  				{"10", v1.PodSucceeded},
    60  			},
    61  			completions:   11,
    62  			wantIntervals: []interval{{5, 5}, {10, 10}},
    63  		},
    64  		"two intervals": {
    65  			pods: []indexPhase{
    66  				{"0", v1.PodRunning},
    67  				{"1", v1.PodPending},
    68  				{"2", v1.PodSucceeded},
    69  				{"3", v1.PodSucceeded},
    70  				{"5", v1.PodSucceeded},
    71  				{"6", v1.PodSucceeded},
    72  				{"7", v1.PodSucceeded},
    73  			},
    74  			completions:   8,
    75  			wantIntervals: []interval{{2, 3}, {5, 7}},
    76  		},
    77  		"one index and one interval": {
    78  			pods: []indexPhase{
    79  				{"0", v1.PodSucceeded},
    80  				{"1", v1.PodFailed},
    81  				{"2", v1.PodSucceeded},
    82  				{"3", v1.PodSucceeded},
    83  				{"4", v1.PodSucceeded},
    84  				{"5", v1.PodSucceeded},
    85  				{noIndex, v1.PodSucceeded},
    86  				{"-2", v1.PodSucceeded},
    87  			},
    88  			completions:   6,
    89  			wantIntervals: []interval{{0, 0}, {2, 5}},
    90  		},
    91  		"out of range": {
    92  			pods: []indexPhase{
    93  				{"0", v1.PodSucceeded},
    94  				{"1", v1.PodSucceeded},
    95  				{"2", v1.PodSucceeded},
    96  				{"3", v1.PodFailed},
    97  				{"4", v1.PodSucceeded},
    98  				{"5", v1.PodSucceeded},
    99  				{noIndex, v1.PodSucceeded},
   100  				{"-2", v1.PodSucceeded},
   101  			},
   102  			completions:   5,
   103  			wantIntervals: []interval{{0, 2}, {4, 4}},
   104  		},
   105  		"prev interval out of range": {
   106  			prevSucceeded:       "0-5,8-10",
   107  			completions:         8,
   108  			wantStatusIntervals: []interval{{0, 5}},
   109  			wantIntervals:       []interval{{0, 5}},
   110  		},
   111  		"prev interval partially out of range": {
   112  			prevSucceeded:       "0-5,8-10",
   113  			completions:         10,
   114  			wantStatusIntervals: []interval{{0, 5}, {8, 9}},
   115  			wantIntervals:       []interval{{0, 5}, {8, 9}},
   116  		},
   117  		"prev and new separate": {
   118  			prevSucceeded: "0,4,5,10-12",
   119  			pods: []indexPhase{
   120  				{"2", v1.PodSucceeded},
   121  				{"7", v1.PodSucceeded},
   122  				{"8", v1.PodSucceeded},
   123  			},
   124  			completions: 13,
   125  			wantStatusIntervals: []interval{
   126  				{0, 0},
   127  				{4, 5},
   128  				{10, 12},
   129  			},
   130  			wantIntervals: []interval{
   131  				{0, 0},
   132  				{2, 2},
   133  				{4, 5},
   134  				{7, 8},
   135  				{10, 12},
   136  			},
   137  		},
   138  		"prev between new": {
   139  			prevSucceeded: "3,4,6",
   140  			pods: []indexPhase{
   141  				{"2", v1.PodSucceeded},
   142  				{"7", v1.PodSucceeded},
   143  				{"8", v1.PodSucceeded},
   144  			},
   145  			completions: 9,
   146  			wantStatusIntervals: []interval{
   147  				{3, 4},
   148  				{6, 6},
   149  			},
   150  			wantIntervals: []interval{
   151  				{2, 4},
   152  				{6, 8},
   153  			},
   154  		},
   155  		"new between prev": {
   156  			prevSucceeded: "2,7,8",
   157  			pods: []indexPhase{
   158  				{"3", v1.PodSucceeded},
   159  				{"4", v1.PodSucceeded},
   160  				{"6", v1.PodSucceeded},
   161  			},
   162  			completions: 9,
   163  			wantStatusIntervals: []interval{
   164  				{2, 2},
   165  				{7, 8},
   166  			},
   167  			wantIntervals: []interval{
   168  				{2, 4},
   169  				{6, 8},
   170  			},
   171  		},
   172  		"new within prev": {
   173  			prevSucceeded: "2-7",
   174  			pods: []indexPhase{
   175  				{"0", v1.PodSucceeded},
   176  				{"3", v1.PodSucceeded},
   177  				{"5", v1.PodSucceeded},
   178  				{"9", v1.PodSucceeded},
   179  			},
   180  			completions: 10,
   181  			wantStatusIntervals: []interval{
   182  				{2, 7},
   183  			},
   184  			wantIntervals: []interval{
   185  				{0, 0},
   186  				{2, 7},
   187  				{9, 9},
   188  			},
   189  		},
   190  		"corrupted interval": {
   191  			prevSucceeded: "0,1-foo,bar",
   192  			pods: []indexPhase{
   193  				{"3", v1.PodSucceeded},
   194  			},
   195  			completions: 4,
   196  			wantStatusIntervals: []interval{
   197  				{0, 0},
   198  			},
   199  			wantIntervals: []interval{
   200  				{0, 0},
   201  				{3, 3},
   202  			},
   203  		},
   204  	}
   205  	for name, tc := range cases {
   206  		t.Run(name, func(t *testing.T) {
   207  			job := &batch.Job{
   208  				Status: batch.JobStatus{
   209  					CompletedIndexes: tc.prevSucceeded,
   210  				},
   211  				Spec: batch.JobSpec{
   212  					Completions: ptr.To(tc.completions),
   213  				},
   214  			}
   215  			pods := hollowPodsWithIndexPhase(tc.pods)
   216  			for _, p := range pods {
   217  				p.Finalizers = append(p.Finalizers, batch.JobTrackingFinalizer)
   218  			}
   219  			gotStatusIntervals, gotIntervals := calculateSucceededIndexes(logger, job, pods)
   220  			if diff := cmp.Diff(tc.wantStatusIntervals, gotStatusIntervals); diff != "" {
   221  				t.Errorf("Unexpected completed indexes from status (-want,+got):\n%s", diff)
   222  			}
   223  			if diff := cmp.Diff(tc.wantIntervals, gotIntervals); diff != "" {
   224  				t.Errorf("Unexpected completed indexes (-want,+got):\n%s", diff)
   225  			}
   226  		})
   227  	}
   228  }
   229  
   230  func TestIsIndexFailed(t *testing.T) {
   231  	logger, _ := ktesting.NewTestContext(t)
   232  	cases := map[string]struct {
   233  		enableJobPodFailurePolicy bool
   234  		job                       batch.Job
   235  		pod                       *v1.Pod
   236  		wantResult                bool
   237  	}{
   238  		"failed pod exceeding backoffLimitPerIndex, when backoffLimitPerIndex=0": {
   239  			job: batch.Job{
   240  				Spec: batch.JobSpec{
   241  					Completions:          ptr.To[int32](2),
   242  					BackoffLimitPerIndex: ptr.To[int32](0),
   243  				},
   244  			},
   245  			pod:        buildPod().indexFailureCount("0").phase(v1.PodFailed).index("0").trackingFinalizer().Pod,
   246  			wantResult: true,
   247  		},
   248  		"failed pod exceeding backoffLimitPerIndex, when backoffLimitPerIndex=1": {
   249  			job: batch.Job{
   250  				Spec: batch.JobSpec{
   251  					Completions:          ptr.To[int32](2),
   252  					BackoffLimitPerIndex: ptr.To[int32](1),
   253  				},
   254  			},
   255  			pod:        buildPod().indexFailureCount("1").phase(v1.PodFailed).index("1").trackingFinalizer().Pod,
   256  			wantResult: true,
   257  		},
   258  		"matching FailIndex pod failure policy; JobPodFailurePolicy enabled": {
   259  			enableJobPodFailurePolicy: true,
   260  			job: batch.Job{
   261  				Spec: batch.JobSpec{
   262  					Completions:          ptr.To[int32](2),
   263  					BackoffLimitPerIndex: ptr.To[int32](1),
   264  					PodFailurePolicy: &batch.PodFailurePolicy{
   265  						Rules: []batch.PodFailurePolicyRule{
   266  							{
   267  								Action: batch.PodFailurePolicyActionFailIndex,
   268  								OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
   269  									Operator: batch.PodFailurePolicyOnExitCodesOpIn,
   270  									Values:   []int32{3},
   271  								},
   272  							},
   273  						},
   274  					},
   275  				},
   276  			},
   277  			pod: buildPod().indexFailureCount("0").status(v1.PodStatus{
   278  				Phase: v1.PodFailed,
   279  				ContainerStatuses: []v1.ContainerStatus{
   280  					{
   281  						State: v1.ContainerState{
   282  							Terminated: &v1.ContainerStateTerminated{
   283  								ExitCode: 3,
   284  							},
   285  						},
   286  					},
   287  				},
   288  			}).index("0").trackingFinalizer().Pod,
   289  			wantResult: true,
   290  		},
   291  		"matching FailIndex pod failure policy; JobPodFailurePolicy disabled": {
   292  			enableJobPodFailurePolicy: false,
   293  			job: batch.Job{
   294  				Spec: batch.JobSpec{
   295  					Completions:          ptr.To[int32](2),
   296  					BackoffLimitPerIndex: ptr.To[int32](1),
   297  					PodFailurePolicy: &batch.PodFailurePolicy{
   298  						Rules: []batch.PodFailurePolicyRule{
   299  							{
   300  								Action: batch.PodFailurePolicyActionFailIndex,
   301  								OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
   302  									Operator: batch.PodFailurePolicyOnExitCodesOpIn,
   303  									Values:   []int32{3},
   304  								},
   305  							},
   306  						},
   307  					},
   308  				},
   309  			},
   310  			pod: buildPod().indexFailureCount("0").status(v1.PodStatus{
   311  				Phase: v1.PodFailed,
   312  				ContainerStatuses: []v1.ContainerStatus{
   313  					{
   314  						State: v1.ContainerState{
   315  							Terminated: &v1.ContainerStateTerminated{
   316  								ExitCode: 3,
   317  							},
   318  						},
   319  					},
   320  				},
   321  			}).index("0").trackingFinalizer().Pod,
   322  			wantResult: false,
   323  		},
   324  	}
   325  	for name, tc := range cases {
   326  		t.Run(name, func(t *testing.T) {
   327  			defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, true)()
   328  			defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodFailurePolicy, tc.enableJobPodFailurePolicy)()
   329  			gotResult := isIndexFailed(logger, &tc.job, tc.pod)
   330  			if diff := cmp.Diff(tc.wantResult, gotResult); diff != "" {
   331  				t.Errorf("Unexpected result (-want,+got):\n%s", diff)
   332  			}
   333  		})
   334  	}
   335  }
   336  
   337  func TestCalculateFailedIndexes(t *testing.T) {
   338  	logger, _ := ktesting.NewTestContext(t)
   339  	cases := map[string]struct {
   340  		enableJobPodFailurePolicy bool
   341  		job                       batch.Job
   342  		pods                      []*v1.Pod
   343  		wantPrevFailedIndexes     orderedIntervals
   344  		wantFailedIndexes         orderedIntervals
   345  	}{
   346  		"one new index failed": {
   347  			job: batch.Job{
   348  				Spec: batch.JobSpec{
   349  					Completions:          ptr.To[int32](2),
   350  					BackoffLimitPerIndex: ptr.To[int32](1),
   351  				},
   352  			},
   353  			pods: []*v1.Pod{
   354  				buildPod().indexFailureCount("0").phase(v1.PodFailed).index("0").trackingFinalizer().Pod,
   355  				buildPod().indexFailureCount("1").phase(v1.PodFailed).index("1").trackingFinalizer().Pod,
   356  			},
   357  			wantFailedIndexes: []interval{{1, 1}},
   358  		},
   359  		"pod without finalizer is ignored": {
   360  			job: batch.Job{
   361  				Spec: batch.JobSpec{
   362  					Completions:          ptr.To[int32](2),
   363  					BackoffLimitPerIndex: ptr.To[int32](0),
   364  				},
   365  			},
   366  			pods: []*v1.Pod{
   367  				buildPod().indexFailureCount("0").phase(v1.PodFailed).index("0").Pod,
   368  			},
   369  			wantFailedIndexes: nil,
   370  		},
   371  		"pod outside completions is ignored": {
   372  			job: batch.Job{
   373  				Spec: batch.JobSpec{
   374  					Completions:          ptr.To[int32](2),
   375  					BackoffLimitPerIndex: ptr.To[int32](0),
   376  				},
   377  			},
   378  			pods: []*v1.Pod{
   379  				buildPod().indexFailureCount("0").phase(v1.PodFailed).index("3").Pod,
   380  			},
   381  			wantFailedIndexes: nil,
   382  		},
   383  		"extend the failed indexes": {
   384  			job: batch.Job{
   385  				Status: batch.JobStatus{
   386  					FailedIndexes: ptr.To("0"),
   387  				},
   388  				Spec: batch.JobSpec{
   389  					Completions:          ptr.To[int32](2),
   390  					BackoffLimitPerIndex: ptr.To[int32](0),
   391  				},
   392  			},
   393  			pods: []*v1.Pod{
   394  				buildPod().indexFailureCount("0").phase(v1.PodFailed).index("1").trackingFinalizer().Pod,
   395  			},
   396  			wantFailedIndexes: []interval{{0, 1}},
   397  		},
   398  		"prev failed indexes empty": {
   399  			job: batch.Job{
   400  				Status: batch.JobStatus{
   401  					FailedIndexes: ptr.To(""),
   402  				},
   403  				Spec: batch.JobSpec{
   404  					Completions:          ptr.To[int32](2),
   405  					BackoffLimitPerIndex: ptr.To[int32](0),
   406  				},
   407  			},
   408  			pods: []*v1.Pod{
   409  				buildPod().indexFailureCount("0").phase(v1.PodFailed).index("1").trackingFinalizer().Pod,
   410  			},
   411  			wantFailedIndexes: []interval{{1, 1}},
   412  		},
   413  		"prev failed indexes outside the completions": {
   414  			job: batch.Job{
   415  				Status: batch.JobStatus{
   416  					FailedIndexes: ptr.To("9"),
   417  				},
   418  				Spec: batch.JobSpec{
   419  					Completions:          ptr.To[int32](2),
   420  					BackoffLimitPerIndex: ptr.To[int32](0),
   421  				},
   422  			},
   423  			pods: []*v1.Pod{
   424  				buildPod().indexFailureCount("0").phase(v1.PodFailed).index("1").trackingFinalizer().Pod,
   425  			},
   426  			wantFailedIndexes: []interval{{1, 1}},
   427  		},
   428  	}
   429  	for name, tc := range cases {
   430  		t.Run(name, func(t *testing.T) {
   431  			failedIndexes := calculateFailedIndexes(logger, &tc.job, tc.pods)
   432  			if diff := cmp.Diff(&tc.wantFailedIndexes, failedIndexes); diff != "" {
   433  				t.Errorf("Unexpected failed indexes (-want,+got):\n%s", diff)
   434  			}
   435  		})
   436  	}
   437  }
   438  
   439  func TestGetPodsWithDelayedDeletionPerIndex(t *testing.T) {
   440  	logger, _ := ktesting.NewTestContext(t)
   441  	now := time.Now()
   442  	cases := map[string]struct {
   443  		enableJobPodFailurePolicy           bool
   444  		job                                 batch.Job
   445  		pods                                []*v1.Pod
   446  		expectedRmFinalizers                sets.Set[string]
   447  		wantPodsWithDelayedDeletionPerIndex []string
   448  	}{
   449  		"failed pods are kept corresponding to non-failed indexes are kept": {
   450  			job: batch.Job{
   451  				Spec: batch.JobSpec{
   452  					Completions:          ptr.To[int32](3),
   453  					BackoffLimitPerIndex: ptr.To[int32](1),
   454  				},
   455  			},
   456  			pods: []*v1.Pod{
   457  				buildPod().uid("a").indexFailureCount("0").phase(v1.PodFailed).index("0").trackingFinalizer().Pod,
   458  				buildPod().uid("b").indexFailureCount("1").phase(v1.PodFailed).index("1").trackingFinalizer().Pod,
   459  				buildPod().uid("c").indexFailureCount("0").phase(v1.PodFailed).index("2").trackingFinalizer().Pod,
   460  			},
   461  			wantPodsWithDelayedDeletionPerIndex: []string{"a", "c"},
   462  		},
   463  		"failed pod without finalizer; the pod's deletion is not delayed as it already started": {
   464  			job: batch.Job{
   465  				Spec: batch.JobSpec{
   466  					Completions:          ptr.To[int32](2),
   467  					BackoffLimitPerIndex: ptr.To[int32](0),
   468  				},
   469  			},
   470  			pods: []*v1.Pod{
   471  				buildPod().uid("a").indexFailureCount("0").phase(v1.PodFailed).index("0").Pod,
   472  			},
   473  			wantPodsWithDelayedDeletionPerIndex: []string{},
   474  		},
   475  		"failed pod with expected finalizer removal; the pod's deletion is not delayed as it already started": {
   476  			job: batch.Job{
   477  				Spec: batch.JobSpec{
   478  					Completions:          ptr.To[int32](2),
   479  					BackoffLimitPerIndex: ptr.To[int32](0),
   480  				},
   481  			},
   482  			pods: []*v1.Pod{
   483  				buildPod().uid("a").indexFailureCount("0").phase(v1.PodFailed).index("0").trackingFinalizer().Pod,
   484  			},
   485  			expectedRmFinalizers:                sets.New("a"),
   486  			wantPodsWithDelayedDeletionPerIndex: []string{},
   487  		},
   488  		"failed pod with index outside of completions; the pod's deletion is not delayed": {
   489  			job: batch.Job{
   490  				Spec: batch.JobSpec{
   491  					Completions:          ptr.To[int32](2),
   492  					BackoffLimitPerIndex: ptr.To[int32](0),
   493  				},
   494  			},
   495  			pods: []*v1.Pod{
   496  				buildPod().uid("a").indexFailureCount("0").phase(v1.PodFailed).index("4").trackingFinalizer().Pod,
   497  			},
   498  			wantPodsWithDelayedDeletionPerIndex: []string{},
   499  		},
   500  		"failed pod for active index; the pod's deletion is not delayed as it is already replaced": {
   501  			job: batch.Job{
   502  				Spec: batch.JobSpec{
   503  					Completions:          ptr.To[int32](2),
   504  					BackoffLimitPerIndex: ptr.To[int32](1),
   505  				},
   506  			},
   507  			pods: []*v1.Pod{
   508  				buildPod().uid("a1").indexFailureCount("0").phase(v1.PodFailed).index("0").trackingFinalizer().Pod,
   509  				buildPod().uid("a2").indexFailureCount("1").phase(v1.PodRunning).index("0").trackingFinalizer().Pod,
   510  			},
   511  			wantPodsWithDelayedDeletionPerIndex: []string{},
   512  		},
   513  		"failed pod for succeeded index; the pod's deletion is not delayed as it is already replaced": {
   514  			job: batch.Job{
   515  				Spec: batch.JobSpec{
   516  					Completions:          ptr.To[int32](2),
   517  					BackoffLimitPerIndex: ptr.To[int32](1),
   518  				},
   519  			},
   520  			pods: []*v1.Pod{
   521  				buildPod().uid("a1").indexFailureCount("0").phase(v1.PodFailed).index("0").trackingFinalizer().Pod,
   522  				buildPod().uid("a2").indexFailureCount("1").phase(v1.PodSucceeded).index("0").trackingFinalizer().Pod,
   523  			},
   524  			wantPodsWithDelayedDeletionPerIndex: []string{},
   525  		},
   526  		"multiple failed pods for index with different failure count; only the pod with highest failure count is kept": {
   527  			job: batch.Job{
   528  				Spec: batch.JobSpec{
   529  					Completions:          ptr.To[int32](2),
   530  					BackoffLimitPerIndex: ptr.To[int32](4),
   531  				},
   532  			},
   533  			pods: []*v1.Pod{
   534  				buildPod().uid("a1").indexFailureCount("0").phase(v1.PodFailed).index("0").trackingFinalizer().Pod,
   535  				buildPod().uid("a3").indexFailureCount("2").phase(v1.PodFailed).index("0").trackingFinalizer().Pod,
   536  				buildPod().uid("a2").indexFailureCount("1").phase(v1.PodFailed).index("0").trackingFinalizer().Pod,
   537  			},
   538  			wantPodsWithDelayedDeletionPerIndex: []string{"a3"},
   539  		},
   540  		"multiple failed pods for index with different finish times; only the last failed pod is kept": {
   541  			job: batch.Job{
   542  				Spec: batch.JobSpec{
   543  					Completions:          ptr.To[int32](2),
   544  					BackoffLimitPerIndex: ptr.To[int32](4),
   545  				},
   546  			},
   547  			pods: []*v1.Pod{
   548  				buildPod().uid("a1").indexFailureCount("1").phase(v1.PodFailed).index("0").customDeletionTimestamp(now.Add(-time.Second)).trackingFinalizer().Pod,
   549  				buildPod().uid("a3").indexFailureCount("1").phase(v1.PodFailed).index("0").customDeletionTimestamp(now).trackingFinalizer().Pod,
   550  				buildPod().uid("a2").indexFailureCount("1").phase(v1.PodFailed).index("0").customDeletionTimestamp(now.Add(-2 * time.Second)).trackingFinalizer().Pod,
   551  			},
   552  			wantPodsWithDelayedDeletionPerIndex: []string{"a3"},
   553  		},
   554  	}
   555  	for name, tc := range cases {
   556  		t.Run(name, func(t *testing.T) {
   557  			defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, true)()
   558  			activePods := controller.FilterActivePods(logger, tc.pods)
   559  			failedIndexes := calculateFailedIndexes(logger, &tc.job, tc.pods)
   560  			_, succeededIndexes := calculateSucceededIndexes(logger, &tc.job, tc.pods)
   561  			jobCtx := &syncJobCtx{
   562  				job:                  &tc.job,
   563  				pods:                 tc.pods,
   564  				activePods:           activePods,
   565  				succeededIndexes:     succeededIndexes,
   566  				failedIndexes:        failedIndexes,
   567  				expectedRmFinalizers: tc.expectedRmFinalizers,
   568  			}
   569  			gotPodsWithDelayedDeletionPerIndex := getPodsWithDelayedDeletionPerIndex(logger, jobCtx)
   570  			gotPodsWithDelayedDeletionPerIndexSet := sets.New[string]()
   571  			for _, pod := range gotPodsWithDelayedDeletionPerIndex {
   572  				gotPodsWithDelayedDeletionPerIndexSet.Insert(string(pod.UID))
   573  			}
   574  			if diff := cmp.Diff(tc.wantPodsWithDelayedDeletionPerIndex, sets.List(gotPodsWithDelayedDeletionPerIndexSet)); diff != "" {
   575  				t.Errorf("Unexpected set of pods with delayed deletion (-want,+got):\n%s", diff)
   576  			}
   577  		})
   578  	}
   579  }
   580  
   581  func TestGetNewIndexFailureCountValue(t *testing.T) {
   582  	logger, _ := ktesting.NewTestContext(t)
   583  	cases := map[string]struct {
   584  		enableJobPodFailurePolicy       bool
   585  		job                             batch.Job
   586  		pod                             *v1.Pod
   587  		wantNewIndexFailureCount        int32
   588  		wantNewIndexIgnoredFailureCount int32
   589  	}{
   590  		"first pod created": {
   591  			job:                      batch.Job{},
   592  			wantNewIndexFailureCount: 0,
   593  		},
   594  		"failed pod being replaced with 0 index failure count": {
   595  			job:                      batch.Job{},
   596  			pod:                      buildPod().uid("a").indexFailureCount("0").phase(v1.PodFailed).index("0").trackingFinalizer().Pod,
   597  			wantNewIndexFailureCount: 1,
   598  		},
   599  		"failed pod being replaced with >0 index failure count": {
   600  			job:                      batch.Job{},
   601  			pod:                      buildPod().uid("a").indexFailureCount("3").phase(v1.PodFailed).index("0").trackingFinalizer().Pod,
   602  			wantNewIndexFailureCount: 4,
   603  		},
   604  		"failed pod being replaced, matching the ignore rule; JobPodFailurePolicy enabled": {
   605  			enableJobPodFailurePolicy: true,
   606  			job: batch.Job{
   607  				Spec: batch.JobSpec{
   608  					PodFailurePolicy: &batch.PodFailurePolicy{
   609  						Rules: []batch.PodFailurePolicyRule{
   610  							{
   611  								Action: batch.PodFailurePolicyActionIgnore,
   612  								OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
   613  									{
   614  										Type:   v1.DisruptionTarget,
   615  										Status: v1.ConditionTrue,
   616  									},
   617  								},
   618  							},
   619  						},
   620  					},
   621  				},
   622  			},
   623  			pod: buildPod().uid("a").indexFailureCount("3").status(v1.PodStatus{
   624  				Phase: v1.PodFailed,
   625  				Conditions: []v1.PodCondition{
   626  					{
   627  						Type:   v1.DisruptionTarget,
   628  						Status: v1.ConditionTrue,
   629  					},
   630  				},
   631  			}).index("3").trackingFinalizer().Pod,
   632  			wantNewIndexFailureCount:        3,
   633  			wantNewIndexIgnoredFailureCount: 1,
   634  		},
   635  	}
   636  	for name, tc := range cases {
   637  		t.Run(name, func(t *testing.T) {
   638  			defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, true)()
   639  			defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodFailurePolicy, tc.enableJobPodFailurePolicy)()
   640  			gotNewIndexFailureCount, gotNewIndexIgnoredFailureCount := getNewIndexFailureCounts(logger, &tc.job, tc.pod)
   641  			if diff := cmp.Diff(tc.wantNewIndexFailureCount, gotNewIndexFailureCount); diff != "" {
   642  				t.Errorf("Unexpected set of pods with delayed deletion (-want,+got):\n%s", diff)
   643  			}
   644  			if diff := cmp.Diff(tc.wantNewIndexIgnoredFailureCount, gotNewIndexIgnoredFailureCount); diff != "" {
   645  				t.Errorf("Unexpected set of pods with delayed deletion (-want,+got):\n%s", diff)
   646  			}
   647  		})
   648  	}
   649  }
   650  
   651  func TestIntervalsHaveIndex(t *testing.T) {
   652  	cases := map[string]struct {
   653  		intervals orderedIntervals
   654  		index     int
   655  		wantHas   bool
   656  	}{
   657  		"empty": {
   658  			index: 4,
   659  		},
   660  		"before all": {
   661  			index:     1,
   662  			intervals: []interval{{2, 4}, {5, 7}},
   663  		},
   664  		"after all": {
   665  			index:     9,
   666  			intervals: []interval{{2, 4}, {6, 8}},
   667  		},
   668  		"in between": {
   669  			index:     5,
   670  			intervals: []interval{{2, 4}, {6, 8}},
   671  		},
   672  		"in first": {
   673  			index:     2,
   674  			intervals: []interval{{2, 4}, {6, 8}},
   675  			wantHas:   true,
   676  		},
   677  		"in second": {
   678  			index:     8,
   679  			intervals: []interval{{2, 4}, {6, 8}},
   680  			wantHas:   true,
   681  		},
   682  	}
   683  	for name, tc := range cases {
   684  		t.Run(name, func(t *testing.T) {
   685  			has := tc.intervals.has(tc.index)
   686  			if has != tc.wantHas {
   687  				t.Errorf("intervalsHaveIndex(_, _) = %t, want %t", has, tc.wantHas)
   688  			}
   689  		})
   690  	}
   691  }
   692  
   693  func TestFirstPendingIndexes(t *testing.T) {
   694  	cases := map[string]struct {
   695  		cnt              int
   696  		completions      int
   697  		activePods       []indexPhase
   698  		succeededIndexes []interval
   699  		failedIndexes    *orderedIntervals
   700  		want             []int
   701  	}{
   702  		"cnt greater than completions": {
   703  			cnt:         5,
   704  			completions: 3,
   705  			want:        []int{0, 1, 2},
   706  		},
   707  		"cnt less than completions": {
   708  			cnt:         2,
   709  			completions: 5,
   710  			want:        []int{0, 1},
   711  		},
   712  		"first pods active": {
   713  			activePods: []indexPhase{
   714  				{"0", v1.PodRunning},
   715  				{"1", v1.PodPending},
   716  			},
   717  			cnt:         3,
   718  			completions: 10,
   719  			want:        []int{2, 3, 4},
   720  		},
   721  		"last pods active or succeeded": {
   722  			activePods: []indexPhase{
   723  				{"6", v1.PodPending},
   724  			},
   725  			succeededIndexes: []interval{{4, 5}},
   726  			cnt:              6,
   727  			completions:      6,
   728  			want:             []int{0, 1, 2, 3},
   729  		},
   730  		"mixed": {
   731  			activePods: []indexPhase{
   732  				{"3", v1.PodPending},
   733  				{"5", v1.PodRunning},
   734  				{"8", v1.PodPending},
   735  				{noIndex, v1.PodRunning},
   736  				{"-3", v1.PodRunning},
   737  			},
   738  			succeededIndexes: []interval{{2, 4}, {9, 9}},
   739  			cnt:              5,
   740  			completions:      20,
   741  			want:             []int{0, 1, 6, 7, 10},
   742  		},
   743  		"with failed indexes": {
   744  			activePods: []indexPhase{
   745  				{"3", v1.PodPending},
   746  				{"9", v1.PodPending},
   747  			},
   748  			succeededIndexes: []interval{{1, 1}, {5, 5}, {9, 9}},
   749  			failedIndexes:    &orderedIntervals{{2, 2}, {6, 7}},
   750  			cnt:              5,
   751  			completions:      20,
   752  			want:             []int{0, 4, 8, 10, 11},
   753  		},
   754  	}
   755  	for name, tc := range cases {
   756  		t.Run(name, func(t *testing.T) {
   757  			jobCtx := &syncJobCtx{
   758  				activePods:       hollowPodsWithIndexPhase(tc.activePods),
   759  				succeededIndexes: tc.succeededIndexes,
   760  				failedIndexes:    tc.failedIndexes,
   761  				job:              newJob(1, 1, 1, batch.IndexedCompletion),
   762  			}
   763  			got := firstPendingIndexes(jobCtx, tc.cnt, tc.completions)
   764  			if diff := cmp.Diff(tc.want, got); diff != "" {
   765  				t.Errorf("Wrong first pending indexes (-want,+got):\n%s", diff)
   766  			}
   767  		})
   768  	}
   769  }
   770  
   771  func TestAppendDuplicatedIndexPodsForRemoval(t *testing.T) {
   772  	cases := map[string]struct {
   773  		pods        []indexPhase
   774  		wantRm      []indexPhase
   775  		wantLeft    []indexPhase
   776  		completions int32
   777  	}{
   778  		"all unique": {
   779  			pods: []indexPhase{
   780  				{noIndex, v1.PodPending},
   781  				{"2", v1.PodPending},
   782  				{"5", v1.PodRunning},
   783  				{"6", v1.PodRunning},
   784  			},
   785  			wantRm: []indexPhase{
   786  				{noIndex, v1.PodPending},
   787  				{"6", v1.PodRunning},
   788  			},
   789  			wantLeft: []indexPhase{
   790  				{"2", v1.PodPending},
   791  				{"5", v1.PodRunning},
   792  			},
   793  			completions: 6,
   794  		},
   795  		"all with index": {
   796  			pods: []indexPhase{
   797  				{"5", v1.PodPending},
   798  				{"0", v1.PodRunning},
   799  				{"3", v1.PodPending},
   800  				{"0", v1.PodRunning},
   801  				{"3", v1.PodRunning},
   802  				{"0", v1.PodPending},
   803  				{"6", v1.PodRunning},
   804  				{"6", v1.PodPending},
   805  			},
   806  			wantRm: []indexPhase{
   807  				{"0", v1.PodPending},
   808  				{"0", v1.PodRunning},
   809  				{"3", v1.PodPending},
   810  				{"6", v1.PodRunning},
   811  				{"6", v1.PodPending},
   812  			},
   813  			wantLeft: []indexPhase{
   814  				{"0", v1.PodRunning},
   815  				{"3", v1.PodRunning},
   816  				{"5", v1.PodPending},
   817  			},
   818  			completions: 6,
   819  		},
   820  		"mixed": {
   821  			pods: []indexPhase{
   822  				{noIndex, v1.PodPending},
   823  				{"invalid", v1.PodRunning},
   824  				{"-2", v1.PodRunning},
   825  				{"0", v1.PodPending},
   826  				{"1", v1.PodPending},
   827  				{"1", v1.PodPending},
   828  				{"1", v1.PodRunning},
   829  			},
   830  			wantRm: []indexPhase{
   831  				{noIndex, v1.PodPending},
   832  				{"invalid", v1.PodRunning},
   833  				{"-2", v1.PodRunning},
   834  				{"1", v1.PodPending},
   835  				{"1", v1.PodPending},
   836  			},
   837  			wantLeft: []indexPhase{
   838  				{"0", v1.PodPending},
   839  				{"1", v1.PodRunning},
   840  			},
   841  			completions: 6,
   842  		},
   843  	}
   844  	for name, tc := range cases {
   845  		t.Run(name, func(t *testing.T) {
   846  			pods := hollowPodsWithIndexPhase(tc.pods)
   847  			rm, left := appendDuplicatedIndexPodsForRemoval(nil, nil, pods, int(tc.completions))
   848  			rmInt := toIndexPhases(rm)
   849  			leftInt := toIndexPhases(left)
   850  			if diff := cmp.Diff(tc.wantRm, rmInt); diff != "" {
   851  				t.Errorf("Unexpected pods for removal (-want,+got):\n%s", diff)
   852  			}
   853  			if diff := cmp.Diff(tc.wantLeft, leftInt); diff != "" {
   854  				t.Errorf("Unexpected pods to keep (-want,+got):\n%s", diff)
   855  			}
   856  		})
   857  	}
   858  }
   859  
   860  func TestPodGenerateNameWithIndex(t *testing.T) {
   861  	cases := map[string]struct {
   862  		jobname             string
   863  		index               int
   864  		wantPodGenerateName string
   865  	}{
   866  		"short job name": {
   867  			jobname:             "indexed-job",
   868  			index:               1,
   869  			wantPodGenerateName: "indexed-job-1-",
   870  		},
   871  		"job name exceeds MaxGeneneratedNameLength": {
   872  			jobname:             "hhhhhooooohhhhhooooohhhhhooooohhhhhooooohhhhhooooohhhhhooooohhhhhooooo",
   873  			index:               1,
   874  			wantPodGenerateName: "hhhhhooooohhhhhooooohhhhhooooohhhhhooooohhhhhooooohhhhh-1-",
   875  		},
   876  		"job name with index suffix exceeds MaxGeneratedNameLength": {
   877  			jobname:             "hhhhhooooohhhhhooooohhhhhooooohhhhhooooohhhhhooooohhhhhoo",
   878  			index:               1,
   879  			wantPodGenerateName: "hhhhhooooohhhhhooooohhhhhooooohhhhhooooohhhhhooooohhhhh-1-",
   880  		},
   881  	}
   882  	for name, tc := range cases {
   883  		t.Run(name, func(t *testing.T) {
   884  			podGenerateName := podGenerateNameWithIndex(tc.jobname, tc.index)
   885  			if diff := cmp.Equal(tc.wantPodGenerateName, podGenerateName); !diff {
   886  				t.Errorf("Got pod generateName %s, want %s", podGenerateName, tc.wantPodGenerateName)
   887  			}
   888  		})
   889  	}
   890  }
   891  
   892  func TestGetIndexFailureCount(t *testing.T) {
   893  	logger, _ := ktesting.NewTestContext(t)
   894  	cases := map[string]struct {
   895  		pod        *v1.Pod
   896  		wantResult int32
   897  	}{
   898  		"no annotation": {
   899  			pod:        &v1.Pod{},
   900  			wantResult: 0,
   901  		},
   902  		"valid value": {
   903  			pod:        buildPod().indexFailureCount("2").Pod,
   904  			wantResult: 2,
   905  		},
   906  		"valid maxint32 value": {
   907  			pod:        buildPod().indexFailureCount(strconv.Itoa(math.MaxInt32)).Pod,
   908  			wantResult: math.MaxInt32,
   909  		},
   910  		"too large value": {
   911  			pod:        buildPod().indexFailureCount(strconv.Itoa(math.MaxInt32 + 1)).Pod,
   912  			wantResult: 0,
   913  		},
   914  		"negative value": {
   915  			pod:        buildPod().indexFailureCount("-1").Pod,
   916  			wantResult: 0,
   917  		},
   918  		"invalid int value": {
   919  			pod:        buildPod().indexFailureCount("xyz").Pod,
   920  			wantResult: 0,
   921  		},
   922  	}
   923  	for name, tc := range cases {
   924  		t.Run(name, func(t *testing.T) {
   925  			gotResult := getIndexFailureCount(logger, tc.pod)
   926  			if diff := cmp.Equal(tc.wantResult, gotResult); !diff {
   927  				t.Errorf("Unexpected result. want: %d, got: %d", tc.wantResult, gotResult)
   928  			}
   929  		})
   930  	}
   931  }
   932  
   933  func hollowPodsWithIndexPhase(descs []indexPhase) []*v1.Pod {
   934  	pods := make([]*v1.Pod, 0, len(descs))
   935  	for _, desc := range descs {
   936  		p := &v1.Pod{
   937  			Status: v1.PodStatus{
   938  				Phase: desc.Phase,
   939  			},
   940  		}
   941  		if desc.Index != noIndex {
   942  			p.Annotations = map[string]string{
   943  				batch.JobCompletionIndexAnnotation: desc.Index,
   944  			}
   945  		}
   946  		pods = append(pods, p)
   947  	}
   948  	return pods
   949  }
   950  
   951  type indexPhase struct {
   952  	Index string
   953  	Phase v1.PodPhase
   954  }
   955  
   956  func toIndexPhases(pods []*v1.Pod) []indexPhase {
   957  	result := make([]indexPhase, len(pods))
   958  	for i, p := range pods {
   959  		index := noIndex
   960  		if p.Annotations != nil {
   961  			index = p.Annotations[batch.JobCompletionIndexAnnotation]
   962  		}
   963  		result[i] = indexPhase{index, p.Status.Phase}
   964  	}
   965  	return result
   966  }