sigs.k8s.io/kueue@v0.6.2/pkg/controller/jobs/mpijob/mpijob_controller_test.go (about)

     1  /*
     2  Copyright 2023 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package mpijob
    18  
    19  import (
    20  	"testing"
    21  
    22  	"github.com/google/go-cmp/cmp"
    23  	"github.com/google/go-cmp/cmp/cmpopts"
    24  	common "github.com/kubeflow/common/pkg/apis/common/v1"
    25  	kubeflow "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v2beta1"
    26  	corev1 "k8s.io/api/core/v1"
    27  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    28  	"k8s.io/client-go/tools/record"
    29  	"sigs.k8s.io/controller-runtime/pkg/client"
    30  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    31  
    32  	kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
    33  	"sigs.k8s.io/kueue/pkg/constants"
    34  	"sigs.k8s.io/kueue/pkg/controller/jobframework"
    35  	utiltesting "sigs.k8s.io/kueue/pkg/util/testing"
    36  	testingmpijob "sigs.k8s.io/kueue/pkg/util/testingjobs/mpijob"
    37  )
    38  
    39  func TestCalcPriorityClassName(t *testing.T) {
    40  	testcases := map[string]struct {
    41  		job                   kubeflow.MPIJob
    42  		wantPriorityClassName string
    43  	}{
    44  		"none priority class name specified": {
    45  			job:                   kubeflow.MPIJob{},
    46  			wantPriorityClassName: "",
    47  		},
    48  		"priority specified at runPolicy and replicas; use priority in runPolicy": {
    49  			job: kubeflow.MPIJob{
    50  				Spec: kubeflow.MPIJobSpec{
    51  					RunPolicy: kubeflow.RunPolicy{
    52  						SchedulingPolicy: &kubeflow.SchedulingPolicy{
    53  							PriorityClass: "scheduling-priority",
    54  						},
    55  					},
    56  					MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{
    57  						kubeflow.MPIReplicaTypeLauncher: {
    58  							Template: corev1.PodTemplateSpec{
    59  								Spec: corev1.PodSpec{
    60  									PriorityClassName: "launcher-priority",
    61  								},
    62  							},
    63  						},
    64  						kubeflow.MPIReplicaTypeWorker: {
    65  							Template: corev1.PodTemplateSpec{
    66  								Spec: corev1.PodSpec{
    67  									PriorityClassName: "worker-priority",
    68  								},
    69  							},
    70  						},
    71  					},
    72  				},
    73  			},
    74  			wantPriorityClassName: "scheduling-priority",
    75  		},
    76  		"runPolicy present, but without priority; fallback to launcher": {
    77  			job: kubeflow.MPIJob{
    78  				Spec: kubeflow.MPIJobSpec{
    79  					RunPolicy: kubeflow.RunPolicy{
    80  						SchedulingPolicy: &kubeflow.SchedulingPolicy{},
    81  					},
    82  					MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{
    83  						kubeflow.MPIReplicaTypeLauncher: {
    84  							Template: corev1.PodTemplateSpec{
    85  								Spec: corev1.PodSpec{
    86  									PriorityClassName: "launcher-priority",
    87  								},
    88  							},
    89  						},
    90  					},
    91  				},
    92  			},
    93  			wantPriorityClassName: "launcher-priority",
    94  		},
    95  		"specified on launcher takes precedence over worker": {
    96  			job: kubeflow.MPIJob{
    97  				Spec: kubeflow.MPIJobSpec{
    98  					MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{
    99  						kubeflow.MPIReplicaTypeLauncher: {
   100  							Template: corev1.PodTemplateSpec{
   101  								Spec: corev1.PodSpec{
   102  									PriorityClassName: "launcher-priority",
   103  								},
   104  							},
   105  						},
   106  						kubeflow.MPIReplicaTypeWorker: {
   107  							Template: corev1.PodTemplateSpec{
   108  								Spec: corev1.PodSpec{
   109  									PriorityClassName: "worker-priority",
   110  								},
   111  							},
   112  						},
   113  					},
   114  				},
   115  			},
   116  			wantPriorityClassName: "launcher-priority",
   117  		},
   118  		"launcher present, but without priority; fallback to worker": {
   119  			job: kubeflow.MPIJob{
   120  				Spec: kubeflow.MPIJobSpec{
   121  					MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{
   122  						kubeflow.MPIReplicaTypeLauncher: {
   123  							Template: corev1.PodTemplateSpec{
   124  								Spec: corev1.PodSpec{},
   125  							},
   126  						},
   127  						kubeflow.MPIReplicaTypeWorker: {
   128  							Template: corev1.PodTemplateSpec{
   129  								Spec: corev1.PodSpec{
   130  									PriorityClassName: "worker-priority",
   131  								},
   132  							},
   133  						},
   134  					},
   135  				},
   136  			},
   137  			wantPriorityClassName: "worker-priority",
   138  		},
   139  		"specified on worker only": {
   140  			job: kubeflow.MPIJob{
   141  				Spec: kubeflow.MPIJobSpec{
   142  					MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{
   143  						kubeflow.MPIReplicaTypeLauncher: {},
   144  						kubeflow.MPIReplicaTypeWorker: {
   145  							Template: corev1.PodTemplateSpec{
   146  								Spec: corev1.PodSpec{
   147  									PriorityClassName: "worker-priority",
   148  								},
   149  							},
   150  						},
   151  					},
   152  				},
   153  			},
   154  			wantPriorityClassName: "worker-priority",
   155  		},
   156  		"worker present, but without priority; fallback to empty": {
   157  			job: kubeflow.MPIJob{
   158  				Spec: kubeflow.MPIJobSpec{
   159  					MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{
   160  						kubeflow.MPIReplicaTypeLauncher: {},
   161  						kubeflow.MPIReplicaTypeWorker: {
   162  							Template: corev1.PodTemplateSpec{
   163  								Spec: corev1.PodSpec{},
   164  							},
   165  						},
   166  					},
   167  				},
   168  			},
   169  			wantPriorityClassName: "",
   170  		},
   171  	}
   172  
   173  	for name, tc := range testcases {
   174  		t.Run(name, func(t *testing.T) {
   175  			mpiJob := (*MPIJob)(&tc.job)
   176  			gotPriorityClassName := mpiJob.PriorityClass()
   177  			if tc.wantPriorityClassName != gotPriorityClassName {
   178  				t.Errorf("Unexpected response (want: %v, got: %v)", tc.wantPriorityClassName, gotPriorityClassName)
   179  			}
   180  		})
   181  	}
   182  }
   183  
   184  var (
   185  	jobCmpOpts = []cmp.Option{
   186  		cmpopts.EquateEmpty(),
   187  		cmpopts.IgnoreFields(kubeflow.MPIJob{}, "TypeMeta", "ObjectMeta"),
   188  	}
   189  	workloadCmpOpts = []cmp.Option{
   190  		cmpopts.EquateEmpty(),
   191  		cmpopts.IgnoreFields(kueue.Workload{}, "TypeMeta", "ObjectMeta"),
   192  		cmpopts.IgnoreFields(kueue.WorkloadSpec{}, "Priority"),
   193  		cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime"),
   194  		cmpopts.IgnoreFields(kueue.PodSet{}, "Template"),
   195  	}
   196  )
   197  
   198  func TestReconciler(t *testing.T) {
   199  	baseWPCWrapper := utiltesting.MakeWorkloadPriorityClass("test-wpc").
   200  		PriorityValue(100)
   201  	basePCWrapper := utiltesting.MakePriorityClass("test-pc").
   202  		PriorityValue(200)
   203  	cases := map[string]struct {
   204  		reconcilerOptions []jobframework.Option
   205  		job               *kubeflow.MPIJob
   206  		priorityClasses   []client.Object
   207  		wantJob           *kubeflow.MPIJob
   208  		wantWorkloads     []kueue.Workload
   209  		wantErr           error
   210  	}{
   211  		"workload is created with podsets": {
   212  			reconcilerOptions: []jobframework.Option{
   213  				jobframework.WithManageJobsWithoutQueueName(true),
   214  			},
   215  			job:     testingmpijob.MakeMPIJob("mpijob", "ns").Parallelism(2).Obj(),
   216  			wantJob: testingmpijob.MakeMPIJob("mpijob", "ns").Parallelism(2).Obj(),
   217  			wantWorkloads: []kueue.Workload{
   218  				*utiltesting.MakeWorkload("mpijob", "ns").
   219  					PodSets(
   220  						*utiltesting.MakePodSet("launcher", 1).Obj(),
   221  						*utiltesting.MakePodSet("worker", 2).Obj(),
   222  					).
   223  					Obj(),
   224  			},
   225  		},
   226  		"workload is created with podsets and workloadPriorityClass": {
   227  			reconcilerOptions: []jobframework.Option{
   228  				jobframework.WithManageJobsWithoutQueueName(true),
   229  			},
   230  			job: testingmpijob.MakeMPIJob("mpijob", "ns").Parallelism(2).WorkloadPriorityClass("test-wpc").Obj(),
   231  			priorityClasses: []client.Object{
   232  				baseWPCWrapper.Obj(),
   233  			},
   234  			wantJob: testingmpijob.MakeMPIJob("mpijob", "ns").Parallelism(2).WorkloadPriorityClass("test-wpc").Obj(),
   235  			wantWorkloads: []kueue.Workload{
   236  				*utiltesting.MakeWorkload("mpijob", "ns").
   237  					PodSets(
   238  						*utiltesting.MakePodSet("launcher", 1).Obj(),
   239  						*utiltesting.MakePodSet("worker", 2).Obj(),
   240  					).PriorityClass("test-wpc").Priority(100).
   241  					PriorityClassSource(constants.WorkloadPriorityClassSource).
   242  					Obj(),
   243  			},
   244  		},
   245  		"workload is created with podsets and PriorityClass": {
   246  			reconcilerOptions: []jobframework.Option{
   247  				jobframework.WithManageJobsWithoutQueueName(true),
   248  			},
   249  			job: testingmpijob.MakeMPIJob("mpijob", "ns").Parallelism(2).PriorityClass("test-pc").Obj(),
   250  			priorityClasses: []client.Object{
   251  				basePCWrapper.Obj(),
   252  			},
   253  			wantJob: testingmpijob.MakeMPIJob("mpijob", "ns").Parallelism(2).PriorityClass("test-pc").Obj(),
   254  			wantWorkloads: []kueue.Workload{
   255  				*utiltesting.MakeWorkload("mpijob", "ns").
   256  					PodSets(
   257  						*utiltesting.MakePodSet("launcher", 1).Obj(),
   258  						*utiltesting.MakePodSet("worker", 2).Obj(),
   259  					).PriorityClass("test-pc").Priority(200).
   260  					PriorityClassSource(constants.PodPriorityClassSource).
   261  					Obj(),
   262  			},
   263  		},
   264  		"workload is created with podsets, workloadPriorityClass and PriorityClass": {
   265  			reconcilerOptions: []jobframework.Option{
   266  				jobframework.WithManageJobsWithoutQueueName(true),
   267  			},
   268  			job: testingmpijob.MakeMPIJob("mpijob", "ns").Parallelism(2).
   269  				WorkloadPriorityClass("test-wpc").PriorityClass("test-pc").Obj(),
   270  			priorityClasses: []client.Object{
   271  				basePCWrapper.Obj(), baseWPCWrapper.Obj(),
   272  			},
   273  			wantJob: testingmpijob.MakeMPIJob("mpijob", "ns").Parallelism(2).
   274  				WorkloadPriorityClass("test-wpc").PriorityClass("test-pc").Obj(),
   275  			wantWorkloads: []kueue.Workload{
   276  				*utiltesting.MakeWorkload("mpijob", "ns").
   277  					PodSets(
   278  						*utiltesting.MakePodSet("launcher", 1).Obj(),
   279  						*utiltesting.MakePodSet("worker", 2).Obj(),
   280  					).PriorityClass("test-wpc").Priority(100).
   281  					PriorityClassSource(constants.WorkloadPriorityClassSource).
   282  					Obj(),
   283  			},
   284  		},
   285  	}
   286  
   287  	for name, tc := range cases {
   288  		t.Run(name, func(t *testing.T) {
   289  			ctx, _ := utiltesting.ContextWithLog(t)
   290  			clientBuilder := utiltesting.NewClientBuilder(kubeflow.AddToScheme)
   291  			if err := SetupIndexes(ctx, utiltesting.AsIndexer(clientBuilder)); err != nil {
   292  				t.Fatalf("Could not setup indexes: %v", err)
   293  			}
   294  			objs := append(tc.priorityClasses, tc.job)
   295  			kClient := clientBuilder.WithObjects(objs...).Build()
   296  			recorder := record.NewBroadcaster().NewRecorder(kClient.Scheme(), corev1.EventSource{Component: "test"})
   297  			reconciler := NewReconciler(kClient, recorder, tc.reconcilerOptions...)
   298  
   299  			jobKey := client.ObjectKeyFromObject(tc.job)
   300  			_, err := reconciler.Reconcile(ctx, reconcile.Request{
   301  				NamespacedName: jobKey,
   302  			})
   303  			if diff := cmp.Diff(tc.wantErr, err, cmpopts.EquateErrors()); diff != "" {
   304  				t.Errorf("Reconcile returned error (-want,+got):\n%s", diff)
   305  			}
   306  
   307  			var gotMpiJob kubeflow.MPIJob
   308  			if err := kClient.Get(ctx, jobKey, &gotMpiJob); err != nil {
   309  				t.Fatalf("Could not get Job after reconcile: %v", err)
   310  			}
   311  			if diff := cmp.Diff(tc.wantJob, &gotMpiJob, jobCmpOpts...); diff != "" {
   312  				t.Errorf("Job after reconcile (-want,+got):\n%s", diff)
   313  			}
   314  			var gotWorkloads kueue.WorkloadList
   315  			if err := kClient.List(ctx, &gotWorkloads); err != nil {
   316  				t.Fatalf("Could not get Workloads after reconcile: %v", err)
   317  			}
   318  			if diff := cmp.Diff(tc.wantWorkloads, gotWorkloads.Items, workloadCmpOpts...); diff != "" {
   319  				t.Errorf("Workloads after reconcile (-want,+got):\n%s", diff)
   320  			}
   321  		})
   322  	}
   323  
   324  }