sigs.k8s.io/kueue@v0.6.2/pkg/controller/admissionchecks/multikueue/multikueuecluster_test.go (about) 1 /* 2 Copyright 2024 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package multikueue 18 19 import ( 20 "context" 21 "errors" 22 "testing" 23 "time" 24 25 "github.com/google/go-cmp/cmp" 26 "github.com/google/go-cmp/cmp/cmpopts" 27 batchv1 "k8s.io/api/batch/v1" 28 corev1 "k8s.io/api/core/v1" 29 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 "k8s.io/apimachinery/pkg/types" 31 "k8s.io/apimachinery/pkg/watch" 32 "sigs.k8s.io/controller-runtime/pkg/client" 33 "sigs.k8s.io/controller-runtime/pkg/client/interceptor" 34 "sigs.k8s.io/controller-runtime/pkg/reconcile" 35 36 kueuealpha "sigs.k8s.io/kueue/apis/kueue/v1alpha1" 37 kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" 38 "sigs.k8s.io/kueue/pkg/util/slices" 39 utiltesting "sigs.k8s.io/kueue/pkg/util/testing" 40 testingjob "sigs.k8s.io/kueue/pkg/util/testingjobs/job" 41 ) 42 43 var ( 44 errInvalidConfig = errors.New("invalid kubeconfig") 45 errCannotWatch = errors.New("client cannot watch") 46 ) 47 48 func fakeClientBuilder(kubeconfig []byte, options client.Options) (client.WithWatch, error) { 49 if string(kubeconfig) == "invalid" { 50 return nil, errInvalidConfig 51 } 52 b, _ := getClientBuilder() 53 b = b.WithInterceptorFuncs(interceptor.Funcs{ 54 Watch: func(ctx context.Context, client client.WithWatch, obj client.ObjectList, opts ...client.ListOption) (watch.Interface, error) { 55 if string(kubeconfig) == "nowatch" { 56 return nil, errCannotWatch 57 } 58 return client.Watch(ctx, obj, opts...) 59 }, 60 }) 61 return b.Build(), nil 62 } 63 64 func newTestClient(config string, watchCancel func()) *remoteClient { 65 b, _ := getClientBuilder() 66 localClient := b.Build() 67 ret := &remoteClient{ 68 kubeconfig: []byte(config), 69 localClient: localClient, 70 watchCancel: watchCancel, 71 72 builderOverride: fakeClientBuilder, 73 } 74 return ret 75 } 76 77 func setReconnectState(rc *remoteClient, a uint) *remoteClient { 78 rc.failedConnAttempts = a 79 rc.forceReconnect.Store(true) 80 return rc 81 } 82 83 func makeTestSecret(name string, kubeconfig string) corev1.Secret { 84 return corev1.Secret{ 85 ObjectMeta: metav1.ObjectMeta{ 86 Name: name, 87 Namespace: TestNamespace, 88 }, 89 Data: map[string][]byte{ 90 kueuealpha.MultiKueueConfigSecretKey: []byte(kubeconfig), 91 }, 92 } 93 } 94 95 func TestUpdateConfig(t *testing.T) { 96 cancelCalledCount := 0 97 cancelCalled := func() { cancelCalledCount++ } 98 99 cases := map[string]struct { 100 reconcileFor string 101 remoteClients map[string]*remoteClient 102 clusters []kueuealpha.MultiKueueCluster 103 secrets []corev1.Secret 104 105 wantRemoteClients map[string]*remoteClient 106 wantClusters []kueuealpha.MultiKueueCluster 107 wantRequeueAfter time.Duration 108 wantCancelCalled int 109 }{ 110 "new valid client is added": { 111 reconcileFor: "worker1", 112 clusters: []kueuealpha.MultiKueueCluster{ 113 *utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, "worker1").Obj(), 114 }, 115 secrets: []corev1.Secret{ 116 makeTestSecret("worker1", "worker1 kubeconfig"), 117 }, 118 wantClusters: []kueuealpha.MultiKueueCluster{ 119 *utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, "worker1").Active(metav1.ConditionTrue, "Active", "Connected").Obj(), 120 }, 121 wantRemoteClients: map[string]*remoteClient{ 122 "worker1": { 123 kubeconfig: []byte("worker1 kubeconfig"), 124 }, 125 }, 126 }, 127 "update client with valid secret config": { 128 reconcileFor: "worker1", 129 clusters: []kueuealpha.MultiKueueCluster{ 130 *utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, "worker1").Obj(), 131 }, 132 secrets: []corev1.Secret{ 133 makeTestSecret("worker1", "worker1 kubeconfig"), 134 }, 135 remoteClients: map[string]*remoteClient{ 136 "worker1": newTestClient("worker1 old kubeconfig", cancelCalled), 137 }, 138 wantClusters: []kueuealpha.MultiKueueCluster{ 139 *utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, "worker1").Active(metav1.ConditionTrue, "Active", "Connected").Obj(), 140 }, 141 wantRemoteClients: map[string]*remoteClient{ 142 "worker1": { 143 kubeconfig: []byte("worker1 kubeconfig"), 144 }, 145 }, 146 wantCancelCalled: 1, 147 }, 148 "update client with valid path config": { 149 reconcileFor: "worker1", 150 clusters: []kueuealpha.MultiKueueCluster{ 151 *utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.PathLocationType, "testdata/worker1KubeConfig").Obj(), 152 }, 153 remoteClients: map[string]*remoteClient{ 154 "worker1": newTestClient("worker1 old kubeconfig", cancelCalled), 155 }, 156 wantClusters: []kueuealpha.MultiKueueCluster{ 157 *utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.PathLocationType, "testdata/worker1KubeConfig").Active(metav1.ConditionTrue, "Active", "Connected").Obj(), 158 }, 159 wantRemoteClients: map[string]*remoteClient{ 160 "worker1": { 161 kubeconfig: []byte("worker1 kubeconfig"), 162 }, 163 }, 164 wantCancelCalled: 1, 165 }, 166 "update client with invalid secret config": { 167 reconcileFor: "worker1", 168 clusters: []kueuealpha.MultiKueueCluster{ 169 *utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, "worker1").Obj(), 170 }, 171 secrets: []corev1.Secret{ 172 makeTestSecret("worker1", "invalid"), 173 }, 174 remoteClients: map[string]*remoteClient{ 175 "worker1": newTestClient("worker1 old kubeconfig", cancelCalled), 176 }, 177 wantRemoteClients: map[string]*remoteClient{ 178 "worker1": newTestClient("invalid", nil), 179 }, 180 wantClusters: []kueuealpha.MultiKueueCluster{ 181 *utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, "worker1").Active(metav1.ConditionFalse, "ClientConnectionFailed", "invalid kubeconfig").Obj(), 182 }, 183 wantCancelCalled: 1, 184 }, 185 "update client with invalid path config": { 186 reconcileFor: "worker1", 187 clusters: []kueuealpha.MultiKueueCluster{ 188 *utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.PathLocationType, "").Obj(), 189 }, 190 remoteClients: map[string]*remoteClient{ 191 "worker1": newTestClient("worker1 old kubeconfig", cancelCalled), 192 }, 193 wantClusters: []kueuealpha.MultiKueueCluster{ 194 *utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.PathLocationType, "").Active(metav1.ConditionFalse, "BadConfig", "open : no such file or directory").Obj(), 195 }, 196 wantCancelCalled: 1, 197 }, 198 "missing cluster is removed": { 199 reconcileFor: "worker2", 200 clusters: []kueuealpha.MultiKueueCluster{ 201 *utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, "worker1").Obj(), 202 }, 203 remoteClients: map[string]*remoteClient{ 204 "worker1": newTestClient("worker1 kubeconfig", cancelCalled), 205 "worker2": newTestClient("worker2 kubeconfig", cancelCalled), 206 }, 207 wantClusters: []kueuealpha.MultiKueueCluster{ 208 *utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, "worker1").Obj(), 209 }, 210 wantRemoteClients: map[string]*remoteClient{ 211 "worker1": { 212 kubeconfig: []byte("worker1 kubeconfig"), 213 }, 214 }, 215 wantCancelCalled: 1, 216 }, 217 "update client config, nowatch": { 218 reconcileFor: "worker1", 219 clusters: []kueuealpha.MultiKueueCluster{ 220 *utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, "worker1").Obj(), 221 }, 222 secrets: []corev1.Secret{ 223 makeTestSecret("worker1", "nowatch"), 224 }, 225 remoteClients: map[string]*remoteClient{ 226 "worker1": newTestClient("worker1 old kubeconfig", cancelCalled), 227 }, 228 wantRemoteClients: map[string]*remoteClient{ 229 "worker1": setReconnectState(newTestClient("nowatch", nil), 1), 230 }, 231 wantClusters: []kueuealpha.MultiKueueCluster{ 232 *utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, "worker1").Active(metav1.ConditionFalse, "ClientConnectionFailed", "client cannot watch").Obj(), 233 }, 234 wantRequeueAfter: 5 * time.Second, 235 wantCancelCalled: 1, 236 }, 237 "update client config, nowatch 3rd try": { 238 reconcileFor: "worker1", 239 clusters: []kueuealpha.MultiKueueCluster{ 240 *utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, "worker1").Active(metav1.ConditionFalse, "ClientConnectionFailed", "client cannot watch").Obj(), 241 }, 242 secrets: []corev1.Secret{ 243 makeTestSecret("worker1", "nowatch"), 244 }, 245 remoteClients: map[string]*remoteClient{ 246 "worker1": setReconnectState(newTestClient("nowatch", cancelCalled), 2), 247 }, 248 wantRemoteClients: map[string]*remoteClient{ 249 "worker1": setReconnectState(newTestClient("nowatch", nil), 3), 250 }, 251 wantClusters: []kueuealpha.MultiKueueCluster{ 252 *utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, "worker1").Active(metav1.ConditionFalse, "ClientConnectionFailed", "client cannot watch").Obj(), 253 }, 254 wantRequeueAfter: 20 * time.Second, 255 wantCancelCalled: 1, 256 }, 257 "failed attempts are set to 0 on successful connection": { 258 reconcileFor: "worker1", 259 clusters: []kueuealpha.MultiKueueCluster{ 260 *utiltesting.MakeMultiKueueCluster("worker1"). 261 KubeConfig(kueuealpha.SecretLocationType, "worker1"). 262 Active(metav1.ConditionFalse, "ClientConnectionFailed", "client cannot watch"). 263 Obj(), 264 }, 265 secrets: []corev1.Secret{ 266 makeTestSecret("worker1", "good config"), 267 }, 268 remoteClients: map[string]*remoteClient{ 269 "worker1": setReconnectState(newTestClient("nowatch", cancelCalled), 5), 270 }, 271 wantRemoteClients: map[string]*remoteClient{ 272 "worker1": newTestClient("good config", nil), 273 }, 274 wantClusters: []kueuealpha.MultiKueueCluster{ 275 *utiltesting.MakeMultiKueueCluster("worker1"). 276 KubeConfig(kueuealpha.SecretLocationType, "worker1"). 277 Active(metav1.ConditionTrue, "Active", "Connected"). 278 Obj(), 279 }, 280 wantCancelCalled: 1, 281 }, 282 "failed attempts are set to 0 on config change": { 283 reconcileFor: "worker1", 284 clusters: []kueuealpha.MultiKueueCluster{ 285 *utiltesting.MakeMultiKueueCluster("worker1"). 286 KubeConfig(kueuealpha.SecretLocationType, "worker1"). 287 Active(metav1.ConditionFalse, "ClientConnectionFailed", "client cannot watch"). 288 Obj(), 289 }, 290 secrets: []corev1.Secret{ 291 makeTestSecret("worker1", "invalid"), 292 }, 293 remoteClients: map[string]*remoteClient{ 294 "worker1": setReconnectState(newTestClient("nowatch", cancelCalled), 5), 295 }, 296 wantRemoteClients: map[string]*remoteClient{ 297 "worker1": newTestClient("invalid", nil), 298 }, 299 wantClusters: []kueuealpha.MultiKueueCluster{ 300 *utiltesting.MakeMultiKueueCluster("worker1"). 301 KubeConfig(kueuealpha.SecretLocationType, "worker1"). 302 Active(metav1.ConditionFalse, "ClientConnectionFailed", "invalid kubeconfig"). 303 Obj(), 304 }, 305 wantCancelCalled: 1, 306 }, 307 } 308 309 for name, tc := range cases { 310 t.Run(name, func(t *testing.T) { 311 builder, ctx := getClientBuilder() 312 builder = builder.WithLists(&kueuealpha.MultiKueueClusterList{Items: tc.clusters}) 313 builder = builder.WithLists(&corev1.SecretList{Items: tc.secrets}) 314 builder = builder.WithStatusSubresource(slices.Map(tc.clusters, func(c *kueuealpha.MultiKueueCluster) client.Object { return c })...) 315 c := builder.Build() 316 317 reconciler := newClustersReconciler(c, TestNamespace, 0, defaultOrigin) 318 reconciler.rootContext = ctx 319 320 if len(tc.remoteClients) > 0 { 321 reconciler.remoteClients = tc.remoteClients 322 } 323 reconciler.builderOverride = fakeClientBuilder 324 325 cancelCalledCount = 0 326 res, gotErr := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: types.NamespacedName{Name: tc.reconcileFor}}) 327 if gotErr != nil { 328 t.Errorf("unexpected reconcile error: %s", gotErr) 329 } 330 331 if diff := cmp.Diff(tc.wantRequeueAfter, res.RequeueAfter); diff != "" { 332 t.Errorf("unexpected requeue after (-want/+got):\n%s", diff) 333 } 334 335 if tc.wantCancelCalled != cancelCalledCount { 336 t.Errorf("unexpected watch cancel call count want: %d, got: %d", tc.wantCancelCalled, cancelCalledCount) 337 } 338 339 lst := &kueuealpha.MultiKueueClusterList{} 340 gotErr = c.List(ctx, lst) 341 if gotErr != nil { 342 t.Errorf("unexpected list clusters error: %s", gotErr) 343 } 344 345 if diff := cmp.Diff(tc.wantClusters, lst.Items, cmpopts.EquateEmpty(), 346 cmpopts.IgnoreFields(metav1.ObjectMeta{}, "ResourceVersion"), 347 cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime")); diff != "" { 348 t.Errorf("unexpected clusters (-want/+got):\n%s", diff) 349 } 350 351 if diff := cmp.Diff(tc.wantRemoteClients, reconciler.remoteClients, cmpopts.EquateEmpty(), 352 cmp.Comparer(func(a, b *remoteClient) bool { 353 if a.failedConnAttempts != b.failedConnAttempts { 354 return false 355 } 356 return string(a.kubeconfig) == string(b.kubeconfig) 357 })); diff != "" { 358 t.Errorf("unexpected controllers (-want/+got):\n%s", diff) 359 } 360 }) 361 } 362 } 363 364 func TestRemoteClientGC(t *testing.T) { 365 baseJobBuilder := testingjob.MakeJob("job1", TestNamespace) 366 baseWlBuilder := utiltesting.MakeWorkload("wl1", TestNamespace).ControllerReference(batchv1.SchemeGroupVersion.WithKind("Job"), "job1", "test-uuid") 367 368 cases := map[string]struct { 369 managersWorkloads []kueue.Workload 370 workersWorkloads []kueue.Workload 371 managersJobs []batchv1.Job 372 workersJobs []batchv1.Job 373 374 wantWorkersWorkloads []kueue.Workload 375 wantWorkersJobs []batchv1.Job 376 }{ 377 "existing workers and jobs are not deleted": { 378 managersWorkloads: []kueue.Workload{ 379 *baseWlBuilder.Clone(). 380 Obj(), 381 }, 382 workersWorkloads: []kueue.Workload{ 383 *baseWlBuilder.Clone(). 384 Label(kueuealpha.MultiKueueOriginLabel, defaultOrigin). 385 Obj(), 386 }, 387 managersJobs: []batchv1.Job{ 388 *baseJobBuilder.Clone(). 389 Obj(), 390 }, 391 workersJobs: []batchv1.Job{ 392 *baseJobBuilder.Clone(). 393 Obj(), 394 }, 395 wantWorkersWorkloads: []kueue.Workload{ 396 *baseWlBuilder.Clone(). 397 Label(kueuealpha.MultiKueueOriginLabel, defaultOrigin). 398 Obj(), 399 }, 400 wantWorkersJobs: []batchv1.Job{ 401 *baseJobBuilder.Clone(). 402 Obj(), 403 }, 404 }, 405 "missing worker workloads are deleted": { 406 workersWorkloads: []kueue.Workload{ 407 *baseWlBuilder.Clone(). 408 Label(kueuealpha.MultiKueueOriginLabel, defaultOrigin). 409 Obj(), 410 }, 411 managersJobs: []batchv1.Job{ 412 *baseJobBuilder.Clone(). 413 Obj(), 414 }, 415 }, 416 "missing worker workloads are deleted (no job adapter)": { 417 workersWorkloads: []kueue.Workload{ 418 *baseWlBuilder.Clone(). 419 ControllerReference(batchv1.SchemeGroupVersion.WithKind("NptAJob"), "job1", "test-uuid"). 420 Label(kueuealpha.MultiKueueOriginLabel, defaultOrigin). 421 Obj(), 422 }, 423 }, 424 "missing worker workloads and their owner jobs are deleted": { 425 workersWorkloads: []kueue.Workload{ 426 *baseWlBuilder.Clone(). 427 Label(kueuealpha.MultiKueueOriginLabel, defaultOrigin). 428 Obj(), 429 }, 430 managersJobs: []batchv1.Job{ 431 *baseJobBuilder.Clone(). 432 Obj(), 433 }, 434 workersJobs: []batchv1.Job{ 435 *baseJobBuilder.Clone(). 436 Obj(), 437 }, 438 }, 439 "unrelated workers and jobs are not deleted": { 440 workersWorkloads: []kueue.Workload{ 441 *baseWlBuilder.Clone(). 442 Label(kueuealpha.MultiKueueOriginLabel, "other-gc-key"). 443 Obj(), 444 }, 445 workersJobs: []batchv1.Job{ 446 *baseJobBuilder.Clone(). 447 Obj(), 448 }, 449 wantWorkersWorkloads: []kueue.Workload{ 450 *baseWlBuilder.Clone(). 451 Label(kueuealpha.MultiKueueOriginLabel, "other-gc-key"). 452 Obj(), 453 }, 454 wantWorkersJobs: []batchv1.Job{ 455 *baseJobBuilder.Clone(). 456 Obj(), 457 }, 458 }, 459 } 460 461 objCheckOpts := []cmp.Option{ 462 cmpopts.IgnoreFields(metav1.ObjectMeta{}, "ResourceVersion"), 463 cmpopts.EquateEmpty(), 464 } 465 466 for name, tc := range cases { 467 t.Run(name, func(t *testing.T) { 468 manageBuilder, ctx := getClientBuilder() 469 manageBuilder = manageBuilder.WithLists(&kueue.WorkloadList{Items: tc.managersWorkloads}, &batchv1.JobList{Items: tc.managersJobs}) 470 managerClient := manageBuilder.Build() 471 472 worker1Builder, _ := getClientBuilder() 473 worker1Builder = worker1Builder.WithLists(&kueue.WorkloadList{Items: tc.workersWorkloads}, &batchv1.JobList{Items: tc.workersJobs}) 474 worker1Client := worker1Builder.Build() 475 476 w1remoteClient := newRemoteClient(managerClient, nil, nil, defaultOrigin, "") 477 w1remoteClient.client = worker1Client 478 479 w1remoteClient.runGC(ctx) 480 481 gotWorker1Wokloads := &kueue.WorkloadList{} 482 err := worker1Client.List(ctx, gotWorker1Wokloads) 483 if err != nil { 484 t.Error("unexpected list worker's workloads error") 485 } 486 487 if diff := cmp.Diff(tc.wantWorkersWorkloads, gotWorker1Wokloads.Items, objCheckOpts...); diff != "" { 488 t.Errorf("unexpected worker's workloads (-want/+got):\n%s", diff) 489 } 490 491 gotWorker1Job := &batchv1.JobList{} 492 err = worker1Client.List(ctx, gotWorker1Job) 493 if err != nil { 494 t.Error("unexpected list worker's jobs error") 495 } 496 497 if diff := cmp.Diff(tc.wantWorkersJobs, gotWorker1Job.Items, objCheckOpts...); diff != "" { 498 t.Errorf("unexpected worker's jobs (-want/+got):\n%s", diff) 499 } 500 }) 501 } 502 }