volcano.sh/volcano@v1.9.0/test/e2e/schedulingaction/reclaim.go (about) 1 /* 2 Copyright 2021 The Volcano Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package schedulingaction 18 19 import ( 20 "context" 21 "fmt" 22 "time" 23 24 . "github.com/onsi/ginkgo/v2" 25 . "github.com/onsi/gomega" 26 "gopkg.in/yaml.v2" 27 v1 "k8s.io/api/core/v1" 28 "k8s.io/apimachinery/pkg/api/resource" 29 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 "k8s.io/apimachinery/pkg/labels" 31 32 batchv1alpha1 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 33 schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 34 35 e2eutil "volcano.sh/volcano/test/e2e/util" 36 ) 37 38 var _ = Describe("Reclaim E2E Test", func() { 39 40 CreateReclaimJob := func(ctx *e2eutil.TestContext, req v1.ResourceList, name string, queue string, pri string, nodeName string, waitTaskReady bool) (*batchv1alpha1.Job, error) { 41 job := &e2eutil.JobSpec{ 42 Tasks: []e2eutil.TaskSpec{ 43 { 44 Img: e2eutil.DefaultNginxImage, 45 Req: req, 46 Min: 1, 47 Rep: 1, 48 Labels: map[string]string{schedulingv1beta1.PodPreemptable: "true"}, 49 }, 50 }, 51 Name: name, 52 Queue: queue, 53 NodeName: nodeName, 54 } 55 if pri != "" { 56 job.Pri = pri 57 } 58 batchJob, err := e2eutil.CreateJobInner(ctx, job) 59 if err != nil { 60 return nil, err 61 } 62 if waitTaskReady { 63 err = e2eutil.WaitTasksReady(ctx, batchJob, 1) 64 } 65 return batchJob, err 66 } 67 68 WaitQueueStatus := func(ctx *e2eutil.TestContext, status string, num int32, queue string) error { 69 err := e2eutil.WaitQueueStatus(func() (bool, error) { 70 queue, err := ctx.Vcclient.SchedulingV1beta1().Queues().Get(context.TODO(), queue, metav1.GetOptions{}) 71 Expect(err).NotTo(HaveOccurred(), "Get queue %s failed", queue) 72 switch status { 73 case "Running": 74 return queue.Status.Running == num, nil 75 case "Open": 76 return queue.Status.State == schedulingv1beta1.QueueStateOpen, nil 77 case "Pending": 78 return queue.Status.Pending == num, nil 79 case "Inqueue": 80 return queue.Status.Inqueue == num, nil 81 default: 82 return false, nil 83 } 84 }) 85 return err 86 } 87 88 It("Reclaim Case 1: New queue with job created no reclaim when resource is enough", func() { 89 q1 := e2eutil.DefaultQueue 90 q2 := "reclaim-q2" 91 ctx := e2eutil.InitTestContext(e2eutil.Options{ 92 Queues: []string{q2}, 93 NodesNumLimit: 4, 94 NodesResourceLimit: e2eutil.CPU1Mem1, 95 }) 96 97 defer e2eutil.CleanupTestContext(ctx) 98 99 By("Setup initial jobs") 100 101 _, err := CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j1", q1, "", "", true) 102 Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed") 103 104 _, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j2", q2, "", "", true) 105 Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed") 106 107 By("Create new coming queue and job") 108 q3 := "reclaim-q3" 109 ctx.Queues = append(ctx.Queues, q3) 110 e2eutil.CreateQueues(ctx) 111 112 err = WaitQueueStatus(ctx, "Open", 1, q1) 113 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open") 114 115 _, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j3", q3, "", "", true) 116 Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed") 117 118 By("Make sure all job running") 119 120 err = WaitQueueStatus(ctx, "Running", 1, q1) 121 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running") 122 123 err = WaitQueueStatus(ctx, "Running", 1, q2) 124 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running") 125 126 err = WaitQueueStatus(ctx, "Running", 1, q3) 127 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running") 128 129 }) 130 131 It("Reclaim Case 3: New queue with job created no reclaim when job.PodGroup.Status.Phase pending", func() { 132 q1 := e2eutil.DefaultQueue 133 q2 := "reclaim-q2" 134 j1 := "reclaim-j1" 135 j2 := "reclaim-j2" 136 j3 := "reclaim-j3" 137 138 ctx := e2eutil.InitTestContext(e2eutil.Options{ 139 Queues: []string{q2}, 140 NodesNumLimit: 3, 141 NodesResourceLimit: e2eutil.CPU1Mem1, 142 PriorityClasses: map[string]int32{ 143 "low-priority": 10, 144 "high-priority": 10000, 145 }, 146 }) 147 148 defer e2eutil.CleanupTestContext(ctx) 149 150 By("Setup initial jobs") 151 152 _, err := CreateReclaimJob(ctx, e2eutil.CPU1Mem1, j1, q1, "", "", true) 153 Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed") 154 155 _, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, j2, q2, "", "", true) 156 Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed") 157 158 By("Create new coming queue and job") 159 q3 := "reclaim-q3" 160 ctx.Queues = append(ctx.Queues, q3) 161 e2eutil.CreateQueues(ctx) 162 163 err = WaitQueueStatus(ctx, "Open", 1, q1) 164 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open") 165 166 _, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, j3, q3, "", "", true) 167 Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed") 168 169 // delete pod of job3 to make sure reclaim-j3 podgroup is pending 170 listOptions := metav1.ListOptions{ 171 LabelSelector: labels.Set(map[string]string{batchv1alpha1.JobNameKey: j3}).String(), 172 } 173 174 job3pods, err := ctx.Kubeclient.CoreV1().Pods(ctx.Namespace).List(context.TODO(), listOptions) 175 Expect(err).NotTo(HaveOccurred(), "Get %s pod failed", j3) 176 177 By("Make sure q1 q2 with job running in it.") 178 err = WaitQueueStatus(ctx, "Running", 1, q1) 179 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running") 180 181 err = WaitQueueStatus(ctx, "Running", 1, q2) 182 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running") 183 184 for _, pod := range job3pods.Items { 185 err = ctx.Kubeclient.CoreV1().Pods(pod.Namespace).Delete(context.TODO(), pod.Name, metav1.DeleteOptions{}) 186 Expect(err).NotTo(HaveOccurred(), "Failed to delete pod %s", pod.Name) 187 } 188 189 By("Q3 pending when we delete it.") 190 err = WaitQueueStatus(ctx, "Pending", 1, q3) 191 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue pending") 192 }) 193 194 It("Reclaim Case 4: New queue with job created no reclaim when new queue is not created", func() { 195 q1 := e2eutil.DefaultQueue 196 q2 := "reclaim-q2" 197 ctx := e2eutil.InitTestContext(e2eutil.Options{ 198 Queues: []string{q2}, 199 NodesNumLimit: 3, 200 NodesResourceLimit: e2eutil.CPU1Mem1, 201 PriorityClasses: map[string]int32{ 202 "low-priority": 10, 203 "high-priority": 10000, 204 }, 205 }) 206 207 defer e2eutil.CleanupTestContext(ctx) 208 209 By("Setup initial jobs") 210 211 _, err := CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j1", q1, "", "", true) 212 Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed") 213 214 _, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j2", q2, "", "", true) 215 Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed") 216 217 By("Create new coming job") 218 q3 := "reclaim-q3" 219 220 _, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j3", q3, "", "", true) 221 Expect(err).Should(HaveOccurred(), "job3 create failed when queue3 is not created") 222 223 By("Make sure all job running") 224 225 err = WaitQueueStatus(ctx, "Running", 1, q1) 226 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running") 227 228 err = WaitQueueStatus(ctx, "Running", 1, q2) 229 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running") 230 }) 231 232 // As we agreed, this is not intended behavior, actually, it is a bug. 233 It("Reclaim Case 5: New queue with job created no reclaim when job or task is low-priority", func() { 234 q1 := e2eutil.DefaultQueue 235 q2 := "reclaim-q2" 236 ctx := e2eutil.InitTestContext(e2eutil.Options{ 237 Queues: []string{q2}, 238 NodesNumLimit: 3, 239 NodesResourceLimit: e2eutil.CPU1Mem1, 240 PriorityClasses: map[string]int32{ 241 "low-priority": 10, 242 "high-priority": 10000, 243 }, 244 }) 245 246 defer e2eutil.CleanupTestContext(ctx) 247 248 By("Setup initial jobs") 249 250 _, err := CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j1", q1, "high-priority", "", true) 251 Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed") 252 253 _, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j2", q2, "high-priority", "", true) 254 Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed") 255 256 By("Create new coming queue and job") 257 q3 := "reclaim-q3" 258 259 err = WaitQueueStatus(ctx, "Open", 1, q1) 260 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open") 261 262 _, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j3", q3, "low-priority", "", true) 263 Expect(err).Should(HaveOccurred(), "job3 create failed when queue3 is not created") 264 265 By("Make sure all job running") 266 267 err = WaitQueueStatus(ctx, "Running", 1, q1) 268 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running") 269 270 err = WaitQueueStatus(ctx, "Running", 1, q2) 271 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running") 272 }) 273 274 It("Reclaim Case 6: New queue with job created no reclaim when overused", func() { 275 q1 := e2eutil.DefaultQueue 276 q2 := "reclaim-q2" 277 q3 := "reclaim-q3" 278 ctx := e2eutil.InitTestContext(e2eutil.Options{ 279 Queues: []string{q2, q3}, 280 NodesNumLimit: 3, 281 NodesResourceLimit: e2eutil.CPU1Mem1, 282 PriorityClasses: map[string]int32{ 283 "low-priority": 10, 284 "high-priority": 10000, 285 }, 286 }) 287 288 defer e2eutil.CleanupTestContext(ctx) 289 290 By("Setup initial jobs") 291 292 _, err := CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j1", q1, "", "", true) 293 Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed") 294 295 _, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j2", q2, "", "", true) 296 Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed") 297 298 _, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j3", q3, "", "", true) 299 Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed") 300 301 By("Create job4 to testing overused cases.") 302 _, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j4", q3, "", "", false) 303 Expect(err).NotTo(HaveOccurred(), "Wait for job4 failed") 304 305 time.Sleep(10 * time.Second) 306 By("Make sure all job running") 307 308 err = WaitQueueStatus(ctx, "Running", 1, q1) 309 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running") 310 311 err = WaitQueueStatus(ctx, "Running", 1, q2) 312 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running") 313 314 err = WaitQueueStatus(ctx, "Running", 1, q3) 315 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running") 316 317 err = WaitQueueStatus(ctx, "Inqueue", 1, q3) 318 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue Inqueue") 319 }) 320 321 It("Reclaim Case 7: New queue with job created no reclaim when job not satisfied with predicates", func() { 322 q1 := e2eutil.DefaultQueue 323 q2 := "reclaim-q2" 324 ctx := e2eutil.InitTestContext(e2eutil.Options{ 325 Queues: []string{q2}, 326 NodesNumLimit: 3, 327 NodesResourceLimit: e2eutil.CPU1Mem1, 328 }) 329 330 defer e2eutil.CleanupTestContext(ctx) 331 332 By("Setup initial jobs") 333 334 _, err := CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j1", q1, "", "", true) 335 Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed") 336 337 _, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j2", q2, "", "", true) 338 Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed") 339 340 By("Create new coming queue and job") 341 q3 := "reclaim-q3" 342 ctx.Queues = append(ctx.Queues, q3) 343 e2eutil.CreateQueues(ctx) 344 345 err = WaitQueueStatus(ctx, "Open", 1, q1) 346 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open") 347 348 _, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j3", q3, "", "fake-node", false) 349 Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed") 350 351 time.Sleep(10 * time.Second) 352 By("Make sure all job running") 353 354 err = WaitQueueStatus(ctx, "Running", 1, q1) 355 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running") 356 357 err = WaitQueueStatus(ctx, "Running", 1, q2) 358 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running") 359 360 // TODO: it is a bug : the job status is pending but podgroup status is running 361 err = WaitQueueStatus(ctx, "Running", 1, q3) 362 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue Running") 363 364 }) 365 366 It("Reclaim Case 8: New queue with job created no reclaim when task resources less than reclaimable resource", func() { 367 q1 := e2eutil.DefaultQueue 368 q2 := "reclaim-q2" 369 ctx := e2eutil.InitTestContext(e2eutil.Options{ 370 Queues: []string{q2}, 371 NodesNumLimit: 3, 372 NodesResourceLimit: e2eutil.CPU1Mem1, 373 PriorityClasses: map[string]int32{ 374 "low-priority": 10, 375 "high-priority": 10000, 376 }, 377 }) 378 379 defer e2eutil.CleanupTestContext(ctx) 380 381 By("Setup initial jobs") 382 383 _, err := CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j1", q1, "", "", true) 384 Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed") 385 386 _, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j2", q2, "", "", true) 387 Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed") 388 389 By("Create new coming queue and job") 390 q3 := "reclaim-q3" 391 ctx.Queues = append(ctx.Queues, q3) 392 e2eutil.CreateQueues(ctx) 393 394 err = WaitQueueStatus(ctx, "Open", 1, q1) 395 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open") 396 397 job := &e2eutil.JobSpec{ 398 Tasks: []e2eutil.TaskSpec{ 399 { 400 Img: e2eutil.DefaultNginxImage, 401 Req: e2eutil.CPU4Mem4, 402 Min: 1, 403 Rep: 1, 404 }, 405 }, 406 Name: "reclaim-j4", 407 Queue: q3, 408 } 409 e2eutil.CreateJob(ctx, job) 410 411 time.Sleep(10 * time.Second) 412 By("Make sure all job running") 413 414 err = WaitQueueStatus(ctx, "Running", 1, q1) 415 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running") 416 417 err = WaitQueueStatus(ctx, "Running", 1, q2) 418 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running") 419 420 err = WaitQueueStatus(ctx, "Inqueue", 1, q3) 421 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue Inqueue") 422 }) 423 424 It("Reclaim Case 9: New queue with job created, all queues.spec.reclaimable is false, no reclaim", func() { 425 q1 := e2eutil.DefaultQueue 426 q2 := "reclaim-q2" 427 ctx := e2eutil.InitTestContext(e2eutil.Options{ 428 Queues: []string{q2}, 429 NodesNumLimit: 3, 430 NodesResourceLimit: e2eutil.CPU1Mem1, 431 }) 432 433 defer e2eutil.CleanupTestContext(ctx) 434 435 By("Setup initial jobs") 436 437 _, err := CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j1", q1, "", "", true) 438 Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed") 439 440 _, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j2", q2, "", "", true) 441 Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed") 442 443 By("Create new coming queue and job") 444 q3 := "reclaim-q3" 445 ctx.Queues = append(ctx.Queues, q3) 446 e2eutil.CreateQueues(ctx) 447 448 e2eutil.SetQueueReclaimable(ctx, []string{q1, q2}, false) 449 defer e2eutil.SetQueueReclaimable(ctx, []string{q1}, true) 450 451 err = WaitQueueStatus(ctx, "Open", 1, q1) 452 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open") 453 454 _, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j3", q3, "", "", true) 455 Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed") 456 457 By("Make sure all job running") 458 459 err = WaitQueueStatus(ctx, "Running", 1, q1) 460 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running") 461 462 err = WaitQueueStatus(ctx, "Running", 1, q2) 463 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running") 464 465 err = WaitQueueStatus(ctx, "Running", 1, q3) 466 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running") 467 468 }) 469 470 // Reclaim rely on priority is a bug here. 471 It("Reclaim Case 10: Multi reclaimed queue", func() { 472 q1 := e2eutil.DefaultQueue 473 q2 := "reclaim-q2" 474 q3 := "reclaim-q3" 475 q4 := "reclaim-q4" 476 ctx := e2eutil.InitTestContext(e2eutil.Options{ 477 Queues: []string{q2, q3, q4}, 478 NodesNumLimit: 4, 479 NodesResourceLimit: e2eutil.CPU1Mem1, 480 PriorityClasses: map[string]int32{ 481 "low-priority": 10, 482 "high-priority": 10000, 483 }, 484 }) 485 486 defer e2eutil.CleanupTestContext(ctx) 487 488 By("Setup initial jobs") 489 490 spec := &e2eutil.JobSpec{ 491 Tasks: []e2eutil.TaskSpec{ 492 { 493 Img: e2eutil.DefaultNginxImage, 494 Req: e2eutil.CPU1Mem1, 495 Min: 1, 496 Rep: 2, 497 Labels: map[string]string{schedulingv1beta1.PodPreemptable: "true"}, 498 }, 499 }, 500 } 501 502 spec.Name = "reclaim-j1" 503 spec.Queue = q1 504 spec.Pri = "low-priority" 505 job1 := e2eutil.CreateJob(ctx, spec) 506 err := e2eutil.WaitJobReady(ctx, job1) 507 Expect(err).NotTo(HaveOccurred()) 508 509 spec.Name = "reclaim-j2" 510 spec.Queue = q2 511 spec.Pri = "low-priority" 512 job2 := e2eutil.CreateJob(ctx, spec) 513 err = e2eutil.WaitJobReady(ctx, job2) 514 Expect(err).NotTo(HaveOccurred()) 515 516 err = WaitQueueStatus(ctx, "Running", 1, q1) 517 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue1 running") 518 519 err = WaitQueueStatus(ctx, "Running", 1, q2) 520 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue2 running") 521 522 By("Create coming jobs") 523 524 _, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j3", q3, "high-priority", "", true) 525 Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed") 526 527 _, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j4", q4, "high-priority", "", true) 528 Expect(err).NotTo(HaveOccurred(), "Wait for job4 failed") 529 530 By("Make sure all job running") 531 532 err = WaitQueueStatus(ctx, "Running", 1, q3) 533 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue3 running") 534 535 err = WaitQueueStatus(ctx, "Running", 1, q4) 536 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue4 running") 537 538 }) 539 540 // Reclaim for capacity plugin. 541 It("Capacity Reclaim Case 11: Multi reclaimed queue", func() { 542 // First replace proportion with capacity plugin. 543 cmc := e2eutil.NewConfigMapCase("volcano-system", "integration-scheduler-configmap") 544 cmc.ChangeBy(func(data map[string]string) (changed bool, changedBefore map[string]string) { 545 vcScheConfStr, ok := data["volcano-scheduler-ci.conf"] 546 Expect(ok).To(BeTrue()) 547 548 schedulerConf := &e2eutil.SchedulerConfiguration{} 549 err := yaml.Unmarshal([]byte(vcScheConfStr), schedulerConf) 550 Expect(err).NotTo(HaveOccurred()) 551 for _, tier := range schedulerConf.Tiers { 552 for i, plugin := range tier.Plugins { 553 if plugin.Name == "proportion" { 554 tier.Plugins[i].Name = "capacity" 555 break 556 } 557 } 558 } 559 560 newVCScheConfBytes, err := yaml.Marshal(schedulerConf) 561 Expect(err).NotTo(HaveOccurred()) 562 563 changed = true 564 changedBefore = make(map[string]string) 565 changedBefore["volcano-scheduler-ci.conf"] = vcScheConfStr 566 data["volcano-scheduler-ci.conf"] = string(newVCScheConfBytes) 567 return 568 }) 569 defer cmc.UndoChanged() 570 571 q1 := "reclaim-q1" 572 q2 := "reclaim-q2" 573 q3 := "reclaim-q3" 574 q4 := "reclaim-q4" 575 ctx := e2eutil.InitTestContext(e2eutil.Options{ 576 Queues: []string{q1, q2, q3, q4}, 577 NodesNumLimit: 4, 578 DeservedResource: map[string]v1.ResourceList{ 579 q1: {v1.ResourceCPU: *resource.NewQuantity(1, resource.DecimalSI), v1.ResourceMemory: *resource.NewQuantity(1024*1024*1024, resource.BinarySI)}, 580 q2: {v1.ResourceCPU: *resource.NewQuantity(1, resource.DecimalSI), v1.ResourceMemory: *resource.NewQuantity(1024*1024*1024, resource.BinarySI)}, 581 q3: {v1.ResourceCPU: *resource.NewQuantity(2, resource.DecimalSI), v1.ResourceMemory: *resource.NewQuantity(2*1024*1024*1024, resource.BinarySI)}, 582 q4: {v1.ResourceCPU: *resource.NewQuantity(4, resource.DecimalSI), v1.ResourceMemory: *resource.NewQuantity(4*1024*1024*1024, resource.BinarySI)}, 583 }, 584 NodesResourceLimit: e2eutil.CPU2Mem2, 585 PriorityClasses: map[string]int32{ 586 "low-priority": 10, 587 "high-priority": 10000, 588 }, 589 }) 590 591 defer e2eutil.CleanupTestContext(ctx) 592 593 By("Setup initial jobs") 594 595 spec := &e2eutil.JobSpec{ 596 Tasks: []e2eutil.TaskSpec{ 597 { 598 Img: e2eutil.DefaultNginxImage, 599 Req: e2eutil.CPU1Mem1, 600 Min: 1, 601 Rep: 4, 602 Labels: map[string]string{schedulingv1beta1.PodPreemptable: "true"}, 603 }, 604 }, 605 } 606 607 spec.Name = "reclaim-j1" 608 spec.Queue = q1 609 spec.Pri = "low-priority" 610 job1 := e2eutil.CreateJob(ctx, spec) 611 err := e2eutil.WaitJobReady(ctx, job1) 612 Expect(err).NotTo(HaveOccurred()) 613 614 spec.Name = "reclaim-j2" 615 spec.Queue = q2 616 spec.Pri = "low-priority" 617 job2 := e2eutil.CreateJob(ctx, spec) 618 err = e2eutil.WaitJobReady(ctx, job2) 619 Expect(err).NotTo(HaveOccurred()) 620 621 err = WaitQueueStatus(ctx, "Running", 1, q1) 622 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue1 running") 623 624 err = WaitQueueStatus(ctx, "Running", 1, q2) 625 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue2 running") 626 627 By("Create coming jobs") 628 629 _, err = CreateReclaimJob(ctx, e2eutil.CPU2Mem2, "reclaim-j3", q3, "high-priority", "", true) 630 Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed") 631 632 _, err = CreateReclaimJob(ctx, e2eutil.CPU2Mem2, "reclaim-j4", q4, "high-priority", "", true) 633 Expect(err).NotTo(HaveOccurred(), "Wait for job4 failed") 634 635 _, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j5", q4, "high-priority", "", true) 636 Expect(err).NotTo(HaveOccurred(), "Wait for job5 failed") 637 638 _, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j6", q4, "high-priority", "", true) 639 Expect(err).NotTo(HaveOccurred(), "Wait for job6 failed") 640 641 By("Make sure all job running") 642 643 err = WaitQueueStatus(ctx, "Running", 1, q3) 644 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue3 running") 645 646 err = WaitQueueStatus(ctx, "Running", 3, q4) 647 Expect(err).NotTo(HaveOccurred(), "Error waiting for queue4 running") 648 649 }) 650 651 It("Reclaim", func() { 652 Skip("skip: the case has some problem") 653 q1, q2 := "reclaim-q1", "reclaim-q2" 654 ctx := e2eutil.InitTestContext(e2eutil.Options{ 655 Queues: []string{q1, q2}, 656 PriorityClasses: map[string]int32{ 657 "low-priority": 10, 658 "high-priority": 10000, 659 }, 660 }) 661 defer e2eutil.CleanupTestContext(ctx) 662 663 slot := e2eutil.OneCPU 664 rep := e2eutil.ClusterSize(ctx, slot) 665 666 spec := &e2eutil.JobSpec{ 667 Tasks: []e2eutil.TaskSpec{ 668 { 669 Img: e2eutil.DefaultNginxImage, 670 Req: slot, 671 Min: 1, 672 Rep: rep, 673 Labels: map[string]string{schedulingv1beta1.PodPreemptable: "true"}, 674 }, 675 }, 676 } 677 678 spec.Name = "q1-qj-1" 679 spec.Queue = q1 680 spec.Pri = "low-priority" 681 job1 := e2eutil.CreateJob(ctx, spec) 682 err := e2eutil.WaitJobReady(ctx, job1) 683 Expect(err).NotTo(HaveOccurred()) 684 685 err = e2eutil.WaitQueueStatus(func() (bool, error) { 686 queue, err := ctx.Vcclient.SchedulingV1beta1().Queues().Get(context.TODO(), q1, metav1.GetOptions{}) 687 Expect(err).NotTo(HaveOccurred()) 688 return queue.Status.Running == 1, nil 689 }) 690 Expect(err).NotTo(HaveOccurred()) 691 692 expected := int(rep) / 2 693 // Reduce one pod to tolerate decimal fraction. 694 if expected > 1 { 695 expected-- 696 } else { 697 err := fmt.Errorf("expected replica <%d> is too small", expected) 698 Expect(err).NotTo(HaveOccurred()) 699 } 700 701 spec.Name = "q2-qj-2" 702 spec.Queue = q2 703 spec.Pri = "high-priority" 704 job2 := e2eutil.CreateJob(ctx, spec) 705 err = e2eutil.WaitTasksReady(ctx, job2, expected) 706 Expect(err).NotTo(HaveOccurred()) 707 708 err = e2eutil.WaitTasksReady(ctx, job1, expected) 709 Expect(err).NotTo(HaveOccurred()) 710 711 // Test Queue status 712 spec = &e2eutil.JobSpec{ 713 Name: "q1-qj-2", 714 Queue: q1, 715 Tasks: []e2eutil.TaskSpec{ 716 { 717 Img: e2eutil.DefaultNginxImage, 718 Req: slot, 719 Min: rep * 2, 720 Rep: rep * 2, 721 }, 722 }, 723 } 724 job3 := e2eutil.CreateJob(ctx, spec) 725 err = e2eutil.WaitJobStatePending(ctx, job3) 726 Expect(err).NotTo(HaveOccurred()) 727 err = e2eutil.WaitQueueStatus(func() (bool, error) { 728 queue, err := ctx.Vcclient.SchedulingV1beta1().Queues().Get(context.TODO(), q1, metav1.GetOptions{}) 729 Expect(err).NotTo(HaveOccurred()) 730 return queue.Status.Pending == 1, nil 731 }) 732 Expect(err).NotTo(HaveOccurred()) 733 }) 734 })