volcano.sh/volcano@v1.9.0/docs/ut_coverage/UT_coverage_v1.4.0.html (about) 1 2 <!DOCTYPE html> 3 <html> 4 <head> 5 <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> 6 <title>job: Go Coverage Report</title> 7 <style> 8 body { 9 background: black; 10 color: rgb(80, 80, 80); 11 } 12 body, pre, #legend span { 13 font-family: Menlo, monospace; 14 font-weight: bold; 15 } 16 #topbar { 17 background: black; 18 position: fixed; 19 top: 0; left: 0; right: 0; 20 height: 42px; 21 border-bottom: 1px solid rgb(80, 80, 80); 22 } 23 #content { 24 margin-top: 50px; 25 } 26 #nav, #legend { 27 float: left; 28 margin-left: 10px; 29 } 30 #legend { 31 margin-top: 12px; 32 } 33 #nav { 34 margin-top: 10px; 35 } 36 #legend span { 37 margin: 0 5px; 38 } 39 .cov0 { color: rgb(192, 0, 0) } 40 .cov1 { color: rgb(128, 128, 128) } 41 .cov2 { color: rgb(116, 140, 131) } 42 .cov3 { color: rgb(104, 152, 134) } 43 .cov4 { color: rgb(92, 164, 137) } 44 .cov5 { color: rgb(80, 176, 140) } 45 .cov6 { color: rgb(68, 188, 143) } 46 .cov7 { color: rgb(56, 200, 146) } 47 .cov8 { color: rgb(44, 212, 149) } 48 .cov9 { color: rgb(32, 224, 152) } 49 .cov10 { color: rgb(20, 236, 155) } 50 51 </style> 52 </head> 53 <body> 54 <div id="topbar"> 55 <div id="nav"> 56 <select id="files"> 57 58 <option value="file0">volcano.sh/volcano/pkg/cli/job/common.go (100.0%)</option> 59 60 <option value="file1">volcano.sh/volcano/pkg/cli/job/delete.go (73.3%)</option> 61 62 <option value="file2">volcano.sh/volcano/pkg/cli/job/list.go (78.8%)</option> 63 64 <option value="file3">volcano.sh/volcano/pkg/cli/job/resume.go (70.0%)</option> 65 66 <option value="file4">volcano.sh/volcano/pkg/cli/job/run.go (66.0%)</option> 67 68 <option value="file5">volcano.sh/volcano/pkg/cli/job/suspend.go (70.0%)</option> 69 70 <option value="file6">volcano.sh/volcano/pkg/cli/job/util.go (90.2%)</option> 71 72 <option value="file7">volcano.sh/volcano/pkg/cli/job/view.go (90.8%)</option> 73 74 <option value="file8">volcano.sh/volcano/pkg/cli/queue/common.go (100.0%)</option> 75 76 <option value="file9">volcano.sh/volcano/pkg/cli/queue/create.go (83.3%)</option> 77 78 <option value="file10">volcano.sh/volcano/pkg/cli/queue/delete.go (88.9%)</option> 79 80 <option value="file11">volcano.sh/volcano/pkg/cli/queue/get.go (80.0%)</option> 81 82 <option value="file12">volcano.sh/volcano/pkg/cli/queue/list.go (80.0%)</option> 83 84 <option value="file13">volcano.sh/volcano/pkg/cli/queue/operate.go (95.5%)</option> 85 86 <option value="file14">volcano.sh/volcano/pkg/cli/queue/util.go (76.9%)</option> 87 88 <option value="file15">volcano.sh/volcano/pkg/cli/util/util.go (40.8%)</option> 89 90 <option value="file16">volcano.sh/volcano/pkg/cli/vcancel/cancel.go (73.3%)</option> 91 92 <option value="file17">volcano.sh/volcano/pkg/cli/vresume/resume.go (70.0%)</option> 93 94 <option value="file18">volcano.sh/volcano/pkg/cli/vsuspend/suspend.go (70.0%)</option> 95 96 <option value="file19">volcano.sh/volcano/pkg/controllers/apis/job_info.go (75.0%)</option> 97 98 <option value="file20">volcano.sh/volcano/pkg/controllers/apis/request.go (100.0%)</option> 99 100 <option value="file21">volcano.sh/volcano/pkg/controllers/cache/cache.go (58.2%)</option> 101 102 <option value="file22">volcano.sh/volcano/pkg/controllers/garbagecollector/garbagecollector.go (37.4%)</option> 103 104 <option value="file23">volcano.sh/volcano/pkg/controllers/job/job_controller.go (45.7%)</option> 105 106 <option value="file24">volcano.sh/volcano/pkg/controllers/job/job_controller_actions.go (57.0%)</option> 107 108 <option value="file25">volcano.sh/volcano/pkg/controllers/job/job_controller_handler.go (49.0%)</option> 109 110 <option value="file26">volcano.sh/volcano/pkg/controllers/job/job_controller_plugins.go (72.2%)</option> 111 112 <option value="file27">volcano.sh/volcano/pkg/controllers/job/job_controller_resync.go (3.8%)</option> 113 114 <option value="file28">volcano.sh/volcano/pkg/controllers/job/job_controller_util.go (82.2%)</option> 115 116 <option value="file29">volcano.sh/volcano/pkg/controllers/job/plugins/ssh/ssh.go (10.6%)</option> 117 118 <option value="file30">volcano.sh/volcano/pkg/controllers/podgroup/pg_controller.go (32.5%)</option> 119 120 <option value="file31">volcano.sh/volcano/pkg/controllers/podgroup/pg_controller_handler.go (51.6%)</option> 121 122 <option value="file32">volcano.sh/volcano/pkg/controllers/queue/queue_controller.go (33.9%)</option> 123 124 <option value="file33">volcano.sh/volcano/pkg/controllers/queue/queue_controller_action.go (25.0%)</option> 125 126 <option value="file34">volcano.sh/volcano/pkg/controllers/queue/queue_controller_handler.go (57.8%)</option> 127 128 <option value="file35">volcano.sh/volcano/pkg/controllers/queue/queue_controller_util.go (0.0%)</option> 129 130 <option value="file36">volcano.sh/volcano/pkg/scheduler/actions/allocate/allocate.go (77.0%)</option> 131 132 <option value="file37">volcano.sh/volcano/pkg/scheduler/actions/preempt/preempt.go (85.8%)</option> 133 134 <option value="file38">volcano.sh/volcano/pkg/scheduler/actions/reclaim/reclaim.go (78.4%)</option> 135 136 <option value="file39">volcano.sh/volcano/pkg/scheduler/api/cluster_info.go (0.0%)</option> 137 138 <option value="file40">volcano.sh/volcano/pkg/scheduler/api/device_info.go (43.8%)</option> 139 140 <option value="file41">volcano.sh/volcano/pkg/scheduler/api/helpers.go (40.0%)</option> 141 142 <option value="file42">volcano.sh/volcano/pkg/scheduler/api/job_info.go (47.8%)</option> 143 144 <option value="file43">volcano.sh/volcano/pkg/scheduler/api/namespace_info.go (83.9%)</option> 145 146 <option value="file44">volcano.sh/volcano/pkg/scheduler/api/node_info.go (29.5%)</option> 147 148 <option value="file45">volcano.sh/volcano/pkg/scheduler/api/numa_info.go (0.0%)</option> 149 150 <option value="file46">volcano.sh/volcano/pkg/scheduler/api/pod_info.go (38.0%)</option> 151 152 <option value="file47">volcano.sh/volcano/pkg/scheduler/api/queue_info.go (0.0%)</option> 153 154 <option value="file48">volcano.sh/volcano/pkg/scheduler/api/resource_info.go (70.7%)</option> 155 156 <option value="file49">volcano.sh/volcano/pkg/scheduler/api/silo_cluster_info.go (0.0%)</option> 157 158 <option value="file50">volcano.sh/volcano/pkg/scheduler/api/test_utils.go (100.0%)</option> 159 160 <option value="file51">volcano.sh/volcano/pkg/scheduler/api/types.go (25.0%)</option> 161 162 <option value="file52">volcano.sh/volcano/pkg/scheduler/api/unschedule_info.go (51.9%)</option> 163 164 <option value="file53">volcano.sh/volcano/pkg/scheduler/cache/cache.go (6.8%)</option> 165 166 <option value="file54">volcano.sh/volcano/pkg/scheduler/cache/event_handlers.go (29.6%)</option> 167 168 <option value="file55">volcano.sh/volcano/pkg/scheduler/cache/util.go (0.0%)</option> 169 170 <option value="file56">volcano.sh/volcano/pkg/scheduler/framework/arguments.go (67.6%)</option> 171 172 <option value="file57">volcano.sh/volcano/pkg/scheduler/framework/framework.go (0.0%)</option> 173 174 <option value="file58">volcano.sh/volcano/pkg/scheduler/framework/job_updater.go (0.0%)</option> 175 176 <option value="file59">volcano.sh/volcano/pkg/scheduler/framework/plugins.go (2.7%)</option> 177 178 <option value="file60">volcano.sh/volcano/pkg/scheduler/framework/session.go (0.0%)</option> 179 180 <option value="file61">volcano.sh/volcano/pkg/scheduler/framework/session_plugins.go (0.0%)</option> 181 182 <option value="file62">volcano.sh/volcano/pkg/scheduler/framework/statement.go (0.0%)</option> 183 184 <option value="file63">volcano.sh/volcano/pkg/scheduler/plugins/binpack/binpack.go (69.6%)</option> 185 186 <option value="file64">volcano.sh/volcano/pkg/scheduler/plugins/drf/drf.go (48.4%)</option> 187 188 <option value="file65">volcano.sh/volcano/pkg/scheduler/plugins/numaaware/policy/factory.go (0.0%)</option> 189 190 <option value="file66">volcano.sh/volcano/pkg/scheduler/plugins/numaaware/policy/policy.go (80.4%)</option> 191 192 <option value="file67">volcano.sh/volcano/pkg/scheduler/plugins/numaaware/policy/policy_best_effort.go (100.0%)</option> 193 194 <option value="file68">volcano.sh/volcano/pkg/scheduler/plugins/numaaware/policy/policy_none.go (0.0%)</option> 195 196 <option value="file69">volcano.sh/volcano/pkg/scheduler/plugins/numaaware/policy/policy_restricted.go (100.0%)</option> 197 198 <option value="file70">volcano.sh/volcano/pkg/scheduler/plugins/numaaware/policy/policy_single_numa_node.go (94.4%)</option> 199 200 <option value="file71">volcano.sh/volcano/pkg/scheduler/plugins/numaaware/provider/cpumanager/cpu_assignment.go (92.4%)</option> 201 202 <option value="file72">volcano.sh/volcano/pkg/scheduler/plugins/numaaware/provider/cpumanager/cpu_mng.go (77.0%)</option> 203 204 <option value="file73">volcano.sh/volcano/pkg/scheduler/plugins/predicates/cache.go (3.8%)</option> 205 206 <option value="file74">volcano.sh/volcano/pkg/scheduler/plugins/predicates/gpu.go (0.0%)</option> 207 208 <option value="file75">volcano.sh/volcano/pkg/scheduler/plugins/predicates/predicates.go (52.0%)</option> 209 210 <option value="file76">volcano.sh/volcano/pkg/scheduler/plugins/predicates/proportional.go (100.0%)</option> 211 212 <option value="file77">volcano.sh/volcano/pkg/scheduler/plugins/task-topology/bucket.go (0.0%)</option> 213 214 <option value="file78">volcano.sh/volcano/pkg/scheduler/plugins/task-topology/manager.go (0.0%)</option> 215 216 <option value="file79">volcano.sh/volcano/pkg/scheduler/plugins/task-topology/topology.go (32.1%)</option> 217 218 <option value="file80">volcano.sh/volcano/pkg/scheduler/plugins/task-topology/util.go (0.0%)</option> 219 220 <option value="file81">volcano.sh/volcano/pkg/scheduler/plugins/tdm/tdm.go (68.6%)</option> 221 222 <option value="file82">volcano.sh/volcano/pkg/scheduler/scheduler.go (0.0%)</option> 223 224 <option value="file83">volcano.sh/volcano/pkg/scheduler/util.go (68.0%)</option> 225 226 <option value="file84">volcano.sh/volcano/pkg/scheduler/util/priority_queue.go (0.0%)</option> 227 228 <option value="file85">volcano.sh/volcano/pkg/scheduler/util/scheduler_helper.go (14.4%)</option> 229 230 <option value="file86">volcano.sh/volcano/pkg/scheduler/util/test_utils.go (0.0%)</option> 231 232 <option value="file87">volcano.sh/volcano/pkg/webhooks/admission/jobs/mutate/mutate_job.go (25.0%)</option> 233 234 <option value="file88">volcano.sh/volcano/pkg/webhooks/admission/jobs/validate/admit_job.go (77.1%)</option> 235 236 <option value="file89">volcano.sh/volcano/pkg/webhooks/admission/jobs/validate/util.go (97.4%)</option> 237 238 <option value="file90">volcano.sh/volcano/pkg/webhooks/admission/pods/mutate/annotation.go (81.0%)</option> 239 240 <option value="file91">volcano.sh/volcano/pkg/webhooks/admission/pods/mutate/factory.go (100.0%)</option> 241 242 <option value="file92">volcano.sh/volcano/pkg/webhooks/admission/pods/mutate/mutate_pod.go (64.3%)</option> 243 244 <option value="file93">volcano.sh/volcano/pkg/webhooks/admission/pods/mutate/namespace.go (85.7%)</option> 245 246 <option value="file94">volcano.sh/volcano/pkg/webhooks/admission/pods/validate/admit_pod.go (39.7%)</option> 247 248 <option value="file95">volcano.sh/volcano/pkg/webhooks/admission/queues/mutate/mutate_queue.go (88.5%)</option> 249 250 <option value="file96">volcano.sh/volcano/pkg/webhooks/admission/queues/validate/validate_queue.go (95.2%)</option> 251 252 </select> 253 </div> 254 <div id="legend"> 255 <span>not tracked</span> 256 257 <span class="cov0">not covered</span> 258 <span class="cov8">covered</span> 259 260 </div> 261 </div> 262 <div id="content"> 263 264 <pre class="file" id="file0" style="display: none">/* 265 Copyright 2018 The Volcano Authors. 266 267 Licensed under the Apache License, Version 2.0 (the "License"); 268 you may not use this file except in compliance with the License. 269 You may obtain a copy of the License at 270 271 http://www.apache.org/licenses/LICENSE-2.0 272 273 Unless required by applicable law or agreed to in writing, software 274 distributed under the License is distributed on an "AS IS" BASIS, 275 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 276 See the License for the specific language governing permissions and 277 limitations under the License. 278 */ 279 280 package job 281 282 import ( 283 "os" 284 "path/filepath" 285 286 "github.com/spf13/cobra" 287 ) 288 289 type commonFlags struct { 290 Master string 291 Kubeconfig string 292 } 293 294 func initFlags(cmd *cobra.Command, cf *commonFlags) <span class="cov8" title="1">{ 295 cmd.Flags().StringVarP(&cf.Master, "master", "s", "", "the address of apiserver") 296 297 kubeConfFile := os.Getenv("KUBECONFIG") 298 if kubeConfFile == "" </span><span class="cov8" title="1">{ 299 if home := homeDir(); home != "" </span><span class="cov8" title="1">{ 300 kubeConfFile = filepath.Join(home, ".kube", "config") 301 }</span> 302 } 303 <span class="cov8" title="1">cmd.Flags().StringVarP(&cf.Kubeconfig, "kubeconfig", "k", kubeConfFile, "(optional) absolute path to the kubeconfig file")</span> 304 } 305 </pre> 306 307 <pre class="file" id="file1" style="display: none">/* 308 Copyright 2019 The Volcano Authors. 309 310 Licensed under the Apache License, Version 2.0 (the "License"); 311 you may not use this file except in compliance with the License. 312 You may obtain a copy of the License at 313 314 http://www.apache.org/licenses/LICENSE-2.0 315 316 Unless required by applicable law or agreed to in writing, software 317 distributed under the License is distributed on an "AS IS" BASIS, 318 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 319 See the License for the specific language governing permissions and 320 limitations under the License. 321 */ 322 323 package job 324 325 import ( 326 "context" 327 "fmt" 328 329 "github.com/spf13/cobra" 330 331 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 332 333 "volcano.sh/apis/pkg/client/clientset/versioned" 334 "volcano.sh/volcano/pkg/cli/util" 335 ) 336 337 type deleteFlags struct { 338 commonFlags 339 340 Namespace string 341 JobName string 342 } 343 344 var deleteJobFlags = &deleteFlags{} 345 346 // InitDeleteFlags init the delete command flags. 347 func InitDeleteFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 348 initFlags(cmd, &deleteJobFlags.commonFlags) 349 350 cmd.Flags().StringVarP(&deleteJobFlags.Namespace, "namespace", "n", "default", "the namespace of job") 351 cmd.Flags().StringVarP(&deleteJobFlags.JobName, "name", "N", "", "the name of job") 352 }</span> 353 354 // DeleteJob delete the job. 355 func DeleteJob() error <span class="cov8" title="1">{ 356 config, err := util.BuildConfig(deleteJobFlags.Master, deleteJobFlags.Kubeconfig) 357 if err != nil </span><span class="cov0" title="0">{ 358 return err 359 }</span> 360 361 <span class="cov8" title="1">if deleteJobFlags.JobName == "" </span><span class="cov0" title="0">{ 362 err := fmt.Errorf("job name is mandatory to delete a particular job") 363 return err 364 }</span> 365 366 <span class="cov8" title="1">jobClient := versioned.NewForConfigOrDie(config) 367 err = jobClient.BatchV1alpha1().Jobs(deleteJobFlags.Namespace).Delete(context.TODO(), deleteJobFlags.JobName, metav1.DeleteOptions{}) 368 if err != nil </span><span class="cov0" title="0">{ 369 return err 370 }</span> 371 <span class="cov8" title="1">fmt.Printf("delete job %v successfully\n", deleteJobFlags.JobName) 372 return nil</span> 373 } 374 </pre> 375 376 <pre class="file" id="file2" style="display: none">/* 377 Copyright 2018 The Volcano Authors. 378 379 Licensed under the Apache License, Version 2.0 (the "License"); 380 you may not use this file except in compliance with the License. 381 You may obtain a copy of the License at 382 383 http://www.apache.org/licenses/LICENSE-2.0 384 385 Unless required by applicable law or agreed to in writing, software 386 distributed under the License is distributed on an "AS IS" BASIS, 387 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 388 See the License for the specific language governing permissions and 389 limitations under the License. 390 */ 391 392 package job 393 394 import ( 395 "context" 396 "fmt" 397 "io" 398 "os" 399 "strings" 400 401 "github.com/spf13/cobra" 402 403 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 404 405 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 406 "volcano.sh/apis/pkg/client/clientset/versioned" 407 "volcano.sh/volcano/pkg/cli/util" 408 ) 409 410 type listFlags struct { 411 commonFlags 412 413 Namespace string 414 SchedulerName string 415 allNamespace bool 416 selector string 417 } 418 419 const ( 420 421 // Name name etc below key words are used in job print format 422 Name string = "Name" 423 // Creation create 424 Creation string = "Creation" 425 // Phase phase 426 Phase string = "Phase" 427 // Replicas replicas 428 Replicas string = "Replicas" 429 // Min minimum 430 Min string = "Min" 431 // Scheduler scheduler 432 Scheduler string = "Scheduler" 433 // Pending pending 434 Pending string = "Pending" 435 // Running running 436 Running string = "Running" 437 // Succeeded success 438 Succeeded string = "Succeeded" 439 // Terminating terminating 440 Terminating string = "Terminating" 441 // Version version 442 Version string = "Version" 443 // Failed failed 444 Failed string = "Failed" 445 // Unknown pod 446 Unknown string = "Unknown" 447 // RetryCount retry count 448 RetryCount string = "RetryCount" 449 // JobType job type 450 JobType string = "JobType" 451 // Namespace job namespace 452 Namespace string = "Namespace" 453 ) 454 455 var listJobFlags = &listFlags{} 456 457 // InitListFlags init list command flags. 458 func InitListFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 459 initFlags(cmd, &listJobFlags.commonFlags) 460 461 cmd.Flags().StringVarP(&listJobFlags.Namespace, "namespace", "n", "default", "the namespace of job") 462 cmd.Flags().StringVarP(&listJobFlags.SchedulerName, "scheduler", "S", "", "list job with specified scheduler name") 463 cmd.Flags().BoolVarP(&listJobFlags.allNamespace, "all-namespaces", "", false, "list jobs in all namespaces") 464 cmd.Flags().StringVarP(&listJobFlags.selector, "selector", "", "", "fuzzy matching jobName") 465 }</span> 466 467 // ListJobs lists all jobs details. 468 func ListJobs() error <span class="cov8" title="1">{ 469 config, err := util.BuildConfig(listJobFlags.Master, listJobFlags.Kubeconfig) 470 if err != nil </span><span class="cov0" title="0">{ 471 return err 472 }</span> 473 <span class="cov8" title="1">if listJobFlags.allNamespace </span><span class="cov8" title="1">{ 474 listJobFlags.Namespace = "" 475 }</span> 476 <span class="cov8" title="1">jobClient := versioned.NewForConfigOrDie(config) 477 jobs, err := jobClient.BatchV1alpha1().Jobs(listJobFlags.Namespace).List(context.TODO(), metav1.ListOptions{}) 478 if err != nil </span><span class="cov0" title="0">{ 479 return err 480 }</span> 481 482 <span class="cov8" title="1">if len(jobs.Items) == 0 </span><span class="cov0" title="0">{ 483 fmt.Printf("No resources found\n") 484 return nil 485 }</span> 486 <span class="cov8" title="1">PrintJobs(jobs, os.Stdout) 487 488 return nil</span> 489 } 490 491 // PrintJobs prints all jobs details. 492 func PrintJobs(jobs *v1alpha1.JobList, writer io.Writer) <span class="cov8" title="1">{ 493 maxLenInfo := getMaxLen(jobs) 494 495 titleFormat := "%%-%ds%%-15s%%-12s%%-12s%%-12s%%-6s%%-10s%%-10s%%-12s%%-10s%%-12s%%-10s\n" 496 contentFormat := "%%-%ds%%-15s%%-12s%%-12s%%-12d%%-6d%%-10d%%-10d%%-12d%%-10d%%-12d%%-10d\n" 497 498 var err error 499 if listJobFlags.allNamespace </span><span class="cov8" title="1">{ 500 _, err = fmt.Fprintf(writer, fmt.Sprintf("%%-%ds"+titleFormat, maxLenInfo[1], maxLenInfo[0]), 501 Namespace, Name, Creation, Phase, JobType, Replicas, Min, Pending, Running, Succeeded, Failed, Unknown, RetryCount) 502 }</span> else<span class="cov8" title="1"> { 503 _, err = fmt.Fprintf(writer, fmt.Sprintf(titleFormat, maxLenInfo[0]), 504 Name, Creation, Phase, JobType, Replicas, Min, Pending, Running, Succeeded, Failed, Unknown, RetryCount) 505 }</span> 506 <span class="cov8" title="1">if err != nil </span><span class="cov0" title="0">{ 507 fmt.Printf("Failed to print list command result: %s.\n", err) 508 }</span> 509 510 <span class="cov8" title="1">for _, job := range jobs.Items </span><span class="cov8" title="1">{ 511 if listJobFlags.SchedulerName != "" && listJobFlags.SchedulerName != job.Spec.SchedulerName </span><span class="cov0" title="0">{ 512 continue</span> 513 } 514 <span class="cov8" title="1">if !strings.Contains(job.Name, listJobFlags.selector) </span><span class="cov0" title="0">{ 515 continue</span> 516 } 517 <span class="cov8" title="1">replicas := int32(0) 518 for _, ts := range job.Spec.Tasks </span><span class="cov0" title="0">{ 519 replicas += ts.Replicas 520 }</span> 521 <span class="cov8" title="1">jobType := job.ObjectMeta.Labels[v1alpha1.JobTypeKey] 522 if jobType == "" </span><span class="cov8" title="1">{ 523 jobType = "Batch" 524 }</span> 525 526 <span class="cov8" title="1">if listJobFlags.allNamespace </span><span class="cov8" title="1">{ 527 _, err = fmt.Fprintf(writer, fmt.Sprintf("%%-%ds"+contentFormat, maxLenInfo[1], maxLenInfo[0]), 528 job.Namespace, job.Name, job.CreationTimestamp.Format("2006-01-02"), job.Status.State.Phase, jobType, replicas, 529 job.Status.MinAvailable, job.Status.Pending, job.Status.Running, job.Status.Succeeded, job.Status.Failed, job.Status.Unknown, job.Status.RetryCount) 530 }</span> else<span class="cov8" title="1"> { 531 _, err = fmt.Fprintf(writer, fmt.Sprintf(contentFormat, maxLenInfo[0]), 532 job.Name, job.CreationTimestamp.Format("2006-01-02"), job.Status.State.Phase, jobType, replicas, 533 job.Status.MinAvailable, job.Status.Pending, job.Status.Running, job.Status.Succeeded, job.Status.Failed, job.Status.Unknown, job.Status.RetryCount) 534 }</span> 535 <span class="cov8" title="1">if err != nil </span><span class="cov0" title="0">{ 536 fmt.Printf("Failed to print list command result: %s.\n", err) 537 }</span> 538 } 539 } 540 541 func getMaxLen(jobs *v1alpha1.JobList) []int <span class="cov8" title="1">{ 542 maxNameLen := len(Name) 543 maxNamespaceLen := len(Namespace) 544 for _, job := range jobs.Items </span><span class="cov8" title="1">{ 545 if len(job.Name) > maxNameLen </span><span class="cov0" title="0">{ 546 maxNameLen = len(job.Name) 547 }</span> 548 <span class="cov8" title="1">if len(job.Namespace) > maxNamespaceLen </span><span class="cov0" title="0">{ 549 maxNamespaceLen = len(job.Namespace) 550 }</span> 551 } 552 553 <span class="cov8" title="1">return []int{maxNameLen + 3, maxNamespaceLen + 3}</span> 554 } 555 </pre> 556 557 <pre class="file" id="file3" style="display: none">/* 558 Copyright 2018 The Volcano Authors. 559 560 Licensed under the Apache License, Version 2.0 (the "License"); 561 you may not use this file except in compliance with the License. 562 You may obtain a copy of the License at 563 564 http://www.apache.org/licenses/LICENSE-2.0 565 566 Unless required by applicable law or agreed to in writing, software 567 distributed under the License is distributed on an "AS IS" BASIS, 568 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 569 See the License for the specific language governing permissions and 570 limitations under the License. 571 */ 572 573 package job 574 575 import ( 576 "fmt" 577 578 "github.com/spf13/cobra" 579 580 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 581 "volcano.sh/volcano/pkg/cli/util" 582 ) 583 584 type resumeFlags struct { 585 commonFlags 586 587 Namespace string 588 JobName string 589 } 590 591 var resumeJobFlags = &resumeFlags{} 592 593 // InitResumeFlags init resume command flags. 594 func InitResumeFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 595 initFlags(cmd, &resumeJobFlags.commonFlags) 596 597 cmd.Flags().StringVarP(&resumeJobFlags.Namespace, "namespace", "n", "default", "the namespace of job") 598 cmd.Flags().StringVarP(&resumeJobFlags.JobName, "name", "N", "", "the name of job") 599 }</span> 600 601 // ResumeJob resumes the job. 602 func ResumeJob() error <span class="cov8" title="1">{ 603 config, err := util.BuildConfig(resumeJobFlags.Master, resumeJobFlags.Kubeconfig) 604 if err != nil </span><span class="cov0" title="0">{ 605 return err 606 }</span> 607 <span class="cov8" title="1">if resumeJobFlags.JobName == "" </span><span class="cov0" title="0">{ 608 err := fmt.Errorf("job name is mandatory to resume a particular job") 609 return err 610 }</span> 611 612 <span class="cov8" title="1">return createJobCommand(config, 613 resumeJobFlags.Namespace, resumeJobFlags.JobName, 614 v1alpha1.ResumeJobAction)</span> 615 } 616 </pre> 617 618 <pre class="file" id="file4" style="display: none">/* 619 Copyright 2018 The Volcano Authors. 620 621 Licensed under the Apache License, Version 2.0 (the "License"); 622 you may not use this file except in compliance with the License. 623 You may obtain a copy of the License at 624 625 http://www.apache.org/licenses/LICENSE-2.0 626 627 Unless required by applicable law or agreed to in writing, software 628 distributed under the License is distributed on an "AS IS" BASIS, 629 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 630 See the License for the specific language governing permissions and 631 limitations under the License. 632 */ 633 634 package job 635 636 import ( 637 "context" 638 "fmt" 639 "io/ioutil" 640 "strings" 641 642 "github.com/spf13/cobra" 643 644 v1 "k8s.io/api/core/v1" 645 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 646 "sigs.k8s.io/yaml" 647 648 vcbatch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 649 "volcano.sh/apis/pkg/client/clientset/versioned" 650 "volcano.sh/volcano/pkg/cli/util" 651 ) 652 653 type runFlags struct { 654 commonFlags 655 656 Name string 657 Namespace string 658 Image string 659 660 MinAvailable int 661 Replicas int 662 Requests string 663 Limits string 664 SchedulerName string 665 FileName string 666 } 667 668 var launchJobFlags = &runFlags{} 669 670 // InitRunFlags init the run flags. 671 func InitRunFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 672 initFlags(cmd, &launchJobFlags.commonFlags) 673 674 cmd.Flags().StringVarP(&launchJobFlags.Image, "image", "i", "busybox", "the container image of job") 675 cmd.Flags().StringVarP(&launchJobFlags.Namespace, "namespace", "n", "default", "the namespace of job") 676 cmd.Flags().StringVarP(&launchJobFlags.Name, "name", "N", "", "the name of job") 677 cmd.Flags().IntVarP(&launchJobFlags.MinAvailable, "min", "m", 1, "the minimal available tasks of job") 678 cmd.Flags().IntVarP(&launchJobFlags.Replicas, "replicas", "r", 1, "the total tasks of job") 679 cmd.Flags().StringVarP(&launchJobFlags.Requests, "requests", "R", "cpu=1000m,memory=100Mi", "the resource request of the task") 680 cmd.Flags().StringVarP(&launchJobFlags.Limits, "limits", "L", "cpu=1000m,memory=100Mi", "the resource limit of the task") 681 cmd.Flags().StringVarP(&launchJobFlags.SchedulerName, "scheduler", "S", "volcano", "the scheduler for this job") 682 cmd.Flags().StringVarP(&launchJobFlags.FileName, "filename", "f", "", "the yaml file of job") 683 }</span> 684 685 var jobName = "job.volcano.sh" 686 687 // RunJob creates the job. 688 func RunJob() error <span class="cov8" title="1">{ 689 config, err := util.BuildConfig(launchJobFlags.Master, launchJobFlags.Kubeconfig) 690 if err != nil </span><span class="cov0" title="0">{ 691 return err 692 }</span> 693 694 <span class="cov8" title="1">if launchJobFlags.Name == "" && launchJobFlags.FileName == "" </span><span class="cov0" title="0">{ 695 err = fmt.Errorf("job name cannot be left blank") 696 return err 697 }</span> 698 699 <span class="cov8" title="1">req, err := populateResourceListV1(launchJobFlags.Requests) 700 if err != nil </span><span class="cov0" title="0">{ 701 return err 702 }</span> 703 704 <span class="cov8" title="1">limit, err := populateResourceListV1(launchJobFlags.Limits) 705 if err != nil </span><span class="cov0" title="0">{ 706 return err 707 }</span> 708 709 <span class="cov8" title="1">job, err := readFile(launchJobFlags.FileName) 710 if err != nil </span><span class="cov0" title="0">{ 711 return err 712 }</span> 713 714 <span class="cov8" title="1">if job == nil </span><span class="cov8" title="1">{ 715 job = constructLaunchJobFlagsJob(launchJobFlags, req, limit) 716 }</span> 717 718 <span class="cov8" title="1">jobClient := versioned.NewForConfigOrDie(config) 719 newJob, err := jobClient.BatchV1alpha1().Jobs(launchJobFlags.Namespace).Create(context.TODO(), job, metav1.CreateOptions{}) 720 if err != nil </span><span class="cov0" title="0">{ 721 return err 722 }</span> 723 724 <span class="cov8" title="1">if newJob.Spec.Queue == "" </span><span class="cov8" title="1">{ 725 newJob.Spec.Queue = "default" 726 }</span> 727 728 <span class="cov8" title="1">fmt.Printf("run job %v successfully\n", newJob.Name) 729 730 return nil</span> 731 } 732 733 func readFile(filename string) (*vcbatch.Job, error) <span class="cov8" title="1">{ 734 if filename == "" </span><span class="cov8" title="1">{ 735 return nil, nil 736 }</span> 737 738 <span class="cov0" title="0">if !strings.Contains(filename, ".yaml") && !strings.Contains(filename, ".yml") </span><span class="cov0" title="0">{ 739 return nil, fmt.Errorf("only support yaml file") 740 }</span> 741 742 <span class="cov0" title="0">file, err := ioutil.ReadFile(filename) 743 if err != nil </span><span class="cov0" title="0">{ 744 return nil, fmt.Errorf("failed to read file, err: %v", err) 745 }</span> 746 747 <span class="cov0" title="0">var job vcbatch.Job 748 if err := yaml.Unmarshal(file, &job); err != nil </span><span class="cov0" title="0">{ 749 return nil, fmt.Errorf("failed to unmarshal file, err: %v", err) 750 }</span> 751 752 <span class="cov0" title="0">return &job, nil</span> 753 } 754 755 func constructLaunchJobFlagsJob(launchJobFlags *runFlags, req, limit v1.ResourceList) *vcbatch.Job <span class="cov8" title="1">{ 756 return &vcbatch.Job{ 757 ObjectMeta: metav1.ObjectMeta{ 758 Name: launchJobFlags.Name, 759 Namespace: launchJobFlags.Namespace, 760 }, 761 Spec: vcbatch.JobSpec{ 762 MinAvailable: int32(launchJobFlags.MinAvailable), 763 SchedulerName: launchJobFlags.SchedulerName, 764 Tasks: []vcbatch.TaskSpec{ 765 { 766 Replicas: int32(launchJobFlags.Replicas), 767 768 Template: v1.PodTemplateSpec{ 769 ObjectMeta: metav1.ObjectMeta{ 770 Name: launchJobFlags.Name, 771 Labels: map[string]string{jobName: launchJobFlags.Name}, 772 }, 773 Spec: v1.PodSpec{ 774 RestartPolicy: v1.RestartPolicyNever, 775 Containers: []v1.Container{ 776 { 777 Image: launchJobFlags.Image, 778 Name: launchJobFlags.Name, 779 ImagePullPolicy: v1.PullIfNotPresent, 780 Resources: v1.ResourceRequirements{ 781 Limits: limit, 782 Requests: req, 783 }, 784 }, 785 }, 786 }, 787 }, 788 }, 789 }, 790 }, 791 } 792 }</span> 793 </pre> 794 795 <pre class="file" id="file5" style="display: none">/* 796 Copyright 2018 The Volcano Authors. 797 798 Licensed under the Apache License, Version 2.0 (the "License"); 799 you may not use this file except in compliance with the License. 800 You may obtain a copy of the License at 801 802 http://www.apache.org/licenses/LICENSE-2.0 803 804 Unless required by applicable law or agreed to in writing, software 805 distributed under the License is distributed on an "AS IS" BASIS, 806 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 807 See the License for the specific language governing permissions and 808 limitations under the License. 809 */ 810 811 package job 812 813 import ( 814 "fmt" 815 816 "github.com/spf13/cobra" 817 818 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 819 "volcano.sh/volcano/pkg/cli/util" 820 ) 821 822 type suspendFlags struct { 823 commonFlags 824 825 Namespace string 826 JobName string 827 } 828 829 var suspendJobFlags = &suspendFlags{} 830 831 // InitSuspendFlags init suspend related flags. 832 func InitSuspendFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 833 initFlags(cmd, &suspendJobFlags.commonFlags) 834 835 cmd.Flags().StringVarP(&suspendJobFlags.Namespace, "namespace", "n", "default", "the namespace of job") 836 cmd.Flags().StringVarP(&suspendJobFlags.JobName, "name", "N", "", "the name of job") 837 }</span> 838 839 // SuspendJob suspends the job. 840 func SuspendJob() error <span class="cov8" title="1">{ 841 config, err := util.BuildConfig(suspendJobFlags.Master, suspendJobFlags.Kubeconfig) 842 if err != nil </span><span class="cov0" title="0">{ 843 return err 844 }</span> 845 846 <span class="cov8" title="1">if suspendJobFlags.JobName == "" </span><span class="cov0" title="0">{ 847 err := fmt.Errorf("job name is mandatory to suspend a particular job") 848 return err 849 }</span> 850 851 <span class="cov8" title="1">return createJobCommand(config, 852 suspendJobFlags.Namespace, suspendJobFlags.JobName, 853 v1alpha1.AbortJobAction)</span> 854 } 855 </pre> 856 857 <pre class="file" id="file6" style="display: none">/* 858 Copyright 2018 The Volcano Authors. 859 860 Licensed under the Apache License, Version 2.0 (the "License"); 861 you may not use this file except in compliance with the License. 862 You may obtain a copy of the License at 863 864 http://www.apache.org/licenses/LICENSE-2.0 865 866 Unless required by applicable law or agreed to in writing, software 867 distributed under the License is distributed on an "AS IS" BASIS, 868 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 869 See the License for the specific language governing permissions and 870 limitations under the License. 871 */ 872 873 package job 874 875 import ( 876 "context" 877 "fmt" 878 "os" 879 "strings" 880 "time" 881 882 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 883 884 v1 "k8s.io/api/core/v1" 885 "k8s.io/apimachinery/pkg/api/resource" 886 "k8s.io/client-go/rest" 887 888 vcbus "volcano.sh/apis/pkg/apis/bus/v1alpha1" 889 "volcano.sh/apis/pkg/apis/helpers" 890 "volcano.sh/apis/pkg/client/clientset/versioned" 891 ) 892 893 func homeDir() string <span class="cov8" title="1">{ 894 if h := os.Getenv("HOME"); h != "" </span><span class="cov8" title="1">{ 895 return h 896 }</span> 897 <span class="cov0" title="0">return os.Getenv("USERPROFILE")</span> // windows 898 } 899 900 // populateResourceListV1 takes strings of form <resourceName1>=<value1>,<resourceName1>=<value2> 901 // and returns ResourceList. 902 func populateResourceListV1(spec string) (v1.ResourceList, error) <span class="cov8" title="1">{ 903 // empty input gets a nil response to preserve generator test expected behaviors 904 if spec == "" </span><span class="cov8" title="1">{ 905 return nil, nil 906 }</span> 907 908 <span class="cov8" title="1">result := v1.ResourceList{} 909 resourceStatements := strings.Split(spec, ",") 910 for _, resourceStatement := range resourceStatements </span><span class="cov8" title="1">{ 911 parts := strings.Split(resourceStatement, "=") 912 if len(parts) != 2 </span><span class="cov0" title="0">{ 913 return nil, fmt.Errorf("invalid argument syntax %v, expected <resource>=<value>", resourceStatement) 914 }</span> 915 <span class="cov8" title="1">resourceName := v1.ResourceName(parts[0]) 916 resourceQuantity, err := resource.ParseQuantity(parts[1]) 917 if err != nil </span><span class="cov0" title="0">{ 918 return nil, err 919 }</span> 920 <span class="cov8" title="1">result[resourceName] = resourceQuantity</span> 921 } 922 <span class="cov8" title="1">return result, nil</span> 923 } 924 925 func createJobCommand(config *rest.Config, ns, name string, action vcbus.Action) error <span class="cov8" title="1">{ 926 jobClient := versioned.NewForConfigOrDie(config) 927 job, err := jobClient.BatchV1alpha1().Jobs(ns).Get(context.TODO(), name, metav1.GetOptions{}) 928 if err != nil </span><span class="cov0" title="0">{ 929 return err 930 }</span> 931 932 <span class="cov8" title="1">ctrlRef := metav1.NewControllerRef(job, helpers.JobKind) 933 cmd := &vcbus.Command{ 934 ObjectMeta: metav1.ObjectMeta{ 935 GenerateName: fmt.Sprintf("%s-%s-", 936 job.Name, strings.ToLower(string(action))), 937 Namespace: job.Namespace, 938 OwnerReferences: []metav1.OwnerReference{ 939 *ctrlRef, 940 }, 941 }, 942 TargetObject: ctrlRef, 943 Action: string(action), 944 } 945 946 if _, err := jobClient.BusV1alpha1().Commands(ns).Create(context.TODO(), cmd, metav1.CreateOptions{}); err != nil </span><span class="cov0" title="0">{ 947 return err 948 }</span> 949 950 <span class="cov8" title="1">return nil</span> 951 } 952 953 func translateTimestampSince(timestamp metav1.Time) string <span class="cov8" title="1">{ 954 if timestamp.IsZero() </span><span class="cov8" title="1">{ 955 return "<unknown>" 956 }</span> 957 <span class="cov8" title="1">return HumanDuration(time.Since(timestamp.Time))</span> 958 } 959 960 // HumanDuration translate time.Duration to human readable time string. 961 func HumanDuration(d time.Duration) string <span class="cov8" title="1">{ 962 // Allow deviation no more than 2 seconds(excluded) to tolerate machine time 963 // inconsistence, it can be considered as almost now. 964 if seconds := int(d.Seconds()); seconds < -1 </span><span class="cov8" title="1">{ 965 return "<invalid>" 966 }</span> else<span class="cov8" title="1"> if seconds < 0 </span><span class="cov0" title="0">{ 967 return "0s" 968 }</span> else<span class="cov8" title="1"> if seconds < 60*2 </span><span class="cov8" title="1">{ 969 return fmt.Sprintf("%ds", seconds) 970 }</span> 971 <span class="cov8" title="1">minutes := int(d / time.Minute) 972 if minutes < 10 </span><span class="cov8" title="1">{ 973 s := int(d/time.Second) % 60 974 if s == 0 </span><span class="cov8" title="1">{ 975 return fmt.Sprintf("%dm", minutes) 976 }</span> 977 <span class="cov8" title="1">return fmt.Sprintf("%dm%ds", minutes, s)</span> 978 } else<span class="cov8" title="1"> if minutes < 60*3 </span><span class="cov8" title="1">{ 979 return fmt.Sprintf("%dm", minutes) 980 }</span> 981 <span class="cov8" title="1">hours := int(d / time.Hour) 982 if hours < 8 </span><span class="cov8" title="1">{ 983 m := int(d/time.Minute) % 60 984 if m == 0 </span><span class="cov8" title="1">{ 985 return fmt.Sprintf("%dh", hours) 986 }</span> 987 <span class="cov8" title="1">return fmt.Sprintf("%dh%dm", hours, m)</span> 988 } else<span class="cov8" title="1"> if hours < 48 </span><span class="cov8" title="1">{ 989 return fmt.Sprintf("%dh", hours) 990 }</span> else<span class="cov8" title="1"> if hours < 24*8 </span><span class="cov8" title="1">{ 991 h := hours % 24 992 if h == 0 </span><span class="cov8" title="1">{ 993 return fmt.Sprintf("%dd", hours/24) 994 }</span> 995 <span class="cov8" title="1">return fmt.Sprintf("%dd%dh", hours/24, h)</span> 996 } else<span class="cov8" title="1"> if hours < 24*365*2 </span><span class="cov8" title="1">{ 997 return fmt.Sprintf("%dd", hours/24) 998 }</span> else<span class="cov8" title="1"> if hours < 24*365*8 </span><span class="cov8" title="1">{ 999 return fmt.Sprintf("%dy%dd", hours/24/365, (hours/24)%365) 1000 }</span> 1001 <span class="cov8" title="1">return fmt.Sprintf("%dy", hours/24/365)</span> 1002 } 1003 </pre> 1004 1005 <pre class="file" id="file7" style="display: none">/* 1006 Copyright 2019 The Volcano Authors. 1007 1008 Licensed under the Apache License, Version 2.0 (the "License"); 1009 you may not use this file except in compliance with the License. 1010 You may obtain a copy of the License at 1011 1012 http://www.apache.org/licenses/LICENSE-2.0 1013 1014 Unless required by applicable law or agreed to in writing, software 1015 distributed under the License is distributed on an "AS IS" BASIS, 1016 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1017 See the License for the specific language governing permissions and 1018 limitations under the License. 1019 */ 1020 1021 package job 1022 1023 import ( 1024 "context" 1025 "encoding/json" 1026 "fmt" 1027 "io" 1028 "os" 1029 "strings" 1030 1031 "github.com/spf13/cobra" 1032 1033 coreV1 "k8s.io/api/core/v1" 1034 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 1035 "k8s.io/client-go/kubernetes" 1036 "k8s.io/client-go/rest" 1037 1038 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 1039 "volcano.sh/apis/pkg/client/clientset/versioned" 1040 "volcano.sh/volcano/pkg/cli/util" 1041 ) 1042 1043 type viewFlags struct { 1044 commonFlags 1045 1046 Namespace string 1047 JobName string 1048 } 1049 1050 // level of print indent. 1051 const ( 1052 Level0 = iota 1053 Level1 1054 Level2 1055 ) 1056 1057 var viewJobFlags = &viewFlags{} 1058 1059 // InitViewFlags init the view command flags. 1060 func InitViewFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 1061 initFlags(cmd, &viewJobFlags.commonFlags) 1062 1063 cmd.Flags().StringVarP(&viewJobFlags.Namespace, "namespace", "n", "default", "the namespace of job") 1064 cmd.Flags().StringVarP(&viewJobFlags.JobName, "name", "N", "", "the name of job") 1065 }</span> 1066 1067 // ViewJob gives full details of the job. 1068 func ViewJob() error <span class="cov8" title="1">{ 1069 config, err := util.BuildConfig(viewJobFlags.Master, viewJobFlags.Kubeconfig) 1070 if err != nil </span><span class="cov0" title="0">{ 1071 return err 1072 }</span> 1073 <span class="cov8" title="1">if viewJobFlags.JobName == "" </span><span class="cov0" title="0">{ 1074 err := fmt.Errorf("job name (specified by --name or -N) is mandatory to view a particular job") 1075 return err 1076 }</span> 1077 1078 <span class="cov8" title="1">jobClient := versioned.NewForConfigOrDie(config) 1079 job, err := jobClient.BatchV1alpha1().Jobs(viewJobFlags.Namespace).Get(context.TODO(), viewJobFlags.JobName, metav1.GetOptions{}) 1080 if err != nil </span><span class="cov0" title="0">{ 1081 return err 1082 }</span> 1083 <span class="cov8" title="1">if job == nil </span><span class="cov0" title="0">{ 1084 fmt.Printf("No resources found\n") 1085 return nil 1086 }</span> 1087 <span class="cov8" title="1">PrintJobInfo(job, os.Stdout) 1088 PrintEvents(GetEvents(config, job), os.Stdout) 1089 return nil</span> 1090 } 1091 1092 // PrintJobInfo print the job detailed info into writer. 1093 func PrintJobInfo(job *v1alpha1.Job, writer io.Writer) <span class="cov8" title="1">{ 1094 WriteLine(writer, Level0, "Name: \t%s\n", job.Name) 1095 WriteLine(writer, Level0, "Namespace: \t%s\n", job.Namespace) 1096 if len(job.Labels) > 0 </span><span class="cov8" title="1">{ 1097 label, _ := json.Marshal(job.Labels) 1098 WriteLine(writer, Level0, "Labels: \t%s\n", string(label)) 1099 }</span> else<span class="cov0" title="0"> { 1100 WriteLine(writer, Level0, "Labels: \t<none>\n") 1101 }</span> 1102 <span class="cov8" title="1">if len(job.Annotations) > 0 </span><span class="cov8" title="1">{ 1103 annotation, _ := json.Marshal(job.Annotations) 1104 WriteLine(writer, Level0, "Annotations:\t%s\n", string(annotation)) 1105 }</span> else<span class="cov0" title="0"> { 1106 WriteLine(writer, Level0, "Annotations:\t<none>\n") 1107 }</span> 1108 <span class="cov8" title="1">WriteLine(writer, Level0, "API Version:\t%s\n", job.APIVersion) 1109 WriteLine(writer, Level0, "Kind: \t%s\n", job.Kind) 1110 1111 WriteLine(writer, Level0, "Metadata:\n") 1112 WriteLine(writer, Level1, "Creation Timestamp:\t%s\n", job.CreationTimestamp) 1113 WriteLine(writer, Level1, "Generate Name: \t%s\n", job.GenerateName) 1114 WriteLine(writer, Level1, "Generation: \t%d\n", job.Generation) 1115 WriteLine(writer, Level1, "Resource Version: \t%s\n", job.ResourceVersion) 1116 WriteLine(writer, Level1, "Self Link: \t%s\n", job.SelfLink) 1117 WriteLine(writer, Level1, "UID: \t%s\n", job.UID) 1118 1119 WriteLine(writer, Level0, "Spec:\n") 1120 WriteLine(writer, Level1, "Min Available: \t%d\n", job.Spec.MinAvailable) 1121 WriteLine(writer, Level1, "Plugins:\n") 1122 WriteLine(writer, Level2, "Env:\t%v\n", job.Spec.Plugins["env"]) 1123 WriteLine(writer, Level2, "Ssh:\t%v\n", job.Spec.Plugins["ssh"]) 1124 WriteLine(writer, Level1, "Scheduler Name: \t%s\n", job.Spec.SchedulerName) 1125 WriteLine(writer, Level1, "Tasks:\n") 1126 for i := 0; i < len(job.Spec.Tasks); i++ </span><span class="cov8" title="1">{ 1127 WriteLine(writer, Level2, "Name:\t%s\n", job.Spec.Tasks[i].Name) 1128 WriteLine(writer, Level2, "Replicas:\t%d\n", job.Spec.Tasks[i].Replicas) 1129 WriteLine(writer, Level2, "Template:\n") 1130 WriteLine(writer, Level2+1, "Metadata:\n") 1131 WriteLine(writer, Level2+2, "Annotations:\n") 1132 WriteLine(writer, Level2+3, "Cri . Cci . Io / Container - Type: \t%s\n", job.Spec.Tasks[i].Template.ObjectMeta.Annotations["cri.cci.io/container-type"]) 1133 WriteLine(writer, Level2+3, "Kubernetes . Io / Availablezone: \t%s\n", job.Spec.Tasks[i].Template.ObjectMeta.Annotations["kubernetes.io/availablezone"]) 1134 WriteLine(writer, Level2+3, "Network . Alpha . Kubernetes . Io / Network:\t%s\n", job.Spec.Tasks[i].Template.ObjectMeta.Annotations["network.alpha.kubernetes.io/network"]) 1135 WriteLine(writer, Level2+2, "Creation Timestamp:\t%s\n", job.Spec.Tasks[i].Template.ObjectMeta.CreationTimestamp) 1136 1137 WriteLine(writer, Level2+1, "Spec:\n") 1138 WriteLine(writer, Level2+2, "Containers:\n") 1139 for j := 0; j < len(job.Spec.Tasks[i].Template.Spec.Containers); j++ </span><span class="cov8" title="1">{ 1140 WriteLine(writer, Level2+3, "Command:\n") 1141 for k := 0; k < len(job.Spec.Tasks[i].Template.Spec.Containers[j].Command); k++ </span><span class="cov8" title="1">{ 1142 WriteLine(writer, Level2+4, "%s\n", job.Spec.Tasks[i].Template.Spec.Containers[j].Command[k]) 1143 }</span> 1144 <span class="cov8" title="1">WriteLine(writer, Level2+3, "Image:\t%s\n", job.Spec.Tasks[i].Template.Spec.Containers[j].Image) 1145 WriteLine(writer, Level2+3, "Name: \t%s\n", job.Spec.Tasks[i].Template.Spec.Containers[j].Name) 1146 WriteLine(writer, Level2+3, "Ports:\n") 1147 for k := 0; k < len(job.Spec.Tasks[i].Template.Spec.Containers[j].Ports); k++ </span><span class="cov8" title="1">{ 1148 WriteLine(writer, Level2+4, "Container Port:\t%d\n", job.Spec.Tasks[i].Template.Spec.Containers[j].Ports[k].ContainerPort) 1149 WriteLine(writer, Level2+4, "Name: \t%s\n", job.Spec.Tasks[i].Template.Spec.Containers[j].Ports[k].Name) 1150 }</span> 1151 <span class="cov8" title="1">WriteLine(writer, Level2+3, "Resources:\n") 1152 WriteLine(writer, Level2+4, "Limits:\n") 1153 WriteLine(writer, Level2+5, "Cpu: \t%s\n", job.Spec.Tasks[i].Template.Spec.Containers[j].Resources.Limits.Cpu()) 1154 WriteLine(writer, Level2+5, "Memory:\t%s\n", job.Spec.Tasks[i].Template.Spec.Containers[j].Resources.Limits.Memory()) 1155 WriteLine(writer, Level2+4, "Requests:\n") 1156 WriteLine(writer, Level2+5, "Cpu: \t%s\n", job.Spec.Tasks[i].Template.Spec.Containers[j].Resources.Requests.Cpu()) 1157 WriteLine(writer, Level2+5, "Memory:\t%s\n", job.Spec.Tasks[i].Template.Spec.Containers[j].Resources.Requests.Memory()) 1158 WriteLine(writer, Level2+4, "Working Dir:\t%s\n", job.Spec.Tasks[i].Template.Spec.Containers[j].WorkingDir)</span> 1159 } 1160 <span class="cov8" title="1">WriteLine(writer, Level2+2, "Image Pull Secrets:\n") 1161 for j := 0; j < len(job.Spec.Tasks[i].Template.Spec.ImagePullSecrets); j++ </span><span class="cov8" title="1">{ 1162 WriteLine(writer, Level2+3, "Name: \t%s\n", job.Spec.Tasks[i].Template.Spec.ImagePullSecrets[j].Name) 1163 }</span> 1164 <span class="cov8" title="1">WriteLine(writer, Level2+2, "Restart Policy: \t%s\n", job.Spec.Tasks[i].Template.Spec.RestartPolicy)</span> 1165 } 1166 1167 <span class="cov8" title="1">WriteLine(writer, Level0, "Status:\n") 1168 if job.Status.Succeeded > 0 </span><span class="cov8" title="1">{ 1169 WriteLine(writer, Level1, "Succeeded: \t%d\n", job.Status.Succeeded) 1170 }</span> 1171 <span class="cov8" title="1">if job.Status.Pending > 0 </span><span class="cov8" title="1">{ 1172 WriteLine(writer, Level1, "Pending: \t%d\n", job.Status.Pending) 1173 }</span> 1174 <span class="cov8" title="1">if job.Status.Running > 0 </span><span class="cov8" title="1">{ 1175 WriteLine(writer, Level1, "Running: \t%d\n", job.Status.Running) 1176 }</span> 1177 <span class="cov8" title="1">if job.Status.Failed > 0 </span><span class="cov8" title="1">{ 1178 WriteLine(writer, Level1, "Failed: \t%d\n", job.Status.Failed) 1179 }</span> 1180 <span class="cov8" title="1">if job.Status.Terminating > 0 </span><span class="cov8" title="1">{ 1181 WriteLine(writer, Level1, "Terminating: \t%d\n", job.Status.Terminating) 1182 }</span> 1183 <span class="cov8" title="1">if job.Status.Unknown > 0 </span><span class="cov8" title="1">{ 1184 WriteLine(writer, Level1, "Unknown: \t%d\n", job.Status.Unknown) 1185 }</span> 1186 <span class="cov8" title="1">if job.Status.RetryCount > 0 </span><span class="cov8" title="1">{ 1187 WriteLine(writer, Level1, "RetryCount: \t%d\n", job.Status.RetryCount) 1188 }</span> 1189 <span class="cov8" title="1">if job.Status.MinAvailable > 0 </span><span class="cov8" title="1">{ 1190 WriteLine(writer, Level1, "Min Available:\t%d\n", job.Status.MinAvailable) 1191 }</span> 1192 <span class="cov8" title="1">if job.Status.Version > 0 </span><span class="cov8" title="1">{ 1193 WriteLine(writer, Level1, "Version: \t%d\n", job.Status.Version) 1194 }</span> 1195 1196 <span class="cov8" title="1">WriteLine(writer, Level1, "State:\n") 1197 WriteLine(writer, Level2, "Phase:\t%s\n", job.Status.State.Phase) 1198 if len(job.Status.ControlledResources) > 0 </span><span class="cov8" title="1">{ 1199 WriteLine(writer, Level1, "Controlled Resources:\n") 1200 for key, value := range job.Status.ControlledResources </span><span class="cov8" title="1">{ 1201 WriteLine(writer, Level2, "%s: \t%s\n", key, value) 1202 }</span> 1203 } 1204 } 1205 1206 // PrintEvents print event info to writer. 1207 func PrintEvents(events []coreV1.Event, writer io.Writer) <span class="cov8" title="1">{ 1208 if len(events) > 0 </span><span class="cov8" title="1">{ 1209 WriteLine(writer, Level0, "%s:\n%-15s\t%-40s\t%-30s\t%-40s\t%s\n", "Events", "Type", "Reason", "Age", "Form", "Message") 1210 WriteLine(writer, Level0, "%-15s\t%-40s\t%-30s\t%-40s\t%s\n", "-------", "-------", "-------", "-------", "-------") 1211 for _, e := range events </span><span class="cov8" title="1">{ 1212 var interval string 1213 if e.Count > 1 </span><span class="cov8" title="1">{ 1214 interval = fmt.Sprintf("%s (x%d over %s)", translateTimestampSince(e.LastTimestamp), e.Count, translateTimestampSince(e.FirstTimestamp)) 1215 }</span> else<span class="cov8" title="1"> { 1216 interval = translateTimestampSince(e.FirstTimestamp) 1217 }</span> 1218 <span class="cov8" title="1">EventSourceString := []string{e.Source.Component} 1219 if len(e.Source.Host) > 0 </span><span class="cov0" title="0">{ 1220 EventSourceString = append(EventSourceString, e.Source.Host) 1221 }</span> 1222 <span class="cov8" title="1">WriteLine(writer, Level0, "%-15v\t%-40v\t%-30s\t%-40s\t%v\n", 1223 e.Type, 1224 e.Reason, 1225 interval, 1226 strings.Join(EventSourceString, ", "), 1227 strings.TrimSpace(e.Message), 1228 )</span> 1229 } 1230 } else<span class="cov0" title="0"> { 1231 WriteLine(writer, Level0, "Events: \t<none>\n") 1232 }</span> 1233 } 1234 1235 // GetEvents get the job event by config. 1236 func GetEvents(config *rest.Config, job *v1alpha1.Job) []coreV1.Event <span class="cov8" title="1">{ 1237 kubernetes, err := kubernetes.NewForConfig(config) 1238 if err != nil </span><span class="cov0" title="0">{ 1239 fmt.Printf("%v\n", err) 1240 return nil 1241 }</span> 1242 <span class="cov8" title="1">events, _ := kubernetes.CoreV1().Events(viewJobFlags.Namespace).List(context.TODO(), metav1.ListOptions{}) 1243 var jobEvents []coreV1.Event 1244 for _, v := range events.Items </span><span class="cov8" title="1">{ 1245 if strings.HasPrefix(v.ObjectMeta.Name, job.Name+".") </span><span class="cov8" title="1">{ 1246 jobEvents = append(jobEvents, v) 1247 }</span> 1248 } 1249 <span class="cov8" title="1">return jobEvents</span> 1250 } 1251 1252 // WriteLine write lines with specified indent. 1253 func WriteLine(writer io.Writer, spaces int, content string, params ...interface{}) <span class="cov8" title="1">{ 1254 prefix := "" 1255 for i := 0; i < spaces; i++ </span><span class="cov8" title="1">{ 1256 prefix += " " 1257 }</span> 1258 <span class="cov8" title="1">fmt.Fprintf(writer, prefix+content, params...)</span> 1259 } 1260 </pre> 1261 1262 <pre class="file" id="file8" style="display: none">/* 1263 Copyright 2019 The Volcano Authors. 1264 1265 Licensed under the Apache License, Version 2.0 (the "License"); 1266 you may not use this file except in compliance with the License. 1267 You may obtain a copy of the License at 1268 1269 http://www.apache.org/licenses/LICENSE-2.0 1270 1271 Unless required by applicable law or agreed to in writing, software 1272 distributed under the License is distributed on an "AS IS" BASIS, 1273 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1274 See the License for the specific language governing permissions and 1275 limitations under the License. 1276 */ 1277 1278 package queue 1279 1280 import ( 1281 "os" 1282 "path/filepath" 1283 1284 "github.com/spf13/cobra" 1285 ) 1286 1287 type commonFlags struct { 1288 Master string 1289 Kubeconfig string 1290 SchedulerName string 1291 } 1292 1293 func initFlags(cmd *cobra.Command, cf *commonFlags) <span class="cov8" title="1">{ 1294 cmd.Flags().StringVarP(&cf.SchedulerName, "scheduler", "", "volcano", "the scheduler for this job") 1295 cmd.Flags().StringVarP(&cf.Master, "master", "s", "", "the address of apiserver") 1296 1297 kubeConfFile := os.Getenv("KUBECONFIG") 1298 if kubeConfFile == "" </span><span class="cov8" title="1">{ 1299 if home := homeDir(); home != "" </span><span class="cov8" title="1">{ 1300 kubeConfFile = filepath.Join(home, ".kube", "config") 1301 }</span> 1302 } 1303 <span class="cov8" title="1">cmd.Flags().StringVarP(&cf.Kubeconfig, "kubeconfig", "k", kubeConfFile, "(optional) absolute path to the kubeconfig file")</span> 1304 } 1305 </pre> 1306 1307 <pre class="file" id="file9" style="display: none">/* 1308 Copyright 2019 The Volcano Authors. 1309 1310 Licensed under the Apache License, Version 2.0 (the "License"); 1311 you may not use this file except in compliance with the License. 1312 You may obtain a copy of the License at 1313 1314 http://www.apache.org/licenses/LICENSE-2.0 1315 1316 Unless required by applicable law or agreed to in writing, software 1317 distributed under the License is distributed on an "AS IS" BASIS, 1318 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1319 See the License for the specific language governing permissions and 1320 limitations under the License. 1321 */ 1322 1323 package queue 1324 1325 import ( 1326 "context" 1327 1328 "github.com/spf13/cobra" 1329 1330 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 1331 1332 schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 1333 "volcano.sh/apis/pkg/client/clientset/versioned" 1334 ) 1335 1336 type createFlags struct { 1337 commonFlags 1338 1339 Name string 1340 Weight int32 1341 // State is state of Queue 1342 State string 1343 } 1344 1345 var createQueueFlags = &createFlags{} 1346 1347 // InitCreateFlags is used to init all flags during queue creating. 1348 func InitCreateFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 1349 initFlags(cmd, &createQueueFlags.commonFlags) 1350 1351 cmd.Flags().StringVarP(&createQueueFlags.Name, "name", "n", "test", "the name of queue") 1352 cmd.Flags().Int32VarP(&createQueueFlags.Weight, "weight", "w", 1, "the weight of the queue") 1353 1354 cmd.Flags().StringVarP(&createQueueFlags.State, "state", "S", "Open", "the state of queue") 1355 }</span> 1356 1357 // CreateQueue create queue. 1358 func CreateQueue() error <span class="cov8" title="1">{ 1359 config, err := buildConfig(createQueueFlags.Master, createQueueFlags.Kubeconfig) 1360 if err != nil </span><span class="cov0" title="0">{ 1361 return err 1362 }</span> 1363 1364 <span class="cov8" title="1">queue := &schedulingv1beta1.Queue{ 1365 ObjectMeta: metav1.ObjectMeta{ 1366 Name: createQueueFlags.Name, 1367 }, 1368 Spec: schedulingv1beta1.QueueSpec{ 1369 Weight: createQueueFlags.Weight, 1370 }, 1371 Status: schedulingv1beta1.QueueStatus{ 1372 State: schedulingv1beta1.QueueState(createQueueFlags.State), 1373 }, 1374 } 1375 1376 queueClient := versioned.NewForConfigOrDie(config) 1377 if _, err := queueClient.SchedulingV1beta1().Queues().Create(context.TODO(), queue, metav1.CreateOptions{}); err != nil </span><span class="cov0" title="0">{ 1378 return err 1379 }</span> 1380 1381 <span class="cov8" title="1">return nil</span> 1382 } 1383 </pre> 1384 1385 <pre class="file" id="file10" style="display: none">/* 1386 Copyright 2017 The Kubernetes Authors. 1387 1388 Licensed under the Apache License, Version 2.0 (the "License"); 1389 you may not use this file except in compliance with the License. 1390 You may obtain a copy of the License at 1391 1392 http://www.apache.org/licenses/LICENSE-2.0 1393 1394 Unless required by applicable law or agreed to in writing, software 1395 distributed under the License is distributed on an "AS IS" BASIS, 1396 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1397 See the License for the specific language governing permissions and 1398 limitations under the License. 1399 */ 1400 1401 package queue 1402 1403 import ( 1404 "context" 1405 "fmt" 1406 1407 "volcano.sh/apis/pkg/client/clientset/versioned" 1408 1409 "github.com/spf13/cobra" 1410 1411 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 1412 ) 1413 1414 type deleteFlags struct { 1415 commonFlags 1416 1417 // Name is name of queue 1418 Name string 1419 } 1420 1421 var deleteQueueFlags = &deleteFlags{} 1422 1423 // InitDeleteFlags is used to init all flags during queue deleting. 1424 func InitDeleteFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 1425 initFlags(cmd, &deleteQueueFlags.commonFlags) 1426 1427 cmd.Flags().StringVarP(&deleteQueueFlags.Name, "name", "n", "", "the name of queue") 1428 }</span> 1429 1430 // DeleteQueue delete queue. 1431 func DeleteQueue() error <span class="cov8" title="1">{ 1432 config, err := buildConfig(deleteQueueFlags.Master, deleteQueueFlags.Kubeconfig) 1433 if err != nil </span><span class="cov0" title="0">{ 1434 return err 1435 }</span> 1436 1437 <span class="cov8" title="1">if len(deleteQueueFlags.Name) == 0 </span><span class="cov8" title="1">{ 1438 return fmt.Errorf("queue name must be specified") 1439 }</span> 1440 1441 <span class="cov8" title="1">queueClient := versioned.NewForConfigOrDie(config) 1442 return queueClient.SchedulingV1beta1().Queues().Delete(context.TODO(), deleteQueueFlags.Name, metav1.DeleteOptions{})</span> 1443 } 1444 </pre> 1445 1446 <pre class="file" id="file11" style="display: none">/* 1447 Copyright 2019 The Volcano Authors. 1448 1449 Licensed under the Apache License, Version 2.0 (the "License"); 1450 you may not use this file except in compliance with the License. 1451 You may obtain a copy of the License at 1452 1453 http://www.apache.org/licenses/LICENSE-2.0 1454 1455 Unless required by applicable law or agreed to in writing, software 1456 distributed under the License is distributed on an "AS IS" BASIS, 1457 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1458 See the License for the specific language governing permissions and 1459 limitations under the License. 1460 */ 1461 1462 package queue 1463 1464 import ( 1465 "context" 1466 "fmt" 1467 "io" 1468 "os" 1469 1470 "github.com/spf13/cobra" 1471 1472 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 1473 1474 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 1475 "volcano.sh/apis/pkg/client/clientset/versioned" 1476 ) 1477 1478 type getFlags struct { 1479 commonFlags 1480 1481 Name string 1482 } 1483 1484 var getQueueFlags = &getFlags{} 1485 1486 // InitGetFlags is used to init all flags. 1487 func InitGetFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 1488 initFlags(cmd, &getQueueFlags.commonFlags) 1489 1490 cmd.Flags().StringVarP(&getQueueFlags.Name, "name", "n", "", "the name of queue") 1491 }</span> 1492 1493 // GetQueue gets a queue. 1494 func GetQueue() error <span class="cov8" title="1">{ 1495 config, err := buildConfig(getQueueFlags.Master, getQueueFlags.Kubeconfig) 1496 if err != nil </span><span class="cov0" title="0">{ 1497 return err 1498 }</span> 1499 1500 <span class="cov8" title="1">if getQueueFlags.Name == "" </span><span class="cov8" title="1">{ 1501 err := fmt.Errorf("name is mandatory to get the particular queue details") 1502 return err 1503 }</span> 1504 1505 <span class="cov8" title="1">queueClient := versioned.NewForConfigOrDie(config) 1506 queue, err := queueClient.SchedulingV1beta1().Queues().Get(context.TODO(), getQueueFlags.Name, metav1.GetOptions{}) 1507 if err != nil </span><span class="cov0" title="0">{ 1508 return err 1509 }</span> 1510 1511 <span class="cov8" title="1">PrintQueue(queue, os.Stdout) 1512 1513 return nil</span> 1514 } 1515 1516 // PrintQueue prints queue information. 1517 func PrintQueue(queue *v1beta1.Queue, writer io.Writer) <span class="cov8" title="1">{ 1518 _, err := fmt.Fprintf(writer, "%-25s%-8s%-8s%-8s%-8s%-8s%-8s\n", 1519 Name, Weight, State, Inqueue, Pending, Running, Unknown) 1520 if err != nil </span><span class="cov0" title="0">{ 1521 fmt.Printf("Failed to print queue command result: %s.\n", err) 1522 }</span> 1523 <span class="cov8" title="1">_, err = fmt.Fprintf(writer, "%-25s%-8d%-8s%-8d%-8d%-8d%-8d\n", 1524 queue.Name, queue.Spec.Weight, queue.Status.State, queue.Status.Inqueue, 1525 queue.Status.Pending, queue.Status.Running, queue.Status.Unknown) 1526 if err != nil </span><span class="cov0" title="0">{ 1527 fmt.Printf("Failed to print queue command result: %s.\n", err) 1528 }</span> 1529 } 1530 </pre> 1531 1532 <pre class="file" id="file12" style="display: none">/* 1533 Copyright 2019 The Volcano Authors. 1534 1535 Licensed under the Apache License, Version 2.0 (the "License"); 1536 you may not use this file except in compliance with the License. 1537 You may obtain a copy of the License at 1538 1539 http://www.apache.org/licenses/LICENSE-2.0 1540 1541 Unless required by applicable law or agreed to in writing, software 1542 distributed under the License is distributed on an "AS IS" BASIS, 1543 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1544 See the License for the specific language governing permissions and 1545 limitations under the License. 1546 */ 1547 1548 package queue 1549 1550 import ( 1551 "context" 1552 "fmt" 1553 "io" 1554 "os" 1555 1556 "github.com/spf13/cobra" 1557 1558 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 1559 1560 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 1561 "volcano.sh/apis/pkg/client/clientset/versioned" 1562 ) 1563 1564 type listFlags struct { 1565 commonFlags 1566 } 1567 1568 const ( 1569 // Weight of the queue 1570 Weight string = "Weight" 1571 1572 // Name of queue 1573 Name string = "Name" 1574 1575 // Pending status of the queue 1576 Pending string = "Pending" 1577 1578 // Running status of the queue 1579 Running string = "Running" 1580 1581 // Unknown status of the queue 1582 Unknown string = "Unknown" 1583 1584 // Inqueue status of queue 1585 Inqueue string = "Inqueue" 1586 1587 // State is state of queue 1588 State string = "State" 1589 ) 1590 1591 var listQueueFlags = &listFlags{} 1592 1593 // InitListFlags inits all flags. 1594 func InitListFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 1595 initFlags(cmd, &listQueueFlags.commonFlags) 1596 }</span> 1597 1598 // ListQueue lists all the queue. 1599 func ListQueue() error <span class="cov8" title="1">{ 1600 config, err := buildConfig(listQueueFlags.Master, listQueueFlags.Kubeconfig) 1601 if err != nil </span><span class="cov0" title="0">{ 1602 return err 1603 }</span> 1604 1605 <span class="cov8" title="1">jobClient := versioned.NewForConfigOrDie(config) 1606 queues, err := jobClient.SchedulingV1beta1().Queues().List(context.TODO(), metav1.ListOptions{}) 1607 if err != nil </span><span class="cov0" title="0">{ 1608 return err 1609 }</span> 1610 1611 <span class="cov8" title="1">if len(queues.Items) == 0 </span><span class="cov8" title="1">{ 1612 fmt.Printf("No resources found\n") 1613 return nil 1614 }</span> 1615 <span class="cov8" title="1">PrintQueues(queues, os.Stdout) 1616 1617 return nil</span> 1618 } 1619 1620 // PrintQueues prints queue information. 1621 func PrintQueues(queues *v1beta1.QueueList, writer io.Writer) <span class="cov8" title="1">{ 1622 _, err := fmt.Fprintf(writer, "%-25s%-8s%-8s%-8s%-8s%-8s%-8s\n", 1623 Name, Weight, State, Inqueue, Pending, Running, Unknown) 1624 if err != nil </span><span class="cov0" title="0">{ 1625 fmt.Printf("Failed to print queue command result: %s.\n", err) 1626 }</span> 1627 <span class="cov8" title="1">for _, queue := range queues.Items </span><span class="cov8" title="1">{ 1628 _, err = fmt.Fprintf(writer, "%-25s%-8d%-8s%-8d%-8d%-8d%-8d\n", 1629 queue.Name, queue.Spec.Weight, queue.Status.State, queue.Status.Inqueue, 1630 queue.Status.Pending, queue.Status.Running, queue.Status.Unknown) 1631 if err != nil </span><span class="cov0" title="0">{ 1632 fmt.Printf("Failed to print queue command result: %s.\n", err) 1633 }</span> 1634 } 1635 } 1636 </pre> 1637 1638 <pre class="file" id="file13" style="display: none">/* 1639 Copyright 2017 The Kubernetes Authors. 1640 1641 Licensed under the Apache License, Version 2.0 (the "License"); 1642 you may not use this file except in compliance with the License. 1643 You may obtain a copy of the License at 1644 1645 http://www.apache.org/licenses/LICENSE-2.0 1646 1647 Unless required by applicable law or agreed to in writing, software 1648 distributed under the License is distributed on an "AS IS" BASIS, 1649 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1650 See the License for the specific language governing permissions and 1651 limitations under the License. 1652 */ 1653 1654 package queue 1655 1656 import ( 1657 "context" 1658 "fmt" 1659 1660 "github.com/spf13/cobra" 1661 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 1662 1663 "k8s.io/apimachinery/pkg/types" 1664 1665 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 1666 "volcano.sh/apis/pkg/client/clientset/versioned" 1667 ) 1668 1669 const ( 1670 // ActionOpen is `open` action 1671 ActionOpen = "open" 1672 // ActionClose is `close` action 1673 ActionClose = "close" 1674 // ActionUpdate is `update` action 1675 ActionUpdate = "update" 1676 ) 1677 1678 type operateFlags struct { 1679 commonFlags 1680 1681 // Name is name of queue 1682 Name string 1683 // Weight is weight of queue 1684 Weight int32 1685 // Action is operation action of queue 1686 Action string 1687 } 1688 1689 var operateQueueFlags = &operateFlags{} 1690 1691 // InitOperateFlags is used to init all flags during queue operating 1692 func InitOperateFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 1693 initFlags(cmd, &operateQueueFlags.commonFlags) 1694 1695 cmd.Flags().StringVarP(&operateQueueFlags.Name, "name", "n", "", "the name of queue") 1696 cmd.Flags().Int32VarP(&operateQueueFlags.Weight, "weight", "w", 0, "the weight of the queue") 1697 cmd.Flags().StringVarP(&operateQueueFlags.Action, "action", "a", "", 1698 "operate action to queue, valid actions are open, close, update") 1699 }</span> 1700 1701 // OperateQueue operates queue 1702 func OperateQueue() error <span class="cov8" title="1">{ 1703 config, err := buildConfig(operateQueueFlags.Master, operateQueueFlags.Kubeconfig) 1704 if err != nil </span><span class="cov0" title="0">{ 1705 return err 1706 }</span> 1707 1708 <span class="cov8" title="1">if len(operateQueueFlags.Name) == 0 </span><span class="cov8" title="1">{ 1709 return fmt.Errorf("queue name must be specified") 1710 }</span> 1711 1712 <span class="cov8" title="1">var action v1alpha1.Action 1713 1714 switch operateQueueFlags.Action </span>{ 1715 case ActionOpen:<span class="cov8" title="1"> 1716 action = v1alpha1.OpenQueueAction</span> 1717 case ActionClose:<span class="cov8" title="1"> 1718 action = v1alpha1.CloseQueueAction</span> 1719 case ActionUpdate:<span class="cov8" title="1"> 1720 if operateQueueFlags.Weight == 0 </span><span class="cov8" title="1">{ 1721 return fmt.Errorf("when %s queue %s, weight must be specified, "+ 1722 "the value must be greater than 0", ActionUpdate, operateQueueFlags.Name) 1723 }</span> 1724 1725 <span class="cov8" title="1">queueClient := versioned.NewForConfigOrDie(config) 1726 patchBytes := []byte(fmt.Sprintf(`{"spec":{"weight":%d}}`, operateQueueFlags.Weight)) 1727 _, err := queueClient.SchedulingV1beta1().Queues().Patch(context.TODO(), 1728 operateQueueFlags.Name, types.MergePatchType, patchBytes, metav1.PatchOptions{}) 1729 1730 return err</span> 1731 case "":<span class="cov8" title="1"> 1732 return fmt.Errorf("action can not be null")</span> 1733 default:<span class="cov8" title="1"> 1734 return fmt.Errorf("action %s invalid, valid actions are %s, %s and %s", 1735 operateQueueFlags.Action, ActionOpen, ActionClose, ActionUpdate)</span> 1736 } 1737 1738 <span class="cov8" title="1">return createQueueCommand(config, action)</span> 1739 } 1740 </pre> 1741 1742 <pre class="file" id="file14" style="display: none">/* 1743 Copyright 2019 The Volcano Authors. 1744 1745 Licensed under the Apache License, Version 2.0 (the "License"); 1746 you may not use this file except in compliance with the License. 1747 You may obtain a copy of the License at 1748 1749 http://www.apache.org/licenses/LICENSE-2.0 1750 1751 Unless required by applicable law or agreed to in writing, software 1752 distributed under the License is distributed on an "AS IS" BASIS, 1753 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1754 See the License for the specific language governing permissions and 1755 limitations under the License. 1756 */ 1757 1758 package queue 1759 1760 import ( 1761 "context" 1762 "fmt" 1763 "os" 1764 "strings" 1765 1766 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 1767 // Initialize client auth plugin. 1768 _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" 1769 "k8s.io/client-go/rest" 1770 "k8s.io/client-go/tools/clientcmd" 1771 1772 busv1alpha1 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 1773 "volcano.sh/apis/pkg/apis/helpers" 1774 "volcano.sh/apis/pkg/client/clientset/versioned" 1775 ) 1776 1777 func homeDir() string <span class="cov8" title="1">{ 1778 if h := os.Getenv("HOME"); h != "" </span><span class="cov8" title="1">{ 1779 return h 1780 }</span> 1781 <span class="cov0" title="0">return os.Getenv("USERPROFILE")</span> // windows 1782 } 1783 1784 func buildConfig(master, kubeconfig string) (*rest.Config, error) <span class="cov8" title="1">{ 1785 return clientcmd.BuildConfigFromFlags(master, kubeconfig) 1786 }</span> 1787 1788 func createQueueCommand(config *rest.Config, action busv1alpha1.Action) error <span class="cov8" title="1">{ 1789 queueClient := versioned.NewForConfigOrDie(config) 1790 queue, err := queueClient.SchedulingV1beta1().Queues().Get(context.TODO(), operateQueueFlags.Name, metav1.GetOptions{}) 1791 if err != nil </span><span class="cov0" title="0">{ 1792 return err 1793 }</span> 1794 1795 <span class="cov8" title="1">ctrlRef := metav1.NewControllerRef(queue, helpers.V1beta1QueueKind) 1796 cmd := &busv1alpha1.Command{ 1797 ObjectMeta: metav1.ObjectMeta{ 1798 GenerateName: fmt.Sprintf("%s-%s-", 1799 queue.Name, strings.ToLower(string(action))), 1800 OwnerReferences: []metav1.OwnerReference{ 1801 *ctrlRef, 1802 }, 1803 }, 1804 TargetObject: ctrlRef, 1805 Action: string(action), 1806 } 1807 1808 if _, err := queueClient.BusV1alpha1().Commands("default").Create(context.TODO(), cmd, metav1.CreateOptions{}); err != nil </span><span class="cov0" title="0">{ 1809 return err 1810 }</span> 1811 1812 <span class="cov8" title="1">return nil</span> 1813 } 1814 </pre> 1815 1816 <pre class="file" id="file15" style="display: none">/* 1817 Copyright 2019 The Volcano Authors. 1818 1819 Licensed under the Apache License, Version 2.0 (the "License"); 1820 you may not use this file except in compliance with the License. 1821 You may obtain a copy of the License at 1822 1823 http://www.apache.org/licenses/LICENSE-2.0 1824 1825 Unless required by applicable law or agreed to in writing, software 1826 distributed under the License is distributed on an "AS IS" BASIS, 1827 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1828 See the License for the specific language governing permissions and 1829 limitations under the License. 1830 */ 1831 1832 package util 1833 1834 import ( 1835 "context" 1836 "fmt" 1837 "os" 1838 "path/filepath" 1839 "strings" 1840 "time" 1841 1842 "github.com/spf13/cobra" 1843 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 1844 1845 v1 "k8s.io/api/core/v1" 1846 "k8s.io/apimachinery/pkg/api/resource" 1847 "k8s.io/client-go/rest" 1848 "k8s.io/client-go/tools/clientcmd" 1849 1850 vcbus "volcano.sh/apis/pkg/apis/bus/v1alpha1" 1851 "volcano.sh/apis/pkg/apis/helpers" 1852 "volcano.sh/apis/pkg/client/clientset/versioned" 1853 ) 1854 1855 // CommonFlags are the flags that most command lines have. 1856 type CommonFlags struct { 1857 Master string 1858 Kubeconfig string 1859 } 1860 1861 // InitFlags initializes the common flags for most command lines. 1862 func InitFlags(cmd *cobra.Command, cf *CommonFlags) <span class="cov0" title="0">{ 1863 cmd.Flags().StringVarP(&cf.Master, "master", "s", "", "the address of apiserver") 1864 1865 kubeConfFile := os.Getenv("KUBECONFIG") 1866 if kubeConfFile == "" </span><span class="cov0" title="0">{ 1867 if home := HomeDir(); home != "" </span><span class="cov0" title="0">{ 1868 kubeConfFile = filepath.Join(home, ".kube", "config") 1869 }</span> 1870 } 1871 <span class="cov0" title="0">cmd.Flags().StringVarP(&cf.Kubeconfig, "kubeconfig", "k", kubeConfFile, "(optional) absolute path to the kubeconfig file")</span> 1872 } 1873 1874 // HomeDir gets the env $HOME. 1875 func HomeDir() string <span class="cov0" title="0">{ 1876 if h := os.Getenv("HOME"); h != "" </span><span class="cov0" title="0">{ 1877 return h 1878 }</span> 1879 <span class="cov0" title="0">return os.Getenv("USERPROFILE")</span> // windows 1880 } 1881 1882 // BuildConfig builds the configure file for command lines. 1883 func BuildConfig(master, kubeconfig string) (*rest.Config, error) <span class="cov0" title="0">{ 1884 return clientcmd.BuildConfigFromFlags(master, kubeconfig) 1885 }</span> 1886 1887 // PopulateResourceListV1 takes strings of form <resourceName1>=<value1>,<resourceName1>=<value2> and returns ResourceList. 1888 func PopulateResourceListV1(spec string) (v1.ResourceList, error) <span class="cov0" title="0">{ 1889 // empty input gets a nil response to preserve generator test expected behaviors 1890 if spec == "" </span><span class="cov0" title="0">{ 1891 return nil, nil 1892 }</span> 1893 1894 <span class="cov0" title="0">result := v1.ResourceList{} 1895 resourceStatements := strings.Split(spec, ",") 1896 for _, resourceStatement := range resourceStatements </span><span class="cov0" title="0">{ 1897 parts := strings.Split(resourceStatement, "=") 1898 if len(parts) != 2 </span><span class="cov0" title="0">{ 1899 return nil, fmt.Errorf("invalid argument syntax %v, expected <resource>=<value>", resourceStatement) 1900 }</span> 1901 <span class="cov0" title="0">resourceName := v1.ResourceName(parts[0]) 1902 resourceQuantity, err := resource.ParseQuantity(parts[1]) 1903 if err != nil </span><span class="cov0" title="0">{ 1904 return nil, err 1905 }</span> 1906 <span class="cov0" title="0">result[resourceName] = resourceQuantity</span> 1907 } 1908 <span class="cov0" title="0">return result, nil</span> 1909 } 1910 1911 // CreateQueueCommand executes a command such as open/close 1912 func CreateQueueCommand(vcClient *versioned.Clientset, ns, name string, action vcbus.Action) error <span class="cov0" title="0">{ 1913 queue, err := vcClient.SchedulingV1beta1().Queues().Get(context.TODO(), name, metav1.GetOptions{}) 1914 if err != nil </span><span class="cov0" title="0">{ 1915 return err 1916 }</span> 1917 <span class="cov0" title="0">ctrlRef := metav1.NewControllerRef(queue, helpers.V1beta1QueueKind) 1918 cmd := &vcbus.Command{ 1919 ObjectMeta: metav1.ObjectMeta{ 1920 GenerateName: fmt.Sprintf("%s-%s-", 1921 queue.Name, strings.ToLower(string(action))), 1922 Namespace: queue.Namespace, 1923 OwnerReferences: []metav1.OwnerReference{ 1924 *ctrlRef, 1925 }, 1926 }, 1927 TargetObject: ctrlRef, 1928 Action: string(action), 1929 } 1930 1931 if _, err := vcClient.BusV1alpha1().Commands(ns).Create(context.TODO(), cmd, metav1.CreateOptions{}); err != nil </span><span class="cov0" title="0">{ 1932 return err 1933 }</span> 1934 1935 <span class="cov0" title="0">return nil</span> 1936 } 1937 1938 // CreateJobCommand executes a command such as resume/suspend. 1939 func CreateJobCommand(config *rest.Config, ns, name string, action vcbus.Action) error <span class="cov0" title="0">{ 1940 jobClient := versioned.NewForConfigOrDie(config) 1941 job, err := jobClient.BatchV1alpha1().Jobs(ns).Get(context.TODO(), name, metav1.GetOptions{}) 1942 if err != nil </span><span class="cov0" title="0">{ 1943 return err 1944 }</span> 1945 1946 <span class="cov0" title="0">ctrlRef := metav1.NewControllerRef(job, helpers.JobKind) 1947 cmd := &vcbus.Command{ 1948 ObjectMeta: metav1.ObjectMeta{ 1949 GenerateName: fmt.Sprintf("%s-%s-", 1950 job.Name, strings.ToLower(string(action))), 1951 Namespace: job.Namespace, 1952 OwnerReferences: []metav1.OwnerReference{ 1953 *ctrlRef, 1954 }, 1955 }, 1956 TargetObject: ctrlRef, 1957 Action: string(action), 1958 } 1959 1960 if _, err := jobClient.BusV1alpha1().Commands(ns).Create(context.TODO(), cmd, metav1.CreateOptions{}); err != nil </span><span class="cov0" title="0">{ 1961 return err 1962 }</span> 1963 1964 <span class="cov0" title="0">return nil</span> 1965 } 1966 1967 // TranslateTimestampSince translates the time stamp. 1968 func TranslateTimestampSince(timestamp metav1.Time) string <span class="cov0" title="0">{ 1969 if timestamp.IsZero() </span><span class="cov0" title="0">{ 1970 return "<unknown>" 1971 }</span> 1972 <span class="cov0" title="0">return HumanDuration(time.Since(timestamp.Time))</span> 1973 } 1974 1975 // HumanDuration translate time.Duration to human readable time string. 1976 func HumanDuration(d time.Duration) string <span class="cov8" title="1">{ 1977 // Allow deviation no more than 2 seconds(excluded) to tolerate machine time 1978 // inconsistence, it can be considered as almost now. 1979 if seconds := int(d.Seconds()); seconds < -1 </span><span class="cov8" title="1">{ 1980 return "<invalid>" 1981 }</span> else<span class="cov8" title="1"> if seconds < 0 </span><span class="cov0" title="0">{ 1982 return "0s" 1983 }</span> else<span class="cov8" title="1"> if seconds < 60*2 </span><span class="cov8" title="1">{ 1984 return fmt.Sprintf("%ds", seconds) 1985 }</span> 1986 <span class="cov8" title="1">minutes := int(d / time.Minute) 1987 if minutes < 10 </span><span class="cov8" title="1">{ 1988 s := int(d/time.Second) % 60 1989 if s == 0 </span><span class="cov8" title="1">{ 1990 return fmt.Sprintf("%dm", minutes) 1991 }</span> 1992 <span class="cov8" title="1">return fmt.Sprintf("%dm%ds", minutes, s)</span> 1993 } else<span class="cov8" title="1"> if minutes < 60*3 </span><span class="cov8" title="1">{ 1994 return fmt.Sprintf("%dm", minutes) 1995 }</span> 1996 <span class="cov8" title="1">hours := int(d / time.Hour) 1997 if hours < 8 </span><span class="cov8" title="1">{ 1998 m := int(d/time.Minute) % 60 1999 if m == 0 </span><span class="cov8" title="1">{ 2000 return fmt.Sprintf("%dh", hours) 2001 }</span> 2002 <span class="cov8" title="1">return fmt.Sprintf("%dh%dm", hours, m)</span> 2003 } else<span class="cov8" title="1"> if hours < 48 </span><span class="cov8" title="1">{ 2004 return fmt.Sprintf("%dh", hours) 2005 }</span> else<span class="cov8" title="1"> if hours < 24*8 </span><span class="cov8" title="1">{ 2006 h := hours % 24 2007 if h == 0 </span><span class="cov8" title="1">{ 2008 return fmt.Sprintf("%dd", hours/24) 2009 }</span> 2010 <span class="cov8" title="1">return fmt.Sprintf("%dd%dh", hours/24, h)</span> 2011 } else<span class="cov8" title="1"> if hours < 24*365*2 </span><span class="cov8" title="1">{ 2012 return fmt.Sprintf("%dd", hours/24) 2013 }</span> else<span class="cov8" title="1"> if hours < 24*365*8 </span><span class="cov8" title="1">{ 2014 return fmt.Sprintf("%dy%dd", hours/24/365, (hours/24)%365) 2015 }</span> 2016 <span class="cov8" title="1">return fmt.Sprintf("%dy", hours/24/365)</span> 2017 } 2018 </pre> 2019 2020 <pre class="file" id="file16" style="display: none">/* 2021 Copyright 2019 The Volcano Authors. 2022 2023 Licensed under the Apache License, Version 2.0 (the "License"); 2024 you may not use this file except in compliance with the License. 2025 You may obtain a copy of the License at 2026 2027 http://www.apache.org/licenses/LICENSE-2.0 2028 2029 Unless required by applicable law or agreed to in writing, software 2030 distributed under the License is distributed on an "AS IS" BASIS, 2031 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 2032 See the License for the specific language governing permissions and 2033 limitations under the License. 2034 */ 2035 2036 package vcancel 2037 2038 import ( 2039 "context" 2040 "fmt" 2041 2042 "github.com/spf13/cobra" 2043 2044 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 2045 2046 "volcano.sh/apis/pkg/client/clientset/versioned" 2047 "volcano.sh/volcano/pkg/cli/util" 2048 ) 2049 2050 type cancelFlags struct { 2051 util.CommonFlags 2052 2053 Namespace string 2054 JobName string 2055 } 2056 2057 var cancelJobFlags = &cancelFlags{} 2058 2059 // InitCancelFlags init the cancel command flags. 2060 func InitCancelFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 2061 util.InitFlags(cmd, &cancelJobFlags.CommonFlags) 2062 2063 cmd.Flags().StringVarP(&cancelJobFlags.Namespace, "namespace", "N", "default", "the namespace of job") 2064 cmd.Flags().StringVarP(&cancelJobFlags.JobName, "name", "n", "", "the name of job") 2065 }</span> 2066 2067 // CancelJob cancel the job. 2068 func CancelJob() error <span class="cov8" title="1">{ 2069 config, err := util.BuildConfig(cancelJobFlags.Master, cancelJobFlags.Kubeconfig) 2070 if err != nil </span><span class="cov0" title="0">{ 2071 return err 2072 }</span> 2073 2074 <span class="cov8" title="1">if cancelJobFlags.JobName == "" </span><span class="cov0" title="0">{ 2075 err := fmt.Errorf("job name is mandatory to cancel a particular job") 2076 return err 2077 }</span> 2078 2079 <span class="cov8" title="1">jobClient := versioned.NewForConfigOrDie(config) 2080 err = jobClient.BatchV1alpha1().Jobs(cancelJobFlags.Namespace).Delete(context.TODO(), cancelJobFlags.JobName, metav1.DeleteOptions{}) 2081 if err != nil </span><span class="cov0" title="0">{ 2082 return err 2083 }</span> 2084 <span class="cov8" title="1">fmt.Printf("cancel job %v successfully\n", cancelJobFlags.JobName) 2085 return nil</span> 2086 } 2087 </pre> 2088 2089 <pre class="file" id="file17" style="display: none">/* 2090 Copyright 2019 The Volcano Authors. 2091 2092 Licensed under the Apache License, Version 2.0 (the "License"); 2093 you may not use this file except in compliance with the License. 2094 You may obtain a copy of the License at 2095 2096 http://www.apache.org/licenses/LICENSE-2.0 2097 2098 Unless required by applicable law or agreed to in writing, software 2099 distributed under the License is distributed on an "AS IS" BASIS, 2100 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 2101 See the License for the specific language governing permissions and 2102 limitations under the License. 2103 */ 2104 2105 package vresume 2106 2107 import ( 2108 "fmt" 2109 2110 "github.com/spf13/cobra" 2111 2112 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 2113 "volcano.sh/volcano/pkg/cli/util" 2114 ) 2115 2116 type resumeFlags struct { 2117 util.CommonFlags 2118 2119 Namespace string 2120 JobName string 2121 } 2122 2123 var resumeJobFlags = &resumeFlags{} 2124 2125 // InitResumeFlags init resume command flags. 2126 func InitResumeFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 2127 util.InitFlags(cmd, &resumeJobFlags.CommonFlags) 2128 2129 cmd.Flags().StringVarP(&resumeJobFlags.Namespace, "namespace", "N", "default", "the namespace of job") 2130 cmd.Flags().StringVarP(&resumeJobFlags.JobName, "name", "n", "", "the name of job") 2131 }</span> 2132 2133 // ResumeJob resumes the job. 2134 func ResumeJob() error <span class="cov8" title="1">{ 2135 config, err := util.BuildConfig(resumeJobFlags.Master, resumeJobFlags.Kubeconfig) 2136 if err != nil </span><span class="cov0" title="0">{ 2137 return err 2138 }</span> 2139 <span class="cov8" title="1">if resumeJobFlags.JobName == "" </span><span class="cov0" title="0">{ 2140 err := fmt.Errorf("job name is mandatory to resume a particular job") 2141 return err 2142 }</span> 2143 2144 <span class="cov8" title="1">return util.CreateJobCommand(config, 2145 resumeJobFlags.Namespace, resumeJobFlags.JobName, 2146 v1alpha1.ResumeJobAction)</span> 2147 } 2148 </pre> 2149 2150 <pre class="file" id="file18" style="display: none">/* 2151 Copyright 2019 The Volcano Authors. 2152 2153 Licensed under the Apache License, Version 2.0 (the "License"); 2154 you may not use this file except in compliance with the License. 2155 You may obtain a copy of the License at 2156 2157 http://www.apache.org/licenses/LICENSE-2.0 2158 2159 Unless required by applicable law or agreed to in writing, software 2160 distributed under the License is distributed on an "AS IS" BASIS, 2161 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 2162 See the License for the specific language governing permissions and 2163 limitations under the License. 2164 */ 2165 2166 package vsuspend 2167 2168 import ( 2169 "fmt" 2170 2171 "github.com/spf13/cobra" 2172 2173 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 2174 "volcano.sh/volcano/pkg/cli/util" 2175 ) 2176 2177 type suspendFlags struct { 2178 util.CommonFlags 2179 2180 Namespace string 2181 JobName string 2182 } 2183 2184 var suspendJobFlags = &suspendFlags{} 2185 2186 // InitSuspendFlags init suspend related flags. 2187 func InitSuspendFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 2188 util.InitFlags(cmd, &suspendJobFlags.CommonFlags) 2189 2190 cmd.Flags().StringVarP(&suspendJobFlags.Namespace, "namespace", "N", "default", "the namespace of job") 2191 cmd.Flags().StringVarP(&suspendJobFlags.JobName, "name", "n", "", "the name of job") 2192 }</span> 2193 2194 // SuspendJob suspends the job. 2195 func SuspendJob() error <span class="cov8" title="1">{ 2196 config, err := util.BuildConfig(suspendJobFlags.Master, suspendJobFlags.Kubeconfig) 2197 if err != nil </span><span class="cov0" title="0">{ 2198 return err 2199 }</span> 2200 2201 <span class="cov8" title="1">if suspendJobFlags.JobName == "" </span><span class="cov0" title="0">{ 2202 err := fmt.Errorf("job name is mandatory to suspend a particular job") 2203 return err 2204 }</span> 2205 2206 <span class="cov8" title="1">return util.CreateJobCommand(config, 2207 suspendJobFlags.Namespace, suspendJobFlags.JobName, 2208 v1alpha1.AbortJobAction)</span> 2209 } 2210 </pre> 2211 2212 <pre class="file" id="file19" style="display: none">/* 2213 Copyright 2019 The Volcano Authors. 2214 2215 Licensed under the Apache License, Version 2.0 (the "License"); 2216 you may not use this file except in compliance with the License. 2217 You may obtain a copy of the License at 2218 2219 http://www.apache.org/licenses/LICENSE-2.0 2220 2221 Unless required by applicable law or agreed to in writing, software 2222 distributed under the License is distributed on an "AS IS" BASIS, 2223 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 2224 See the License for the specific language governing permissions and 2225 limitations under the License. 2226 */ 2227 2228 package apis 2229 2230 import ( 2231 "fmt" 2232 2233 v1 "k8s.io/api/core/v1" 2234 2235 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 2236 ) 2237 2238 //JobInfo struct. 2239 type JobInfo struct { 2240 Namespace string 2241 Name string 2242 2243 Job *batch.Job 2244 Pods map[string]map[string]*v1.Pod 2245 } 2246 2247 //Clone function clones the k8s pod values to the JobInfo struct. 2248 func (ji *JobInfo) Clone() *JobInfo <span class="cov8" title="1">{ 2249 job := &JobInfo{ 2250 Namespace: ji.Namespace, 2251 Name: ji.Name, 2252 Job: ji.Job, 2253 2254 Pods: make(map[string]map[string]*v1.Pod), 2255 } 2256 2257 for key, pods := range ji.Pods </span><span class="cov0" title="0">{ 2258 job.Pods[key] = make(map[string]*v1.Pod) 2259 for pn, pod := range pods </span><span class="cov0" title="0">{ 2260 job.Pods[key][pn] = pod 2261 }</span> 2262 } 2263 2264 <span class="cov8" title="1">return job</span> 2265 } 2266 2267 //SetJob sets the volcano jobs values to the JobInfo struct. 2268 func (ji *JobInfo) SetJob(job *batch.Job) <span class="cov8" title="1">{ 2269 ji.Name = job.Name 2270 ji.Namespace = job.Namespace 2271 ji.Job = job 2272 }</span> 2273 2274 //AddPod adds the k8s pod object values to the Pods field 2275 //of JobStruct if it doesn't exist. Otherwise it throws error. 2276 func (ji *JobInfo) AddPod(pod *v1.Pod) error <span class="cov8" title="1">{ 2277 taskName, found := pod.Annotations[batch.TaskSpecKey] 2278 if !found </span><span class="cov0" title="0">{ 2279 return fmt.Errorf("failed to taskName of Pod <%s/%s>", 2280 pod.Namespace, pod.Name) 2281 }</span> 2282 2283 <span class="cov8" title="1">_, found = pod.Annotations[batch.JobVersion] 2284 if !found </span><span class="cov0" title="0">{ 2285 return fmt.Errorf("failed to find jobVersion of Pod <%s/%s>", 2286 pod.Namespace, pod.Name) 2287 }</span> 2288 2289 <span class="cov8" title="1">if _, found := ji.Pods[taskName]; !found </span><span class="cov8" title="1">{ 2290 ji.Pods[taskName] = make(map[string]*v1.Pod) 2291 }</span> 2292 <span class="cov8" title="1">if _, found := ji.Pods[taskName][pod.Name]; found </span><span class="cov8" title="1">{ 2293 return fmt.Errorf("duplicated pod") 2294 }</span> 2295 <span class="cov8" title="1">ji.Pods[taskName][pod.Name] = pod 2296 2297 return nil</span> 2298 } 2299 2300 //UpdatePod updates the k8s pod object values to the existing pod. 2301 func (ji *JobInfo) UpdatePod(pod *v1.Pod) error <span class="cov8" title="1">{ 2302 taskName, found := pod.Annotations[batch.TaskSpecKey] 2303 if !found </span><span class="cov0" title="0">{ 2304 return fmt.Errorf("failed to find taskName of Pod <%s/%s>", 2305 pod.Namespace, pod.Name) 2306 }</span> 2307 <span class="cov8" title="1">_, found = pod.Annotations[batch.JobVersion] 2308 if !found </span><span class="cov0" title="0">{ 2309 return fmt.Errorf("failed to find jobVersion of Pod <%s/%s>", 2310 pod.Namespace, pod.Name) 2311 }</span> 2312 2313 <span class="cov8" title="1">if _, found := ji.Pods[taskName]; !found </span><span class="cov0" title="0">{ 2314 return fmt.Errorf("can not find task %s in cache", taskName) 2315 }</span> 2316 <span class="cov8" title="1">if _, found := ji.Pods[taskName][pod.Name]; !found </span><span class="cov0" title="0">{ 2317 return fmt.Errorf("can not find pod <%s/%s> in cache", 2318 pod.Namespace, pod.Name) 2319 }</span> 2320 <span class="cov8" title="1">ji.Pods[taskName][pod.Name] = pod 2321 2322 return nil</span> 2323 } 2324 2325 //DeletePod deletes the given k8s pod from the JobInfo struct. 2326 func (ji *JobInfo) DeletePod(pod *v1.Pod) error <span class="cov8" title="1">{ 2327 taskName, found := pod.Annotations[batch.TaskSpecKey] 2328 if !found </span><span class="cov0" title="0">{ 2329 return fmt.Errorf("failed to find taskName of Pod <%s/%s>", 2330 pod.Namespace, pod.Name) 2331 }</span> 2332 <span class="cov8" title="1">_, found = pod.Annotations[batch.JobVersion] 2333 if !found </span><span class="cov0" title="0">{ 2334 return fmt.Errorf("failed to find jobVersion of Pod <%s/%s>", 2335 pod.Namespace, pod.Name) 2336 }</span> 2337 2338 <span class="cov8" title="1">if pods, found := ji.Pods[taskName]; found </span><span class="cov8" title="1">{ 2339 delete(pods, pod.Name) 2340 if len(pods) == 0 </span><span class="cov8" title="1">{ 2341 delete(ji.Pods, taskName) 2342 }</span> 2343 } 2344 2345 <span class="cov8" title="1">return nil</span> 2346 } 2347 </pre> 2348 2349 <pre class="file" id="file20" style="display: none">/* 2350 Copyright 2019 The Volcano Authors. 2351 2352 Licensed under the Apache License, Version 2.0 (the "License"); 2353 you may not use this file except in compliance with the License. 2354 You may obtain a copy of the License at 2355 2356 http://www.apache.org/licenses/LICENSE-2.0 2357 2358 Unless required by applicable law or agreed to in writing, software 2359 distributed under the License is distributed on an "AS IS" BASIS, 2360 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 2361 See the License for the specific language governing permissions and 2362 limitations under the License. 2363 */ 2364 2365 package apis 2366 2367 import ( 2368 "fmt" 2369 2370 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 2371 ) 2372 2373 //Request struct. 2374 type Request struct { 2375 Namespace string 2376 JobName string 2377 TaskName string 2378 QueueName string 2379 2380 Event v1alpha1.Event 2381 ExitCode int32 2382 Action v1alpha1.Action 2383 JobVersion int32 2384 } 2385 2386 // String function returns the request in string format. 2387 func (r Request) String() string <span class="cov8" title="1">{ 2388 return fmt.Sprintf( 2389 "Queue: %s, Job: %s/%s, Task:%s, Event:%s, ExitCode:%d, Action:%s, JobVersion: %d", 2390 r.QueueName, r.Namespace, r.JobName, r.TaskName, r.Event, r.ExitCode, r.Action, r.JobVersion) 2391 }</span> 2392 </pre> 2393 2394 <pre class="file" id="file21" style="display: none">/* 2395 Copyright 2019 The Volcano Authors. 2396 2397 Licensed under the Apache License, Version 2.0 (the "License"); 2398 you may not use this file except in compliance with the License. 2399 You may obtain a copy of the License at 2400 2401 http://www.apache.org/licenses/LICENSE-2.0 2402 2403 Unless required by applicable law or agreed to in writing, software 2404 distributed under the License is distributed on an "AS IS" BASIS, 2405 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 2406 See the License for the specific language governing permissions and 2407 limitations under the License. 2408 */ 2409 2410 package cache 2411 2412 import ( 2413 "fmt" 2414 "sync" 2415 "time" 2416 2417 "golang.org/x/time/rate" 2418 2419 v1 "k8s.io/api/core/v1" 2420 "k8s.io/apimachinery/pkg/util/wait" 2421 "k8s.io/client-go/util/workqueue" 2422 "k8s.io/klog" 2423 2424 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 2425 "volcano.sh/volcano/pkg/controllers/apis" 2426 ) 2427 2428 type jobCache struct { 2429 sync.Mutex 2430 2431 jobs map[string]*apis.JobInfo 2432 deletedJobs workqueue.RateLimitingInterface 2433 } 2434 2435 func keyFn(ns, name string) string <span class="cov8" title="1">{ 2436 return fmt.Sprintf("%s/%s", ns, name) 2437 }</span> 2438 2439 //JobKeyByName gets the key for the job name. 2440 func JobKeyByName(namespace string, name string) string <span class="cov0" title="0">{ 2441 return keyFn(namespace, name) 2442 }</span> 2443 2444 //JobKeyByReq gets the key for the job request. 2445 func JobKeyByReq(req *apis.Request) string <span class="cov0" title="0">{ 2446 return keyFn(req.Namespace, req.JobName) 2447 }</span> 2448 2449 //JobKey gets the "ns"/"name" format of the given job. 2450 func JobKey(job *v1alpha1.Job) string <span class="cov8" title="1">{ 2451 return keyFn(job.Namespace, job.Name) 2452 }</span> 2453 2454 func jobTerminated(job *apis.JobInfo) bool <span class="cov0" title="0">{ 2455 return job.Job == nil && len(job.Pods) == 0 2456 }</span> 2457 2458 func jobKeyOfPod(pod *v1.Pod) (string, error) <span class="cov8" title="1">{ 2459 jobName, found := pod.Annotations[v1alpha1.JobNameKey] 2460 if !found </span><span class="cov8" title="1">{ 2461 return "", fmt.Errorf("failed to find job name of pod <%s/%s>", 2462 pod.Namespace, pod.Name) 2463 }</span> 2464 2465 <span class="cov8" title="1">return keyFn(pod.Namespace, jobName), nil</span> 2466 } 2467 2468 // New gets the job Cache. 2469 func New() Cache <span class="cov8" title="1">{ 2470 queue := workqueue.NewMaxOfRateLimiter( 2471 workqueue.NewItemExponentialFailureRateLimiter(5*time.Millisecond, 180*time.Second), 2472 // 10 qps, 100 bucket size. This is only for retry speed and its only the overall factor (not per item) 2473 &workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(10), 100)}, 2474 ) 2475 2476 return &jobCache{ 2477 jobs: map[string]*apis.JobInfo{}, 2478 deletedJobs: workqueue.NewRateLimitingQueue(queue), 2479 } 2480 }</span> 2481 2482 func (jc *jobCache) Get(key string) (*apis.JobInfo, error) <span class="cov8" title="1">{ 2483 jc.Lock() 2484 defer jc.Unlock() 2485 2486 job, found := jc.jobs[key] 2487 if !found </span><span class="cov8" title="1">{ 2488 return nil, fmt.Errorf("failed to find job <%s>", key) 2489 }</span> 2490 2491 <span class="cov8" title="1">if job.Job == nil </span><span class="cov8" title="1">{ 2492 return nil, fmt.Errorf("job <%s> is not ready", key) 2493 }</span> 2494 2495 <span class="cov8" title="1">return job.Clone(), nil</span> 2496 } 2497 2498 func (jc *jobCache) GetStatus(key string) (*v1alpha1.JobStatus, error) <span class="cov8" title="1">{ 2499 jc.Lock() 2500 defer jc.Unlock() 2501 2502 job, found := jc.jobs[key] 2503 if !found </span><span class="cov8" title="1">{ 2504 return nil, fmt.Errorf("failed to find job <%s>", key) 2505 }</span> 2506 2507 <span class="cov8" title="1">if job.Job == nil </span><span class="cov0" title="0">{ 2508 return nil, fmt.Errorf("job <%s> is not ready", key) 2509 }</span> 2510 2511 <span class="cov8" title="1">status := job.Job.Status 2512 2513 return &status, nil</span> 2514 } 2515 2516 func (jc *jobCache) Add(job *v1alpha1.Job) error <span class="cov8" title="1">{ 2517 jc.Lock() 2518 defer jc.Unlock() 2519 key := JobKey(job) 2520 if jobInfo, found := jc.jobs[key]; found </span><span class="cov8" title="1">{ 2521 if jobInfo.Job == nil </span><span class="cov0" title="0">{ 2522 jobInfo.SetJob(job) 2523 2524 return nil 2525 }</span> 2526 <span class="cov8" title="1">return fmt.Errorf("duplicated jobInfo <%v>", key)</span> 2527 } 2528 2529 <span class="cov8" title="1">jc.jobs[key] = &apis.JobInfo{ 2530 Name: job.Name, 2531 Namespace: job.Namespace, 2532 2533 Job: job, 2534 Pods: make(map[string]map[string]*v1.Pod), 2535 } 2536 2537 return nil</span> 2538 } 2539 2540 func (jc *jobCache) Update(obj *v1alpha1.Job) error <span class="cov8" title="1">{ 2541 jc.Lock() 2542 defer jc.Unlock() 2543 2544 key := JobKey(obj) 2545 job, found := jc.jobs[key] 2546 if !found </span><span class="cov8" title="1">{ 2547 return fmt.Errorf("failed to find job <%v>", key) 2548 }</span> 2549 <span class="cov8" title="1">job.Job = obj 2550 2551 return nil</span> 2552 } 2553 2554 func (jc *jobCache) Delete(obj *v1alpha1.Job) error <span class="cov8" title="1">{ 2555 jc.Lock() 2556 defer jc.Unlock() 2557 2558 key := JobKey(obj) 2559 jobInfo, found := jc.jobs[key] 2560 if !found </span><span class="cov8" title="1">{ 2561 return fmt.Errorf("failed to find job <%v>", key) 2562 }</span> 2563 <span class="cov8" title="1">jobInfo.Job = nil 2564 jc.deleteJob(jobInfo) 2565 2566 return nil</span> 2567 } 2568 2569 func (jc *jobCache) AddPod(pod *v1.Pod) error <span class="cov8" title="1">{ 2570 jc.Lock() 2571 defer jc.Unlock() 2572 2573 key, err := jobKeyOfPod(pod) 2574 if err != nil </span><span class="cov8" title="1">{ 2575 return err 2576 }</span> 2577 2578 <span class="cov8" title="1">job, found := jc.jobs[key] 2579 if !found </span><span class="cov0" title="0">{ 2580 job = &apis.JobInfo{ 2581 Pods: make(map[string]map[string]*v1.Pod), 2582 } 2583 jc.jobs[key] = job 2584 }</span> 2585 2586 <span class="cov8" title="1">return job.AddPod(pod)</span> 2587 } 2588 2589 func (jc *jobCache) UpdatePod(pod *v1.Pod) error <span class="cov8" title="1">{ 2590 jc.Lock() 2591 defer jc.Unlock() 2592 2593 key, err := jobKeyOfPod(pod) 2594 if err != nil </span><span class="cov0" title="0">{ 2595 return err 2596 }</span> 2597 2598 <span class="cov8" title="1">job, found := jc.jobs[key] 2599 if !found </span><span class="cov0" title="0">{ 2600 job = &apis.JobInfo{ 2601 Pods: make(map[string]map[string]*v1.Pod), 2602 } 2603 jc.jobs[key] = job 2604 }</span> 2605 2606 <span class="cov8" title="1">return job.UpdatePod(pod)</span> 2607 } 2608 2609 func (jc *jobCache) DeletePod(pod *v1.Pod) error <span class="cov8" title="1">{ 2610 jc.Lock() 2611 defer jc.Unlock() 2612 2613 key, err := jobKeyOfPod(pod) 2614 if err != nil </span><span class="cov0" title="0">{ 2615 return err 2616 }</span> 2617 2618 <span class="cov8" title="1">job, found := jc.jobs[key] 2619 if !found </span><span class="cov0" title="0">{ 2620 job = &apis.JobInfo{ 2621 Pods: make(map[string]map[string]*v1.Pod), 2622 } 2623 jc.jobs[key] = job 2624 }</span> 2625 2626 <span class="cov8" title="1">if err := job.DeletePod(pod); err != nil </span><span class="cov0" title="0">{ 2627 return err 2628 }</span> 2629 2630 <span class="cov8" title="1">if jc.jobs[key].Job == nil </span><span class="cov0" title="0">{ 2631 jc.deleteJob(job) 2632 }</span> 2633 2634 <span class="cov8" title="1">return nil</span> 2635 } 2636 2637 func (jc *jobCache) Run(stopCh <-chan struct{}) <span class="cov0" title="0">{ 2638 wait.Until(jc.worker, 0, stopCh) 2639 }</span> 2640 2641 func (jc *jobCache) TaskCompleted(jobKey, taskName string) bool <span class="cov8" title="1">{ 2642 jc.Lock() 2643 defer jc.Unlock() 2644 2645 var taskReplicas, completed int32 2646 2647 jobInfo, found := jc.jobs[jobKey] 2648 if !found </span><span class="cov0" title="0">{ 2649 return false 2650 }</span> 2651 2652 <span class="cov8" title="1">taskPods, found := jobInfo.Pods[taskName] 2653 2654 if !found </span><span class="cov0" title="0">{ 2655 return false 2656 }</span> 2657 2658 <span class="cov8" title="1">if jobInfo.Job == nil </span><span class="cov0" title="0">{ 2659 return false 2660 }</span> 2661 2662 <span class="cov8" title="1">for _, task := range jobInfo.Job.Spec.Tasks </span><span class="cov8" title="1">{ 2663 if task.Name == taskName </span><span class="cov8" title="1">{ 2664 taskReplicas = task.Replicas 2665 break</span> 2666 } 2667 } 2668 <span class="cov8" title="1">if taskReplicas <= 0 </span><span class="cov0" title="0">{ 2669 return false 2670 }</span> 2671 2672 <span class="cov8" title="1">for _, pod := range taskPods </span><span class="cov8" title="1">{ 2673 if pod.Status.Phase == v1.PodSucceeded </span><span class="cov8" title="1">{ 2674 completed++ 2675 }</span> 2676 } 2677 <span class="cov8" title="1">return completed >= taskReplicas</span> 2678 } 2679 2680 func (jc *jobCache) TaskFailed(jobKey, taskName string) bool <span class="cov0" title="0">{ 2681 jc.Lock() 2682 defer jc.Unlock() 2683 2684 var taskReplicas, retried, maxRetry int32 2685 2686 jobInfo, found := jc.jobs[jobKey] 2687 if !found </span><span class="cov0" title="0">{ 2688 return false 2689 }</span> 2690 2691 <span class="cov0" title="0">taskPods, found := jobInfo.Pods[taskName] 2692 2693 if !found || jobInfo.Job == nil </span><span class="cov0" title="0">{ 2694 return false 2695 }</span> 2696 2697 <span class="cov0" title="0">for _, task := range jobInfo.Job.Spec.Tasks </span><span class="cov0" title="0">{ 2698 if task.Name == taskName </span><span class="cov0" title="0">{ 2699 maxRetry = task.MaxRetry 2700 taskReplicas = task.Replicas 2701 break</span> 2702 } 2703 } 2704 2705 // maxRetry == -1 means no limit 2706 <span class="cov0" title="0">if taskReplicas == 0 || maxRetry == -1 </span><span class="cov0" title="0">{ 2707 return false 2708 }</span> 2709 2710 // Compatible with existing job 2711 <span class="cov0" title="0">if maxRetry == 0 </span><span class="cov0" title="0">{ 2712 maxRetry = 3 2713 }</span> 2714 2715 <span class="cov0" title="0">for _, pod := range taskPods </span><span class="cov0" title="0">{ 2716 if pod.Status.Phase == v1.PodRunning || pod.Status.Phase == v1.PodPending </span><span class="cov0" title="0">{ 2717 for j := range pod.Status.InitContainerStatuses </span><span class="cov0" title="0">{ 2718 stat := pod.Status.InitContainerStatuses[j] 2719 retried += stat.RestartCount 2720 }</span> 2721 <span class="cov0" title="0">for j := range pod.Status.ContainerStatuses </span><span class="cov0" title="0">{ 2722 stat := pod.Status.ContainerStatuses[j] 2723 retried += stat.RestartCount 2724 }</span> 2725 } 2726 } 2727 <span class="cov0" title="0">return retried > maxRetry</span> 2728 } 2729 2730 func (jc *jobCache) worker() <span class="cov0" title="0">{ 2731 for jc.processCleanupJob() </span>{<span class="cov0" title="0"> 2732 }</span> 2733 } 2734 2735 func (jc *jobCache) processCleanupJob() bool <span class="cov0" title="0">{ 2736 obj, shutdown := jc.deletedJobs.Get() 2737 if shutdown </span><span class="cov0" title="0">{ 2738 return false 2739 }</span> 2740 <span class="cov0" title="0">defer jc.deletedJobs.Done(obj) 2741 2742 job, ok := obj.(*apis.JobInfo) 2743 if !ok </span><span class="cov0" title="0">{ 2744 klog.Errorf("failed to convert %v to *apis.JobInfo", obj) 2745 return true 2746 }</span> 2747 2748 <span class="cov0" title="0">jc.Mutex.Lock() 2749 defer jc.Mutex.Unlock() 2750 2751 if jobTerminated(job) </span><span class="cov0" title="0">{ 2752 jc.deletedJobs.Forget(obj) 2753 key := keyFn(job.Namespace, job.Name) 2754 delete(jc.jobs, key) 2755 klog.V(3).Infof("Job <%s> was deleted.", key) 2756 }</span> else<span class="cov0" title="0"> { 2757 // Retry 2758 jc.deleteJob(job) 2759 }</span> 2760 <span class="cov0" title="0">return true</span> 2761 } 2762 2763 func (jc *jobCache) deleteJob(job *apis.JobInfo) <span class="cov8" title="1">{ 2764 klog.V(3).Infof("Try to delete Job <%v/%v>", 2765 job.Namespace, job.Name) 2766 2767 jc.deletedJobs.AddRateLimited(job) 2768 }</span> 2769 </pre> 2770 2771 <pre class="file" id="file22" style="display: none">/* 2772 Copyright 2019 The Volcano Authors. 2773 2774 Licensed under the Apache License, Version 2.0 (the "License"); 2775 you may not use this file except in compliance with the License. 2776 You may obtain a copy of the License at 2777 2778 http://www.apache.org/licenses/LICENSE-2.0 2779 2780 Unless required by applicable law or agreed to in writing, software 2781 distributed under the License is distributed on an "AS IS" BASIS, 2782 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 2783 See the License for the specific language governing permissions and 2784 limitations under the License. 2785 */ 2786 2787 package garbagecollector 2788 2789 import ( 2790 "context" 2791 "fmt" 2792 "time" 2793 2794 "k8s.io/apimachinery/pkg/api/errors" 2795 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 2796 "k8s.io/apimachinery/pkg/util/wait" 2797 "k8s.io/client-go/tools/cache" 2798 "k8s.io/client-go/util/workqueue" 2799 "k8s.io/klog" 2800 2801 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 2802 vcclientset "volcano.sh/apis/pkg/client/clientset/versioned" 2803 informerfactory "volcano.sh/apis/pkg/client/informers/externalversions" 2804 batchinformers "volcano.sh/apis/pkg/client/informers/externalversions/batch/v1alpha1" 2805 batchlisters "volcano.sh/apis/pkg/client/listers/batch/v1alpha1" 2806 "volcano.sh/volcano/pkg/controllers/framework" 2807 ) 2808 2809 func init() <span class="cov8" title="1">{ 2810 framework.RegisterController(&gccontroller{}) 2811 }</span> 2812 2813 // gccontroller runs reflectors to watch for changes of managed API 2814 // objects. Currently it only watches Jobs. Triggered by Job creation 2815 // and updates, it enqueues Jobs that have non-nil `.spec.ttlSecondsAfterFinished` 2816 // to the `queue`. The gccontroller has workers who consume `queue`, check whether 2817 // the Job TTL has expired or not; if the Job TTL hasn't expired, it will add the 2818 // Job to the queue after the TTL is expected to expire; if the TTL has expired, the 2819 // worker will send requests to the API server to delete the Jobs accordingly. 2820 // This is implemented outside of Job controller for separation of concerns, and 2821 // because it will be extended to handle other finishable resource types. 2822 type gccontroller struct { 2823 vcClient vcclientset.Interface 2824 2825 jobInformer batchinformers.JobInformer 2826 2827 // A store of jobs 2828 jobLister batchlisters.JobLister 2829 jobSynced func() bool 2830 2831 // queues that need to be updated. 2832 queue workqueue.RateLimitingInterface 2833 } 2834 2835 func (gc *gccontroller) Name() string <span class="cov8" title="1">{ 2836 return "gc-controller" 2837 }</span> 2838 2839 // Initialize creates an instance of gccontroller. 2840 func (gc *gccontroller) Initialize(opt *framework.ControllerOption) error <span class="cov8" title="1">{ 2841 gc.vcClient = opt.VolcanoClient 2842 jobInformer := informerfactory.NewSharedInformerFactory(gc.vcClient, 0).Batch().V1alpha1().Jobs() 2843 2844 gc.jobInformer = jobInformer 2845 gc.jobLister = jobInformer.Lister() 2846 gc.jobSynced = jobInformer.Informer().HasSynced 2847 gc.queue = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) 2848 2849 jobInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 2850 AddFunc: gc.addJob, 2851 UpdateFunc: gc.updateJob, 2852 }) 2853 2854 return nil 2855 }</span> 2856 2857 // Run starts the worker to clean up Jobs. 2858 func (gc *gccontroller) Run(stopCh <-chan struct{}) <span class="cov0" title="0">{ 2859 defer gc.queue.ShutDown() 2860 2861 klog.Infof("Starting garbage collector") 2862 defer klog.Infof("Shutting down garbage collector") 2863 2864 go gc.jobInformer.Informer().Run(stopCh) 2865 if !cache.WaitForCacheSync(stopCh, gc.jobSynced) </span><span class="cov0" title="0">{ 2866 return 2867 }</span> 2868 2869 <span class="cov0" title="0">go wait.Until(gc.worker, time.Second, stopCh) 2870 2871 <-stopCh</span> 2872 } 2873 2874 func (gc *gccontroller) addJob(obj interface{}) <span class="cov0" title="0">{ 2875 job := obj.(*v1alpha1.Job) 2876 klog.V(4).Infof("Adding job %s/%s", job.Namespace, job.Name) 2877 2878 if job.DeletionTimestamp == nil && needsCleanup(job) </span><span class="cov0" title="0">{ 2879 gc.enqueue(job) 2880 }</span> 2881 } 2882 2883 func (gc *gccontroller) updateJob(old, cur interface{}) <span class="cov0" title="0">{ 2884 job := cur.(*v1alpha1.Job) 2885 klog.V(4).Infof("Updating job %s/%s", job.Namespace, job.Name) 2886 2887 if job.DeletionTimestamp == nil && needsCleanup(job) </span><span class="cov0" title="0">{ 2888 gc.enqueue(job) 2889 }</span> 2890 } 2891 2892 func (gc *gccontroller) enqueue(job *v1alpha1.Job) <span class="cov0" title="0">{ 2893 klog.V(4).Infof("Add job %s/%s to cleanup", job.Namespace, job.Name) 2894 key, err := cache.MetaNamespaceKeyFunc(job) 2895 if err != nil </span><span class="cov0" title="0">{ 2896 klog.Errorf("couldn't get key for object %#v: %v", job, err) 2897 return 2898 }</span> 2899 2900 <span class="cov0" title="0">gc.queue.Add(key)</span> 2901 } 2902 2903 func (gc *gccontroller) enqueueAfter(job *v1alpha1.Job, after time.Duration) <span class="cov8" title="1">{ 2904 key, err := cache.MetaNamespaceKeyFunc(job) 2905 if err != nil </span><span class="cov0" title="0">{ 2906 klog.Errorf("couldn't get key for object %#v: %v", job, err) 2907 return 2908 }</span> 2909 2910 <span class="cov8" title="1">gc.queue.AddAfter(key, after)</span> 2911 } 2912 2913 func (gc *gccontroller) worker() <span class="cov0" title="0">{ 2914 for gc.processNextWorkItem() </span>{<span class="cov0" title="0"> 2915 }</span> 2916 } 2917 2918 func (gc *gccontroller) processNextWorkItem() bool <span class="cov0" title="0">{ 2919 key, quit := gc.queue.Get() 2920 if quit </span><span class="cov0" title="0">{ 2921 return false 2922 }</span> 2923 <span class="cov0" title="0">defer gc.queue.Done(key) 2924 2925 err := gc.processJob(key.(string)) 2926 gc.handleErr(err, key) 2927 2928 return true</span> 2929 } 2930 2931 func (gc *gccontroller) handleErr(err error, key interface{}) <span class="cov0" title="0">{ 2932 if err == nil </span><span class="cov0" title="0">{ 2933 gc.queue.Forget(key) 2934 return 2935 }</span> 2936 2937 <span class="cov0" title="0">klog.Errorf("error cleaning up Job %v, will retry: %v", key, err) 2938 gc.queue.AddRateLimited(key)</span> 2939 } 2940 2941 // processJob will check the Job's state and TTL and delete the Job when it 2942 // finishes and its TTL after finished has expired. If the Job hasn't finished or 2943 // its TTL hasn't expired, it will be added to the queue after the TTL is expected 2944 // to expire. 2945 // This function is not meant to be invoked concurrently with the same key. 2946 func (gc *gccontroller) processJob(key string) error <span class="cov0" title="0">{ 2947 namespace, name, err := cache.SplitMetaNamespaceKey(key) 2948 if err != nil </span><span class="cov0" title="0">{ 2949 return err 2950 }</span> 2951 2952 <span class="cov0" title="0">klog.V(4).Infof("Checking if Job %s/%s is ready for cleanup", namespace, name) 2953 // Ignore the Jobs that are already deleted or being deleted, or the ones that don't need clean up. 2954 job, err := gc.jobLister.Jobs(namespace).Get(name) 2955 if errors.IsNotFound(err) </span><span class="cov0" title="0">{ 2956 return nil 2957 }</span> 2958 <span class="cov0" title="0">if err != nil </span><span class="cov0" title="0">{ 2959 return err 2960 }</span> 2961 2962 <span class="cov0" title="0">if expired, err := gc.processTTL(job); err != nil </span><span class="cov0" title="0">{ 2963 return err 2964 }</span> else<span class="cov0" title="0"> if !expired </span><span class="cov0" title="0">{ 2965 return nil 2966 }</span> 2967 2968 // The Job's TTL is assumed to have expired, but the Job TTL might be stale. 2969 // Before deleting the Job, do a final sanity check. 2970 // If TTL is modified before we do this check, we cannot be sure if the TTL truly expires. 2971 // The latest Job may have a different UID, but it's fine because the checks will be run again. 2972 <span class="cov0" title="0">fresh, err := gc.vcClient.BatchV1alpha1().Jobs(namespace).Get(context.TODO(), name, metav1.GetOptions{}) 2973 if errors.IsNotFound(err) </span><span class="cov0" title="0">{ 2974 return nil 2975 }</span> 2976 <span class="cov0" title="0">if err != nil </span><span class="cov0" title="0">{ 2977 return err 2978 }</span> 2979 // Use the latest Job TTL to see if the TTL truly expires. 2980 <span class="cov0" title="0">if expired, err := gc.processTTL(fresh); err != nil </span><span class="cov0" title="0">{ 2981 return err 2982 }</span> else<span class="cov0" title="0"> if !expired </span><span class="cov0" title="0">{ 2983 return nil 2984 }</span> 2985 // Cascade deletes the Jobs if TTL truly expires. 2986 <span class="cov0" title="0">policy := metav1.DeletePropagationForeground 2987 options := metav1.DeleteOptions{ 2988 PropagationPolicy: &policy, 2989 Preconditions: &metav1.Preconditions{UID: &fresh.UID}, 2990 } 2991 klog.V(4).Infof("Cleaning up Job %s/%s", namespace, name) 2992 return gc.vcClient.BatchV1alpha1().Jobs(fresh.Namespace).Delete(context.TODO(), fresh.Name, options)</span> 2993 } 2994 2995 // processTTL checks whether a given Job's TTL has expired, and add it to the queue after the TTL is expected to expire 2996 // if the TTL will expire later. 2997 func (gc *gccontroller) processTTL(job *v1alpha1.Job) (expired bool, err error) <span class="cov8" title="1">{ 2998 // We don't care about the Jobs that are going to be deleted, or the ones that don't need clean up. 2999 if job.DeletionTimestamp != nil || !needsCleanup(job) </span><span class="cov0" title="0">{ 3000 return false, nil 3001 }</span> 3002 3003 <span class="cov8" title="1">now := time.Now() 3004 t, err := timeLeft(job, &now) 3005 if err != nil </span><span class="cov0" title="0">{ 3006 return false, err 3007 }</span> 3008 3009 // TTL has expired 3010 <span class="cov8" title="1">if *t <= 0 </span><span class="cov8" title="1">{ 3011 return true, nil 3012 }</span> 3013 3014 <span class="cov8" title="1">gc.enqueueAfter(job, *t) 3015 return false, nil</span> 3016 } 3017 3018 // needsCleanup checks whether a Job has finished and has a TTL set. 3019 func needsCleanup(j *v1alpha1.Job) bool <span class="cov8" title="1">{ 3020 return j.Spec.TTLSecondsAfterFinished != nil && isJobFinished(j) 3021 }</span> 3022 3023 func isJobFinished(job *v1alpha1.Job) bool <span class="cov8" title="1">{ 3024 return job.Status.State.Phase == v1alpha1.Completed || 3025 job.Status.State.Phase == v1alpha1.Failed || 3026 job.Status.State.Phase == v1alpha1.Terminated 3027 }</span> 3028 3029 func getFinishAndExpireTime(j *v1alpha1.Job) (*time.Time, *time.Time, error) <span class="cov8" title="1">{ 3030 if !needsCleanup(j) </span><span class="cov8" title="1">{ 3031 return nil, nil, fmt.Errorf("job %s/%s should not be cleaned up", j.Namespace, j.Name) 3032 }</span> 3033 <span class="cov8" title="1">finishAt, err := jobFinishTime(j) 3034 if err != nil </span><span class="cov0" title="0">{ 3035 return nil, nil, err 3036 }</span> 3037 <span class="cov8" title="1">finishAtUTC := finishAt.UTC() 3038 expireAtUTC := finishAtUTC.Add(time.Duration(*j.Spec.TTLSecondsAfterFinished) * time.Second) 3039 return &finishAtUTC, &expireAtUTC, nil</span> 3040 } 3041 3042 func timeLeft(j *v1alpha1.Job, since *time.Time) (*time.Duration, error) <span class="cov8" title="1">{ 3043 finishAt, expireAt, err := getFinishAndExpireTime(j) 3044 if err != nil </span><span class="cov8" title="1">{ 3045 return nil, err 3046 }</span> 3047 <span class="cov8" title="1">if finishAt.UTC().After(since.UTC()) </span><span class="cov0" title="0">{ 3048 klog.Warningf("Warning: Found Job %s/%s finished in the future. This is likely due to time skew in the cluster. Job cleanup will be deferred.", j.Namespace, j.Name) 3049 }</span> 3050 <span class="cov8" title="1">remaining := expireAt.UTC().Sub(since.UTC()) 3051 klog.V(4).Infof("Found Job %s/%s finished at %v, remaining TTL %v since %v, TTL will expire at %v", j.Namespace, j.Name, finishAt.UTC(), remaining, since.UTC(), expireAt.UTC()) 3052 return &remaining, nil</span> 3053 } 3054 3055 // jobFinishTime takes an already finished Job and returns the time it finishes. 3056 func jobFinishTime(finishedJob *v1alpha1.Job) (metav1.Time, error) <span class="cov8" title="1">{ 3057 if finishedJob.Status.State.LastTransitionTime.IsZero() </span><span class="cov8" title="1">{ 3058 return metav1.Time{}, fmt.Errorf("unable to find the time when the Job %s/%s finished", finishedJob.Namespace, finishedJob.Name) 3059 }</span> 3060 <span class="cov8" title="1">return finishedJob.Status.State.LastTransitionTime, nil</span> 3061 } 3062 </pre> 3063 3064 <pre class="file" id="file23" style="display: none">/* 3065 Copyright 2017 The Volcano Authors. 3066 3067 Licensed under the Apache License, Version 2.0 (the "License"); 3068 you may not use this file except in compliance with the License. 3069 You may obtain a copy of the License at 3070 3071 http://www.apache.org/licenses/LICENSE-2.0 3072 3073 Unless required by applicable law or agreed to in writing, software 3074 distributed under the License is distributed on an "AS IS" BASIS, 3075 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 3076 See the License for the specific language governing permissions and 3077 limitations under the License. 3078 */ 3079 3080 package job 3081 3082 import ( 3083 "fmt" 3084 "hash" 3085 "hash/fnv" 3086 "time" 3087 3088 v1 "k8s.io/api/core/v1" 3089 "k8s.io/apimachinery/pkg/util/wait" 3090 coreinformers "k8s.io/client-go/informers/core/v1" 3091 kubeschedulinginformers "k8s.io/client-go/informers/scheduling/v1beta1" 3092 "k8s.io/client-go/kubernetes" 3093 corev1 "k8s.io/client-go/kubernetes/typed/core/v1" 3094 corelisters "k8s.io/client-go/listers/core/v1" 3095 kubeschedulinglisters "k8s.io/client-go/listers/scheduling/v1beta1" 3096 "k8s.io/client-go/tools/cache" 3097 "k8s.io/client-go/tools/record" 3098 "k8s.io/client-go/util/workqueue" 3099 "k8s.io/klog" 3100 3101 batchv1alpha1 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 3102 busv1alpha1 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 3103 vcclientset "volcano.sh/apis/pkg/client/clientset/versioned" 3104 vcscheme "volcano.sh/apis/pkg/client/clientset/versioned/scheme" 3105 informerfactory "volcano.sh/apis/pkg/client/informers/externalversions" 3106 batchinformer "volcano.sh/apis/pkg/client/informers/externalversions/batch/v1alpha1" 3107 businformer "volcano.sh/apis/pkg/client/informers/externalversions/bus/v1alpha1" 3108 schedulinginformers "volcano.sh/apis/pkg/client/informers/externalversions/scheduling/v1beta1" 3109 batchlister "volcano.sh/apis/pkg/client/listers/batch/v1alpha1" 3110 buslister "volcano.sh/apis/pkg/client/listers/bus/v1alpha1" 3111 schedulinglisters "volcano.sh/apis/pkg/client/listers/scheduling/v1beta1" 3112 "volcano.sh/volcano/pkg/controllers/apis" 3113 jobcache "volcano.sh/volcano/pkg/controllers/cache" 3114 "volcano.sh/volcano/pkg/controllers/framework" 3115 "volcano.sh/volcano/pkg/controllers/job/state" 3116 ) 3117 3118 func init() <span class="cov8" title="1">{ 3119 framework.RegisterController(&jobcontroller{}) 3120 }</span> 3121 3122 // jobcontroller the Job jobcontroller type. 3123 type jobcontroller struct { 3124 kubeClient kubernetes.Interface 3125 vcClient vcclientset.Interface 3126 3127 jobInformer batchinformer.JobInformer 3128 podInformer coreinformers.PodInformer 3129 pvcInformer coreinformers.PersistentVolumeClaimInformer 3130 pgInformer schedulinginformers.PodGroupInformer 3131 svcInformer coreinformers.ServiceInformer 3132 cmdInformer businformer.CommandInformer 3133 pcInformer kubeschedulinginformers.PriorityClassInformer 3134 queueInformer schedulinginformers.QueueInformer 3135 3136 // A store of jobs 3137 jobLister batchlister.JobLister 3138 jobSynced func() bool 3139 3140 // A store of pods 3141 podLister corelisters.PodLister 3142 podSynced func() bool 3143 3144 pvcLister corelisters.PersistentVolumeClaimLister 3145 pvcSynced func() bool 3146 3147 // A store of podgroups 3148 pgLister schedulinglisters.PodGroupLister 3149 pgSynced func() bool 3150 3151 // A store of service 3152 svcLister corelisters.ServiceLister 3153 svcSynced func() bool 3154 3155 cmdLister buslister.CommandLister 3156 cmdSynced func() bool 3157 3158 pcLister kubeschedulinglisters.PriorityClassLister 3159 pcSynced func() bool 3160 3161 queueLister schedulinglisters.QueueLister 3162 queueSynced func() bool 3163 3164 // queue that need to sync up 3165 queueList []workqueue.RateLimitingInterface 3166 commandQueue workqueue.RateLimitingInterface 3167 cache jobcache.Cache 3168 // Job Event recorder 3169 recorder record.EventRecorder 3170 3171 errTasks workqueue.RateLimitingInterface 3172 workers uint32 3173 maxRequeueNum int 3174 } 3175 3176 func (cc *jobcontroller) Name() string <span class="cov8" title="1">{ 3177 return "job-controller" 3178 }</span> 3179 3180 // Initialize creates the new Job job controller. 3181 func (cc *jobcontroller) Initialize(opt *framework.ControllerOption) error <span class="cov8" title="1">{ 3182 cc.kubeClient = opt.KubeClient 3183 cc.vcClient = opt.VolcanoClient 3184 3185 sharedInformers := opt.SharedInformerFactory 3186 workers := opt.WorkerNum 3187 // Initialize event client 3188 eventBroadcaster := record.NewBroadcaster() 3189 eventBroadcaster.StartLogging(klog.Infof) 3190 eventBroadcaster.StartRecordingToSink(&corev1.EventSinkImpl{Interface: cc.kubeClient.CoreV1().Events("")}) 3191 recorder := eventBroadcaster.NewRecorder(vcscheme.Scheme, v1.EventSource{Component: "vc-controller-manager"}) 3192 3193 cc.queueList = make([]workqueue.RateLimitingInterface, workers) 3194 cc.commandQueue = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) 3195 cc.cache = jobcache.New() 3196 cc.errTasks = newRateLimitingQueue() 3197 cc.recorder = recorder 3198 cc.workers = workers 3199 cc.maxRequeueNum = opt.MaxRequeueNum 3200 if cc.maxRequeueNum < 0 </span><span class="cov0" title="0">{ 3201 cc.maxRequeueNum = -1 3202 }</span> 3203 3204 <span class="cov8" title="1">var i uint32 3205 for i = 0; i < workers; i++ </span><span class="cov8" title="1">{ 3206 cc.queueList[i] = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) 3207 }</span> 3208 3209 <span class="cov8" title="1">cc.jobInformer = informerfactory.NewSharedInformerFactory(cc.vcClient, 0).Batch().V1alpha1().Jobs() 3210 cc.jobInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 3211 AddFunc: cc.addJob, 3212 UpdateFunc: cc.updateJob, 3213 DeleteFunc: cc.deleteJob, 3214 }) 3215 cc.jobLister = cc.jobInformer.Lister() 3216 cc.jobSynced = cc.jobInformer.Informer().HasSynced 3217 3218 cc.cmdInformer = informerfactory.NewSharedInformerFactory(cc.vcClient, 0).Bus().V1alpha1().Commands() 3219 cc.cmdInformer.Informer().AddEventHandler( 3220 cache.FilteringResourceEventHandler{ 3221 FilterFunc: func(obj interface{}) bool </span><span class="cov0" title="0">{ 3222 switch v := obj.(type) </span>{ 3223 case *busv1alpha1.Command:<span class="cov0" title="0"> 3224 if v.TargetObject != nil && 3225 v.TargetObject.APIVersion == batchv1alpha1.SchemeGroupVersion.String() && 3226 v.TargetObject.Kind == "Job" </span><span class="cov0" title="0">{ 3227 return true 3228 }</span> 3229 3230 <span class="cov0" title="0">return false</span> 3231 default:<span class="cov0" title="0"> 3232 return false</span> 3233 } 3234 }, 3235 Handler: cache.ResourceEventHandlerFuncs{ 3236 AddFunc: cc.addCommand, 3237 }, 3238 }, 3239 ) 3240 <span class="cov8" title="1">cc.cmdLister = cc.cmdInformer.Lister() 3241 cc.cmdSynced = cc.cmdInformer.Informer().HasSynced 3242 3243 cc.podInformer = sharedInformers.Core().V1().Pods() 3244 cc.podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 3245 AddFunc: cc.addPod, 3246 UpdateFunc: cc.updatePod, 3247 DeleteFunc: cc.deletePod, 3248 }) 3249 3250 cc.podLister = cc.podInformer.Lister() 3251 cc.podSynced = cc.podInformer.Informer().HasSynced 3252 3253 cc.pvcInformer = sharedInformers.Core().V1().PersistentVolumeClaims() 3254 cc.pvcLister = cc.pvcInformer.Lister() 3255 cc.pvcSynced = cc.pvcInformer.Informer().HasSynced 3256 3257 cc.svcInformer = sharedInformers.Core().V1().Services() 3258 cc.svcLister = cc.svcInformer.Lister() 3259 cc.svcSynced = cc.svcInformer.Informer().HasSynced 3260 3261 cc.pgInformer = informerfactory.NewSharedInformerFactory(cc.vcClient, 0).Scheduling().V1beta1().PodGroups() 3262 cc.pgInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 3263 UpdateFunc: cc.updatePodGroup, 3264 }) 3265 cc.pgLister = cc.pgInformer.Lister() 3266 cc.pgSynced = cc.pgInformer.Informer().HasSynced 3267 3268 cc.pcInformer = sharedInformers.Scheduling().V1beta1().PriorityClasses() 3269 cc.pcLister = cc.pcInformer.Lister() 3270 cc.pcSynced = cc.pcInformer.Informer().HasSynced 3271 3272 cc.queueInformer = informerfactory.NewSharedInformerFactory(cc.vcClient, 0).Scheduling().V1beta1().Queues() 3273 cc.queueLister = cc.queueInformer.Lister() 3274 cc.queueSynced = cc.queueInformer.Informer().HasSynced 3275 3276 // Register actions 3277 state.SyncJob = cc.syncJob 3278 state.KillJob = cc.killJob 3279 3280 return nil</span> 3281 } 3282 3283 // Run start JobController. 3284 func (cc *jobcontroller) Run(stopCh <-chan struct{}) <span class="cov0" title="0">{ 3285 go cc.jobInformer.Informer().Run(stopCh) 3286 go cc.podInformer.Informer().Run(stopCh) 3287 go cc.pvcInformer.Informer().Run(stopCh) 3288 go cc.pgInformer.Informer().Run(stopCh) 3289 go cc.svcInformer.Informer().Run(stopCh) 3290 go cc.cmdInformer.Informer().Run(stopCh) 3291 go cc.pcInformer.Informer().Run(stopCh) 3292 go cc.queueInformer.Informer().Run(stopCh) 3293 3294 cache.WaitForCacheSync(stopCh, cc.jobSynced, cc.podSynced, cc.pgSynced, 3295 cc.svcSynced, cc.cmdSynced, cc.pvcSynced, cc.pcSynced, cc.queueSynced) 3296 3297 go wait.Until(cc.handleCommands, 0, stopCh) 3298 var i uint32 3299 for i = 0; i < cc.workers; i++ </span><span class="cov0" title="0">{ 3300 go func(num uint32) </span><span class="cov0" title="0">{ 3301 wait.Until( 3302 func() </span><span class="cov0" title="0">{ 3303 cc.worker(num) 3304 }</span>, 3305 time.Second, 3306 stopCh) 3307 }(i) 3308 } 3309 3310 <span class="cov0" title="0">go cc.cache.Run(stopCh) 3311 3312 // Re-sync error tasks. 3313 go wait.Until(cc.processResyncTask, 0, stopCh) 3314 3315 klog.Infof("JobController is running ...... ")</span> 3316 } 3317 3318 func (cc *jobcontroller) worker(i uint32) <span class="cov0" title="0">{ 3319 klog.Infof("worker %d start ...... ", i) 3320 3321 for cc.processNextReq(i) </span>{<span class="cov0" title="0"> 3322 }</span> 3323 } 3324 3325 func (cc *jobcontroller) belongsToThisRoutine(key string, count uint32) bool <span class="cov0" title="0">{ 3326 var hashVal hash.Hash32 3327 var val uint32 3328 3329 hashVal = fnv.New32() 3330 hashVal.Write([]byte(key)) 3331 3332 val = hashVal.Sum32() 3333 3334 return val%cc.workers == count 3335 }</span> 3336 3337 func (cc *jobcontroller) getWorkerQueue(key string) workqueue.RateLimitingInterface <span class="cov8" title="1">{ 3338 var hashVal hash.Hash32 3339 var val uint32 3340 3341 hashVal = fnv.New32() 3342 hashVal.Write([]byte(key)) 3343 3344 val = hashVal.Sum32() 3345 3346 queue := cc.queueList[val%cc.workers] 3347 3348 return queue 3349 }</span> 3350 3351 func (cc *jobcontroller) processNextReq(count uint32) bool <span class="cov0" title="0">{ 3352 queue := cc.queueList[count] 3353 obj, shutdown := queue.Get() 3354 if shutdown </span><span class="cov0" title="0">{ 3355 klog.Errorf("Fail to pop item from queue") 3356 return false 3357 }</span> 3358 3359 <span class="cov0" title="0">req := obj.(apis.Request) 3360 defer queue.Done(req) 3361 3362 key := jobcache.JobKeyByReq(&req) 3363 if !cc.belongsToThisRoutine(key, count) </span><span class="cov0" title="0">{ 3364 klog.Errorf("should not occur The job does not belongs to this routine key:%s, worker:%d...... ", key, count) 3365 queueLocal := cc.getWorkerQueue(key) 3366 queueLocal.Add(req) 3367 return true 3368 }</span> 3369 3370 <span class="cov0" title="0">klog.V(3).Infof("Try to handle request <%v>", req) 3371 3372 jobInfo, err := cc.cache.Get(key) 3373 if err != nil </span><span class="cov0" title="0">{ 3374 // TODO(k82cn): ignore not-ready error. 3375 klog.Errorf("Failed to get job by <%v> from cache: %v", req, err) 3376 return true 3377 }</span> 3378 3379 <span class="cov0" title="0">st := state.NewState(jobInfo) 3380 if st == nil </span><span class="cov0" title="0">{ 3381 klog.Errorf("Invalid state <%s> of Job <%v/%v>", 3382 jobInfo.Job.Status.State, jobInfo.Job.Namespace, jobInfo.Job.Name) 3383 return true 3384 }</span> 3385 3386 <span class="cov0" title="0">action := applyPolicies(jobInfo.Job, &req) 3387 klog.V(3).Infof("Execute <%v> on Job <%s/%s> in <%s> by <%T>.", 3388 action, req.Namespace, req.JobName, jobInfo.Job.Status.State.Phase, st) 3389 3390 if action != busv1alpha1.SyncJobAction </span><span class="cov0" title="0">{ 3391 cc.recordJobEvent(jobInfo.Job.Namespace, jobInfo.Job.Name, batchv1alpha1.ExecuteAction, fmt.Sprintf( 3392 "Start to execute action %s ", action)) 3393 }</span> 3394 3395 <span class="cov0" title="0">if err := st.Execute(action); err != nil </span><span class="cov0" title="0">{ 3396 if cc.maxRequeueNum == -1 || queue.NumRequeues(req) < cc.maxRequeueNum </span><span class="cov0" title="0">{ 3397 klog.V(2).Infof("Failed to handle Job <%s/%s>: %v", 3398 jobInfo.Job.Namespace, jobInfo.Job.Name, err) 3399 // If any error, requeue it. 3400 queue.AddRateLimited(req) 3401 return true 3402 }</span> 3403 <span class="cov0" title="0">cc.recordJobEvent(jobInfo.Job.Namespace, jobInfo.Job.Name, batchv1alpha1.ExecuteAction, fmt.Sprintf( 3404 "Job failed on action %s for retry limit reached", action)) 3405 klog.Warningf("Terminating Job <%s/%s> and releasing resources", jobInfo.Job.Namespace, jobInfo.Job.Name) 3406 if err = st.Execute(busv1alpha1.TerminateJobAction); err != nil </span><span class="cov0" title="0">{ 3407 klog.Errorf("Failed to terminate Job<%s/%s>: %v", jobInfo.Job.Namespace, jobInfo.Job.Name, err) 3408 }</span> 3409 <span class="cov0" title="0">klog.Warningf("Dropping job<%s/%s> out of the queue: %v because max retries has reached", jobInfo.Job.Namespace, jobInfo.Job.Name, err)</span> 3410 } 3411 3412 // If no error, forget it. 3413 <span class="cov0" title="0">queue.Forget(req) 3414 3415 return true</span> 3416 } 3417 </pre> 3418 3419 <pre class="file" id="file24" style="display: none">/* 3420 Copyright 2019 The Volcano Authors. 3421 3422 Licensed under the Apache License, Version 2.0 (the "License"); 3423 you may not use this file except in compliance with the License. 3424 You may obtain a copy of the License at 3425 3426 http://www.apache.org/licenses/LICENSE-2.0 3427 3428 Unless required by applicable law or agreed to in writing, software 3429 distributed under the License is distributed on an "AS IS" BASIS, 3430 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 3431 See the License for the specific language governing permissions and 3432 limitations under the License. 3433 */ 3434 3435 package job 3436 3437 import ( 3438 "context" 3439 "fmt" 3440 "reflect" 3441 "sort" 3442 "sync" 3443 "sync/atomic" 3444 "time" 3445 3446 v1 "k8s.io/api/core/v1" 3447 apierrors "k8s.io/apimachinery/pkg/api/errors" 3448 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 3449 "k8s.io/klog" 3450 3451 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 3452 "volcano.sh/apis/pkg/apis/helpers" 3453 scheduling "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 3454 "volcano.sh/volcano/pkg/controllers/apis" 3455 jobhelpers "volcano.sh/volcano/pkg/controllers/job/helpers" 3456 "volcano.sh/volcano/pkg/controllers/job/state" 3457 ) 3458 3459 var calMutex sync.Mutex 3460 3461 func (cc *jobcontroller) killJob(jobInfo *apis.JobInfo, podRetainPhase state.PhaseMap, updateStatus state.UpdateStatusFn) error <span class="cov8" title="1">{ 3462 job := jobInfo.Job 3463 klog.V(3).Infof("Killing Job <%s/%s>, current version %d", job.Namespace, job.Name, job.Status.Version) 3464 defer klog.V(3).Infof("Finished Job <%s/%s> killing, current version %d", job.Namespace, job.Name, job.Status.Version) 3465 3466 if job.DeletionTimestamp != nil </span><span class="cov0" title="0">{ 3467 klog.Infof("Job <%s/%s> is terminating, skip management process.", 3468 job.Namespace, job.Name) 3469 return nil 3470 }</span> 3471 3472 <span class="cov8" title="1">var pending, running, terminating, succeeded, failed, unknown int32 3473 taskStatusCount := make(map[string]batch.TaskState) 3474 3475 var errs []error 3476 var total int 3477 3478 for _, pods := range jobInfo.Pods </span><span class="cov8" title="1">{ 3479 for _, pod := range pods </span><span class="cov8" title="1">{ 3480 total++ 3481 3482 if pod.DeletionTimestamp != nil </span><span class="cov0" title="0">{ 3483 klog.Infof("Pod <%s/%s> is terminating", pod.Namespace, pod.Name) 3484 terminating++ 3485 continue</span> 3486 } 3487 3488 <span class="cov8" title="1">maxRetry := job.Spec.MaxRetry 3489 lastRetry := false 3490 if job.Status.RetryCount > maxRetry-1 </span><span class="cov8" title="1">{ 3491 lastRetry = true 3492 }</span> 3493 3494 <span class="cov8" title="1">_, retain := podRetainPhase[pod.Status.Phase] 3495 3496 if !retain && !lastRetry </span><span class="cov0" title="0">{ 3497 err := cc.deleteJobPod(job.Name, pod) 3498 if err == nil </span><span class="cov0" title="0">{ 3499 terminating++ 3500 continue</span> 3501 } 3502 // record the err, and then collect the pod info like retained pod 3503 <span class="cov0" title="0">errs = append(errs, err) 3504 cc.resyncTask(pod)</span> 3505 } 3506 3507 <span class="cov8" title="1">classifyAndAddUpPodBaseOnPhase(pod, &pending, &running, &succeeded, &failed, &unknown) 3508 calcPodStatus(pod, taskStatusCount)</span> 3509 } 3510 } 3511 3512 <span class="cov8" title="1">if len(errs) != 0 </span><span class="cov0" title="0">{ 3513 klog.Errorf("failed to kill pods for job %s/%s, with err %+v", job.Namespace, job.Name, errs) 3514 cc.recorder.Event(job, v1.EventTypeWarning, FailedDeletePodReason, 3515 fmt.Sprintf("Error deleting pods: %+v", errs)) 3516 return fmt.Errorf("failed to kill %d pods of %d", len(errs), total) 3517 }</span> 3518 3519 <span class="cov8" title="1">job = job.DeepCopy() 3520 // Job version is bumped only when job is killed 3521 job.Status.Version++ 3522 job.Status.Pending = pending 3523 job.Status.Running = running 3524 job.Status.Succeeded = succeeded 3525 job.Status.Failed = failed 3526 job.Status.Terminating = terminating 3527 job.Status.Unknown = unknown 3528 job.Status.TaskStatusCount = taskStatusCount 3529 3530 // Update running duration 3531 klog.V(3).Infof("Running duration is %s", metav1.Duration{Duration: time.Since(jobInfo.Job.CreationTimestamp.Time)}.ToUnstructured()) 3532 job.Status.RunningDuration = &metav1.Duration{Duration: time.Since(jobInfo.Job.CreationTimestamp.Time)} 3533 3534 if updateStatus != nil </span><span class="cov8" title="1">{ 3535 if updateStatus(&job.Status) </span><span class="cov8" title="1">{ 3536 job.Status.State.LastTransitionTime = metav1.Now() 3537 }</span> 3538 } 3539 3540 // must be called before update job status 3541 <span class="cov8" title="1">if err := cc.pluginOnJobDelete(job); err != nil </span><span class="cov0" title="0">{ 3542 return err 3543 }</span> 3544 3545 // Update Job status 3546 <span class="cov8" title="1">newJob, err := cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(context.TODO(), job, metav1.UpdateOptions{}) 3547 if err != nil </span><span class="cov0" title="0">{ 3548 klog.Errorf("Failed to update status of Job %v/%v: %v", 3549 job.Namespace, job.Name, err) 3550 return err 3551 }</span> 3552 <span class="cov8" title="1">if e := cc.cache.Update(newJob); e != nil </span><span class="cov0" title="0">{ 3553 klog.Errorf("KillJob - Failed to update Job %v/%v in cache: %v", 3554 newJob.Namespace, newJob.Name, e) 3555 return e 3556 }</span> 3557 3558 // Delete PodGroup 3559 <span class="cov8" title="1">if err := cc.vcClient.SchedulingV1beta1().PodGroups(job.Namespace).Delete(context.TODO(), job.Name, metav1.DeleteOptions{}); err != nil </span><span class="cov8" title="1">{ 3560 if !apierrors.IsNotFound(err) </span><span class="cov0" title="0">{ 3561 klog.Errorf("Failed to delete PodGroup of Job %v/%v: %v", 3562 job.Namespace, job.Name, err) 3563 return err 3564 }</span> 3565 } 3566 3567 // NOTE(k82cn): DO NOT delete input/output until job is deleted. 3568 3569 <span class="cov8" title="1">return nil</span> 3570 } 3571 3572 func (cc *jobcontroller) initiateJob(job *batch.Job) (*batch.Job, error) <span class="cov8" title="1">{ 3573 klog.V(3).Infof("Starting to initiate Job <%s/%s>", job.Namespace, job.Name) 3574 jobInstance, err := cc.initJobStatus(job) 3575 if err != nil </span><span class="cov0" title="0">{ 3576 cc.recorder.Event(job, v1.EventTypeWarning, string(batch.JobStatusError), 3577 fmt.Sprintf("Failed to initialize job status, err: %v", err)) 3578 return nil, err 3579 }</span> 3580 3581 <span class="cov8" title="1">if err := cc.pluginOnJobAdd(jobInstance); err != nil </span><span class="cov0" title="0">{ 3582 cc.recorder.Event(job, v1.EventTypeWarning, string(batch.PluginError), 3583 fmt.Sprintf("Execute plugin when job add failed, err: %v", err)) 3584 return nil, err 3585 }</span> 3586 3587 <span class="cov8" title="1">newJob, err := cc.createJobIOIfNotExist(jobInstance) 3588 if err != nil </span><span class="cov0" title="0">{ 3589 cc.recorder.Event(job, v1.EventTypeWarning, string(batch.PVCError), 3590 fmt.Sprintf("Failed to create PVC, err: %v", err)) 3591 return nil, err 3592 }</span> 3593 3594 <span class="cov8" title="1">if err := cc.createOrUpdatePodGroup(newJob); err != nil </span><span class="cov0" title="0">{ 3595 cc.recorder.Event(job, v1.EventTypeWarning, string(batch.PodGroupError), 3596 fmt.Sprintf("Failed to create PodGroup, err: %v", err)) 3597 return nil, err 3598 }</span> 3599 3600 <span class="cov8" title="1">return newJob, nil</span> 3601 } 3602 3603 func (cc *jobcontroller) initOnJobUpdate(job *batch.Job) error <span class="cov8" title="1">{ 3604 klog.V(3).Infof("Starting to initiate Job <%s/%s> on update", job.Namespace, job.Name) 3605 3606 if err := cc.pluginOnJobUpdate(job); err != nil </span><span class="cov0" title="0">{ 3607 cc.recorder.Event(job, v1.EventTypeWarning, string(batch.PluginError), 3608 fmt.Sprintf("Execute plugin when job add failed, err: %v", err)) 3609 return err 3610 }</span> 3611 3612 <span class="cov8" title="1">if err := cc.createOrUpdatePodGroup(job); err != nil </span><span class="cov0" title="0">{ 3613 cc.recorder.Event(job, v1.EventTypeWarning, string(batch.PodGroupError), 3614 fmt.Sprintf("Failed to create PodGroup, err: %v", err)) 3615 return err 3616 }</span> 3617 3618 <span class="cov8" title="1">return nil</span> 3619 } 3620 3621 func (cc *jobcontroller) GetQueueInfo(queue string) (*scheduling.Queue, error) <span class="cov0" title="0">{ 3622 queueInfo, err := cc.queueLister.Get(queue) 3623 if err != nil </span><span class="cov0" title="0">{ 3624 klog.Errorf("Failed to get queue from queueLister, error: %s", err.Error()) 3625 }</span> 3626 3627 <span class="cov0" title="0">return queueInfo, err</span> 3628 } 3629 3630 func (cc *jobcontroller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateStatusFn) error <span class="cov8" title="1">{ 3631 job := jobInfo.Job 3632 klog.V(3).Infof("Starting to sync up Job <%s/%s>, current version %d", job.Namespace, job.Name, job.Status.Version) 3633 defer klog.V(3).Infof("Finished Job <%s/%s> sync up, current version %d", job.Namespace, job.Name, job.Status.Version) 3634 3635 if jobInfo.Job.DeletionTimestamp != nil </span><span class="cov0" title="0">{ 3636 klog.Infof("Job <%s/%s> is terminating, skip management process.", 3637 jobInfo.Job.Namespace, jobInfo.Job.Name) 3638 return nil 3639 }</span> 3640 3641 // deep copy job to prevent mutate it 3642 <span class="cov8" title="1">job = job.DeepCopy() 3643 3644 // Find queue that job belongs to, and check if the queue has forwarding metadata 3645 queueInfo, err := cc.GetQueueInfo(job.Spec.Queue) 3646 if err != nil </span><span class="cov0" title="0">{ 3647 return err 3648 }</span> 3649 3650 <span class="cov8" title="1">var jobForwarding bool 3651 if len(queueInfo.Spec.ExtendClusters) != 0 </span><span class="cov0" title="0">{ 3652 jobForwarding = true 3653 if len(job.Annotations) == 0 </span><span class="cov0" title="0">{ 3654 job.Annotations = make(map[string]string) 3655 }</span> 3656 <span class="cov0" title="0">job.Annotations[batch.JobForwardingKey] = "true" 3657 job, err = cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).Update(context.TODO(), job, metav1.UpdateOptions{}) 3658 if err != nil </span><span class="cov0" title="0">{ 3659 klog.Errorf("failed to update job: %s/%s, error: %s", job.Namespace, job.Name, err.Error()) 3660 return err 3661 }</span> 3662 } 3663 3664 // Skip job initiation if job is already initiated 3665 <span class="cov8" title="1">if !isInitiated(job) </span><span class="cov8" title="1">{ 3666 if job, err = cc.initiateJob(job); err != nil </span><span class="cov0" title="0">{ 3667 return err 3668 }</span> 3669 } else<span class="cov8" title="1"> { 3670 // TODO: optimize this call it only when scale up/down 3671 if err = cc.initOnJobUpdate(job); err != nil </span><span class="cov0" title="0">{ 3672 return err 3673 }</span> 3674 } 3675 3676 <span class="cov8" title="1">if len(queueInfo.Spec.ExtendClusters) != 0 </span><span class="cov0" title="0">{ 3677 jobForwarding = true 3678 job.Annotations[batch.JobForwardingKey] = "true" 3679 _, err := cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).Update(context.TODO(), job, metav1.UpdateOptions{}) 3680 if err != nil </span><span class="cov0" title="0">{ 3681 klog.Errorf("failed to update job: %s/%s, error: %s", job.Namespace, job.Name, err.Error()) 3682 return err 3683 }</span> 3684 } 3685 3686 <span class="cov8" title="1">var syncTask bool 3687 if pg, _ := cc.pgLister.PodGroups(job.Namespace).Get(job.Name); pg != nil </span><span class="cov8" title="1">{ 3688 if pg.Status.Phase != "" && pg.Status.Phase != scheduling.PodGroupPending </span><span class="cov8" title="1">{ 3689 syncTask = true 3690 }</span> 3691 3692 <span class="cov8" title="1">for _, condition := range pg.Status.Conditions </span><span class="cov0" title="0">{ 3693 if condition.Type == scheduling.PodGroupUnschedulableType </span><span class="cov0" title="0">{ 3694 cc.recorder.Eventf(job, v1.EventTypeWarning, string(batch.PodGroupPending), 3695 fmt.Sprintf("PodGroup %s:%s unschedule,reason: %s", job.Namespace, job.Name, condition.Message)) 3696 }</span> 3697 } 3698 } 3699 3700 <span class="cov8" title="1">if !syncTask </span><span class="cov8" title="1">{ 3701 if updateStatus != nil </span><span class="cov8" title="1">{ 3702 if updateStatus(&job.Status) </span><span class="cov8" title="1">{ 3703 job.Status.State.LastTransitionTime = metav1.Now() 3704 }</span> 3705 } 3706 <span class="cov8" title="1">newJob, err := cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(context.TODO(), job, metav1.UpdateOptions{}) 3707 if err != nil </span><span class="cov0" title="0">{ 3708 klog.Errorf("Failed to update status of Job %v/%v: %v", 3709 job.Namespace, job.Name, err) 3710 return err 3711 }</span> 3712 <span class="cov8" title="1">if e := cc.cache.Update(newJob); e != nil </span><span class="cov0" title="0">{ 3713 klog.Errorf("SyncJob - Failed to update Job %v/%v in cache: %v", 3714 newJob.Namespace, newJob.Name, e) 3715 return e 3716 }</span> 3717 <span class="cov8" title="1">return nil</span> 3718 } 3719 3720 <span class="cov8" title="1">var running, pending, terminating, succeeded, failed, unknown int32 3721 taskStatusCount := make(map[string]batch.TaskState) 3722 3723 var podToCreate []*v1.Pod 3724 var podToDelete []*v1.Pod 3725 var creationErrs []error 3726 var deletionErrs []error 3727 appendMutex := sync.Mutex{} 3728 3729 appendError := func(container *[]error, err error) </span><span class="cov0" title="0">{ 3730 appendMutex.Lock() 3731 defer appendMutex.Unlock() 3732 *container = append(*container, err) 3733 }</span> 3734 3735 <span class="cov8" title="1">for _, ts := range job.Spec.Tasks </span><span class="cov8" title="1">{ 3736 ts.Template.Name = ts.Name 3737 tc := ts.Template.DeepCopy() 3738 name := ts.Template.Name 3739 3740 pods, found := jobInfo.Pods[name] 3741 if !found </span><span class="cov0" title="0">{ 3742 pods = map[string]*v1.Pod{} 3743 }</span> 3744 3745 <span class="cov8" title="1">for i := 0; i < int(ts.Replicas); i++ </span><span class="cov8" title="1">{ 3746 podName := fmt.Sprintf(jobhelpers.PodNameFmt, job.Name, name, i) 3747 if pod, found := pods[podName]; !found </span><span class="cov8" title="1">{ 3748 newPod := createJobPod(job, tc, ts.TopologyPolicy, i, jobForwarding) 3749 if err := cc.pluginOnPodCreate(job, newPod); err != nil </span><span class="cov0" title="0">{ 3750 return err 3751 }</span> 3752 <span class="cov8" title="1">podToCreate = append(podToCreate, newPod)</span> 3753 } else<span class="cov8" title="1"> { 3754 delete(pods, podName) 3755 if pod.DeletionTimestamp != nil </span><span class="cov0" title="0">{ 3756 klog.Infof("Pod <%s/%s> is terminating", pod.Namespace, pod.Name) 3757 atomic.AddInt32(&terminating, 1) 3758 continue</span> 3759 } 3760 3761 <span class="cov8" title="1">classifyAndAddUpPodBaseOnPhase(pod, &pending, &running, &succeeded, &failed, &unknown) 3762 calcPodStatus(pod, taskStatusCount)</span> 3763 } 3764 } 3765 3766 <span class="cov8" title="1">for _, pod := range pods </span><span class="cov0" title="0">{ 3767 podToDelete = append(podToDelete, pod) 3768 }</span> 3769 } 3770 3771 <span class="cov8" title="1">waitCreationGroup := sync.WaitGroup{} 3772 waitCreationGroup.Add(len(podToCreate)) 3773 for _, pod := range podToCreate </span><span class="cov8" title="1">{ 3774 go func(pod *v1.Pod) </span><span class="cov8" title="1">{ 3775 defer waitCreationGroup.Done() 3776 newPod, err := cc.kubeClient.CoreV1().Pods(pod.Namespace).Create(context.TODO(), pod, metav1.CreateOptions{}) 3777 if err != nil && !apierrors.IsAlreadyExists(err) </span><span class="cov0" title="0">{ 3778 // Failed to create Pod, waitCreationGroup a moment and then create it again 3779 // This is to ensure all podsMap under the same Job created 3780 // So gang-scheduling could schedule the Job successfully 3781 klog.Errorf("Failed to create pod %s for Job %s, err %#v", 3782 pod.Name, job.Name, err) 3783 appendError(&creationErrs, fmt.Errorf("failed to create pod %s, err: %#v", pod.Name, err)) 3784 }</span> else<span class="cov8" title="1"> { 3785 classifyAndAddUpPodBaseOnPhase(newPod, &pending, &running, &succeeded, &failed, &unknown) 3786 calcPodStatus(pod, taskStatusCount) 3787 klog.V(3).Infof("Created Task <%s> of Job <%s/%s>", 3788 pod.Name, job.Namespace, job.Name) 3789 }</span> 3790 }(pod) 3791 } 3792 <span class="cov8" title="1">waitCreationGroup.Wait() 3793 3794 if len(creationErrs) != 0 </span><span class="cov0" title="0">{ 3795 cc.recorder.Event(job, v1.EventTypeWarning, FailedCreatePodReason, 3796 fmt.Sprintf("Error creating pods: %+v", creationErrs)) 3797 return fmt.Errorf("failed to create %d pods of %d", len(creationErrs), len(podToCreate)) 3798 }</span> 3799 3800 // Delete pods when scale down. 3801 <span class="cov8" title="1">waitDeletionGroup := sync.WaitGroup{} 3802 waitDeletionGroup.Add(len(podToDelete)) 3803 for _, pod := range podToDelete </span><span class="cov0" title="0">{ 3804 go func(pod *v1.Pod) </span><span class="cov0" title="0">{ 3805 defer waitDeletionGroup.Done() 3806 err := cc.deleteJobPod(job.Name, pod) 3807 if err != nil </span><span class="cov0" title="0">{ 3808 // Failed to delete Pod, waitCreationGroup a moment and then create it again 3809 // This is to ensure all podsMap under the same Job created 3810 // So gang-scheduling could schedule the Job successfully 3811 klog.Errorf("Failed to delete pod %s for Job %s, err %#v", 3812 pod.Name, job.Name, err) 3813 appendError(&deletionErrs, err) 3814 cc.resyncTask(pod) 3815 }</span> else<span class="cov0" title="0"> { 3816 klog.V(3).Infof("Deleted Task <%s> of Job <%s/%s>", 3817 pod.Name, job.Namespace, job.Name) 3818 atomic.AddInt32(&terminating, 1) 3819 }</span> 3820 }(pod) 3821 } 3822 <span class="cov8" title="1">waitDeletionGroup.Wait() 3823 3824 if len(deletionErrs) != 0 </span><span class="cov0" title="0">{ 3825 cc.recorder.Event(job, v1.EventTypeWarning, FailedDeletePodReason, 3826 fmt.Sprintf("Error deleting pods: %+v", deletionErrs)) 3827 return fmt.Errorf("failed to delete %d pods of %d", len(deletionErrs), len(podToDelete)) 3828 }</span> 3829 <span class="cov8" title="1">job.Status = batch.JobStatus{ 3830 State: job.Status.State, 3831 3832 Pending: pending, 3833 Running: running, 3834 Succeeded: succeeded, 3835 Failed: failed, 3836 Terminating: terminating, 3837 Unknown: unknown, 3838 Version: job.Status.Version, 3839 MinAvailable: job.Spec.MinAvailable, 3840 TaskStatusCount: taskStatusCount, 3841 ControlledResources: job.Status.ControlledResources, 3842 RetryCount: job.Status.RetryCount, 3843 } 3844 3845 if updateStatus != nil </span><span class="cov0" title="0">{ 3846 if updateStatus(&job.Status) </span><span class="cov0" title="0">{ 3847 job.Status.State.LastTransitionTime = metav1.Now() 3848 }</span> 3849 } 3850 <span class="cov8" title="1">newJob, err := cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(context.TODO(), job, metav1.UpdateOptions{}) 3851 if err != nil </span><span class="cov0" title="0">{ 3852 klog.Errorf("Failed to update status of Job %v/%v: %v", 3853 job.Namespace, job.Name, err) 3854 return err 3855 }</span> 3856 <span class="cov8" title="1">if e := cc.cache.Update(newJob); e != nil </span><span class="cov0" title="0">{ 3857 klog.Errorf("SyncJob - Failed to update Job %v/%v in cache: %v", 3858 newJob.Namespace, newJob.Name, e) 3859 return e 3860 }</span> 3861 3862 <span class="cov8" title="1">return nil</span> 3863 } 3864 3865 func (cc *jobcontroller) createJobIOIfNotExist(job *batch.Job) (*batch.Job, error) <span class="cov8" title="1">{ 3866 // If PVC does not exist, create them for Job. 3867 var needUpdate bool 3868 if job.Status.ControlledResources == nil </span><span class="cov8" title="1">{ 3869 job.Status.ControlledResources = make(map[string]string) 3870 }</span> 3871 <span class="cov8" title="1">for index, volume := range job.Spec.Volumes </span><span class="cov8" title="1">{ 3872 vcName := volume.VolumeClaimName 3873 if len(vcName) == 0 </span><span class="cov0" title="0">{ 3874 // NOTE(k82cn): Ensure never have duplicated generated names. 3875 for </span><span class="cov0" title="0">{ 3876 vcName = jobhelpers.GenPVCName(job.Name) 3877 exist, err := cc.checkPVCExist(job, vcName) 3878 if err != nil </span><span class="cov0" title="0">{ 3879 return job, err 3880 }</span> 3881 <span class="cov0" title="0">if exist </span><span class="cov0" title="0">{ 3882 continue</span> 3883 } 3884 <span class="cov0" title="0">job.Spec.Volumes[index].VolumeClaimName = vcName 3885 needUpdate = true 3886 break</span> 3887 } 3888 // TODO: check VolumeClaim must be set if VolumeClaimName is empty 3889 <span class="cov0" title="0">if volume.VolumeClaim != nil </span><span class="cov0" title="0">{ 3890 if err := cc.createPVC(job, vcName, volume.VolumeClaim); err != nil </span><span class="cov0" title="0">{ 3891 return job, err 3892 }</span> 3893 } 3894 } else<span class="cov8" title="1"> { 3895 exist, err := cc.checkPVCExist(job, vcName) 3896 if err != nil </span><span class="cov0" title="0">{ 3897 return job, err 3898 }</span> 3899 <span class="cov8" title="1">if !exist </span><span class="cov8" title="1">{ 3900 return job, fmt.Errorf("pvc %s is not found, the job will be in the Pending state until the PVC is created", vcName) 3901 }</span> 3902 } 3903 <span class="cov0" title="0">job.Status.ControlledResources["volume-pvc-"+vcName] = vcName</span> 3904 } 3905 <span class="cov8" title="1">if needUpdate </span><span class="cov0" title="0">{ 3906 newJob, err := cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).Update(context.TODO(), job, metav1.UpdateOptions{}) 3907 if err != nil </span><span class="cov0" title="0">{ 3908 klog.Errorf("Failed to update Job %v/%v for volume claim name: %v ", 3909 job.Namespace, job.Name, err) 3910 return job, err 3911 }</span> 3912 3913 <span class="cov0" title="0">newJob.Status = job.Status 3914 return newJob, err</span> 3915 } 3916 <span class="cov8" title="1">return job, nil</span> 3917 } 3918 3919 func (cc *jobcontroller) checkPVCExist(job *batch.Job, pvc string) (bool, error) <span class="cov8" title="1">{ 3920 if _, err := cc.pvcLister.PersistentVolumeClaims(job.Namespace).Get(pvc); err != nil </span><span class="cov8" title="1">{ 3921 if apierrors.IsNotFound(err) </span><span class="cov8" title="1">{ 3922 return false, nil 3923 }</span> 3924 <span class="cov0" title="0">klog.V(3).Infof("Failed to get PVC %s for job <%s/%s>: %v", 3925 pvc, job.Namespace, job.Name, err) 3926 return false, err</span> 3927 } 3928 <span class="cov0" title="0">return true, nil</span> 3929 } 3930 3931 func (cc *jobcontroller) createPVC(job *batch.Job, vcName string, volumeClaim *v1.PersistentVolumeClaimSpec) error <span class="cov8" title="1">{ 3932 pvc := &v1.PersistentVolumeClaim{ 3933 ObjectMeta: metav1.ObjectMeta{ 3934 Namespace: job.Namespace, 3935 Name: vcName, 3936 OwnerReferences: []metav1.OwnerReference{ 3937 *metav1.NewControllerRef(job, helpers.JobKind), 3938 }, 3939 }, 3940 Spec: *volumeClaim, 3941 } 3942 3943 klog.V(3).Infof("Try to create PVC: %v", pvc) 3944 3945 if _, e := cc.kubeClient.CoreV1().PersistentVolumeClaims(job.Namespace).Create(context.TODO(), pvc, metav1.CreateOptions{}); e != nil </span><span class="cov0" title="0">{ 3946 klog.V(3).Infof("Failed to create PVC for Job <%s/%s>: %v", 3947 job.Namespace, job.Name, e) 3948 return e 3949 }</span> 3950 <span class="cov8" title="1">return nil</span> 3951 } 3952 3953 func (cc *jobcontroller) createOrUpdatePodGroup(job *batch.Job) error <span class="cov8" title="1">{ 3954 // If PodGroup does not exist, create one for Job. 3955 pg, err := cc.pgLister.PodGroups(job.Namespace).Get(job.Name) 3956 if err != nil </span><span class="cov8" title="1">{ 3957 if !apierrors.IsNotFound(err) </span><span class="cov0" title="0">{ 3958 klog.Errorf("Failed to get PodGroup for Job <%s/%s>: %v", 3959 job.Namespace, job.Name, err) 3960 return err 3961 }</span> 3962 3963 <span class="cov8" title="1">minTaskMember := map[string]int32{} 3964 for _, task := range job.Spec.Tasks </span><span class="cov8" title="1">{ 3965 if task.MinAvailable != nil </span><span class="cov0" title="0">{ 3966 minTaskMember[task.Name] = *task.MinAvailable 3967 }</span> else<span class="cov8" title="1"> { 3968 minTaskMember[task.Name] = task.Replicas 3969 }</span> 3970 } 3971 3972 <span class="cov8" title="1">pg := &scheduling.PodGroup{ 3973 ObjectMeta: metav1.ObjectMeta{ 3974 Namespace: job.Namespace, 3975 Name: job.Name, 3976 Annotations: job.Annotations, 3977 Labels: job.Labels, 3978 OwnerReferences: []metav1.OwnerReference{ 3979 *metav1.NewControllerRef(job, helpers.JobKind), 3980 }, 3981 }, 3982 Spec: scheduling.PodGroupSpec{ 3983 MinMember: job.Spec.MinAvailable, 3984 MinTaskMember: minTaskMember, 3985 Queue: job.Spec.Queue, 3986 MinResources: cc.calcPGMinResources(job), 3987 PriorityClassName: job.Spec.PriorityClassName, 3988 }, 3989 } 3990 3991 if _, err = cc.vcClient.SchedulingV1beta1().PodGroups(job.Namespace).Create(context.TODO(), pg, metav1.CreateOptions{}); err != nil </span><span class="cov0" title="0">{ 3992 if !apierrors.IsAlreadyExists(err) </span><span class="cov0" title="0">{ 3993 klog.Errorf("Failed to create PodGroup for Job <%s/%s>: %v", 3994 job.Namespace, job.Name, err) 3995 return err 3996 }</span> 3997 } 3998 <span class="cov8" title="1">return nil</span> 3999 } 4000 4001 <span class="cov8" title="1">pgShouldUpdate := false 4002 if pg.Spec.PriorityClassName != job.Spec.PriorityClassName </span><span class="cov8" title="1">{ 4003 pg.Spec.PriorityClassName = job.Spec.PriorityClassName 4004 pgShouldUpdate = true 4005 }</span> 4006 4007 <span class="cov8" title="1">minResources := cc.calcPGMinResources(job) 4008 if pg.Spec.MinMember != job.Spec.MinAvailable || !reflect.DeepEqual(pg.Spec.MinResources, minResources) </span><span class="cov0" title="0">{ 4009 pg.Spec.MinMember = job.Spec.MinAvailable 4010 pg.Spec.MinResources = minResources 4011 pgShouldUpdate = true 4012 }</span> 4013 4014 <span class="cov8" title="1">if pg.Spec.MinTaskMember == nil </span><span class="cov8" title="1">{ 4015 pgShouldUpdate = true 4016 pg.Spec.MinTaskMember = make(map[string]int32) 4017 }</span> 4018 4019 <span class="cov8" title="1">for _, task := range job.Spec.Tasks </span><span class="cov8" title="1">{ 4020 if task.MinAvailable == nil </span><span class="cov8" title="1">{ 4021 continue</span> 4022 } 4023 4024 <span class="cov0" title="0">if taskMember, ok := pg.Spec.MinTaskMember[task.Name]; !ok </span><span class="cov0" title="0">{ 4025 pgShouldUpdate = true 4026 pg.Spec.MinTaskMember[task.Name] = *task.MinAvailable 4027 }</span> else<span class="cov0" title="0"> { 4028 if taskMember == *task.MinAvailable </span><span class="cov0" title="0">{ 4029 continue</span> 4030 } 4031 4032 <span class="cov0" title="0">pgShouldUpdate = true 4033 pg.Spec.MinTaskMember[task.Name] = *task.MinAvailable</span> 4034 } 4035 } 4036 4037 <span class="cov8" title="1">if !pgShouldUpdate </span><span class="cov8" title="1">{ 4038 return nil 4039 }</span> 4040 4041 <span class="cov8" title="1">_, err = cc.vcClient.SchedulingV1beta1().PodGroups(job.Namespace).Update(context.TODO(), pg, metav1.UpdateOptions{}) 4042 if err != nil </span><span class="cov0" title="0">{ 4043 klog.V(3).Infof("Failed to update PodGroup for Job <%s/%s>: %v", 4044 job.Namespace, job.Name, err) 4045 }</span> 4046 <span class="cov8" title="1">return err</span> 4047 } 4048 4049 func (cc *jobcontroller) deleteJobPod(jobName string, pod *v1.Pod) error <span class="cov8" title="1">{ 4050 err := cc.kubeClient.CoreV1().Pods(pod.Namespace).Delete(context.TODO(), pod.Name, metav1.DeleteOptions{}) 4051 if err != nil && !apierrors.IsNotFound(err) </span><span class="cov0" title="0">{ 4052 klog.Errorf("Failed to delete pod %s/%s for Job %s, err %#v", 4053 pod.Namespace, pod.Name, jobName, err) 4054 4055 return fmt.Errorf("failed to delete pod %s, err %#v", pod.Name, err) 4056 }</span> 4057 4058 <span class="cov8" title="1">return nil</span> 4059 } 4060 4061 func (cc *jobcontroller) calcPGMinResources(job *batch.Job) *v1.ResourceList <span class="cov8" title="1">{ 4062 // sort task by priorityClasses 4063 var tasksPriority TasksPriority 4064 for _, task := range job.Spec.Tasks </span><span class="cov8" title="1">{ 4065 tp := TaskPriority{0, task} 4066 pc := task.Template.Spec.PriorityClassName 4067 4068 priorityClass, err := cc.pcLister.Get(pc) 4069 if err != nil || priorityClass == nil </span><span class="cov8" title="1">{ 4070 klog.Warningf("Ignore task %s priority class %s: %v", task.Name, pc, err) 4071 }</span> else<span class="cov0" title="0"> { 4072 tp.priority = priorityClass.Value 4073 }</span> 4074 4075 <span class="cov8" title="1">tasksPriority = append(tasksPriority, tp)</span> 4076 } 4077 4078 <span class="cov8" title="1">sort.Sort(tasksPriority) 4079 4080 minAvailableTasksRes := v1.ResourceList{} 4081 podCnt := int32(0) 4082 for _, task := range tasksPriority </span><span class="cov8" title="1">{ 4083 for i := int32(0); i < task.Replicas; i++ </span><span class="cov8" title="1">{ 4084 if podCnt >= job.Spec.MinAvailable </span><span class="cov8" title="1">{ 4085 break</span> 4086 } 4087 <span class="cov0" title="0">podCnt++ 4088 for _, c := range task.Template.Spec.Containers </span><span class="cov0" title="0">{ 4089 addResourceList(minAvailableTasksRes, c.Resources.Requests, c.Resources.Limits) 4090 }</span> 4091 } 4092 } 4093 4094 <span class="cov8" title="1">return &minAvailableTasksRes</span> 4095 } 4096 4097 func (cc *jobcontroller) initJobStatus(job *batch.Job) (*batch.Job, error) <span class="cov8" title="1">{ 4098 if job.Status.State.Phase != "" </span><span class="cov8" title="1">{ 4099 return job, nil 4100 }</span> 4101 4102 <span class="cov0" title="0">job.Status.State.LastTransitionTime = metav1.Now() 4103 job.Status.State.Phase = batch.Pending 4104 job.Status.State.LastTransitionTime = metav1.Now() 4105 job.Status.MinAvailable = job.Spec.MinAvailable 4106 newJob, err := cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(context.TODO(), job, metav1.UpdateOptions{}) 4107 if err != nil </span><span class="cov0" title="0">{ 4108 klog.Errorf("Failed to update status of Job %v/%v: %v", 4109 job.Namespace, job.Name, err) 4110 return nil, err 4111 }</span> 4112 <span class="cov0" title="0">if err := cc.cache.Update(newJob); err != nil </span><span class="cov0" title="0">{ 4113 klog.Errorf("CreateJob - Failed to update Job %v/%v in cache: %v", 4114 newJob.Namespace, newJob.Name, err) 4115 return nil, err 4116 }</span> 4117 4118 <span class="cov0" title="0">return newJob, nil</span> 4119 } 4120 4121 func classifyAndAddUpPodBaseOnPhase(pod *v1.Pod, pending, running, succeeded, failed, unknown *int32) <span class="cov8" title="1">{ 4122 switch pod.Status.Phase </span>{ 4123 case v1.PodPending:<span class="cov8" title="1"> 4124 atomic.AddInt32(pending, 1)</span> 4125 case v1.PodRunning:<span class="cov8" title="1"> 4126 atomic.AddInt32(running, 1)</span> 4127 case v1.PodSucceeded:<span class="cov0" title="0"> 4128 atomic.AddInt32(succeeded, 1)</span> 4129 case v1.PodFailed:<span class="cov0" title="0"> 4130 atomic.AddInt32(failed, 1)</span> 4131 default:<span class="cov8" title="1"> 4132 atomic.AddInt32(unknown, 1)</span> 4133 } 4134 } 4135 4136 func calcPodStatus(pod *v1.Pod, taskStatusCount map[string]batch.TaskState) <span class="cov8" title="1">{ 4137 taskName, found := pod.Annotations[batch.TaskSpecKey] 4138 if !found </span><span class="cov8" title="1">{ 4139 return 4140 }</span> 4141 4142 <span class="cov8" title="1">calMutex.Lock() 4143 defer calMutex.Unlock() 4144 if _, ok := taskStatusCount[taskName]; !ok </span><span class="cov8" title="1">{ 4145 taskStatusCount[taskName] = batch.TaskState{ 4146 Phase: make(map[v1.PodPhase]int32), 4147 } 4148 }</span> 4149 4150 <span class="cov8" title="1">switch pod.Status.Phase </span>{ 4151 case v1.PodPending:<span class="cov0" title="0"> 4152 taskStatusCount[taskName].Phase[v1.PodPending]++</span> 4153 case v1.PodRunning:<span class="cov0" title="0"> 4154 taskStatusCount[taskName].Phase[v1.PodRunning]++</span> 4155 case v1.PodSucceeded:<span class="cov0" title="0"> 4156 taskStatusCount[taskName].Phase[v1.PodSucceeded]++</span> 4157 case v1.PodFailed:<span class="cov0" title="0"> 4158 taskStatusCount[taskName].Phase[v1.PodFailed]++</span> 4159 default:<span class="cov8" title="1"> 4160 taskStatusCount[taskName].Phase[v1.PodUnknown]++</span> 4161 } 4162 } 4163 4164 func isInitiated(job *batch.Job) bool <span class="cov8" title="1">{ 4165 if job.Status.State.Phase == "" || job.Status.State.Phase == batch.Pending </span><span class="cov8" title="1">{ 4166 return false 4167 }</span> 4168 4169 <span class="cov8" title="1">return true</span> 4170 } 4171 </pre> 4172 4173 <pre class="file" id="file25" style="display: none">/* 4174 Copyright 2017 The Volcano Authors. 4175 4176 Licensed under the Apache License, Version 2.0 (the "License"); 4177 you may not use this file except in compliance with the License. 4178 You may obtain a copy of the License at 4179 4180 http://www.apache.org/licenses/LICENSE-2.0 4181 4182 Unless required by applicable law or agreed to in writing, software 4183 distributed under the License is distributed on an "AS IS" BASIS, 4184 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 4185 See the License for the specific language governing permissions and 4186 limitations under the License. 4187 */ 4188 4189 package job 4190 4191 import ( 4192 "context" 4193 "fmt" 4194 "reflect" 4195 "strconv" 4196 4197 v1 "k8s.io/api/core/v1" 4198 apierrors "k8s.io/apimachinery/pkg/api/errors" 4199 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 4200 "k8s.io/client-go/tools/cache" 4201 "k8s.io/klog" 4202 4203 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 4204 bus "volcano.sh/apis/pkg/apis/bus/v1alpha1" 4205 "volcano.sh/apis/pkg/apis/helpers" 4206 scheduling "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 4207 "volcano.sh/volcano/pkg/controllers/apis" 4208 jobcache "volcano.sh/volcano/pkg/controllers/cache" 4209 jobhelpers "volcano.sh/volcano/pkg/controllers/job/helpers" 4210 ) 4211 4212 func (cc *jobcontroller) addCommand(obj interface{}) <span class="cov8" title="1">{ 4213 cmd, ok := obj.(*bus.Command) 4214 if !ok </span><span class="cov8" title="1">{ 4215 klog.Errorf("obj is not Command") 4216 return 4217 }</span> 4218 4219 <span class="cov8" title="1">cc.commandQueue.Add(cmd)</span> 4220 } 4221 4222 func (cc *jobcontroller) addJob(obj interface{}) <span class="cov8" title="1">{ 4223 job, ok := obj.(*batch.Job) 4224 if !ok </span><span class="cov0" title="0">{ 4225 klog.Errorf("obj is not Job") 4226 return 4227 }</span> 4228 4229 <span class="cov8" title="1">req := apis.Request{ 4230 Namespace: job.Namespace, 4231 JobName: job.Name, 4232 4233 Event: bus.OutOfSyncEvent, 4234 } 4235 4236 // TODO(k82cn): if failed to add job, the cache should be refresh 4237 if err := cc.cache.Add(job); err != nil </span><span class="cov0" title="0">{ 4238 klog.Errorf("Failed to add job <%s/%s>: %v in cache", 4239 job.Namespace, job.Name, err) 4240 }</span> 4241 <span class="cov8" title="1">key := jobhelpers.GetJobKeyByReq(&req) 4242 queue := cc.getWorkerQueue(key) 4243 queue.Add(req)</span> 4244 } 4245 4246 func (cc *jobcontroller) updateJob(oldObj, newObj interface{}) <span class="cov8" title="1">{ 4247 newJob, ok := newObj.(*batch.Job) 4248 if !ok </span><span class="cov0" title="0">{ 4249 klog.Errorf("newObj is not Job") 4250 return 4251 }</span> 4252 4253 <span class="cov8" title="1">oldJob, ok := oldObj.(*batch.Job) 4254 if !ok </span><span class="cov0" title="0">{ 4255 klog.Errorf("oldJob is not Job") 4256 return 4257 }</span> 4258 4259 // No need to update if ResourceVersion is not changed 4260 <span class="cov8" title="1">if newJob.ResourceVersion == oldJob.ResourceVersion </span><span class="cov8" title="1">{ 4261 klog.V(6).Infof("No need to update because job is not modified.") 4262 return 4263 }</span> 4264 4265 <span class="cov8" title="1">if err := cc.cache.Update(newJob); err != nil </span><span class="cov0" title="0">{ 4266 klog.Errorf("UpdateJob - Failed to update job <%s/%s>: %v in cache", 4267 newJob.Namespace, newJob.Name, err) 4268 }</span> 4269 4270 // NOTE: Since we only reconcile job based on Spec, we will ignore other attributes 4271 // For Job status, it's used internally and always been updated via our controller. 4272 <span class="cov8" title="1">if reflect.DeepEqual(newJob.Spec, oldJob.Spec) && newJob.Status.State.Phase == oldJob.Status.State.Phase </span><span class="cov0" title="0">{ 4273 klog.V(6).Infof("Job update event is ignored since no update in 'Spec'.") 4274 return 4275 }</span> 4276 4277 <span class="cov8" title="1">req := apis.Request{ 4278 Namespace: newJob.Namespace, 4279 JobName: newJob.Name, 4280 Event: bus.OutOfSyncEvent, 4281 } 4282 key := jobhelpers.GetJobKeyByReq(&req) 4283 queue := cc.getWorkerQueue(key) 4284 queue.Add(req)</span> 4285 } 4286 4287 func (cc *jobcontroller) deleteJob(obj interface{}) <span class="cov0" title="0">{ 4288 job, ok := obj.(*batch.Job) 4289 if !ok </span><span class="cov0" title="0">{ 4290 // If we reached here it means the Job was deleted but its final state is unrecorded. 4291 tombstone, ok := obj.(cache.DeletedFinalStateUnknown) 4292 if !ok </span><span class="cov0" title="0">{ 4293 klog.Errorf("Couldn't get object from tombstone %#v", obj) 4294 return 4295 }</span> 4296 <span class="cov0" title="0">job, ok = tombstone.Obj.(*batch.Job) 4297 if !ok </span><span class="cov0" title="0">{ 4298 klog.Errorf("Tombstone contained object that is not a volcano Job: %#v", obj) 4299 return 4300 }</span> 4301 } 4302 4303 <span class="cov0" title="0">if err := cc.cache.Delete(job); err != nil </span><span class="cov0" title="0">{ 4304 klog.Errorf("Failed to delete job <%s/%s>: %v in cache", 4305 job.Namespace, job.Name, err) 4306 }</span> 4307 } 4308 4309 func (cc *jobcontroller) addPod(obj interface{}) <span class="cov8" title="1">{ 4310 pod, ok := obj.(*v1.Pod) 4311 if !ok </span><span class="cov0" title="0">{ 4312 klog.Errorf("Failed to convert %v to v1.Pod", obj) 4313 return 4314 }</span> 4315 // Filter out pods that are not created from volcano job 4316 <span class="cov8" title="1">if !isControlledBy(pod, helpers.JobKind) </span><span class="cov0" title="0">{ 4317 return 4318 }</span> 4319 4320 <span class="cov8" title="1">jobName, found := pod.Annotations[batch.JobNameKey] 4321 if !found </span><span class="cov0" title="0">{ 4322 klog.Infof("Failed to find jobName of Pod <%s/%s>, skipping", 4323 pod.Namespace, pod.Name) 4324 return 4325 }</span> 4326 4327 <span class="cov8" title="1">version, found := pod.Annotations[batch.JobVersion] 4328 if !found </span><span class="cov0" title="0">{ 4329 klog.Infof("Failed to find jobVersion of Pod <%s/%s>, skipping", 4330 pod.Namespace, pod.Name) 4331 return 4332 }</span> 4333 4334 <span class="cov8" title="1">dVersion, err := strconv.Atoi(version) 4335 if err != nil </span><span class="cov0" title="0">{ 4336 klog.Infof("Failed to convert jobVersion of Pod <%s/%s> into number, skipping", 4337 pod.Namespace, pod.Name) 4338 return 4339 }</span> 4340 4341 <span class="cov8" title="1">if pod.DeletionTimestamp != nil </span><span class="cov0" title="0">{ 4342 cc.deletePod(pod) 4343 return 4344 }</span> 4345 4346 <span class="cov8" title="1">req := apis.Request{ 4347 Namespace: pod.Namespace, 4348 JobName: jobName, 4349 4350 Event: bus.OutOfSyncEvent, 4351 JobVersion: int32(dVersion), 4352 } 4353 4354 if err := cc.cache.AddPod(pod); err != nil </span><span class="cov8" title="1">{ 4355 klog.Errorf("Failed to add Pod <%s/%s>: %v to cache", 4356 pod.Namespace, pod.Name, err) 4357 }</span> 4358 <span class="cov8" title="1">key := jobhelpers.GetJobKeyByReq(&req) 4359 queue := cc.getWorkerQueue(key) 4360 queue.Add(req)</span> 4361 } 4362 4363 func (cc *jobcontroller) updatePod(oldObj, newObj interface{}) <span class="cov8" title="1">{ 4364 oldPod, ok := oldObj.(*v1.Pod) 4365 if !ok </span><span class="cov0" title="0">{ 4366 klog.Errorf("Failed to convert %v to v1.Pod", oldObj) 4367 return 4368 }</span> 4369 4370 <span class="cov8" title="1">newPod, ok := newObj.(*v1.Pod) 4371 if !ok </span><span class="cov0" title="0">{ 4372 klog.Errorf("Failed to convert %v to v1.Pod", newObj) 4373 return 4374 }</span> 4375 4376 // Filter out pods that are not created from volcano job 4377 <span class="cov8" title="1">if !isControlledBy(newPod, helpers.JobKind) </span><span class="cov0" title="0">{ 4378 return 4379 }</span> 4380 4381 <span class="cov8" title="1">if newPod.ResourceVersion == oldPod.ResourceVersion </span><span class="cov0" title="0">{ 4382 return 4383 }</span> 4384 4385 <span class="cov8" title="1">if newPod.DeletionTimestamp != nil </span><span class="cov0" title="0">{ 4386 cc.deletePod(newObj) 4387 return 4388 }</span> 4389 4390 <span class="cov8" title="1">taskName, found := newPod.Annotations[batch.TaskSpecKey] 4391 if !found </span><span class="cov0" title="0">{ 4392 klog.Infof("Failed to find taskName of Pod <%s/%s>, skipping", 4393 newPod.Namespace, newPod.Name) 4394 return 4395 }</span> 4396 4397 <span class="cov8" title="1">jobName, found := newPod.Annotations[batch.JobNameKey] 4398 if !found </span><span class="cov0" title="0">{ 4399 klog.Infof("Failed to find jobName of Pod <%s/%s>, skipping", 4400 newPod.Namespace, newPod.Name) 4401 return 4402 }</span> 4403 4404 <span class="cov8" title="1">version, found := newPod.Annotations[batch.JobVersion] 4405 if !found </span><span class="cov0" title="0">{ 4406 klog.Infof("Failed to find jobVersion of Pod <%s/%s>, skipping", 4407 newPod.Namespace, newPod.Name) 4408 return 4409 }</span> 4410 4411 <span class="cov8" title="1">dVersion, err := strconv.Atoi(version) 4412 if err != nil </span><span class="cov0" title="0">{ 4413 klog.Infof("Failed to convert jobVersion of Pod into number <%s/%s>, skipping", 4414 newPod.Namespace, newPod.Name) 4415 return 4416 }</span> 4417 4418 <span class="cov8" title="1">if err := cc.cache.UpdatePod(newPod); err != nil </span><span class="cov0" title="0">{ 4419 klog.Errorf("Failed to update Pod <%s/%s>: %v in cache", 4420 newPod.Namespace, newPod.Name, err) 4421 }</span> 4422 4423 <span class="cov8" title="1">event := bus.OutOfSyncEvent 4424 var exitCode int32 4425 4426 switch newPod.Status.Phase </span>{ 4427 case v1.PodFailed:<span class="cov8" title="1"> 4428 if oldPod.Status.Phase != v1.PodFailed </span><span class="cov8" title="1">{ 4429 event = bus.PodFailedEvent 4430 // TODO: currently only one container pod is supported by volcano 4431 // Once multi containers pod is supported, update accordingly. 4432 if len(newPod.Status.ContainerStatuses) > 0 && newPod.Status.ContainerStatuses[0].State.Terminated != nil </span><span class="cov0" title="0">{ 4433 exitCode = newPod.Status.ContainerStatuses[0].State.Terminated.ExitCode 4434 }</span> 4435 } 4436 case v1.PodSucceeded:<span class="cov0" title="0"> 4437 if oldPod.Status.Phase != v1.PodSucceeded && 4438 cc.cache.TaskCompleted(jobcache.JobKeyByName(newPod.Namespace, jobName), taskName) </span><span class="cov0" title="0">{ 4439 event = bus.TaskCompletedEvent 4440 }</span> 4441 case v1.PodPending, v1.PodRunning:<span class="cov8" title="1"> 4442 if cc.cache.TaskFailed(jobcache.JobKeyByName(newPod.Namespace, jobName), taskName) </span><span class="cov0" title="0">{ 4443 event = bus.TaskFailedEvent 4444 }</span> 4445 } 4446 4447 <span class="cov8" title="1">req := apis.Request{ 4448 Namespace: newPod.Namespace, 4449 JobName: jobName, 4450 TaskName: taskName, 4451 4452 Event: event, 4453 ExitCode: exitCode, 4454 JobVersion: int32(dVersion), 4455 } 4456 4457 key := jobhelpers.GetJobKeyByReq(&req) 4458 queue := cc.getWorkerQueue(key) 4459 queue.Add(req)</span> 4460 } 4461 4462 func (cc *jobcontroller) deletePod(obj interface{}) <span class="cov8" title="1">{ 4463 pod, ok := obj.(*v1.Pod) 4464 if !ok </span><span class="cov0" title="0">{ 4465 // If we reached here it means the pod was deleted but its final state is unrecorded. 4466 tombstone, ok := obj.(cache.DeletedFinalStateUnknown) 4467 if !ok </span><span class="cov0" title="0">{ 4468 klog.Errorf("Couldn't get object from tombstone %#v", obj) 4469 return 4470 }</span> 4471 <span class="cov0" title="0">pod, ok = tombstone.Obj.(*v1.Pod) 4472 if !ok </span><span class="cov0" title="0">{ 4473 klog.Errorf("Tombstone contained object that is not a Pod: %#v", obj) 4474 return 4475 }</span> 4476 } 4477 4478 // Filter out pods that are not created from volcano job 4479 <span class="cov8" title="1">if !isControlledBy(pod, helpers.JobKind) </span><span class="cov0" title="0">{ 4480 return 4481 }</span> 4482 4483 <span class="cov8" title="1">taskName, found := pod.Annotations[batch.TaskSpecKey] 4484 if !found </span><span class="cov0" title="0">{ 4485 klog.Infof("Failed to find taskName of Pod <%s/%s>, skipping", 4486 pod.Namespace, pod.Name) 4487 return 4488 }</span> 4489 4490 <span class="cov8" title="1">jobName, found := pod.Annotations[batch.JobNameKey] 4491 if !found </span><span class="cov0" title="0">{ 4492 klog.Infof("Failed to find jobName of Pod <%s/%s>, skipping", 4493 pod.Namespace, pod.Name) 4494 return 4495 }</span> 4496 4497 <span class="cov8" title="1">version, found := pod.Annotations[batch.JobVersion] 4498 if !found </span><span class="cov0" title="0">{ 4499 klog.Infof("Failed to find jobVersion of Pod <%s/%s>, skipping", 4500 pod.Namespace, pod.Name) 4501 return 4502 }</span> 4503 4504 <span class="cov8" title="1">dVersion, err := strconv.Atoi(version) 4505 if err != nil </span><span class="cov0" title="0">{ 4506 klog.Infof("Failed to convert jobVersion of Pod <%s/%s> into number, skipping", 4507 pod.Namespace, pod.Name) 4508 return 4509 }</span> 4510 4511 <span class="cov8" title="1">req := apis.Request{ 4512 Namespace: pod.Namespace, 4513 JobName: jobName, 4514 TaskName: taskName, 4515 4516 Event: bus.PodEvictedEvent, 4517 JobVersion: int32(dVersion), 4518 } 4519 4520 if err := cc.cache.DeletePod(pod); err != nil </span><span class="cov0" title="0">{ 4521 klog.Errorf("Failed to delete Pod <%s/%s>: %v in cache", 4522 pod.Namespace, pod.Name, err) 4523 }</span> 4524 4525 <span class="cov8" title="1">key := jobhelpers.GetJobKeyByReq(&req) 4526 queue := cc.getWorkerQueue(key) 4527 queue.Add(req)</span> 4528 } 4529 4530 func (cc *jobcontroller) recordJobEvent(namespace, name string, event batch.JobEvent, message string) <span class="cov0" title="0">{ 4531 job, err := cc.cache.Get(jobcache.JobKeyByName(namespace, name)) 4532 if err != nil </span><span class="cov0" title="0">{ 4533 klog.Warningf("Failed to find job in cache when reporting job event <%s/%s>: %v", 4534 namespace, name, err) 4535 return 4536 }</span> 4537 <span class="cov0" title="0">cc.recorder.Event(job.Job, v1.EventTypeNormal, string(event), message)</span> 4538 } 4539 4540 func (cc *jobcontroller) handleCommands() <span class="cov0" title="0">{ 4541 for cc.processNextCommand() </span>{<span class="cov0" title="0"> 4542 }</span> 4543 } 4544 4545 func (cc *jobcontroller) processNextCommand() bool <span class="cov0" title="0">{ 4546 obj, shutdown := cc.commandQueue.Get() 4547 if shutdown </span><span class="cov0" title="0">{ 4548 return false 4549 }</span> 4550 <span class="cov0" title="0">cmd := obj.(*bus.Command) 4551 defer cc.commandQueue.Done(cmd) 4552 4553 if err := cc.vcClient.BusV1alpha1().Commands(cmd.Namespace).Delete(context.TODO(), cmd.Name, metav1.DeleteOptions{}); err != nil </span><span class="cov0" title="0">{ 4554 if !apierrors.IsNotFound(err) </span><span class="cov0" title="0">{ 4555 klog.Errorf("Failed to delete Command <%s/%s>.", cmd.Namespace, cmd.Name) 4556 cc.commandQueue.AddRateLimited(cmd) 4557 }</span> 4558 <span class="cov0" title="0">return true</span> 4559 } 4560 <span class="cov0" title="0">cc.recordJobEvent(cmd.Namespace, cmd.TargetObject.Name, 4561 batch.CommandIssued, 4562 fmt.Sprintf( 4563 "Start to execute command %s, and clean it up to make sure executed not more than once.", cmd.Action)) 4564 req := apis.Request{ 4565 Namespace: cmd.Namespace, 4566 JobName: cmd.TargetObject.Name, 4567 Event: bus.CommandIssuedEvent, 4568 Action: bus.Action(cmd.Action), 4569 } 4570 4571 key := jobhelpers.GetJobKeyByReq(&req) 4572 queue := cc.getWorkerQueue(key) 4573 queue.Add(req) 4574 4575 return true</span> 4576 } 4577 4578 func (cc *jobcontroller) updatePodGroup(oldObj, newObj interface{}) <span class="cov8" title="1">{ 4579 oldPG, ok := oldObj.(*scheduling.PodGroup) 4580 if !ok </span><span class="cov0" title="0">{ 4581 klog.Errorf("Failed to convert %v to PodGroup", newObj) 4582 return 4583 }</span> 4584 4585 <span class="cov8" title="1">newPG, ok := newObj.(*scheduling.PodGroup) 4586 if !ok </span><span class="cov0" title="0">{ 4587 klog.Errorf("Failed to convert %v to PodGroup", newObj) 4588 return 4589 }</span> 4590 4591 <span class="cov8" title="1">_, err := cc.cache.Get(jobcache.JobKeyByName(newPG.Namespace, newPG.Name)) 4592 if err != nil && newPG.Annotations != nil </span><span class="cov0" title="0">{ 4593 klog.Warningf( 4594 "Failed to find job in cache by PodGroup, this may not be a PodGroup for volcano job.") 4595 }</span> 4596 4597 <span class="cov8" title="1">if newPG.Status.Phase != oldPG.Status.Phase </span><span class="cov8" title="1">{ 4598 req := apis.Request{ 4599 Namespace: newPG.Namespace, 4600 JobName: newPG.Name, 4601 } 4602 switch newPG.Status.Phase </span>{ 4603 case scheduling.PodGroupUnknown:<span class="cov0" title="0"> 4604 req.Event = bus.JobUnknownEvent</span> 4605 } 4606 <span class="cov8" title="1">key := jobhelpers.GetJobKeyByReq(&req) 4607 queue := cc.getWorkerQueue(key) 4608 queue.Add(req)</span> 4609 } 4610 } 4611 4612 // TODO(k82cn): add handler for PodGroup unschedulable event. 4613 </pre> 4614 4615 <pre class="file" id="file26" style="display: none">/* 4616 Copyright 2019 The Volcano Authors. 4617 4618 Licensed under the Apache License, Version 2.0 (the "License"); 4619 you may not use this file except in compliance with the License. 4620 You may obtain a copy of the License at 4621 4622 http://www.apache.org/licenses/LICENSE-2.0 4623 4624 Unless required by applicable law or agreed to in writing, software 4625 distributed under the License is distributed on an "AS IS" BASIS, 4626 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 4627 See the License for the specific language governing permissions and 4628 limitations under the License. 4629 */ 4630 4631 package job 4632 4633 import ( 4634 "fmt" 4635 4636 v1 "k8s.io/api/core/v1" 4637 "k8s.io/klog" 4638 4639 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 4640 "volcano.sh/volcano/pkg/controllers/job/plugins" 4641 pluginsinterface "volcano.sh/volcano/pkg/controllers/job/plugins/interface" 4642 ) 4643 4644 func (cc *jobcontroller) pluginOnPodCreate(job *batch.Job, pod *v1.Pod) error <span class="cov8" title="1">{ 4645 client := pluginsinterface.PluginClientset{KubeClients: cc.kubeClient} 4646 for name, args := range job.Spec.Plugins </span><span class="cov8" title="1">{ 4647 pb, found := plugins.GetPluginBuilder(name) 4648 if !found </span><span class="cov8" title="1">{ 4649 err := fmt.Errorf("failed to get plugin %s", name) 4650 klog.Error(err) 4651 return err 4652 }</span> 4653 <span class="cov8" title="1">klog.Infof("Starting to execute plugin at <pluginOnPodCreate>: %s on job: <%s/%s>", name, job.Namespace, job.Name) 4654 if err := pb(client, args).OnPodCreate(pod, job); err != nil </span><span class="cov0" title="0">{ 4655 klog.Errorf("Failed to process on pod create plugin %s, err %v.", name, err) 4656 return err 4657 }</span> 4658 } 4659 <span class="cov8" title="1">return nil</span> 4660 } 4661 4662 func (cc *jobcontroller) pluginOnJobAdd(job *batch.Job) error <span class="cov8" title="1">{ 4663 client := pluginsinterface.PluginClientset{KubeClients: cc.kubeClient} 4664 if job.Status.ControlledResources == nil </span><span class="cov8" title="1">{ 4665 job.Status.ControlledResources = make(map[string]string) 4666 }</span> 4667 <span class="cov8" title="1">for name, args := range job.Spec.Plugins </span><span class="cov8" title="1">{ 4668 pb, found := plugins.GetPluginBuilder(name) 4669 if !found </span><span class="cov8" title="1">{ 4670 err := fmt.Errorf("failed to get plugin %s", name) 4671 klog.Error(err) 4672 return err 4673 }</span> 4674 <span class="cov8" title="1">klog.Infof("Starting to execute plugin at <pluginOnJobAdd>: %s on job: <%s/%s>", name, job.Namespace, job.Name) 4675 if err := pb(client, args).OnJobAdd(job); err != nil </span><span class="cov0" title="0">{ 4676 klog.Errorf("Failed to process on job add plugin %s, err %v.", name, err) 4677 return err 4678 }</span> 4679 } 4680 4681 <span class="cov8" title="1">return nil</span> 4682 } 4683 4684 func (cc *jobcontroller) pluginOnJobDelete(job *batch.Job) error <span class="cov8" title="1">{ 4685 if job.Status.ControlledResources == nil </span><span class="cov8" title="1">{ 4686 job.Status.ControlledResources = make(map[string]string) 4687 }</span> 4688 <span class="cov8" title="1">client := pluginsinterface.PluginClientset{KubeClients: cc.kubeClient} 4689 for name, args := range job.Spec.Plugins </span><span class="cov8" title="1">{ 4690 pb, found := plugins.GetPluginBuilder(name) 4691 if !found </span><span class="cov8" title="1">{ 4692 err := fmt.Errorf("failed to get plugin %s", name) 4693 klog.Error(err) 4694 return err 4695 }</span> 4696 <span class="cov8" title="1">klog.Infof("Starting to execute plugin at <pluginOnJobDelete>: %s on job: <%s/%s>", name, job.Namespace, job.Name) 4697 if err := pb(client, args).OnJobDelete(job); err != nil </span><span class="cov0" title="0">{ 4698 klog.Errorf("failed to process on job delete plugin %s, err %v.", name, err) 4699 return err 4700 }</span> 4701 } 4702 4703 <span class="cov8" title="1">return nil</span> 4704 } 4705 4706 func (cc *jobcontroller) pluginOnJobUpdate(job *batch.Job) error <span class="cov8" title="1">{ 4707 client := pluginsinterface.PluginClientset{KubeClients: cc.kubeClient} 4708 if job.Status.ControlledResources == nil </span><span class="cov8" title="1">{ 4709 job.Status.ControlledResources = make(map[string]string) 4710 }</span> 4711 <span class="cov8" title="1">for name, args := range job.Spec.Plugins </span><span class="cov0" title="0">{ 4712 pb, found := plugins.GetPluginBuilder(name) 4713 if !found </span><span class="cov0" title="0">{ 4714 err := fmt.Errorf("failed to get plugin %s", name) 4715 klog.Error(err) 4716 return err 4717 }</span> 4718 <span class="cov0" title="0">klog.Infof("Starting to execute plugin at <pluginOnJobUpdate>: %s on job: <%s/%s>", name, job.Namespace, job.Name) 4719 if err := pb(client, args).OnJobUpdate(job); err != nil </span><span class="cov0" title="0">{ 4720 klog.Errorf("Failed to process on job update plugin %s, err %v.", name, err) 4721 return err 4722 }</span> 4723 } 4724 4725 <span class="cov8" title="1">return nil</span> 4726 } 4727 </pre> 4728 4729 <pre class="file" id="file27" style="display: none">/* 4730 Copyright 2019 The Volcano Authors. 4731 4732 Licensed under the Apache License, Version 2.0 (the "License"); 4733 you may not use this file except in compliance with the License. 4734 You may obtain a copy of the License at 4735 4736 http://www.apache.org/licenses/LICENSE-2.0 4737 4738 Unless required by applicable law or agreed to in writing, software 4739 distributed under the License is distributed on an "AS IS" BASIS, 4740 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 4741 See the License for the specific language governing permissions and 4742 limitations under the License. 4743 */ 4744 4745 package job 4746 4747 import ( 4748 "context" 4749 "fmt" 4750 "time" 4751 4752 "golang.org/x/time/rate" 4753 v1 "k8s.io/api/core/v1" 4754 "k8s.io/apimachinery/pkg/api/errors" 4755 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 4756 "k8s.io/client-go/util/workqueue" 4757 "k8s.io/klog" 4758 ) 4759 4760 func newRateLimitingQueue() workqueue.RateLimitingInterface <span class="cov8" title="1">{ 4761 return workqueue.NewRateLimitingQueue(workqueue.NewMaxOfRateLimiter( 4762 workqueue.NewItemExponentialFailureRateLimiter(5*time.Millisecond, 180*time.Second), 4763 // 10 qps, 100 bucket size. This is only for retry speed and its only the overall factor (not per item) 4764 &workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(10), 100)}, 4765 )) 4766 }</span> 4767 4768 func (cc *jobcontroller) processResyncTask() <span class="cov0" title="0">{ 4769 obj, shutdown := cc.errTasks.Get() 4770 if shutdown </span><span class="cov0" title="0">{ 4771 return 4772 }</span> 4773 4774 // one task only resync 10 times 4775 <span class="cov0" title="0">if cc.errTasks.NumRequeues(obj) > 10 </span><span class="cov0" title="0">{ 4776 cc.errTasks.Forget(obj) 4777 return 4778 }</span> 4779 4780 <span class="cov0" title="0">defer cc.errTasks.Done(obj) 4781 4782 task, ok := obj.(*v1.Pod) 4783 if !ok </span><span class="cov0" title="0">{ 4784 klog.Errorf("failed to convert %v to *v1.Pod", obj) 4785 return 4786 }</span> 4787 4788 <span class="cov0" title="0">if err := cc.syncTask(task); err != nil </span><span class="cov0" title="0">{ 4789 klog.Errorf("Failed to sync pod <%v/%v>, retry it, err %v", task.Namespace, task.Name, err) 4790 cc.resyncTask(task) 4791 }</span> 4792 } 4793 4794 func (cc *jobcontroller) syncTask(oldTask *v1.Pod) error <span class="cov0" title="0">{ 4795 newPod, err := cc.kubeClient.CoreV1().Pods(oldTask.Namespace).Get(context.TODO(), oldTask.Name, metav1.GetOptions{}) 4796 if err != nil </span><span class="cov0" title="0">{ 4797 if errors.IsNotFound(err) </span><span class="cov0" title="0">{ 4798 if err := cc.cache.DeletePod(oldTask); err != nil </span><span class="cov0" title="0">{ 4799 klog.Errorf("failed to delete cache pod <%v/%v>, err %v.", oldTask.Namespace, oldTask.Name, err) 4800 return err 4801 }</span> 4802 <span class="cov0" title="0">klog.V(3).Infof("Pod <%v/%v> was deleted, removed from cache.", oldTask.Namespace, oldTask.Name) 4803 4804 return nil</span> 4805 } 4806 <span class="cov0" title="0">return fmt.Errorf("failed to get Pod <%v/%v>: err %v", oldTask.Namespace, oldTask.Name, err)</span> 4807 } 4808 4809 <span class="cov0" title="0">return cc.cache.UpdatePod(newPod)</span> 4810 } 4811 4812 func (cc *jobcontroller) resyncTask(task *v1.Pod) <span class="cov0" title="0">{ 4813 cc.errTasks.AddRateLimited(task) 4814 }</span> 4815 </pre> 4816 4817 <pre class="file" id="file28" style="display: none">/* 4818 Copyright 2017 The Volcano Authors. 4819 4820 Licensed under the Apache License, Version 2.0 (the "License"); 4821 you may not use this file except in compliance with the License. 4822 You may obtain a copy of the License at 4823 4824 http://www.apache.org/licenses/LICENSE-2.0 4825 4826 Unless required by applicable law or agreed to in writing, software 4827 distributed under the License is distributed on an "AS IS" BASIS, 4828 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 4829 See the License for the specific language governing permissions and 4830 limitations under the License. 4831 */ 4832 4833 package job 4834 4835 import ( 4836 "fmt" 4837 4838 v1 "k8s.io/api/core/v1" 4839 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 4840 "k8s.io/apimachinery/pkg/runtime/schema" 4841 "k8s.io/klog" 4842 4843 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 4844 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 4845 "volcano.sh/apis/pkg/apis/helpers" 4846 schedulingv2 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 4847 "volcano.sh/volcano/pkg/controllers/apis" 4848 jobhelpers "volcano.sh/volcano/pkg/controllers/job/helpers" 4849 ) 4850 4851 // MakePodName append podname,jobname,taskName and index and returns the string. 4852 func MakePodName(jobName string, taskName string, index int) string <span class="cov8" title="1">{ 4853 return fmt.Sprintf(jobhelpers.PodNameFmt, jobName, taskName, index) 4854 }</span> 4855 4856 func createJobPod(job *batch.Job, template *v1.PodTemplateSpec, topologyPolicy batch.NumaPolicy, ix int, jobForwarding bool) *v1.Pod <span class="cov8" title="1">{ 4857 templateCopy := template.DeepCopy() 4858 4859 pod := &v1.Pod{ 4860 ObjectMeta: metav1.ObjectMeta{ 4861 Name: jobhelpers.MakePodName(job.Name, template.Name, ix), 4862 Namespace: job.Namespace, 4863 OwnerReferences: []metav1.OwnerReference{ 4864 *metav1.NewControllerRef(job, helpers.JobKind), 4865 }, 4866 Labels: templateCopy.Labels, 4867 Annotations: templateCopy.Annotations, 4868 }, 4869 Spec: templateCopy.Spec, 4870 } 4871 4872 // If no scheduler name in Pod, use scheduler name from Job. 4873 if len(pod.Spec.SchedulerName) == 0 </span><span class="cov8" title="1">{ 4874 pod.Spec.SchedulerName = job.Spec.SchedulerName 4875 }</span> 4876 4877 <span class="cov8" title="1">volumeMap := make(map[string]string) 4878 for _, volume := range job.Spec.Volumes </span><span class="cov8" title="1">{ 4879 vcName := volume.VolumeClaimName 4880 name := fmt.Sprintf("%s-%s", job.Name, jobhelpers.GenRandomStr(12)) 4881 if _, ok := volumeMap[vcName]; !ok </span><span class="cov8" title="1">{ 4882 volume := v1.Volume{ 4883 Name: name, 4884 VolumeSource: v1.VolumeSource{ 4885 PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ 4886 ClaimName: vcName, 4887 }, 4888 }, 4889 } 4890 pod.Spec.Volumes = append(pod.Spec.Volumes, volume) 4891 volumeMap[vcName] = name 4892 }</span> else<span class="cov0" title="0"> { 4893 // duplicate volumes, should be prevented 4894 continue</span> 4895 } 4896 4897 <span class="cov8" title="1">for i, c := range pod.Spec.Containers </span><span class="cov8" title="1">{ 4898 vm := v1.VolumeMount{ 4899 MountPath: volume.MountPath, 4900 Name: name, 4901 } 4902 pod.Spec.Containers[i].VolumeMounts = append(c.VolumeMounts, vm) 4903 }</span> 4904 } 4905 4906 <span class="cov8" title="1">tsKey := templateCopy.Name 4907 if len(tsKey) == 0 </span><span class="cov8" title="1">{ 4908 tsKey = batch.DefaultTaskSpec 4909 }</span> 4910 4911 <span class="cov8" title="1">if len(pod.Annotations) == 0 </span><span class="cov8" title="1">{ 4912 pod.Annotations = make(map[string]string) 4913 }</span> 4914 4915 <span class="cov8" title="1">pod.Annotations[batch.TaskSpecKey] = tsKey 4916 pod.Annotations[schedulingv2.KubeGroupNameAnnotationKey] = job.Name 4917 pod.Annotations[batch.JobNameKey] = job.Name 4918 pod.Annotations[batch.QueueNameKey] = job.Spec.Queue 4919 pod.Annotations[batch.JobVersion] = fmt.Sprintf("%d", job.Status.Version) 4920 pod.Annotations[batch.PodTemplateKey] = fmt.Sprintf("%s-%s", job.Name, template.Name) 4921 4922 if topologyPolicy != "" </span><span class="cov0" title="0">{ 4923 pod.Annotations[schedulingv2.NumaPolicyKey] = string(topologyPolicy) 4924 }</span> 4925 4926 <span class="cov8" title="1">if len(job.Annotations) > 0 </span><span class="cov0" title="0">{ 4927 if value, found := job.Annotations[schedulingv2.PodPreemptable]; found </span><span class="cov0" title="0">{ 4928 pod.Annotations[schedulingv2.PodPreemptable] = value 4929 }</span> 4930 <span class="cov0" title="0">if value, found := job.Annotations[schedulingv2.RevocableZone]; found </span><span class="cov0" title="0">{ 4931 pod.Annotations[schedulingv2.RevocableZone] = value 4932 }</span> 4933 4934 <span class="cov0" title="0">if value, found := job.Annotations[schedulingv2.JDBMinAvailable]; found </span><span class="cov0" title="0">{ 4935 pod.Annotations[schedulingv2.JDBMinAvailable] = value 4936 }</span> else<span class="cov0" title="0"> if value, found := job.Annotations[schedulingv2.JDBMaxUnavailable]; found </span><span class="cov0" title="0">{ 4937 pod.Annotations[schedulingv2.JDBMaxUnavailable] = value 4938 }</span> 4939 } 4940 4941 <span class="cov8" title="1">if len(pod.Labels) == 0 </span><span class="cov8" title="1">{ 4942 pod.Labels = make(map[string]string) 4943 }</span> 4944 4945 // Set pod labels for Service. 4946 <span class="cov8" title="1">pod.Labels[batch.JobNameKey] = job.Name 4947 pod.Labels[batch.TaskSpecKey] = tsKey 4948 pod.Labels[batch.JobNamespaceKey] = job.Namespace 4949 pod.Labels[batch.QueueNameKey] = job.Spec.Queue 4950 if len(job.Labels) > 0 </span><span class="cov0" title="0">{ 4951 if value, found := job.Labels[schedulingv2.PodPreemptable]; found </span><span class="cov0" title="0">{ 4952 pod.Labels[schedulingv2.PodPreemptable] = value 4953 }</span> 4954 } 4955 4956 <span class="cov8" title="1">if jobForwarding </span><span class="cov0" title="0">{ 4957 pod.Annotations[batch.JobForwardingKey] = "true" 4958 pod.Labels[batch.JobForwardingKey] = "true" 4959 }</span> 4960 4961 <span class="cov8" title="1">return pod</span> 4962 } 4963 4964 func applyPolicies(job *batch.Job, req *apis.Request) v1alpha1.Action <span class="cov8" title="1">{ 4965 if len(req.Action) != 0 </span><span class="cov8" title="1">{ 4966 return req.Action 4967 }</span> 4968 4969 <span class="cov8" title="1">if req.Event == v1alpha1.OutOfSyncEvent </span><span class="cov8" title="1">{ 4970 return v1alpha1.SyncJobAction 4971 }</span> 4972 4973 // For all the requests triggered from discarded job resources will perform sync action instead 4974 <span class="cov8" title="1">if req.JobVersion < job.Status.Version </span><span class="cov0" title="0">{ 4975 klog.Infof("Request %s is outdated, will perform sync instead.", req) 4976 return v1alpha1.SyncJobAction 4977 }</span> 4978 4979 // Overwrite Job level policies 4980 <span class="cov8" title="1">if len(req.TaskName) != 0 </span><span class="cov8" title="1">{ 4981 // Parse task level policies 4982 for _, task := range job.Spec.Tasks </span><span class="cov8" title="1">{ 4983 if task.Name == req.TaskName </span><span class="cov8" title="1">{ 4984 for _, policy := range task.Policies </span><span class="cov8" title="1">{ 4985 policyEvents := getEventlist(policy) 4986 4987 if len(policyEvents) > 0 && len(req.Event) > 0 </span><span class="cov8" title="1">{ 4988 if checkEventExist(policyEvents, req.Event) || checkEventExist(policyEvents, v1alpha1.AnyEvent) </span><span class="cov8" title="1">{ 4989 return policy.Action 4990 }</span> 4991 } 4992 4993 // 0 is not an error code, is prevented in validation admission controller 4994 <span class="cov8" title="1">if policy.ExitCode != nil && *policy.ExitCode == req.ExitCode </span><span class="cov8" title="1">{ 4995 return policy.Action 4996 }</span> 4997 } 4998 <span class="cov8" title="1">break</span> 4999 } 5000 } 5001 } 5002 5003 // Parse Job level policies 5004 <span class="cov8" title="1">for _, policy := range job.Spec.Policies </span><span class="cov8" title="1">{ 5005 policyEvents := getEventlist(policy) 5006 5007 if len(policyEvents) > 0 && len(req.Event) > 0 </span><span class="cov8" title="1">{ 5008 if checkEventExist(policyEvents, req.Event) || checkEventExist(policyEvents, v1alpha1.AnyEvent) </span><span class="cov8" title="1">{ 5009 return policy.Action 5010 }</span> 5011 } 5012 5013 // 0 is not an error code, is prevented in validation admission controller 5014 <span class="cov8" title="1">if policy.ExitCode != nil && *policy.ExitCode == req.ExitCode </span><span class="cov8" title="1">{ 5015 return policy.Action 5016 }</span> 5017 } 5018 5019 <span class="cov8" title="1">return v1alpha1.SyncJobAction</span> 5020 } 5021 5022 func getEventlist(policy batch.LifecyclePolicy) []v1alpha1.Event <span class="cov8" title="1">{ 5023 policyEventsList := policy.Events 5024 if len(policy.Event) > 0 </span><span class="cov8" title="1">{ 5025 policyEventsList = append(policyEventsList, policy.Event) 5026 }</span> 5027 <span class="cov8" title="1">return policyEventsList</span> 5028 } 5029 5030 func checkEventExist(policyEvents []v1alpha1.Event, reqEvent v1alpha1.Event) bool <span class="cov8" title="1">{ 5031 for _, event := range policyEvents </span><span class="cov8" title="1">{ 5032 if event == reqEvent </span><span class="cov8" title="1">{ 5033 return true 5034 }</span> 5035 } 5036 <span class="cov0" title="0">return false</span> 5037 } 5038 5039 func addResourceList(list, req, limit v1.ResourceList) <span class="cov8" title="1">{ 5040 for name, quantity := range req </span><span class="cov8" title="1">{ 5041 if value, ok := list[name]; !ok </span><span class="cov8" title="1">{ 5042 list[name] = quantity.DeepCopy() 5043 }</span> else<span class="cov8" title="1"> { 5044 value.Add(quantity) 5045 list[name] = value 5046 }</span> 5047 } 5048 5049 <span class="cov8" title="1">if req != nil </span><span class="cov8" title="1">{ 5050 return 5051 }</span> 5052 5053 // If Requests is omitted for a container, 5054 // it defaults to Limits if that is explicitly specified. 5055 <span class="cov8" title="1">for name, quantity := range limit </span><span class="cov8" title="1">{ 5056 if value, ok := list[name]; !ok </span><span class="cov8" title="1">{ 5057 list[name] = quantity.DeepCopy() 5058 }</span> else<span class="cov8" title="1"> { 5059 value.Add(quantity) 5060 list[name] = value 5061 }</span> 5062 } 5063 } 5064 5065 // TaskPriority structure. 5066 type TaskPriority struct { 5067 priority int32 5068 5069 batch.TaskSpec 5070 } 5071 5072 // TasksPriority is a slice of TaskPriority. 5073 type TasksPriority []TaskPriority 5074 5075 func (p TasksPriority) Len() int <span class="cov8" title="1">{ return len(p) }</span> 5076 5077 func (p TasksPriority) Less(i, j int) bool <span class="cov8" title="1">{ 5078 return p[i].priority > p[j].priority 5079 }</span> 5080 5081 func (p TasksPriority) Swap(i, j int) <span class="cov8" title="1">{ p[i], p[j] = p[j], p[i] }</span> 5082 5083 func isControlledBy(obj metav1.Object, gvk schema.GroupVersionKind) bool <span class="cov8" title="1">{ 5084 controllerRef := metav1.GetControllerOf(obj) 5085 if controllerRef == nil </span><span class="cov0" title="0">{ 5086 return false 5087 }</span> 5088 <span class="cov8" title="1">if controllerRef.APIVersion == gvk.GroupVersion().String() && controllerRef.Kind == gvk.Kind </span><span class="cov8" title="1">{ 5089 return true 5090 }</span> 5091 <span class="cov0" title="0">return false</span> 5092 } 5093 </pre> 5094 5095 <pre class="file" id="file29" style="display: none">/* 5096 Copyright 2019 The Volcano Authors. 5097 5098 Licensed under the Apache License, Version 2.0 (the "License"); 5099 you may not use this file except in compliance with the License. 5100 You may obtain a copy of the License at 5101 5102 http://www.apache.org/licenses/LICENSE-2.0 5103 5104 Unless required by applicable law or agreed to in writing, software 5105 distributed under the License is distributed on an "AS IS" BASIS, 5106 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 5107 See the License for the specific language governing permissions and 5108 limitations under the License. 5109 */ 5110 5111 package ssh 5112 5113 import ( 5114 "crypto/rand" 5115 "crypto/rsa" 5116 "crypto/x509" 5117 "encoding/pem" 5118 "flag" 5119 "fmt" 5120 5121 "golang.org/x/crypto/ssh" 5122 v1 "k8s.io/api/core/v1" 5123 "k8s.io/klog" 5124 5125 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 5126 "volcano.sh/apis/pkg/apis/helpers" 5127 jobhelpers "volcano.sh/volcano/pkg/controllers/job/helpers" 5128 pluginsinterface "volcano.sh/volcano/pkg/controllers/job/plugins/interface" 5129 ) 5130 5131 type sshPlugin struct { 5132 // Arguments given for the plugin 5133 pluginArguments []string 5134 5135 client pluginsinterface.PluginClientset 5136 5137 // flag parse args 5138 sshKeyFilePath string 5139 5140 // private key string 5141 sshPrivateKey string 5142 5143 // public key string 5144 sshPublicKey string 5145 } 5146 5147 // New creates ssh plugin 5148 func New(client pluginsinterface.PluginClientset, arguments []string) pluginsinterface.PluginInterface <span class="cov8" title="1">{ 5149 p := sshPlugin{ 5150 pluginArguments: arguments, 5151 client: client, 5152 sshKeyFilePath: SSHAbsolutePath, 5153 } 5154 5155 p.addFlags() 5156 5157 return &p 5158 }</span> 5159 5160 func (sp *sshPlugin) Name() string <span class="cov8" title="1">{ 5161 return "ssh" 5162 }</span> 5163 5164 func (sp *sshPlugin) OnPodCreate(pod *v1.Pod, job *batch.Job) error <span class="cov0" title="0">{ 5165 sp.mountRsaKey(pod, job) 5166 5167 return nil 5168 }</span> 5169 5170 func (sp *sshPlugin) OnJobAdd(job *batch.Job) error <span class="cov0" title="0">{ 5171 if job.Status.ControlledResources["plugin-"+sp.Name()] == sp.Name() </span><span class="cov0" title="0">{ 5172 return nil 5173 }</span> 5174 5175 <span class="cov0" title="0">var data map[string][]byte 5176 var err error 5177 if len(sp.sshPrivateKey) > 0 </span><span class="cov0" title="0">{ 5178 data, err = withUserProvidedRsaKey(job, sp.sshPrivateKey, sp.sshPublicKey) 5179 }</span> else<span class="cov0" title="0"> { 5180 data, err = generateRsaKey(job) 5181 }</span> 5182 <span class="cov0" title="0">if err != nil </span><span class="cov0" title="0">{ 5183 return err 5184 }</span> 5185 5186 <span class="cov0" title="0">if err := helpers.CreateOrUpdateSecret(job, sp.client.KubeClients, data, sp.secretName(job)); err != nil </span><span class="cov0" title="0">{ 5187 return fmt.Errorf("create secret for job <%s/%s> with ssh plugin failed for %v", 5188 job.Namespace, job.Name, err) 5189 }</span> 5190 5191 <span class="cov0" title="0">job.Status.ControlledResources["plugin-"+sp.Name()] = sp.Name() 5192 5193 return nil</span> 5194 } 5195 5196 func (sp *sshPlugin) OnJobDelete(job *batch.Job) error <span class="cov0" title="0">{ 5197 if job.Status.ControlledResources["plugin-"+sp.Name()] != sp.Name() </span><span class="cov0" title="0">{ 5198 return nil 5199 }</span> 5200 <span class="cov0" title="0">if err := helpers.DeleteSecret(job, sp.client.KubeClients, sp.secretName(job)); err != nil </span><span class="cov0" title="0">{ 5201 return err 5202 }</span> 5203 <span class="cov0" title="0">delete(job.Status.ControlledResources, "plugin-"+sp.Name()) 5204 5205 return nil</span> 5206 } 5207 5208 // TODO: currently a container using a Secret as a subPath volume mount will not receive Secret updates. 5209 // we may not update the job secret due to the above reason now. 5210 // related issue: https://github.com/volcano-sh/volcano/issues/1420 5211 func (sp *sshPlugin) OnJobUpdate(job *batch.Job) error <span class="cov0" title="0">{ 5212 //data, err := generateRsaKey(job) 5213 //if err != nil { 5214 // return err 5215 //} 5216 // 5217 //if err := helpers.CreateOrUpdateSecret(job, sp.client.KubeClients, data, sp.secretName(job)); err != nil { 5218 // return fmt.Errorf("update secret for job <%s/%s> with ssh plugin failed for %v", 5219 // job.Namespace, job.Name, err) 5220 //} 5221 5222 return nil 5223 }</span> 5224 5225 func (sp *sshPlugin) mountRsaKey(pod *v1.Pod, job *batch.Job) <span class="cov0" title="0">{ 5226 secretName := sp.secretName(job) 5227 5228 sshVolume := v1.Volume{ 5229 Name: secretName, 5230 } 5231 5232 var mode int32 = 0600 5233 sshVolume.Secret = &v1.SecretVolumeSource{ 5234 SecretName: secretName, 5235 Items: []v1.KeyToPath{ 5236 { 5237 Key: SSHPrivateKey, 5238 Path: SSHRelativePath + "/" + SSHPrivateKey, 5239 }, 5240 { 5241 Key: SSHPublicKey, 5242 Path: SSHRelativePath + "/" + SSHPublicKey, 5243 }, 5244 { 5245 Key: SSHAuthorizedKeys, 5246 Path: SSHRelativePath + "/" + SSHAuthorizedKeys, 5247 }, 5248 { 5249 Key: SSHConfig, 5250 Path: SSHRelativePath + "/" + SSHConfig, 5251 }, 5252 }, 5253 DefaultMode: &mode, 5254 } 5255 5256 if sp.sshKeyFilePath != SSHAbsolutePath </span><span class="cov0" title="0">{ 5257 var noRootMode int32 = 0600 5258 sshVolume.Secret.DefaultMode = &noRootMode 5259 }</span> 5260 5261 <span class="cov0" title="0">pod.Spec.Volumes = append(pod.Spec.Volumes, sshVolume) 5262 5263 for i, c := range pod.Spec.Containers </span><span class="cov0" title="0">{ 5264 vm := v1.VolumeMount{ 5265 MountPath: sp.sshKeyFilePath, 5266 SubPath: SSHRelativePath, 5267 Name: secretName, 5268 } 5269 5270 pod.Spec.Containers[i].VolumeMounts = append(c.VolumeMounts, vm) 5271 }</span> 5272 <span class="cov0" title="0">for i, c := range pod.Spec.InitContainers </span><span class="cov0" title="0">{ 5273 vm := v1.VolumeMount{ 5274 MountPath: sp.sshKeyFilePath, 5275 SubPath: SSHRelativePath, 5276 Name: secretName, 5277 } 5278 5279 pod.Spec.InitContainers[i].VolumeMounts = append(c.VolumeMounts, vm) 5280 }</span> 5281 } 5282 5283 func generateRsaKey(job *batch.Job) (map[string][]byte, error) <span class="cov0" title="0">{ 5284 bitSize := 1024 5285 5286 privateKey, err := rsa.GenerateKey(rand.Reader, bitSize) 5287 if err != nil </span><span class="cov0" title="0">{ 5288 klog.Errorf("rsa generateKey err: %v", err) 5289 return nil, err 5290 }</span> 5291 5292 // id_rsa 5293 <span class="cov0" title="0">privBlock := pem.Block{ 5294 Type: "RSA PRIVATE KEY", 5295 Bytes: x509.MarshalPKCS1PrivateKey(privateKey), 5296 } 5297 privateKeyBytes := pem.EncodeToMemory(&privBlock) 5298 5299 // id_rsa.pub 5300 publicRsaKey, err := ssh.NewPublicKey(&privateKey.PublicKey) 5301 if err != nil </span><span class="cov0" title="0">{ 5302 klog.Errorf("ssh newPublicKey err: %v", err) 5303 return nil, err 5304 }</span> 5305 <span class="cov0" title="0">publicKeyBytes := ssh.MarshalAuthorizedKey(publicRsaKey) 5306 5307 data := make(map[string][]byte) 5308 data[SSHPrivateKey] = privateKeyBytes 5309 data[SSHPublicKey] = publicKeyBytes 5310 data[SSHAuthorizedKeys] = publicKeyBytes 5311 data[SSHConfig] = []byte(generateSSHConfig(job)) 5312 5313 return data, nil</span> 5314 } 5315 5316 func withUserProvidedRsaKey(job *batch.Job, sshPrivateKey string, sshPublicKey string) (map[string][]byte, error) <span class="cov0" title="0">{ 5317 data := make(map[string][]byte) 5318 data[SSHPrivateKey] = []byte(sshPrivateKey) 5319 data[SSHPublicKey] = []byte(sshPublicKey) 5320 data[SSHAuthorizedKeys] = []byte(sshPublicKey) 5321 data[SSHConfig] = []byte(generateSSHConfig(job)) 5322 5323 return data, nil 5324 }</span> 5325 5326 func (sp *sshPlugin) secretName(job *batch.Job) string <span class="cov0" title="0">{ 5327 return fmt.Sprintf("%s-%s", job.Name, sp.Name()) 5328 }</span> 5329 5330 func (sp *sshPlugin) addFlags() <span class="cov8" title="1">{ 5331 flagSet := flag.NewFlagSet(sp.Name(), flag.ContinueOnError) 5332 flagSet.StringVar(&sp.sshKeyFilePath, "ssh-key-file-path", sp.sshKeyFilePath, "The path used to store "+ 5333 "ssh private and public keys, it is `/root/.ssh` by default.") 5334 flagSet.StringVar(&sp.sshPrivateKey, "ssh-private-key", sp.sshPrivateKey, "The input string of the private key") 5335 flagSet.StringVar(&sp.sshPublicKey, "ssh-public-key", sp.sshPublicKey, "The input string of the public key") 5336 5337 if err := flagSet.Parse(sp.pluginArguments); err != nil </span><span class="cov0" title="0">{ 5338 klog.Errorf("plugin %s flagset parse failed, err: %v", sp.Name(), err) 5339 }</span> 5340 } 5341 5342 func generateSSHConfig(job *batch.Job) string <span class="cov0" title="0">{ 5343 config := "StrictHostKeyChecking no\nUserKnownHostsFile /dev/null\n" 5344 5345 for _, ts := range job.Spec.Tasks </span><span class="cov0" title="0">{ 5346 for i := 0; i < int(ts.Replicas); i++ </span><span class="cov0" title="0">{ 5347 hostName := ts.Template.Spec.Hostname 5348 subdomain := ts.Template.Spec.Subdomain 5349 if len(hostName) == 0 </span><span class="cov0" title="0">{ 5350 hostName = jobhelpers.MakePodName(job.Name, ts.Name, i) 5351 }</span> 5352 <span class="cov0" title="0">if len(subdomain) == 0 </span><span class="cov0" title="0">{ 5353 subdomain = job.Name 5354 }</span> 5355 5356 <span class="cov0" title="0">config += "Host " + hostName + "\n" 5357 config += " HostName " + hostName + "." + subdomain + "\n" 5358 if len(ts.Template.Spec.Hostname) != 0 </span><span class="cov0" title="0">{ 5359 break</span> 5360 } 5361 } 5362 } 5363 5364 <span class="cov0" title="0">return config</span> 5365 } 5366 </pre> 5367 5368 <pre class="file" id="file30" style="display: none">/* 5369 Copyright 2019 The Volcano Authors. 5370 5371 Licensed under the Apache License, Version 2.0 (the "License"); 5372 you may not use this file except in compliance with the License. 5373 You may obtain a copy of the License at 5374 5375 http://www.apache.org/licenses/LICENSE-2.0 5376 5377 Unless required by applicable law or agreed to in writing, software 5378 distributed under the License is distributed on an "AS IS" BASIS, 5379 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 5380 See the License for the specific language governing permissions and 5381 limitations under the License. 5382 */ 5383 5384 package podgroup 5385 5386 import ( 5387 v1 "k8s.io/api/core/v1" 5388 "k8s.io/apimachinery/pkg/util/wait" 5389 coreinformers "k8s.io/client-go/informers/core/v1" 5390 "k8s.io/client-go/kubernetes" 5391 corelisters "k8s.io/client-go/listers/core/v1" 5392 "k8s.io/client-go/tools/cache" 5393 "k8s.io/client-go/util/workqueue" 5394 "k8s.io/klog" 5395 5396 scheduling "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 5397 vcclientset "volcano.sh/apis/pkg/client/clientset/versioned" 5398 informerfactory "volcano.sh/apis/pkg/client/informers/externalversions" 5399 schedulinginformer "volcano.sh/apis/pkg/client/informers/externalversions/scheduling/v1beta1" 5400 schedulinglister "volcano.sh/apis/pkg/client/listers/scheduling/v1beta1" 5401 "volcano.sh/volcano/pkg/controllers/framework" 5402 ) 5403 5404 func init() <span class="cov8" title="1">{ 5405 framework.RegisterController(&pgcontroller{}) 5406 }</span> 5407 5408 // pgcontroller the Podgroup pgcontroller type. 5409 type pgcontroller struct { 5410 kubeClient kubernetes.Interface 5411 vcClient vcclientset.Interface 5412 5413 podInformer coreinformers.PodInformer 5414 pgInformer schedulinginformer.PodGroupInformer 5415 5416 // A store of pods 5417 podLister corelisters.PodLister 5418 podSynced func() bool 5419 5420 // A store of podgroups 5421 pgLister schedulinglister.PodGroupLister 5422 pgSynced func() bool 5423 5424 queue workqueue.RateLimitingInterface 5425 } 5426 5427 func (pg *pgcontroller) Name() string <span class="cov8" title="1">{ 5428 return "pg-controller" 5429 }</span> 5430 5431 // Initialize create new Podgroup Controller. 5432 func (pg *pgcontroller) Initialize(opt *framework.ControllerOption) error <span class="cov8" title="1">{ 5433 pg.kubeClient = opt.KubeClient 5434 pg.vcClient = opt.VolcanoClient 5435 5436 pg.queue = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) 5437 5438 pg.podInformer = opt.SharedInformerFactory.Core().V1().Pods() 5439 pg.podLister = pg.podInformer.Lister() 5440 pg.podSynced = pg.podInformer.Informer().HasSynced 5441 pg.podInformer.Informer().AddEventHandler( 5442 cache.FilteringResourceEventHandler{ 5443 FilterFunc: func(obj interface{}) bool </span><span class="cov0" title="0">{ 5444 switch v := obj.(type) </span>{ 5445 case *v1.Pod:<span class="cov0" title="0"> 5446 if v.Spec.SchedulerName == opt.SchedulerName && 5447 (v.Annotations == nil || v.Annotations[scheduling.KubeGroupNameAnnotationKey] == "") </span><span class="cov0" title="0">{ 5448 return true 5449 }</span> 5450 <span class="cov0" title="0">return false</span> 5451 default:<span class="cov0" title="0"> 5452 return false</span> 5453 } 5454 }, 5455 Handler: cache.ResourceEventHandlerFuncs{ 5456 AddFunc: pg.addPod, 5457 }, 5458 }) 5459 5460 <span class="cov8" title="1">pg.pgInformer = informerfactory.NewSharedInformerFactory(pg.vcClient, 0).Scheduling().V1beta1().PodGroups() 5461 pg.pgLister = pg.pgInformer.Lister() 5462 pg.pgSynced = pg.pgInformer.Informer().HasSynced 5463 5464 return nil</span> 5465 } 5466 5467 // Run start NewPodgroupController. 5468 func (pg *pgcontroller) Run(stopCh <-chan struct{}) <span class="cov0" title="0">{ 5469 go pg.podInformer.Informer().Run(stopCh) 5470 go pg.pgInformer.Informer().Run(stopCh) 5471 5472 cache.WaitForCacheSync(stopCh, pg.podSynced, pg.pgSynced) 5473 5474 go wait.Until(pg.worker, 0, stopCh) 5475 5476 klog.Infof("PodgroupController is running ...... ") 5477 }</span> 5478 5479 func (pg *pgcontroller) worker() <span class="cov0" title="0">{ 5480 for pg.processNextReq() </span>{<span class="cov0" title="0"> 5481 }</span> 5482 } 5483 5484 func (pg *pgcontroller) processNextReq() bool <span class="cov0" title="0">{ 5485 obj, shutdown := pg.queue.Get() 5486 if shutdown </span><span class="cov0" title="0">{ 5487 klog.Errorf("Fail to pop item from queue") 5488 return false 5489 }</span> 5490 5491 <span class="cov0" title="0">req := obj.(podRequest) 5492 defer pg.queue.Done(req) 5493 5494 pod, err := pg.podLister.Pods(req.podNamespace).Get(req.podName) 5495 if err != nil </span><span class="cov0" title="0">{ 5496 klog.Errorf("Failed to get pod by <%v> from cache: %v", req, err) 5497 return true 5498 }</span> 5499 5500 // normal pod use volcano 5501 <span class="cov0" title="0">if err := pg.createNormalPodPGIfNotExist(pod); err != nil </span><span class="cov0" title="0">{ 5502 klog.Errorf("Failed to handle Pod <%s/%s>: %v", pod.Namespace, pod.Name, err) 5503 pg.queue.AddRateLimited(req) 5504 return true 5505 }</span> 5506 5507 // If no error, forget it. 5508 <span class="cov0" title="0">pg.queue.Forget(req) 5509 5510 return true</span> 5511 } 5512 </pre> 5513 5514 <pre class="file" id="file31" style="display: none">/* 5515 Copyright 2019 The Volcano Authors. 5516 5517 Licensed under the Apache License, Version 2.0 (the "License"); 5518 you may not use this file except in compliance with the License. 5519 You may obtain a copy of the License at 5520 5521 http://www.apache.org/licenses/LICENSE-2.0 5522 5523 Unless required by applicable law or agreed to in writing, software 5524 distributed under the License is distributed on an "AS IS" BASIS, 5525 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 5526 See the License for the specific language governing permissions and 5527 limitations under the License. 5528 */ 5529 5530 package podgroup 5531 5532 import ( 5533 "context" 5534 5535 v1 "k8s.io/api/core/v1" 5536 apierrors "k8s.io/apimachinery/pkg/api/errors" 5537 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 5538 "k8s.io/apimachinery/pkg/runtime/schema" 5539 "k8s.io/klog" 5540 5541 "volcano.sh/apis/pkg/apis/helpers" 5542 scheduling "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 5543 ) 5544 5545 type podRequest struct { 5546 podName string 5547 podNamespace string 5548 } 5549 5550 func (pg *pgcontroller) addPod(obj interface{}) <span class="cov8" title="1">{ 5551 pod, ok := obj.(*v1.Pod) 5552 if !ok </span><span class="cov0" title="0">{ 5553 klog.Errorf("Failed to convert %v to v1.Pod", obj) 5554 return 5555 }</span> 5556 5557 <span class="cov8" title="1">req := podRequest{ 5558 podName: pod.Name, 5559 podNamespace: pod.Namespace, 5560 } 5561 5562 pg.queue.Add(req)</span> 5563 } 5564 5565 func (pg *pgcontroller) updatePodAnnotations(pod *v1.Pod, pgName string) error <span class="cov8" title="1">{ 5566 if pod.Annotations == nil </span><span class="cov8" title="1">{ 5567 pod.Annotations = make(map[string]string) 5568 }</span> 5569 <span class="cov8" title="1">if pod.Annotations[scheduling.KubeGroupNameAnnotationKey] == "" </span><span class="cov8" title="1">{ 5570 pod.Annotations[scheduling.KubeGroupNameAnnotationKey] = pgName 5571 }</span> else<span class="cov0" title="0"> { 5572 if pod.Annotations[scheduling.KubeGroupNameAnnotationKey] != pgName </span><span class="cov0" title="0">{ 5573 klog.Errorf("normal pod %s/%s annotations %s value is not %s, but %s", pod.Namespace, pod.Name, 5574 scheduling.KubeGroupNameAnnotationKey, pgName, pod.Annotations[scheduling.KubeGroupNameAnnotationKey]) 5575 }</span> 5576 <span class="cov0" title="0">return nil</span> 5577 } 5578 5579 <span class="cov8" title="1">if _, err := pg.kubeClient.CoreV1().Pods(pod.Namespace).Update(context.TODO(), pod, metav1.UpdateOptions{}); err != nil </span><span class="cov0" title="0">{ 5580 klog.Errorf("Failed to update pod <%s/%s>: %v", pod.Namespace, pod.Name, err) 5581 return err 5582 }</span> 5583 5584 <span class="cov8" title="1">return nil</span> 5585 } 5586 5587 func (pg *pgcontroller) createNormalPodPGIfNotExist(pod *v1.Pod) error <span class="cov8" title="1">{ 5588 pgName := helpers.GeneratePodgroupName(pod) 5589 5590 if _, err := pg.pgLister.PodGroups(pod.Namespace).Get(pgName); err != nil </span><span class="cov8" title="1">{ 5591 if !apierrors.IsNotFound(err) </span><span class="cov0" title="0">{ 5592 klog.Errorf("Failed to get normal PodGroup for Pod <%s/%s>: %v", 5593 pod.Namespace, pod.Name, err) 5594 return err 5595 }</span> 5596 5597 <span class="cov8" title="1">obj := &scheduling.PodGroup{ 5598 ObjectMeta: metav1.ObjectMeta{ 5599 Namespace: pod.Namespace, 5600 Name: pgName, 5601 OwnerReferences: newPGOwnerReferences(pod), 5602 Annotations: map[string]string{}, 5603 Labels: map[string]string{}, 5604 }, 5605 Spec: scheduling.PodGroupSpec{ 5606 MinMember: 1, 5607 PriorityClassName: pod.Spec.PriorityClassName, 5608 MinResources: calcPGMinResources(pod), 5609 }, 5610 } 5611 if queueName, ok := pod.Annotations[scheduling.QueueNameAnnotationKey]; ok </span><span class="cov0" title="0">{ 5612 obj.Spec.Queue = queueName 5613 }</span> 5614 5615 <span class="cov8" title="1">if value, ok := pod.Annotations[scheduling.PodPreemptable]; ok </span><span class="cov0" title="0">{ 5616 obj.Annotations[scheduling.PodPreemptable] = value 5617 }</span> 5618 <span class="cov8" title="1">if value, ok := pod.Annotations[scheduling.RevocableZone]; ok </span><span class="cov0" title="0">{ 5619 obj.Annotations[scheduling.RevocableZone] = value 5620 }</span> 5621 <span class="cov8" title="1">if value, ok := pod.Labels[scheduling.PodPreemptable]; ok </span><span class="cov0" title="0">{ 5622 obj.Labels[scheduling.PodPreemptable] = value 5623 }</span> 5624 5625 <span class="cov8" title="1">if value, found := pod.Annotations[scheduling.JDBMinAvailable]; found </span><span class="cov0" title="0">{ 5626 obj.Annotations[scheduling.JDBMinAvailable] = value 5627 }</span> else<span class="cov8" title="1"> if value, found := pod.Annotations[scheduling.JDBMaxUnavailable]; found </span><span class="cov0" title="0">{ 5628 obj.Annotations[scheduling.JDBMaxUnavailable] = value 5629 }</span> 5630 5631 <span class="cov8" title="1">if _, err := pg.vcClient.SchedulingV1beta1().PodGroups(pod.Namespace).Create(context.TODO(), obj, metav1.CreateOptions{}); err != nil </span><span class="cov0" title="0">{ 5632 klog.Errorf("Failed to create normal PodGroup for Pod <%s/%s>: %v", 5633 pod.Namespace, pod.Name, err) 5634 return err 5635 }</span> 5636 } 5637 5638 <span class="cov8" title="1">return pg.updatePodAnnotations(pod, pgName)</span> 5639 } 5640 5641 func newPGOwnerReferences(pod *v1.Pod) []metav1.OwnerReference <span class="cov8" title="1">{ 5642 if len(pod.OwnerReferences) != 0 </span><span class="cov8" title="1">{ 5643 for _, ownerReference := range pod.OwnerReferences </span><span class="cov8" title="1">{ 5644 if ownerReference.Controller != nil && *ownerReference.Controller </span><span class="cov8" title="1">{ 5645 return pod.OwnerReferences 5646 }</span> 5647 } 5648 } 5649 5650 <span class="cov8" title="1">gvk := schema.GroupVersionKind{ 5651 Group: v1.SchemeGroupVersion.Group, 5652 Version: v1.SchemeGroupVersion.Version, 5653 Kind: "Pod", 5654 } 5655 ref := metav1.NewControllerRef(pod, gvk) 5656 return []metav1.OwnerReference{*ref}</span> 5657 } 5658 5659 // addResourceList add list resource quantity 5660 func addResourceList(list, req, limit v1.ResourceList) <span class="cov0" title="0">{ 5661 for name, quantity := range req </span><span class="cov0" title="0">{ 5662 if value, ok := list[name]; !ok </span><span class="cov0" title="0">{ 5663 list[name] = quantity.DeepCopy() 5664 }</span> else<span class="cov0" title="0"> { 5665 value.Add(quantity) 5666 list[name] = value 5667 }</span> 5668 } 5669 5670 <span class="cov0" title="0">if req != nil </span><span class="cov0" title="0">{ 5671 return 5672 }</span> 5673 5674 // If Requests is omitted for a container, 5675 // it defaults to Limits if that is explicitly specified. 5676 <span class="cov0" title="0">for name, quantity := range limit </span><span class="cov0" title="0">{ 5677 if value, ok := list[name]; !ok </span><span class="cov0" title="0">{ 5678 list[name] = quantity.DeepCopy() 5679 }</span> else<span class="cov0" title="0"> { 5680 value.Add(quantity) 5681 list[name] = value 5682 }</span> 5683 } 5684 } 5685 5686 // calcPGMinResources calculate podgroup minimum resource 5687 func calcPGMinResources(pod *v1.Pod) *v1.ResourceList <span class="cov8" title="1">{ 5688 pgMinRes := v1.ResourceList{} 5689 5690 for _, c := range pod.Spec.Containers </span><span class="cov0" title="0">{ 5691 addResourceList(pgMinRes, c.Resources.Requests, c.Resources.Limits) 5692 }</span> 5693 5694 <span class="cov8" title="1">return &pgMinRes</span> 5695 } 5696 </pre> 5697 5698 <pre class="file" id="file32" style="display: none">/* 5699 Copyright 2019 The Volcano Authors. 5700 5701 Licensed under the Apache License, Version 2.0 (the "License"); 5702 you may not use this file except in compliance with the License. 5703 You may obtain a copy of the License at 5704 5705 http://www.apache.org/licenses/LICENSE-2.0 5706 5707 Unless required by applicable law or agreed to in writing, software 5708 distributed under the License is distributed on an "AS IS" BASIS, 5709 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 5710 See the License for the specific language governing permissions and 5711 limitations under the License. 5712 */ 5713 5714 package queue 5715 5716 import ( 5717 "context" 5718 "fmt" 5719 "sync" 5720 "time" 5721 5722 v1 "k8s.io/api/core/v1" 5723 apierrors "k8s.io/apimachinery/pkg/api/errors" 5724 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 5725 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 5726 "k8s.io/apimachinery/pkg/util/wait" 5727 "k8s.io/client-go/kubernetes" 5728 corev1 "k8s.io/client-go/kubernetes/typed/core/v1" 5729 "k8s.io/client-go/tools/cache" 5730 "k8s.io/client-go/tools/record" 5731 "k8s.io/client-go/util/workqueue" 5732 "k8s.io/klog" 5733 5734 busv1alpha1 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 5735 vcclientset "volcano.sh/apis/pkg/client/clientset/versioned" 5736 versionedscheme "volcano.sh/apis/pkg/client/clientset/versioned/scheme" 5737 informerfactory "volcano.sh/apis/pkg/client/informers/externalversions" 5738 busv1alpha1informer "volcano.sh/apis/pkg/client/informers/externalversions/bus/v1alpha1" 5739 schedulinginformer "volcano.sh/apis/pkg/client/informers/externalversions/scheduling/v1beta1" 5740 busv1alpha1lister "volcano.sh/apis/pkg/client/listers/bus/v1alpha1" 5741 schedulinglister "volcano.sh/apis/pkg/client/listers/scheduling/v1beta1" 5742 "volcano.sh/volcano/pkg/controllers/apis" 5743 "volcano.sh/volcano/pkg/controllers/framework" 5744 queuestate "volcano.sh/volcano/pkg/controllers/queue/state" 5745 ) 5746 5747 func init() <span class="cov8" title="1">{ 5748 framework.RegisterController(&queuecontroller{}) 5749 }</span> 5750 5751 // queuecontroller manages queue status. 5752 type queuecontroller struct { 5753 kubeClient kubernetes.Interface 5754 vcClient vcclientset.Interface 5755 5756 // informer 5757 queueInformer schedulinginformer.QueueInformer 5758 pgInformer schedulinginformer.PodGroupInformer 5759 5760 // queueLister 5761 queueLister schedulinglister.QueueLister 5762 queueSynced cache.InformerSynced 5763 5764 // podGroup lister 5765 pgLister schedulinglister.PodGroupLister 5766 pgSynced cache.InformerSynced 5767 5768 cmdInformer busv1alpha1informer.CommandInformer 5769 cmdLister busv1alpha1lister.CommandLister 5770 cmdSynced cache.InformerSynced 5771 5772 // queues that need to be updated. 5773 queue workqueue.RateLimitingInterface 5774 commandQueue workqueue.RateLimitingInterface 5775 5776 pgMutex sync.RWMutex 5777 // queue name -> podgroup namespace/name 5778 podGroups map[string]map[string]struct{} 5779 5780 syncHandler func(req *apis.Request) error 5781 syncCommandHandler func(cmd *busv1alpha1.Command) error 5782 5783 enqueueQueue func(req *apis.Request) 5784 5785 recorder record.EventRecorder 5786 maxRequeueNum int 5787 } 5788 5789 func (c *queuecontroller) Name() string <span class="cov8" title="1">{ 5790 return "queue-controller" 5791 }</span> 5792 5793 // NewQueueController creates a QueueController. 5794 func (c *queuecontroller) Initialize(opt *framework.ControllerOption) error <span class="cov8" title="1">{ 5795 c.vcClient = opt.VolcanoClient 5796 c.kubeClient = opt.KubeClient 5797 5798 factory := informerfactory.NewSharedInformerFactory(c.vcClient, 0) 5799 queueInformer := factory.Scheduling().V1beta1().Queues() 5800 pgInformer := factory.Scheduling().V1beta1().PodGroups() 5801 5802 eventBroadcaster := record.NewBroadcaster() 5803 eventBroadcaster.StartLogging(klog.Infof) 5804 eventBroadcaster.StartRecordingToSink(&corev1.EventSinkImpl{Interface: c.kubeClient.CoreV1().Events("")}) 5805 5806 c.queueInformer = queueInformer 5807 c.pgInformer = pgInformer 5808 c.queueLister = queueInformer.Lister() 5809 c.queueSynced = queueInformer.Informer().HasSynced 5810 c.pgLister = pgInformer.Lister() 5811 c.pgSynced = pgInformer.Informer().HasSynced 5812 c.queue = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) 5813 c.commandQueue = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) 5814 c.podGroups = make(map[string]map[string]struct{}) 5815 c.recorder = eventBroadcaster.NewRecorder(versionedscheme.Scheme, v1.EventSource{Component: "vc-controller-manager"}) 5816 c.maxRequeueNum = opt.MaxRequeueNum 5817 if c.maxRequeueNum < 0 </span><span class="cov0" title="0">{ 5818 c.maxRequeueNum = -1 5819 }</span> 5820 5821 <span class="cov8" title="1">queueInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 5822 AddFunc: c.addQueue, 5823 UpdateFunc: c.updateQueue, 5824 DeleteFunc: c.deleteQueue, 5825 }) 5826 5827 pgInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 5828 AddFunc: c.addPodGroup, 5829 UpdateFunc: c.updatePodGroup, 5830 DeleteFunc: c.deletePodGroup, 5831 }) 5832 5833 c.cmdInformer = informerfactory.NewSharedInformerFactory(c.vcClient, 0).Bus().V1alpha1().Commands() 5834 c.cmdInformer.Informer().AddEventHandler(cache.FilteringResourceEventHandler{ 5835 FilterFunc: func(obj interface{}) bool </span><span class="cov0" title="0">{ 5836 switch v := obj.(type) </span>{ 5837 case *busv1alpha1.Command:<span class="cov0" title="0"> 5838 return IsQueueReference(v.TargetObject)</span> 5839 default:<span class="cov0" title="0"> 5840 return false</span> 5841 } 5842 }, 5843 Handler: cache.ResourceEventHandlerFuncs{ 5844 AddFunc: c.addCommand, 5845 }, 5846 }) 5847 <span class="cov8" title="1">c.cmdLister = c.cmdInformer.Lister() 5848 c.cmdSynced = c.cmdInformer.Informer().HasSynced 5849 5850 queuestate.SyncQueue = c.syncQueue 5851 queuestate.OpenQueue = c.openQueue 5852 queuestate.CloseQueue = c.closeQueue 5853 5854 c.syncHandler = c.handleQueue 5855 c.syncCommandHandler = c.handleCommand 5856 5857 c.enqueueQueue = c.enqueue 5858 5859 return nil</span> 5860 } 5861 5862 // Run starts QueueController. 5863 func (c *queuecontroller) Run(stopCh <-chan struct{}) <span class="cov0" title="0">{ 5864 defer utilruntime.HandleCrash() 5865 defer c.queue.ShutDown() 5866 defer c.commandQueue.ShutDown() 5867 5868 klog.Infof("Starting queue controller.") 5869 defer klog.Infof("Shutting down queue controller.") 5870 5871 go c.queueInformer.Informer().Run(stopCh) 5872 go c.pgInformer.Informer().Run(stopCh) 5873 go c.cmdInformer.Informer().Run(stopCh) 5874 5875 if !cache.WaitForCacheSync(stopCh, c.queueSynced, c.pgSynced, c.cmdSynced) </span><span class="cov0" title="0">{ 5876 klog.Errorf("unable to sync caches for queue controller.") 5877 return 5878 }</span> 5879 5880 <span class="cov0" title="0">go wait.Until(c.worker, 0, stopCh) 5881 go wait.Until(c.commandWorker, 0, stopCh) 5882 5883 <-stopCh</span> 5884 } 5885 5886 // worker runs a worker thread that just dequeues items, processes them, and 5887 // marks them done. You may run as many of these in parallel as you wish; the 5888 // workqueue guarantees that they will not end up processing the same `queue` 5889 // at the same time. 5890 func (c *queuecontroller) worker() <span class="cov0" title="0">{ 5891 for c.processNextWorkItem() </span>{<span class="cov0" title="0"> 5892 }</span> 5893 } 5894 5895 func (c *queuecontroller) processNextWorkItem() bool <span class="cov8" title="1">{ 5896 obj, shutdown := c.queue.Get() 5897 if shutdown </span><span class="cov0" title="0">{ 5898 return false 5899 }</span> 5900 <span class="cov8" title="1">defer c.queue.Done(obj) 5901 5902 req, ok := obj.(*apis.Request) 5903 if !ok </span><span class="cov8" title="1">{ 5904 klog.Errorf("%v is not a valid queue request struct.", obj) 5905 return true 5906 }</span> 5907 5908 <span class="cov0" title="0">err := c.syncHandler(req) 5909 c.handleQueueErr(err, obj) 5910 5911 return true</span> 5912 } 5913 5914 func (c *queuecontroller) handleQueue(req *apis.Request) error <span class="cov0" title="0">{ 5915 startTime := time.Now() 5916 defer func() </span><span class="cov0" title="0">{ 5917 klog.V(4).Infof("Finished syncing queue %s (%v).", req.QueueName, time.Since(startTime)) 5918 }</span>() 5919 5920 <span class="cov0" title="0">queue, err := c.queueLister.Get(req.QueueName) 5921 if err != nil </span><span class="cov0" title="0">{ 5922 if apierrors.IsNotFound(err) </span><span class="cov0" title="0">{ 5923 klog.V(4).Infof("Queue %s has been deleted.", req.QueueName) 5924 return nil 5925 }</span> 5926 5927 <span class="cov0" title="0">return fmt.Errorf("get queue %s failed for %v", req.QueueName, err)</span> 5928 } 5929 5930 <span class="cov0" title="0">queueState := queuestate.NewState(queue) 5931 if queueState == nil </span><span class="cov0" title="0">{ 5932 return fmt.Errorf("queue %s state %s is invalid", queue.Name, queue.Status.State) 5933 }</span> 5934 5935 <span class="cov0" title="0">klog.V(4).Infof("Begin execute %s action for queue %s, current status %s", req.Action, req.QueueName, queue.Status.State) 5936 if err := queueState.Execute(req.Action); err != nil </span><span class="cov0" title="0">{ 5937 return fmt.Errorf("sync queue %s failed for %v, event is %v, action is %s", 5938 req.QueueName, err, req.Event, req.Action) 5939 }</span> 5940 5941 <span class="cov0" title="0">return nil</span> 5942 } 5943 5944 func (c *queuecontroller) handleQueueErr(err error, obj interface{}) <span class="cov0" title="0">{ 5945 if err == nil </span><span class="cov0" title="0">{ 5946 c.queue.Forget(obj) 5947 return 5948 }</span> 5949 5950 <span class="cov0" title="0">if c.maxRequeueNum == -1 || c.queue.NumRequeues(obj) < c.maxRequeueNum </span><span class="cov0" title="0">{ 5951 klog.V(4).Infof("Error syncing queue request %v for %v.", obj, err) 5952 c.queue.AddRateLimited(obj) 5953 return 5954 }</span> 5955 5956 <span class="cov0" title="0">req, _ := obj.(*apis.Request) 5957 c.recordEventsForQueue(req.QueueName, v1.EventTypeWarning, string(req.Action), 5958 fmt.Sprintf("%v queue failed for %v", req.Action, err)) 5959 klog.V(2).Infof("Dropping queue request %v out of the queue for %v.", obj, err) 5960 c.queue.Forget(obj)</span> 5961 } 5962 5963 func (c *queuecontroller) commandWorker() <span class="cov0" title="0">{ 5964 for c.processNextCommand() </span>{<span class="cov0" title="0"> 5965 }</span> 5966 } 5967 5968 func (c *queuecontroller) processNextCommand() bool <span class="cov0" title="0">{ 5969 obj, shutdown := c.commandQueue.Get() 5970 if shutdown </span><span class="cov0" title="0">{ 5971 return false 5972 }</span> 5973 <span class="cov0" title="0">defer c.commandQueue.Done(obj) 5974 5975 cmd, ok := obj.(*busv1alpha1.Command) 5976 if !ok </span><span class="cov0" title="0">{ 5977 klog.Errorf("%v is not a valid Command struct.", obj) 5978 return true 5979 }</span> 5980 5981 <span class="cov0" title="0">err := c.syncCommandHandler(cmd) 5982 c.handleCommandErr(err, obj) 5983 5984 return true</span> 5985 } 5986 5987 func (c *queuecontroller) handleCommand(cmd *busv1alpha1.Command) error <span class="cov0" title="0">{ 5988 startTime := time.Now() 5989 defer func() </span><span class="cov0" title="0">{ 5990 klog.V(4).Infof("Finished syncing command %s/%s (%v).", cmd.Namespace, cmd.Name, time.Since(startTime)) 5991 }</span>() 5992 5993 <span class="cov0" title="0">err := c.vcClient.BusV1alpha1().Commands(cmd.Namespace).Delete(context.TODO(), cmd.Name, metav1.DeleteOptions{}) 5994 if err != nil </span><span class="cov0" title="0">{ 5995 if apierrors.IsNotFound(err) </span><span class="cov0" title="0">{ 5996 return nil 5997 }</span> 5998 5999 <span class="cov0" title="0">return fmt.Errorf("failed to delete command <%s/%s> for %v", cmd.Namespace, cmd.Name, err)</span> 6000 } 6001 6002 <span class="cov0" title="0">req := &apis.Request{ 6003 QueueName: cmd.TargetObject.Name, 6004 Event: busv1alpha1.CommandIssuedEvent, 6005 Action: busv1alpha1.Action(cmd.Action), 6006 } 6007 6008 c.enqueueQueue(req) 6009 6010 return nil</span> 6011 } 6012 6013 func (c *queuecontroller) handleCommandErr(err error, obj interface{}) <span class="cov0" title="0">{ 6014 if err == nil </span><span class="cov0" title="0">{ 6015 c.commandQueue.Forget(obj) 6016 return 6017 }</span> 6018 6019 <span class="cov0" title="0">if c.maxRequeueNum == -1 || c.commandQueue.NumRequeues(obj) < c.maxRequeueNum </span><span class="cov0" title="0">{ 6020 klog.V(4).Infof("Error syncing command %v for %v.", obj, err) 6021 c.commandQueue.AddRateLimited(obj) 6022 return 6023 }</span> 6024 6025 <span class="cov0" title="0">klog.V(2).Infof("Dropping command %v out of the queue for %v.", obj, err) 6026 c.commandQueue.Forget(obj)</span> 6027 } 6028 </pre> 6029 6030 <pre class="file" id="file33" style="display: none">/* 6031 Copyright 2019 The Volcano Authors. 6032 6033 Licensed under the Apache License, Version 2.0 (the "License"); 6034 you may not use this file except in compliance with the License. 6035 You may obtain a copy of the License at 6036 6037 http://www.apache.org/licenses/LICENSE-2.0 6038 6039 Unless required by applicable law or agreed to in writing, software 6040 distributed under the License is distributed on an "AS IS" BASIS, 6041 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 6042 See the License for the specific language governing permissions and 6043 limitations under the License. 6044 */ 6045 6046 package queue 6047 6048 import ( 6049 "context" 6050 "fmt" 6051 "reflect" 6052 6053 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 6054 schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 6055 "volcano.sh/volcano/pkg/controllers/queue/state" 6056 6057 v1 "k8s.io/api/core/v1" 6058 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 6059 "k8s.io/client-go/tools/cache" 6060 6061 "k8s.io/klog" 6062 ) 6063 6064 func (c *queuecontroller) syncQueue(queue *schedulingv1beta1.Queue, updateStateFn state.UpdateQueueStatusFn) error <span class="cov8" title="1">{ 6065 klog.V(4).Infof("Begin to sync queue %s.", queue.Name) 6066 defer klog.V(4).Infof("End sync queue %s.", queue.Name) 6067 6068 podGroups := c.getPodGroups(queue.Name) 6069 queueStatus := schedulingv1beta1.QueueStatus{} 6070 6071 for _, pgKey := range podGroups </span><span class="cov8" title="1">{ 6072 // Ignore error here, tt can not occur. 6073 ns, name, _ := cache.SplitMetaNamespaceKey(pgKey) 6074 6075 // TODO: check NotFound error and sync local cache. 6076 pg, err := c.pgLister.PodGroups(ns).Get(name) 6077 if err != nil </span><span class="cov0" title="0">{ 6078 return err 6079 }</span> 6080 6081 <span class="cov8" title="1">switch pg.Status.Phase </span>{ 6082 case schedulingv1beta1.PodGroupPending:<span class="cov8" title="1"> 6083 queueStatus.Pending++</span> 6084 case schedulingv1beta1.PodGroupRunning:<span class="cov0" title="0"> 6085 queueStatus.Running++</span> 6086 case schedulingv1beta1.PodGroupUnknown:<span class="cov0" title="0"> 6087 queueStatus.Unknown++</span> 6088 case schedulingv1beta1.PodGroupInqueue:<span class="cov0" title="0"> 6089 queueStatus.Inqueue++</span> 6090 } 6091 } 6092 6093 <span class="cov8" title="1">if updateStateFn != nil </span><span class="cov0" title="0">{ 6094 updateStateFn(&queueStatus, podGroups) 6095 }</span> else<span class="cov8" title="1"> { 6096 queueStatus.State = queue.Status.State 6097 }</span> 6098 6099 // ignore update when status does not change 6100 <span class="cov8" title="1">if reflect.DeepEqual(queueStatus, queue.Status) </span><span class="cov0" title="0">{ 6101 return nil 6102 }</span> 6103 6104 <span class="cov8" title="1">newQueue := queue.DeepCopy() 6105 newQueue.Status = queueStatus 6106 if _, err := c.vcClient.SchedulingV1beta1().Queues().UpdateStatus(context.TODO(), newQueue, metav1.UpdateOptions{}); err != nil </span><span class="cov0" title="0">{ 6107 klog.Errorf("Failed to update status of Queue %s: %v.", newQueue.Name, err) 6108 return err 6109 }</span> 6110 6111 <span class="cov8" title="1">return nil</span> 6112 } 6113 6114 func (c *queuecontroller) openQueue(queue *schedulingv1beta1.Queue, updateStateFn state.UpdateQueueStatusFn) error <span class="cov0" title="0">{ 6115 klog.V(4).Infof("Begin to open queue %s.", queue.Name) 6116 6117 newQueue := queue.DeepCopy() 6118 newQueue.Status.State = schedulingv1beta1.QueueStateOpen 6119 6120 if queue.Status.State != newQueue.Status.State </span><span class="cov0" title="0">{ 6121 if _, err := c.vcClient.SchedulingV1beta1().Queues().Update(context.TODO(), newQueue, metav1.UpdateOptions{}); err != nil </span><span class="cov0" title="0">{ 6122 c.recorder.Event(newQueue, v1.EventTypeWarning, string(v1alpha1.OpenQueueAction), 6123 fmt.Sprintf("Open queue failed for %v", err)) 6124 return err 6125 }</span> 6126 6127 <span class="cov0" title="0">c.recorder.Event(newQueue, v1.EventTypeNormal, string(v1alpha1.OpenQueueAction), "Open queue succeed")</span> 6128 } else<span class="cov0" title="0"> { 6129 return nil 6130 }</span> 6131 6132 <span class="cov0" title="0">q, err := c.vcClient.SchedulingV1beta1().Queues().Get(context.TODO(), newQueue.Name, metav1.GetOptions{}) 6133 if err != nil </span><span class="cov0" title="0">{ 6134 return err 6135 }</span> 6136 6137 <span class="cov0" title="0">newQueue = q.DeepCopy() 6138 if updateStateFn != nil </span><span class="cov0" title="0">{ 6139 updateStateFn(&newQueue.Status, nil) 6140 }</span> else<span class="cov0" title="0"> { 6141 return fmt.Errorf("internal error, update state function should be provided") 6142 }</span> 6143 6144 <span class="cov0" title="0">if queue.Status.State != newQueue.Status.State </span><span class="cov0" title="0">{ 6145 if _, err := c.vcClient.SchedulingV1beta1().Queues().UpdateStatus(context.TODO(), newQueue, metav1.UpdateOptions{}); err != nil </span><span class="cov0" title="0">{ 6146 c.recorder.Event(newQueue, v1.EventTypeWarning, string(v1alpha1.OpenQueueAction), 6147 fmt.Sprintf("Update queue status from %s to %s failed for %v", 6148 queue.Status.State, newQueue.Status.State, err)) 6149 return err 6150 }</span> 6151 } 6152 6153 <span class="cov0" title="0">return nil</span> 6154 } 6155 6156 func (c *queuecontroller) closeQueue(queue *schedulingv1beta1.Queue, updateStateFn state.UpdateQueueStatusFn) error <span class="cov0" title="0">{ 6157 klog.V(4).Infof("Begin to close queue %s.", queue.Name) 6158 6159 newQueue := queue.DeepCopy() 6160 newQueue.Status.State = schedulingv1beta1.QueueStateClosed 6161 6162 if queue.Status.State != newQueue.Status.State </span><span class="cov0" title="0">{ 6163 if _, err := c.vcClient.SchedulingV1beta1().Queues().Update(context.TODO(), newQueue, metav1.UpdateOptions{}); err != nil </span><span class="cov0" title="0">{ 6164 c.recorder.Event(newQueue, v1.EventTypeWarning, string(v1alpha1.CloseQueueAction), 6165 fmt.Sprintf("Close queue failed for %v", err)) 6166 return err 6167 }</span> 6168 6169 <span class="cov0" title="0">c.recorder.Event(newQueue, v1.EventTypeNormal, string(v1alpha1.CloseQueueAction), "Close queue succeed")</span> 6170 } else<span class="cov0" title="0"> { 6171 return nil 6172 }</span> 6173 6174 <span class="cov0" title="0">q, err := c.vcClient.SchedulingV1beta1().Queues().Get(context.TODO(), newQueue.Name, metav1.GetOptions{}) 6175 if err != nil </span><span class="cov0" title="0">{ 6176 return err 6177 }</span> 6178 6179 <span class="cov0" title="0">newQueue = q.DeepCopy() 6180 podGroups := c.getPodGroups(newQueue.Name) 6181 if updateStateFn != nil </span><span class="cov0" title="0">{ 6182 updateStateFn(&newQueue.Status, podGroups) 6183 }</span> else<span class="cov0" title="0"> { 6184 return fmt.Errorf("internal error, update state function should be provided") 6185 }</span> 6186 6187 <span class="cov0" title="0">if queue.Status.State != newQueue.Status.State </span><span class="cov0" title="0">{ 6188 if _, err := c.vcClient.SchedulingV1beta1().Queues().UpdateStatus(context.TODO(), newQueue, metav1.UpdateOptions{}); err != nil </span><span class="cov0" title="0">{ 6189 c.recorder.Event(newQueue, v1.EventTypeWarning, string(v1alpha1.CloseQueueAction), 6190 fmt.Sprintf("Update queue status from %s to %s failed for %v", 6191 queue.Status.State, newQueue.Status.State, err)) 6192 return err 6193 }</span> 6194 } 6195 6196 <span class="cov0" title="0">return nil</span> 6197 } 6198 </pre> 6199 6200 <pre class="file" id="file34" style="display: none">/* 6201 Copyright 2019 The Volcano Authors. 6202 6203 Licensed under the Apache License, Version 2.0 (the "License"); 6204 you may not use this file except in compliance with the License. 6205 You may obtain a copy of the License at 6206 6207 http://www.apache.org/licenses/LICENSE-2.0 6208 6209 Unless required by applicable law or agreed to in writing, software 6210 distributed under the License is distributed on an "AS IS" BASIS, 6211 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 6212 See the License for the specific language governing permissions and 6213 limitations under the License. 6214 */ 6215 6216 package queue 6217 6218 import ( 6219 "k8s.io/client-go/tools/cache" 6220 "k8s.io/klog" 6221 6222 busv1alpha1 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 6223 schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 6224 "volcano.sh/volcano/pkg/controllers/apis" 6225 ) 6226 6227 func (c *queuecontroller) enqueue(req *apis.Request) <span class="cov8" title="1">{ 6228 c.queue.Add(req) 6229 }</span> 6230 6231 func (c *queuecontroller) addQueue(obj interface{}) <span class="cov8" title="1">{ 6232 queue := obj.(*schedulingv1beta1.Queue) 6233 6234 req := &apis.Request{ 6235 QueueName: queue.Name, 6236 6237 Event: busv1alpha1.OutOfSyncEvent, 6238 Action: busv1alpha1.SyncQueueAction, 6239 } 6240 6241 c.enqueue(req) 6242 }</span> 6243 6244 func (c *queuecontroller) deleteQueue(obj interface{}) <span class="cov8" title="1">{ 6245 queue, ok := obj.(*schedulingv1beta1.Queue) 6246 if !ok </span><span class="cov0" title="0">{ 6247 tombstone, ok := obj.(cache.DeletedFinalStateUnknown) 6248 if !ok </span><span class="cov0" title="0">{ 6249 klog.Errorf("Couldn't get object from tombstone %#v.", obj) 6250 return 6251 }</span> 6252 <span class="cov0" title="0">queue, ok = tombstone.Obj.(*schedulingv1beta1.Queue) 6253 if !ok </span><span class="cov0" title="0">{ 6254 klog.Errorf("Tombstone contained object that is not a Queue: %#v.", obj) 6255 return 6256 }</span> 6257 } 6258 6259 <span class="cov8" title="1">c.pgMutex.Lock() 6260 defer c.pgMutex.Unlock() 6261 delete(c.podGroups, queue.Name)</span> 6262 } 6263 6264 func (c *queuecontroller) updateQueue(_, _ interface{}) {<span class="cov0" title="0"> 6265 // currently do not care about queue update 6266 }</span> 6267 6268 func (c *queuecontroller) addPodGroup(obj interface{}) <span class="cov8" title="1">{ 6269 pg := obj.(*schedulingv1beta1.PodGroup) 6270 key, _ := cache.MetaNamespaceKeyFunc(obj) 6271 6272 c.pgMutex.Lock() 6273 defer c.pgMutex.Unlock() 6274 6275 if c.podGroups[pg.Spec.Queue] == nil </span><span class="cov8" title="1">{ 6276 c.podGroups[pg.Spec.Queue] = make(map[string]struct{}) 6277 }</span> 6278 <span class="cov8" title="1">c.podGroups[pg.Spec.Queue][key] = struct{}{} 6279 6280 req := &apis.Request{ 6281 QueueName: pg.Spec.Queue, 6282 6283 Event: busv1alpha1.OutOfSyncEvent, 6284 Action: busv1alpha1.SyncQueueAction, 6285 } 6286 6287 c.enqueue(req)</span> 6288 } 6289 6290 func (c *queuecontroller) updatePodGroup(old, new interface{}) <span class="cov8" title="1">{ 6291 oldPG := old.(*schedulingv1beta1.PodGroup) 6292 newPG := new.(*schedulingv1beta1.PodGroup) 6293 6294 // Note: we have no use case update PodGroup.Spec.Queue 6295 // So do not consider it here. 6296 if oldPG.Status.Phase != newPG.Status.Phase </span><span class="cov8" title="1">{ 6297 c.addPodGroup(newPG) 6298 }</span> 6299 } 6300 6301 func (c *queuecontroller) deletePodGroup(obj interface{}) <span class="cov8" title="1">{ 6302 pg, ok := obj.(*schedulingv1beta1.PodGroup) 6303 if !ok </span><span class="cov0" title="0">{ 6304 tombstone, ok := obj.(cache.DeletedFinalStateUnknown) 6305 if !ok </span><span class="cov0" title="0">{ 6306 klog.Errorf("Couldn't get object from tombstone %#v.", obj) 6307 return 6308 }</span> 6309 <span class="cov0" title="0">pg, ok = tombstone.Obj.(*schedulingv1beta1.PodGroup) 6310 if !ok </span><span class="cov0" title="0">{ 6311 klog.Errorf("Tombstone contained object that is not a PodGroup: %#v.", obj) 6312 return 6313 }</span> 6314 } 6315 6316 <span class="cov8" title="1">key, _ := cache.MetaNamespaceKeyFunc(obj) 6317 6318 c.pgMutex.Lock() 6319 defer c.pgMutex.Unlock() 6320 6321 delete(c.podGroups[pg.Spec.Queue], key) 6322 6323 req := &apis.Request{ 6324 QueueName: pg.Spec.Queue, 6325 6326 Event: busv1alpha1.OutOfSyncEvent, 6327 Action: busv1alpha1.SyncQueueAction, 6328 } 6329 6330 c.enqueue(req)</span> 6331 } 6332 6333 func (c *queuecontroller) addCommand(obj interface{}) <span class="cov0" title="0">{ 6334 cmd, ok := obj.(*busv1alpha1.Command) 6335 if !ok </span><span class="cov0" title="0">{ 6336 klog.Errorf("Obj %v is not command.", obj) 6337 return 6338 }</span> 6339 6340 <span class="cov0" title="0">c.commandQueue.Add(cmd)</span> 6341 } 6342 6343 func (c *queuecontroller) getPodGroups(key string) []string <span class="cov8" title="1">{ 6344 c.pgMutex.RLock() 6345 defer c.pgMutex.RUnlock() 6346 6347 if c.podGroups[key] == nil </span><span class="cov0" title="0">{ 6348 return nil 6349 }</span> 6350 <span class="cov8" title="1">podGroups := make([]string, 0, len(c.podGroups[key])) 6351 for pgKey := range c.podGroups[key] </span><span class="cov8" title="1">{ 6352 podGroups = append(podGroups, pgKey) 6353 }</span> 6354 6355 <span class="cov8" title="1">return podGroups</span> 6356 } 6357 6358 func (c *queuecontroller) recordEventsForQueue(name, eventType, reason, message string) <span class="cov0" title="0">{ 6359 queue, err := c.queueLister.Get(name) 6360 if err != nil </span><span class="cov0" title="0">{ 6361 klog.Errorf("Get queue %s failed for %v.", name, err) 6362 return 6363 }</span> 6364 6365 <span class="cov0" title="0">c.recorder.Event(queue, eventType, reason, message)</span> 6366 } 6367 </pre> 6368 6369 <pre class="file" id="file35" style="display: none">/* 6370 Copyright 2019 The Volcano Authors. 6371 6372 Licensed under the Apache License, Version 2.0 (the "License"); 6373 you may not use this file except in compliance with the License. 6374 You may obtain a copy of the License at 6375 6376 http://www.apache.org/licenses/LICENSE-2.0 6377 6378 Unless required by applicable law or agreed to in writing, software 6379 distributed under the License is distributed on an "AS IS" BASIS, 6380 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 6381 See the License for the specific language governing permissions and 6382 limitations under the License. 6383 */ 6384 6385 package queue 6386 6387 import ( 6388 schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 6389 6390 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 6391 ) 6392 6393 // IsQueueReference return if ownerReference is Queue Kind. 6394 func IsQueueReference(ref *metav1.OwnerReference) bool <span class="cov0" title="0">{ 6395 if ref == nil </span><span class="cov0" title="0">{ 6396 return false 6397 }</span> 6398 6399 <span class="cov0" title="0">if ref.APIVersion != schedulingv1beta1.SchemeGroupVersion.String() </span><span class="cov0" title="0">{ 6400 return false 6401 }</span> 6402 6403 <span class="cov0" title="0">if ref.Kind != "Queue" </span><span class="cov0" title="0">{ 6404 return false 6405 }</span> 6406 6407 <span class="cov0" title="0">return true</span> 6408 } 6409 </pre> 6410 6411 <pre class="file" id="file36" style="display: none">/* 6412 Copyright 2021 The Volcano Authors. 6413 6414 Licensed under the Apache License, Version 2.0 (the "License"); 6415 you may not use this file except in compliance with the License. 6416 You may obtain a copy of the License at 6417 6418 http://www.apache.org/licenses/LICENSE-2.0 6419 6420 Unless required by applicable law or agreed to in writing, software 6421 distributed under the License is distributed on an "AS IS" BASIS, 6422 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 6423 See the License for the specific language governing permissions and 6424 limitations under the License. 6425 */ 6426 6427 package allocate 6428 6429 import ( 6430 "k8s.io/klog" 6431 6432 "volcano.sh/volcano/pkg/scheduler/api" 6433 "volcano.sh/volcano/pkg/scheduler/framework" 6434 "volcano.sh/volcano/pkg/scheduler/metrics" 6435 "volcano.sh/volcano/pkg/scheduler/util" 6436 ) 6437 6438 var targetJob = util.Reservation.TargetJob 6439 6440 type Action struct{} 6441 6442 func New() *Action <span class="cov8" title="1">{ 6443 return &Action{} 6444 }</span> 6445 6446 func (alloc *Action) Name() string <span class="cov0" title="0">{ 6447 return "allocate" 6448 }</span> 6449 6450 func (alloc *Action) Initialize() {<span class="cov0" title="0">}</span> 6451 6452 func (alloc *Action) Execute(ssn *framework.Session) <span class="cov8" title="1">{ 6453 klog.V(3).Infof("Enter Allocate ...") 6454 defer klog.V(3).Infof("Leaving Allocate ...") 6455 6456 // the allocation for pod may have many stages 6457 // 1. pick a namespace named N (using ssn.NamespaceOrderFn) 6458 // 2. pick a queue named Q from N (using ssn.QueueOrderFn) 6459 // 3. pick a job named J from Q (using ssn.JobOrderFn) 6460 // 4. pick a task T from J (using ssn.TaskOrderFn) 6461 // 5. use predicateFn to filter out node that T can not be allocated on. 6462 // 6. use ssn.NodeOrderFn to judge the best node and assign it to T 6463 6464 namespaces := util.NewPriorityQueue(ssn.NamespaceOrderFn) 6465 6466 // jobsMap is map[api.NamespaceName]map[api.QueueID]PriorityQueue(*api.JobInfo) 6467 // used to find job with highest priority in given queue and namespace 6468 jobsMap := map[api.NamespaceName]map[api.QueueID]*util.PriorityQueue{} 6469 6470 for _, job := range ssn.Jobs </span><span class="cov8" title="1">{ 6471 if job.IsPending() </span><span class="cov0" title="0">{ 6472 klog.V(4).Infof("Job <%s/%s> Queue <%s> skip allocate, reason: job status is pending.", 6473 job.Namespace, job.Name, job.Queue) 6474 continue</span> 6475 } 6476 <span class="cov8" title="1">if vr := ssn.JobValid(job); vr != nil && !vr.Pass </span><span class="cov0" title="0">{ 6477 klog.V(4).Infof("Job <%s/%s> Queue <%s> skip allocate, reason: %v, message %v", job.Namespace, job.Name, job.Queue, vr.Reason, vr.Message) 6478 continue</span> 6479 } 6480 6481 <span class="cov8" title="1">if _, found := ssn.Queues[job.Queue]; !found </span><span class="cov0" title="0">{ 6482 klog.Warningf("Skip adding Job <%s/%s> because its queue %s is not found", 6483 job.Namespace, job.Name, job.Queue) 6484 continue</span> 6485 } 6486 6487 <span class="cov8" title="1">namespace := api.NamespaceName(job.Namespace) 6488 queueMap, found := jobsMap[namespace] 6489 if !found </span><span class="cov8" title="1">{ 6490 namespaces.Push(namespace) 6491 6492 queueMap = make(map[api.QueueID]*util.PriorityQueue) 6493 jobsMap[namespace] = queueMap 6494 }</span> 6495 6496 <span class="cov8" title="1">jobs, found := queueMap[job.Queue] 6497 if !found </span><span class="cov8" title="1">{ 6498 jobs = util.NewPriorityQueue(ssn.JobOrderFn) 6499 queueMap[job.Queue] = jobs 6500 }</span> 6501 6502 <span class="cov8" title="1">klog.V(4).Infof("Added Job <%s/%s> into Queue <%s>", job.Namespace, job.Name, job.Queue) 6503 jobs.Push(job)</span> 6504 } 6505 6506 <span class="cov8" title="1">klog.V(3).Infof("Try to allocate resource to %d Namespaces", len(jobsMap)) 6507 6508 pendingTasks := map[api.JobID]*util.PriorityQueue{} 6509 6510 allNodes := ssn.NodeList 6511 unlockedNodes := allNodes 6512 if targetJob != nil && len(util.Reservation.LockedNodes) != 0 </span><span class="cov0" title="0">{ 6513 unlockedNodes = unlockedNodes[0:0] 6514 for _, node := range allNodes </span><span class="cov0" title="0">{ 6515 if _, exist := util.Reservation.LockedNodes[node.Name]; !exist </span><span class="cov0" title="0">{ 6516 unlockedNodes = append(unlockedNodes, node) 6517 }</span> 6518 } 6519 } 6520 <span class="cov8" title="1">for _, unlockedNode := range unlockedNodes </span><span class="cov8" title="1">{ 6521 klog.V(4).Infof("unlockedNode ID: %s, Name: %s", unlockedNode.Node.UID, unlockedNode.Node.Name) 6522 }</span> 6523 <span class="cov8" title="1">predicateFn := func(task *api.TaskInfo, node *api.NodeInfo) error </span><span class="cov8" title="1">{ 6524 // Check for Resource Predicate 6525 if !task.InitResreq.LessEqual(node.FutureIdle(), api.Zero) </span><span class="cov8" title="1">{ 6526 return api.NewFitError(task, node, api.NodeResourceFitFailed) 6527 }</span> 6528 6529 <span class="cov8" title="1">return ssn.PredicateFn(task, node)</span> 6530 } 6531 6532 // To pick <namespace, queue> tuple for job, we choose to pick namespace firstly. 6533 // Because we believe that number of queues would less than namespaces in most case. 6534 // And, this action would make the resource usage among namespace balanced. 6535 <span class="cov8" title="1">for </span><span class="cov8" title="1">{ 6536 if namespaces.Empty() </span><span class="cov8" title="1">{ 6537 break</span> 6538 } 6539 6540 // pick namespace from namespaces PriorityQueue 6541 <span class="cov8" title="1">namespace := namespaces.Pop().(api.NamespaceName) 6542 6543 queueInNamespace := jobsMap[namespace] 6544 6545 // pick queue for given namespace 6546 // 6547 // This block use a algorithm with time complex O(n). 6548 // But at least PriorityQueue could not be used here, 6549 // because the allocation of job would change the priority of queue among all namespaces, 6550 // and the PriorityQueue have no ability to update priority for a special queue. 6551 var queue *api.QueueInfo 6552 for queueID := range queueInNamespace </span><span class="cov8" title="1">{ 6553 currentQueue := ssn.Queues[queueID] 6554 if ssn.Overused(currentQueue) </span><span class="cov8" title="1">{ 6555 klog.V(3).Infof("Namespace <%s> Queue <%s> is overused, ignore it.", namespace, currentQueue.Name) 6556 delete(queueInNamespace, queueID) 6557 continue</span> 6558 } 6559 <span class="cov8" title="1">if jobs, found := queueInNamespace[currentQueue.UID]; found && jobs.Empty() </span><span class="cov8" title="1">{ 6560 continue</span> 6561 } 6562 6563 <span class="cov8" title="1">if queue == nil || ssn.QueueOrderFn(currentQueue, queue) </span><span class="cov8" title="1">{ 6564 queue = currentQueue 6565 }</span> 6566 } 6567 6568 <span class="cov8" title="1">if queue == nil </span><span class="cov8" title="1">{ 6569 klog.V(3).Infof("Namespace <%s> have no queue, skip it", namespace) 6570 continue</span> 6571 } 6572 6573 <span class="cov8" title="1">klog.V(3).Infof("Try to allocate resource to Jobs in Namespace <%s> Queue <%v>", namespace, queue.Name) 6574 6575 jobs, found := queueInNamespace[queue.UID] 6576 if !found || jobs.Empty() </span><span class="cov0" title="0">{ 6577 delete(queueInNamespace, queue.UID) 6578 namespaces.Push(namespace) 6579 klog.V(4).Infof("Can not find jobs for queue %s.", queue.Name) 6580 continue</span> 6581 } 6582 6583 <span class="cov8" title="1">job := jobs.Pop().(*api.JobInfo) 6584 var nodes []*api.NodeInfo 6585 if targetJob != nil && job.UID == targetJob.UID </span><span class="cov0" title="0">{ 6586 klog.V(4).Infof("Try to allocate resource to target job: %s", job.Name) 6587 nodes = allNodes 6588 }</span> else<span class="cov8" title="1"> { 6589 nodes = unlockedNodes 6590 }</span> 6591 <span class="cov8" title="1">if _, found = pendingTasks[job.UID]; !found </span><span class="cov8" title="1">{ 6592 tasks := util.NewPriorityQueue(ssn.TaskOrderFn) 6593 for _, task := range job.TaskStatusIndex[api.Pending] </span><span class="cov8" title="1">{ 6594 // Skip BestEffort task in 'allocate' action. 6595 if task.Resreq.IsEmpty() </span><span class="cov0" title="0">{ 6596 klog.V(4).Infof("Task <%v/%v> is BestEffort task, skip it.", 6597 task.Namespace, task.Name) 6598 continue</span> 6599 } 6600 6601 <span class="cov8" title="1">tasks.Push(task)</span> 6602 } 6603 <span class="cov8" title="1">pendingTasks[job.UID] = tasks</span> 6604 } 6605 <span class="cov8" title="1">tasks := pendingTasks[job.UID] 6606 6607 klog.V(3).Infof("Try to allocate resource to %d tasks of Job <%v/%v>", 6608 tasks.Len(), job.Namespace, job.Name) 6609 6610 stmt := framework.NewStatement(ssn) 6611 6612 for !tasks.Empty() </span><span class="cov8" title="1">{ 6613 task := tasks.Pop().(*api.TaskInfo) 6614 6615 // Check whether the queue is overused on dimension that the task requested 6616 taskRequest := task.Resreq.ResourceNames() 6617 if underusedResources := ssn.UnderusedResources(queue); underusedResources != nil && !underusedResources.Contains(taskRequest) </span><span class="cov8" title="1">{ 6618 klog.V(3).Infof("Queue <%s> is overused when considering task <%s>, ignore it.", queue.Name, task.Name) 6619 continue</span> 6620 } 6621 6622 <span class="cov8" title="1">klog.V(3).Infof("There are <%d> nodes for Job <%v/%v>", len(nodes), job.Namespace, job.Name) 6623 6624 predicateNodes, fitErrors := util.PredicateNodes(task, nodes, predicateFn) 6625 if len(predicateNodes) == 0 </span><span class="cov8" title="1">{ 6626 job.NodesFitErrors[task.UID] = fitErrors 6627 break</span> 6628 } 6629 6630 <span class="cov8" title="1">var candidateNodes []*api.NodeInfo 6631 for _, n := range predicateNodes </span><span class="cov8" title="1">{ 6632 if task.InitResreq.LessEqual(n.Idle, api.Zero) || task.InitResreq.LessEqual(n.FutureIdle(), api.Zero) </span><span class="cov8" title="1">{ 6633 candidateNodes = append(candidateNodes, n) 6634 }</span> 6635 } 6636 6637 // If not candidate nodes for this task, skip it. 6638 <span class="cov8" title="1">if len(candidateNodes) == 0 </span><span class="cov0" title="0">{ 6639 continue</span> 6640 } 6641 6642 <span class="cov8" title="1">nodeScores := util.PrioritizeNodes(task, candidateNodes, ssn.BatchNodeOrderFn, ssn.NodeOrderMapFn, ssn.NodeOrderReduceFn) 6643 6644 node := ssn.BestNodeFn(task, nodeScores) 6645 if node == nil </span><span class="cov8" title="1">{ 6646 node = util.SelectBestNode(nodeScores) 6647 }</span> 6648 6649 // Allocate idle resource to the task. 6650 <span class="cov8" title="1">if task.InitResreq.LessEqual(node.Idle, api.Zero) </span><span class="cov8" title="1">{ 6651 klog.V(3).Infof("Binding Task <%v/%v> to node <%v>", 6652 task.Namespace, task.Name, node.Name) 6653 if err := stmt.Allocate(task, node); err != nil </span><span class="cov0" title="0">{ 6654 klog.Errorf("Failed to bind Task %v on %v in Session %v, err: %v", 6655 task.UID, node.Name, ssn.UID, err) 6656 }</span> else<span class="cov8" title="1"> { 6657 metrics.UpdateE2eSchedulingDurationByJob(job.Name, string(job.Queue), job.Namespace, metrics.Duration(job.CreationTimestamp.Time)) 6658 }</span> 6659 } else<span class="cov0" title="0"> { 6660 klog.V(3).Infof("Predicates failed for task <%s/%s> on node <%s> with limited resources", 6661 task.Namespace, task.Name, node.Name) 6662 6663 // Allocate releasing resource to the task if any. 6664 if task.InitResreq.LessEqual(node.FutureIdle(), api.Zero) </span><span class="cov0" title="0">{ 6665 klog.V(3).Infof("Pipelining Task <%v/%v> to node <%v> for <%v> on <%v>", 6666 task.Namespace, task.Name, node.Name, task.InitResreq, node.Releasing) 6667 if err := stmt.Pipeline(task, node.Name); err != nil </span><span class="cov0" title="0">{ 6668 klog.Errorf("Failed to pipeline Task %v on %v in Session %v for %v.", 6669 task.UID, node.Name, ssn.UID, err) 6670 }</span> else<span class="cov0" title="0"> { 6671 metrics.UpdateE2eSchedulingDurationByJob(job.Name, string(job.Queue), job.Namespace, metrics.Duration(job.CreationTimestamp.Time)) 6672 }</span> 6673 } 6674 } 6675 6676 <span class="cov8" title="1">if ssn.JobReady(job) && !tasks.Empty() </span><span class="cov8" title="1">{ 6677 jobs.Push(job) 6678 break</span> 6679 } 6680 } 6681 6682 <span class="cov8" title="1">if ssn.JobReady(job) </span><span class="cov8" title="1">{ 6683 stmt.Commit() 6684 }</span> else<span class="cov0" title="0"> { 6685 if !ssn.JobPipelined(job) </span><span class="cov0" title="0">{ 6686 stmt.Discard() 6687 }</span> 6688 } 6689 6690 // Added Namespace back until no job in Namespace. 6691 <span class="cov8" title="1">namespaces.Push(namespace)</span> 6692 } 6693 } 6694 6695 func (alloc *Action) UnInitialize() {<span class="cov0" title="0">}</span> 6696 </pre> 6697 6698 <pre class="file" id="file37" style="display: none">/* 6699 Copyright 2018 The Kubernetes Authors. 6700 6701 Licensed under the Apache License, Version 2.0 (the "License"); 6702 you may not use this file except in compliance with the License. 6703 You may obtain a copy of the License at 6704 6705 http://www.apache.org/licenses/LICENSE-2.0 6706 6707 Unless required by applicable law or agreed to in writing, software 6708 distributed under the License is distributed on an "AS IS" BASIS, 6709 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 6710 See the License for the specific language governing permissions and 6711 limitations under the License. 6712 */ 6713 6714 package preempt 6715 6716 import ( 6717 "k8s.io/klog" 6718 6719 "volcano.sh/volcano/pkg/scheduler/api" 6720 "volcano.sh/volcano/pkg/scheduler/framework" 6721 "volcano.sh/volcano/pkg/scheduler/metrics" 6722 "volcano.sh/volcano/pkg/scheduler/util" 6723 ) 6724 6725 type Action struct{} 6726 6727 func New() *Action <span class="cov8" title="1">{ 6728 return &Action{} 6729 }</span> 6730 6731 func (alloc *Action) Name() string <span class="cov0" title="0">{ 6732 return "preempt" 6733 }</span> 6734 6735 func (alloc *Action) Initialize() {<span class="cov0" title="0">}</span> 6736 6737 func (alloc *Action) Execute(ssn *framework.Session) <span class="cov8" title="1">{ 6738 klog.V(3).Infof("Enter Preempt ...") 6739 defer klog.V(3).Infof("Leaving Preempt ...") 6740 6741 preemptorsMap := map[api.QueueID]*util.PriorityQueue{} 6742 preemptorTasks := map[api.JobID]*util.PriorityQueue{} 6743 6744 var underRequest []*api.JobInfo 6745 queues := map[api.QueueID]*api.QueueInfo{} 6746 6747 for _, job := range ssn.Jobs </span><span class="cov8" title="1">{ 6748 if job.IsPending() </span><span class="cov0" title="0">{ 6749 continue</span> 6750 } 6751 6752 <span class="cov8" title="1">if vr := ssn.JobValid(job); vr != nil && !vr.Pass </span><span class="cov0" title="0">{ 6753 klog.V(4).Infof("Job <%s/%s> Queue <%s> skip preemption, reason: %v, message %v", job.Namespace, job.Name, job.Queue, vr.Reason, vr.Message) 6754 continue</span> 6755 } 6756 6757 <span class="cov8" title="1">if queue, found := ssn.Queues[job.Queue]; !found </span><span class="cov0" title="0">{ 6758 continue</span> 6759 } else<span class="cov8" title="1"> if _, existed := queues[queue.UID]; !existed </span><span class="cov8" title="1">{ 6760 klog.V(3).Infof("Added Queue <%s> for Job <%s/%s>", 6761 queue.Name, job.Namespace, job.Name) 6762 queues[queue.UID] = queue 6763 }</span> 6764 6765 // check job if starting for more resources. 6766 <span class="cov8" title="1">if ssn.JobStarving(job) </span><span class="cov8" title="1">{ 6767 if _, found := preemptorsMap[job.Queue]; !found </span><span class="cov8" title="1">{ 6768 preemptorsMap[job.Queue] = util.NewPriorityQueue(ssn.JobOrderFn) 6769 }</span> 6770 <span class="cov8" title="1">preemptorsMap[job.Queue].Push(job) 6771 underRequest = append(underRequest, job) 6772 preemptorTasks[job.UID] = util.NewPriorityQueue(ssn.TaskOrderFn) 6773 for _, task := range job.TaskStatusIndex[api.Pending] </span><span class="cov8" title="1">{ 6774 preemptorTasks[job.UID].Push(task) 6775 }</span> 6776 } 6777 } 6778 6779 // Preemption between Jobs within Queue. 6780 <span class="cov8" title="1">for _, queue := range queues </span><span class="cov8" title="1">{ 6781 for </span><span class="cov8" title="1">{ 6782 preemptors := preemptorsMap[queue.UID] 6783 6784 // If no preemptors, no preemption. 6785 if preemptors == nil || preemptors.Empty() </span><span class="cov8" title="1">{ 6786 klog.V(4).Infof("No preemptors in Queue <%s>, break.", queue.Name) 6787 break</span> 6788 } 6789 6790 <span class="cov8" title="1">preemptorJob := preemptors.Pop().(*api.JobInfo) 6791 6792 stmt := framework.NewStatement(ssn) 6793 assigned := false 6794 for </span><span class="cov8" title="1">{ 6795 // If job is not request more resource, then stop preempting. 6796 if !ssn.JobStarving(preemptorJob) </span><span class="cov8" title="1">{ 6797 break</span> 6798 } 6799 6800 // If not preemptor tasks, next job. 6801 <span class="cov8" title="1">if preemptorTasks[preemptorJob.UID].Empty() </span><span class="cov8" title="1">{ 6802 klog.V(3).Infof("No preemptor task in job <%s/%s>.", 6803 preemptorJob.Namespace, preemptorJob.Name) 6804 break</span> 6805 } 6806 6807 <span class="cov8" title="1">preemptor := preemptorTasks[preemptorJob.UID].Pop().(*api.TaskInfo) 6808 6809 if preempted, _ := preempt(ssn, stmt, preemptor, func(task *api.TaskInfo) bool </span><span class="cov8" title="1">{ 6810 // Ignore non running task. 6811 if task.Status != api.Running </span><span class="cov0" title="0">{ 6812 return false 6813 }</span> 6814 // Ignore task with empty resource request. 6815 <span class="cov8" title="1">if task.Resreq.IsEmpty() </span><span class="cov0" title="0">{ 6816 return false 6817 }</span> 6818 <span class="cov8" title="1">job, found := ssn.Jobs[task.Job] 6819 if !found </span><span class="cov0" title="0">{ 6820 return false 6821 }</span> 6822 // Preempt other jobs within queue 6823 <span class="cov8" title="1">return job.Queue == preemptorJob.Queue && preemptor.Job != task.Job</span> 6824 }); preempted <span class="cov8" title="1">{ 6825 assigned = true 6826 }</span> 6827 } 6828 6829 // Commit changes only if job is pipelined, otherwise try next job. 6830 <span class="cov8" title="1">if ssn.JobPipelined(preemptorJob) </span><span class="cov8" title="1">{ 6831 stmt.Commit() 6832 }</span> else<span class="cov8" title="1"> { 6833 stmt.Discard() 6834 continue</span> 6835 } 6836 6837 <span class="cov8" title="1">if assigned </span><span class="cov8" title="1">{ 6838 preemptors.Push(preemptorJob) 6839 }</span> 6840 } 6841 6842 // Preemption between Task within Job. 6843 <span class="cov8" title="1">for _, job := range underRequest </span><span class="cov8" title="1">{ 6844 // Fix: preemptor numbers lose when in same job 6845 preemptorTasks[job.UID] = util.NewPriorityQueue(ssn.TaskOrderFn) 6846 for _, task := range job.TaskStatusIndex[api.Pending] </span><span class="cov8" title="1">{ 6847 preemptorTasks[job.UID].Push(task) 6848 }</span> 6849 <span class="cov8" title="1">for </span><span class="cov8" title="1">{ 6850 if _, found := preemptorTasks[job.UID]; !found </span><span class="cov0" title="0">{ 6851 break</span> 6852 } 6853 6854 <span class="cov8" title="1">if preemptorTasks[job.UID].Empty() </span><span class="cov8" title="1">{ 6855 break</span> 6856 } 6857 6858 <span class="cov8" title="1">preemptor := preemptorTasks[job.UID].Pop().(*api.TaskInfo) 6859 6860 stmt := framework.NewStatement(ssn) 6861 assigned, _ := preempt(ssn, stmt, preemptor, func(task *api.TaskInfo) bool </span><span class="cov8" title="1">{ 6862 // Ignore non running task. 6863 if task.Status != api.Running </span><span class="cov8" title="1">{ 6864 return false 6865 }</span> 6866 // Ignore task with empty resource request. 6867 <span class="cov8" title="1">if task.Resreq.IsEmpty() </span><span class="cov0" title="0">{ 6868 return false 6869 }</span> 6870 // Preempt tasks within job. 6871 <span class="cov8" title="1">return preemptor.Job == task.Job</span> 6872 }) 6873 <span class="cov8" title="1">stmt.Commit() 6874 6875 // If no preemption, next job. 6876 if !assigned </span><span class="cov8" title="1">{ 6877 break</span> 6878 } 6879 } 6880 } 6881 } 6882 6883 // call victimTasksFn to evict tasks 6884 <span class="cov8" title="1">victimTasks(ssn)</span> 6885 } 6886 6887 func (alloc *Action) UnInitialize() {<span class="cov0" title="0">}</span> 6888 6889 func preempt( 6890 ssn *framework.Session, 6891 stmt *framework.Statement, 6892 preemptor *api.TaskInfo, 6893 filter func(*api.TaskInfo) bool, 6894 ) (bool, error) <span class="cov8" title="1">{ 6895 assigned := false 6896 6897 allNodes := ssn.NodeList 6898 6899 predicateNodes, _ := util.PredicateNodes(preemptor, allNodes, ssn.PredicateFn) 6900 6901 nodeScores := util.PrioritizeNodes(preemptor, predicateNodes, ssn.BatchNodeOrderFn, ssn.NodeOrderMapFn, ssn.NodeOrderReduceFn) 6902 6903 selectedNodes := util.SortNodes(nodeScores) 6904 for _, node := range selectedNodes </span><span class="cov8" title="1">{ 6905 klog.V(3).Infof("Considering Task <%s/%s> on Node <%s>.", 6906 preemptor.Namespace, preemptor.Name, node.Name) 6907 6908 var preemptees []*api.TaskInfo 6909 for _, task := range node.Tasks </span><span class="cov8" title="1">{ 6910 if filter == nil </span><span class="cov0" title="0">{ 6911 preemptees = append(preemptees, task.Clone()) 6912 }</span> else<span class="cov8" title="1"> if filter(task) </span><span class="cov8" title="1">{ 6913 preemptees = append(preemptees, task.Clone()) 6914 }</span> 6915 } 6916 <span class="cov8" title="1">victims := ssn.Preemptable(preemptor, preemptees) 6917 metrics.UpdatePreemptionVictimsCount(len(victims)) 6918 6919 if err := util.ValidateVictims(preemptor, node, victims); err != nil </span><span class="cov8" title="1">{ 6920 klog.V(3).Infof("No validated victims on Node <%s>: %v", node.Name, err) 6921 continue</span> 6922 } 6923 6924 <span class="cov8" title="1">victimsQueue := util.NewPriorityQueue(func(l, r interface{}) bool </span><span class="cov8" title="1">{ 6925 return !ssn.TaskOrderFn(l, r) 6926 }</span>) 6927 <span class="cov8" title="1">for _, victim := range victims </span><span class="cov8" title="1">{ 6928 victimsQueue.Push(victim) 6929 }</span> 6930 // Preempt victims for tasks, pick lowest priority task first. 6931 <span class="cov8" title="1">preempted := api.EmptyResource() 6932 6933 for !victimsQueue.Empty() </span><span class="cov8" title="1">{ 6934 // If reclaimed enough resources, break loop to avoid Sub panic. 6935 if preemptor.InitResreq.LessEqual(node.FutureIdle(), api.Zero) </span><span class="cov0" title="0">{ 6936 break</span> 6937 } 6938 <span class="cov8" title="1">preemptee := victimsQueue.Pop().(*api.TaskInfo) 6939 klog.V(3).Infof("Try to preempt Task <%s/%s> for Tasks <%s/%s>", 6940 preemptee.Namespace, preemptee.Name, preemptor.Namespace, preemptor.Name) 6941 if err := stmt.Evict(preemptee, "preempt"); err != nil </span><span class="cov0" title="0">{ 6942 klog.Errorf("Failed to preempt Task <%s/%s> for Tasks <%s/%s>: %v", 6943 preemptee.Namespace, preemptee.Name, preemptor.Namespace, preemptor.Name, err) 6944 continue</span> 6945 } 6946 <span class="cov8" title="1">preempted.Add(preemptee.Resreq)</span> 6947 } 6948 6949 <span class="cov8" title="1">metrics.RegisterPreemptionAttempts() 6950 klog.V(3).Infof("Preempted <%v> for Task <%s/%s> requested <%v>.", 6951 preempted, preemptor.Namespace, preemptor.Name, preemptor.InitResreq) 6952 6953 if preemptor.InitResreq.LessEqual(node.FutureIdle(), api.Zero) </span><span class="cov8" title="1">{ 6954 if err := stmt.Pipeline(preemptor, node.Name); err != nil </span><span class="cov0" title="0">{ 6955 klog.Errorf("Failed to pipeline Task <%s/%s> on Node <%s>", 6956 preemptor.Namespace, preemptor.Name, node.Name) 6957 }</span> 6958 6959 // Ignore pipeline error, will be corrected in next scheduling loop. 6960 <span class="cov8" title="1">assigned = true 6961 6962 break</span> 6963 } 6964 } 6965 6966 <span class="cov8" title="1">return assigned, nil</span> 6967 } 6968 6969 func victimTasks(ssn *framework.Session) <span class="cov8" title="1">{ 6970 stmt := framework.NewStatement(ssn) 6971 victimTasks := ssn.VictimTasks() 6972 for _, victim := range victimTasks </span><span class="cov0" title="0">{ 6973 if err := stmt.Evict(victim.Clone(), "evict"); err != nil </span><span class="cov0" title="0">{ 6974 klog.Errorf("Failed to evict Task <%s/%s>: %v", 6975 victim.Namespace, victim.Name, err) 6976 continue</span> 6977 } 6978 } 6979 <span class="cov8" title="1">stmt.Commit()</span> 6980 } 6981 </pre> 6982 6983 <pre class="file" id="file38" style="display: none">/* 6984 Copyright 2018 The Kubernetes Authors. 6985 6986 Licensed under the Apache License, Version 2.0 (the "License"); 6987 you may not use this file except in compliance with the License. 6988 You may obtain a copy of the License at 6989 6990 http://www.apache.org/licenses/LICENSE-2.0 6991 6992 Unless required by applicable law or agreed to in writing, software 6993 distributed under the License is distributed on an "AS IS" BASIS, 6994 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 6995 See the License for the specific language governing permissions and 6996 limitations under the License. 6997 */ 6998 6999 package reclaim 7000 7001 import ( 7002 "k8s.io/klog" 7003 7004 "volcano.sh/volcano/pkg/scheduler/api" 7005 "volcano.sh/volcano/pkg/scheduler/framework" 7006 "volcano.sh/volcano/pkg/scheduler/util" 7007 ) 7008 7009 type Action struct{} 7010 7011 func New() *Action <span class="cov8" title="1">{ 7012 return &Action{} 7013 }</span> 7014 7015 func (ra *Action) Name() string <span class="cov0" title="0">{ 7016 return "reclaim" 7017 }</span> 7018 7019 func (ra *Action) Initialize() {<span class="cov0" title="0">}</span> 7020 7021 func (ra *Action) Execute(ssn *framework.Session) <span class="cov8" title="1">{ 7022 klog.V(3).Infof("Enter Reclaim ...") 7023 defer klog.V(3).Infof("Leaving Reclaim ...") 7024 7025 queues := util.NewPriorityQueue(ssn.QueueOrderFn) 7026 queueMap := map[api.QueueID]*api.QueueInfo{} 7027 7028 preemptorsMap := map[api.QueueID]*util.PriorityQueue{} 7029 preemptorTasks := map[api.JobID]*util.PriorityQueue{} 7030 7031 klog.V(3).Infof("There are <%d> Jobs and <%d> Queues in total for scheduling.", 7032 len(ssn.Jobs), len(ssn.Queues)) 7033 7034 for _, job := range ssn.Jobs </span><span class="cov8" title="1">{ 7035 if job.IsPending() </span><span class="cov0" title="0">{ 7036 continue</span> 7037 } 7038 7039 <span class="cov8" title="1">if vr := ssn.JobValid(job); vr != nil && !vr.Pass </span><span class="cov0" title="0">{ 7040 klog.V(4).Infof("Job <%s/%s> Queue <%s> skip reclaim, reason: %v, message %v", job.Namespace, job.Name, job.Queue, vr.Reason, vr.Message) 7041 continue</span> 7042 } 7043 7044 <span class="cov8" title="1">if queue, found := ssn.Queues[job.Queue]; !found </span><span class="cov0" title="0">{ 7045 klog.Errorf("Failed to find Queue <%s> for Job <%s/%s>", 7046 job.Queue, job.Namespace, job.Name) 7047 continue</span> 7048 } else<span class="cov8" title="1"> if _, existed := queueMap[queue.UID]; !existed </span><span class="cov8" title="1">{ 7049 klog.V(4).Infof("Added Queue <%s> for Job <%s/%s>", queue.Name, job.Namespace, job.Name) 7050 queueMap[queue.UID] = queue 7051 queues.Push(queue) 7052 }</span> 7053 7054 <span class="cov8" title="1">if len(job.TaskStatusIndex[api.Pending]) != 0 </span><span class="cov8" title="1">{ 7055 if _, found := preemptorsMap[job.Queue]; !found </span><span class="cov8" title="1">{ 7056 preemptorsMap[job.Queue] = util.NewPriorityQueue(ssn.JobOrderFn) 7057 }</span> 7058 <span class="cov8" title="1">preemptorsMap[job.Queue].Push(job) 7059 preemptorTasks[job.UID] = util.NewPriorityQueue(ssn.TaskOrderFn) 7060 for _, task := range job.TaskStatusIndex[api.Pending] </span><span class="cov8" title="1">{ 7061 preemptorTasks[job.UID].Push(task) 7062 }</span> 7063 } 7064 } 7065 7066 <span class="cov8" title="1">for </span><span class="cov8" title="1">{ 7067 // If no queues, break 7068 if queues.Empty() </span><span class="cov8" title="1">{ 7069 break</span> 7070 } 7071 7072 <span class="cov8" title="1">var job *api.JobInfo 7073 var task *api.TaskInfo 7074 7075 queue := queues.Pop().(*api.QueueInfo) 7076 if ssn.Overused(queue) </span><span class="cov8" title="1">{ 7077 klog.V(3).Infof("Queue <%s> is overused, ignore it.", queue.Name) 7078 continue</span> 7079 } 7080 7081 // Found "high" priority job 7082 <span class="cov8" title="1">jobs, found := preemptorsMap[queue.UID] 7083 if !found || jobs.Empty() </span><span class="cov0" title="0">{ 7084 continue</span> 7085 } else<span class="cov8" title="1"> { 7086 job = jobs.Pop().(*api.JobInfo) 7087 }</span> 7088 7089 // Found "high" priority task to reclaim others 7090 <span class="cov8" title="1">if tasks, found := preemptorTasks[job.UID]; !found || tasks.Empty() </span><span class="cov0" title="0">{ 7091 continue</span> 7092 } else<span class="cov8" title="1"> { 7093 task = tasks.Pop().(*api.TaskInfo) 7094 }</span> 7095 7096 // Check whether the queue is overused on dimension that the task requested 7097 <span class="cov8" title="1">taskRequest := task.Resreq.ResourceNames() 7098 if underusedResources := ssn.UnderusedResources(queue); underusedResources != nil && !underusedResources.Contains(taskRequest) </span><span class="cov0" title="0">{ 7099 klog.V(3).Infof("Queue <%s> is overused when considering task <%s>, ignore it.", queue.Name, task.Name) 7100 continue</span> 7101 } 7102 7103 <span class="cov8" title="1">assigned := false 7104 for _, n := range ssn.Nodes </span><span class="cov8" title="1">{ 7105 // If predicates failed, next node. 7106 if err := ssn.PredicateFn(task, n); err != nil </span><span class="cov0" title="0">{ 7107 continue</span> 7108 } 7109 7110 <span class="cov8" title="1">klog.V(3).Infof("Considering Task <%s/%s> on Node <%s>.", 7111 task.Namespace, task.Name, n.Name) 7112 7113 var reclaimees []*api.TaskInfo 7114 for _, task := range n.Tasks </span><span class="cov8" title="1">{ 7115 // Ignore non running task. 7116 if task.Status != api.Running </span><span class="cov0" title="0">{ 7117 continue</span> 7118 } 7119 7120 <span class="cov8" title="1">if j, found := ssn.Jobs[task.Job]; !found </span><span class="cov0" title="0">{ 7121 continue</span> 7122 } else<span class="cov8" title="1"> if j.Queue != job.Queue </span><span class="cov8" title="1">{ 7123 q := ssn.Queues[j.Queue] 7124 if !q.Reclaimable() </span><span class="cov0" title="0">{ 7125 continue</span> 7126 } 7127 // Clone task to avoid modify Task's status on node. 7128 <span class="cov8" title="1">reclaimees = append(reclaimees, task.Clone())</span> 7129 } 7130 } 7131 <span class="cov8" title="1">victims := ssn.Reclaimable(task, reclaimees) 7132 7133 if err := util.ValidateVictims(task, n, victims); err != nil </span><span class="cov0" title="0">{ 7134 klog.V(3).Infof("No validated victims on Node <%s>: %v", n.Name, err) 7135 continue</span> 7136 } 7137 7138 <span class="cov8" title="1">resreq := task.InitResreq.Clone() 7139 reclaimed := api.EmptyResource() 7140 7141 // Reclaim victims for tasks. 7142 for _, reclaimee := range victims </span><span class="cov8" title="1">{ 7143 klog.Errorf("Try to reclaim Task <%s/%s> for Tasks <%s/%s>", 7144 reclaimee.Namespace, reclaimee.Name, task.Namespace, task.Name) 7145 if err := ssn.Evict(reclaimee, "reclaim"); err != nil </span><span class="cov0" title="0">{ 7146 klog.Errorf("Failed to reclaim Task <%s/%s> for Tasks <%s/%s>: %v", 7147 reclaimee.Namespace, reclaimee.Name, task.Namespace, task.Name, err) 7148 continue</span> 7149 } 7150 <span class="cov8" title="1">reclaimed.Add(reclaimee.Resreq) 7151 // If reclaimed enough resources, break loop to avoid Sub panic. 7152 if resreq.LessEqual(reclaimed, api.Zero) </span><span class="cov8" title="1">{ 7153 break</span> 7154 } 7155 } 7156 7157 <span class="cov8" title="1">klog.V(3).Infof("Reclaimed <%v> for task <%s/%s> requested <%v>.", 7158 reclaimed, task.Namespace, task.Name, task.InitResreq) 7159 7160 if task.InitResreq.LessEqual(reclaimed, api.Zero) </span><span class="cov8" title="1">{ 7161 if err := ssn.Pipeline(task, n.Name); err != nil </span><span class="cov0" title="0">{ 7162 klog.Errorf("Failed to pipeline Task <%s/%s> on Node <%s>", 7163 task.Namespace, task.Name, n.Name) 7164 }</span> 7165 7166 // Ignore error of pipeline, will be corrected in next scheduling loop. 7167 <span class="cov8" title="1">assigned = true 7168 7169 break</span> 7170 } 7171 } 7172 7173 <span class="cov8" title="1">if assigned </span><span class="cov8" title="1">{ 7174 jobs.Push(job) 7175 }</span> 7176 <span class="cov8" title="1">queues.Push(queue)</span> 7177 } 7178 } 7179 7180 func (ra *Action) UnInitialize() {<span class="cov0" title="0"> 7181 }</span> 7182 </pre> 7183 7184 <pre class="file" id="file39" style="display: none">/* 7185 Copyright 2017 The Kubernetes Authors. 7186 7187 Licensed under the Apache License, Version 2.0 (the "License"); 7188 you may not use this file except in compliance with the License. 7189 You may obtain a copy of the License at 7190 7191 http://www.apache.org/licenses/LICENSE-2.0 7192 7193 Unless required by applicable law or agreed to in writing, software 7194 distributed under the License is distributed on an "AS IS" BASIS, 7195 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 7196 See the License for the specific language governing permissions and 7197 limitations under the License. 7198 */ 7199 7200 package api 7201 7202 import ( 7203 "fmt" 7204 ) 7205 7206 // ClusterInfo is a snapshot of cluster by cache. 7207 type ClusterInfo struct { 7208 Jobs map[JobID]*JobInfo 7209 Nodes map[string]*NodeInfo 7210 Queues map[QueueID]*QueueInfo 7211 NamespaceInfo map[NamespaceName]*NamespaceInfo 7212 RevocableNodes map[string]*NodeInfo 7213 NodeList []string 7214 } 7215 7216 func (ci ClusterInfo) String() string <span class="cov0" title="0">{ 7217 str := "Cache:\n" 7218 7219 if len(ci.Nodes) != 0 </span><span class="cov0" title="0">{ 7220 str += "Nodes:\n" 7221 for _, n := range ci.Nodes </span><span class="cov0" title="0">{ 7222 str += fmt.Sprintf("\t %s: idle(%v) used(%v) allocatable(%v) pods(%d)\n", 7223 n.Name, n.Idle, n.Used, n.Allocatable, len(n.Tasks)) 7224 7225 i := 0 7226 for _, p := range n.Tasks </span><span class="cov0" title="0">{ 7227 str += fmt.Sprintf("\t\t %d: %v\n", i, p) 7228 i++ 7229 }</span> 7230 } 7231 } 7232 7233 <span class="cov0" title="0">if len(ci.Jobs) != 0 </span><span class="cov0" title="0">{ 7234 str += "Jobs:\n" 7235 for _, job := range ci.Jobs </span><span class="cov0" title="0">{ 7236 str += fmt.Sprintf("\t Job(%s) name(%s) minAvailable(%v)\n", 7237 job.UID, job.Name, job.MinAvailable) 7238 7239 i := 0 7240 for _, task := range job.Tasks </span><span class="cov0" title="0">{ 7241 str += fmt.Sprintf("\t\t %d: %v\n", i, task) 7242 i++ 7243 }</span> 7244 } 7245 } 7246 7247 <span class="cov0" title="0">if len(ci.NamespaceInfo) != 0 </span><span class="cov0" title="0">{ 7248 str += "Namespaces:\n" 7249 for _, ns := range ci.NamespaceInfo </span><span class="cov0" title="0">{ 7250 str += fmt.Sprintf("\t Namespace(%s) Weight(%v)\n", 7251 ns.Name, ns.Weight) 7252 }</span> 7253 } 7254 7255 <span class="cov0" title="0">if len(ci.NodeList) != 0 </span><span class="cov0" title="0">{ 7256 str += fmt.Sprintf("NodeList: %v\n", ci.NodeList) 7257 }</span> 7258 7259 <span class="cov0" title="0">return str</span> 7260 } 7261 </pre> 7262 7263 <pre class="file" id="file40" style="display: none">/* 7264 Copyright 2020 The Volcano Authors. 7265 7266 Licensed under the Apache License, Version 2.0 (the "License"); 7267 you may not use this file except in compliance with the License. 7268 You may obtain a copy of the License at 7269 7270 http://www.apache.org/licenses/LICENSE-2.0 7271 7272 Unless required by applicable law or agreed to in writing, software 7273 distributed under the License is distributed on an "AS IS" BASIS, 7274 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 7275 See the License for the specific language governing permissions and 7276 limitations under the License. 7277 */ 7278 7279 package api 7280 7281 import ( 7282 v1 "k8s.io/api/core/v1" 7283 ) 7284 7285 // GPUDevice include gpu id, memory and the pods that are sharing it. 7286 type GPUDevice struct { 7287 // GPU ID 7288 ID int 7289 // The pods that are sharing this GPU 7290 PodMap map[string]*v1.Pod 7291 // memory per card 7292 Memory uint 7293 } 7294 7295 // NewGPUDevice creates a device 7296 func NewGPUDevice(id int, mem uint) *GPUDevice <span class="cov0" title="0">{ 7297 return &GPUDevice{ 7298 ID: id, 7299 Memory: mem, 7300 PodMap: map[string]*v1.Pod{}, 7301 } 7302 }</span> 7303 7304 // getUsedGPUMemory calculates the used memory of the device. 7305 func (g *GPUDevice) getUsedGPUMemory() uint <span class="cov0" title="0">{ 7306 res := uint(0) 7307 for _, pod := range g.PodMap </span><span class="cov0" title="0">{ 7308 if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed </span><span class="cov0" title="0">{ 7309 continue</span> 7310 } else<span class="cov0" title="0"> { 7311 gpuRequest := GetGPUResourceOfPod(pod) 7312 res += gpuRequest 7313 }</span> 7314 } 7315 <span class="cov0" title="0">return res</span> 7316 } 7317 7318 // GetGPUResourceOfPod returns the GPU resource required by the pod. 7319 func GetGPUResourceOfPod(pod *v1.Pod) uint <span class="cov8" title="1">{ 7320 var mem uint 7321 for _, container := range pod.Spec.Containers </span><span class="cov8" title="1">{ 7322 mem += getGPUResourceOfContainer(&container) 7323 }</span> 7324 <span class="cov8" title="1">return mem</span> 7325 } 7326 7327 // getGPUResourceOfPod returns the GPU resource required by the container. 7328 func getGPUResourceOfContainer(container *v1.Container) uint <span class="cov8" title="1">{ 7329 var mem uint 7330 if val, ok := container.Resources.Limits[VolcanoGPUResource]; ok </span><span class="cov0" title="0">{ 7331 mem = uint(val.Value()) 7332 }</span> 7333 <span class="cov8" title="1">return mem</span> 7334 } 7335 </pre> 7336 7337 <pre class="file" id="file41" style="display: none">/* 7338 Copyright 2017 The Kubernetes Authors. 7339 7340 Licensed under the Apache License, Version 2.0 (the "License"); 7341 you may not use this file except in compliance with the License. 7342 You may obtain a copy of the License at 7343 7344 http://www.apache.org/licenses/LICENSE-2.0 7345 7346 Unless required by applicable law or agreed to in writing, software 7347 distributed under the License is distributed on an "AS IS" BASIS, 7348 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 7349 See the License for the specific language governing permissions and 7350 limitations under the License. 7351 */ 7352 7353 package api 7354 7355 import ( 7356 "fmt" 7357 7358 v1 "k8s.io/api/core/v1" 7359 clientcache "k8s.io/client-go/tools/cache" 7360 ) 7361 7362 // PodKey returns the string key of a pod. 7363 func PodKey(pod *v1.Pod) TaskID <span class="cov8" title="1">{ 7364 key, err := clientcache.MetaNamespaceKeyFunc(pod) 7365 if err != nil </span><span class="cov0" title="0">{ 7366 return TaskID(fmt.Sprintf("%v/%v", pod.Namespace, pod.Name)) 7367 }</span> 7368 <span class="cov8" title="1">return TaskID(key)</span> 7369 } 7370 7371 func getTaskStatus(pod *v1.Pod) TaskStatus <span class="cov8" title="1">{ 7372 switch pod.Status.Phase </span>{ 7373 case v1.PodRunning:<span class="cov8" title="1"> 7374 if pod.DeletionTimestamp != nil </span><span class="cov0" title="0">{ 7375 return Releasing 7376 }</span> 7377 7378 <span class="cov8" title="1">return Running</span> 7379 case v1.PodPending:<span class="cov8" title="1"> 7380 if pod.DeletionTimestamp != nil </span><span class="cov0" title="0">{ 7381 return Releasing 7382 }</span> 7383 7384 <span class="cov8" title="1">if len(pod.Spec.NodeName) == 0 </span><span class="cov8" title="1">{ 7385 return Pending 7386 }</span> 7387 <span class="cov8" title="1">return Bound</span> 7388 case v1.PodUnknown:<span class="cov8" title="1"> 7389 return Unknown</span> 7390 case v1.PodSucceeded:<span class="cov0" title="0"> 7391 return Succeeded</span> 7392 case v1.PodFailed:<span class="cov0" title="0"> 7393 return Failed</span> 7394 } 7395 7396 <span class="cov0" title="0">return Unknown</span> 7397 } 7398 7399 // AllocatedStatus checks whether the tasks has AllocatedStatus 7400 func AllocatedStatus(status TaskStatus) bool <span class="cov8" title="1">{ 7401 switch status </span>{ 7402 case Bound, Binding, Running, Allocated:<span class="cov8" title="1"> 7403 return true</span> 7404 default:<span class="cov8" title="1"> 7405 return false</span> 7406 } 7407 } 7408 7409 // MergeErrors is used to merge multiple errors into single error 7410 func MergeErrors(errs ...error) error <span class="cov0" title="0">{ 7411 msg := "errors: " 7412 7413 foundErr := false 7414 i := 1 7415 7416 for _, e := range errs </span><span class="cov0" title="0">{ 7417 if e != nil </span><span class="cov0" title="0">{ 7418 if foundErr </span><span class="cov0" title="0">{ 7419 msg = fmt.Sprintf("%s, %d: ", msg, i) 7420 }</span> else<span class="cov0" title="0"> { 7421 msg = fmt.Sprintf("%s %d: ", msg, i) 7422 }</span> 7423 7424 <span class="cov0" title="0">msg = fmt.Sprintf("%s%v", msg, e) 7425 foundErr = true 7426 i++</span> 7427 } 7428 } 7429 7430 <span class="cov0" title="0">if foundErr </span><span class="cov0" title="0">{ 7431 return fmt.Errorf("%s", msg) 7432 }</span> 7433 7434 <span class="cov0" title="0">return nil</span> 7435 } 7436 7437 // JobTerminated checks whether job was terminated. 7438 func JobTerminated(job *JobInfo) bool <span class="cov0" title="0">{ 7439 return job.PodGroup == nil && len(job.Tasks) == 0 7440 }</span> 7441 </pre> 7442 7443 <pre class="file" id="file42" style="display: none">/* 7444 Copyright 2017 The Kubernetes Authors. 7445 7446 Licensed under the Apache License, Version 2.0 (the "License"); 7447 you may not use this file except in compliance with the License. 7448 You may obtain a copy of the License at 7449 7450 http://www.apache.org/licenses/LICENSE-2.0 7451 7452 Unless required by applicable law or agreed to in writing, software 7453 distributed under the License is distributed on an "AS IS" BASIS, 7454 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 7455 See the License for the specific language governing permissions and 7456 limitations under the License. 7457 */ 7458 7459 package api 7460 7461 import ( 7462 "errors" 7463 "fmt" 7464 "sort" 7465 "strconv" 7466 "strings" 7467 "time" 7468 7469 v1 "k8s.io/api/core/v1" 7470 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 7471 "k8s.io/apimachinery/pkg/types" 7472 "k8s.io/klog" 7473 volumescheduling "k8s.io/kubernetes/pkg/controller/volume/scheduling" 7474 7475 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 7476 "volcano.sh/apis/pkg/apis/scheduling" 7477 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 7478 ) 7479 7480 // DisruptionBudget define job min pod available and max pod unvailable value 7481 type DisruptionBudget struct { 7482 MinAvailable string 7483 MaxUnavilable string 7484 } 7485 7486 // NewDisruptionBudget create disruption budget for job 7487 func NewDisruptionBudget(minAvailable, maxUnavilable string) *DisruptionBudget <span class="cov8" title="1">{ 7488 disruptionBudget := &DisruptionBudget{ 7489 MinAvailable: minAvailable, 7490 MaxUnavilable: maxUnavilable, 7491 } 7492 return disruptionBudget 7493 }</span> 7494 7495 // Clone return a clone of DisruptionBudget 7496 func (db *DisruptionBudget) Clone() *DisruptionBudget <span class="cov0" title="0">{ 7497 return &DisruptionBudget{ 7498 MinAvailable: db.MinAvailable, 7499 MaxUnavilable: db.MaxUnavilable, 7500 } 7501 }</span> 7502 7503 // JobWaitingTime is maximum waiting time that a job could stay Pending in service level agreement 7504 // when job waits longer than waiting time, it should be inqueue at once, and cluster should reserve resources for it 7505 const JobWaitingTime = "sla-waiting-time" 7506 7507 // TaskID is UID type for Task 7508 type TaskID types.UID 7509 7510 // TransactionContext holds all the fields that needed by scheduling transaction 7511 type TransactionContext struct { 7512 NodeName string 7513 Status TaskStatus 7514 } 7515 7516 // Clone return a clone of TransactionContext 7517 func (ctx *TransactionContext) Clone() *TransactionContext <span class="cov8" title="1">{ 7518 if ctx == nil </span><span class="cov8" title="1">{ 7519 return nil 7520 }</span> 7521 <span class="cov0" title="0">clone := *ctx 7522 return &clone</span> 7523 } 7524 7525 // TaskInfo will have all infos about the task 7526 type TaskInfo struct { 7527 UID TaskID 7528 Job JobID 7529 7530 Name string 7531 Namespace string 7532 7533 // Resreq is the resource that used when task running. 7534 Resreq *Resource 7535 // InitResreq is the resource that used to launch a task. 7536 InitResreq *Resource 7537 7538 TransactionContext 7539 // LastTransaction holds the context of last scheduling transaction 7540 LastTransaction *TransactionContext 7541 7542 Priority int32 7543 VolumeReady bool 7544 Preemptable bool 7545 BestEffort bool 7546 7547 // RevocableZone support set volcano.sh/revocable-zone annotaion or label for pod/podgroup 7548 // we only support empty value or * value for this version and we will support specify revocable zone name for futrue release 7549 // empty value means workload can not use revocable node 7550 // * value means workload can use all the revocable node for during node active revocable time. 7551 RevocableZone string 7552 7553 TopologyPolicy string 7554 PodVolumes *volumescheduling.PodVolumes 7555 Pod *v1.Pod 7556 } 7557 7558 func getJobID(pod *v1.Pod) JobID <span class="cov8" title="1">{ 7559 if gn, found := pod.Annotations[v1beta1.KubeGroupNameAnnotationKey]; found && len(gn) != 0 </span><span class="cov8" title="1">{ 7560 // Make sure Pod and PodGroup belong to the same namespace. 7561 jobID := fmt.Sprintf("%s/%s", pod.Namespace, gn) 7562 return JobID(jobID) 7563 }</span> 7564 7565 <span class="cov8" title="1">return ""</span> 7566 } 7567 7568 func getTaskID(pod *v1.Pod) TaskID <span class="cov0" title="0">{ 7569 if ts, found := pod.Annotations[batch.TaskSpecKey]; found && len(ts) != 0 </span><span class="cov0" title="0">{ 7570 return TaskID(ts) 7571 }</span> 7572 7573 <span class="cov0" title="0">return ""</span> 7574 } 7575 7576 // NewTaskInfo creates new taskInfo object for a Pod 7577 func NewTaskInfo(pod *v1.Pod) *TaskInfo <span class="cov8" title="1">{ 7578 initResReq := GetPodResourceRequest(pod) 7579 resReq := initResReq 7580 bestEffort := initResReq.IsEmpty() 7581 preemptable := GetPodPreemptable(pod) 7582 revocableZone := GetPodRevocableZone(pod) 7583 topologyPolicy := GetPodTopologyPolicy(pod) 7584 7585 jobID := getJobID(pod) 7586 7587 ti := &TaskInfo{ 7588 UID: TaskID(pod.UID), 7589 Job: jobID, 7590 Name: pod.Name, 7591 Namespace: pod.Namespace, 7592 Priority: 1, 7593 Pod: pod, 7594 Resreq: resReq, 7595 InitResreq: initResReq, 7596 Preemptable: preemptable, 7597 BestEffort: bestEffort, 7598 RevocableZone: revocableZone, 7599 TopologyPolicy: topologyPolicy, 7600 7601 TransactionContext: TransactionContext{ 7602 NodeName: pod.Spec.NodeName, 7603 Status: getTaskStatus(pod), 7604 }, 7605 } 7606 7607 if pod.Spec.Priority != nil </span><span class="cov0" title="0">{ 7608 ti.Priority = *pod.Spec.Priority 7609 }</span> 7610 7611 <span class="cov8" title="1">return ti</span> 7612 } 7613 7614 // GetTransactionContext get transaction context of a task 7615 func (ti *TaskInfo) GetTransactionContext() TransactionContext <span class="cov8" title="1">{ 7616 return ti.TransactionContext 7617 }</span> 7618 7619 // GenerateLastTxContext generate and set context of last transaction for a task 7620 func (ti *TaskInfo) GenerateLastTxContext() <span class="cov0" title="0">{ 7621 ctx := ti.GetTransactionContext() 7622 ti.LastTransaction = &ctx 7623 }</span> 7624 7625 // ClearLastTxContext clear context of last transaction for a task 7626 func (ti *TaskInfo) ClearLastTxContext() <span class="cov0" title="0">{ 7627 ti.LastTransaction = nil 7628 }</span> 7629 7630 // Clone is used for cloning a task 7631 func (ti *TaskInfo) Clone() *TaskInfo <span class="cov8" title="1">{ 7632 return &TaskInfo{ 7633 UID: ti.UID, 7634 Job: ti.Job, 7635 Name: ti.Name, 7636 Namespace: ti.Namespace, 7637 Priority: ti.Priority, 7638 PodVolumes: ti.PodVolumes, 7639 Pod: ti.Pod, 7640 Resreq: ti.Resreq.Clone(), 7641 InitResreq: ti.InitResreq.Clone(), 7642 VolumeReady: ti.VolumeReady, 7643 Preemptable: ti.Preemptable, 7644 BestEffort: ti.BestEffort, 7645 RevocableZone: ti.RevocableZone, 7646 TopologyPolicy: ti.TopologyPolicy, 7647 7648 TransactionContext: TransactionContext{ 7649 NodeName: ti.NodeName, 7650 Status: ti.Status, 7651 }, 7652 LastTransaction: ti.LastTransaction.Clone(), 7653 } 7654 }</span> 7655 7656 // String returns the taskInfo details in a string 7657 func (ti TaskInfo) String() string <span class="cov0" title="0">{ 7658 return fmt.Sprintf("Task (%v:%v/%v): job %v, status %v, pri %v"+ 7659 "resreq %v, preemptable %v, revocableZone %v, TopologyPolicy %v", 7660 ti.UID, ti.Namespace, ti.Name, ti.Job, ti.Status, ti.Priority, 7661 ti.Resreq, ti.Preemptable, ti.RevocableZone, ti.TopologyPolicy) 7662 }</span> 7663 7664 // JobID is the type of JobInfo's ID. 7665 type JobID types.UID 7666 7667 type tasksMap map[TaskID]*TaskInfo 7668 7669 // NodeResourceMap stores resource in a node 7670 type NodeResourceMap map[string]*Resource 7671 7672 // JobInfo will have all info of a Job 7673 type JobInfo struct { 7674 UID JobID 7675 7676 Name string 7677 Namespace string 7678 7679 Queue QueueID 7680 7681 Priority int32 7682 7683 MinAvailable int32 7684 7685 WaitingTime *time.Duration 7686 7687 JobFitErrors string 7688 NodesFitErrors map[TaskID]*FitErrors 7689 7690 // All tasks of the Job. 7691 TaskStatusIndex map[TaskStatus]tasksMap 7692 Tasks tasksMap 7693 TaskMinAvailable map[TaskID]int32 7694 TaskMinAvailableTotal int32 7695 7696 Allocated *Resource 7697 TotalRequest *Resource 7698 7699 CreationTimestamp metav1.Time 7700 PodGroup *PodGroup 7701 7702 ScheduleStartTimestamp metav1.Time 7703 7704 Preemptable bool 7705 7706 // RevocableZone support set volcano.sh/revocable-zone annotaion or label for pod/podgroup 7707 // we only support empty value or * value for this version and we will support specify revocable zone name for futrue release 7708 // empty value means workload can not use revocable node 7709 // * value means workload can use all the revocable node for during node active revocable time. 7710 RevocableZone string 7711 Budget *DisruptionBudget 7712 } 7713 7714 // NewJobInfo creates a new jobInfo for set of tasks 7715 func NewJobInfo(uid JobID, tasks ...*TaskInfo) *JobInfo <span class="cov8" title="1">{ 7716 job := &JobInfo{ 7717 UID: uid, 7718 MinAvailable: 0, 7719 NodesFitErrors: make(map[TaskID]*FitErrors), 7720 Allocated: EmptyResource(), 7721 TotalRequest: EmptyResource(), 7722 TaskStatusIndex: map[TaskStatus]tasksMap{}, 7723 Tasks: tasksMap{}, 7724 TaskMinAvailable: map[TaskID]int32{}, 7725 } 7726 7727 for _, task := range tasks </span><span class="cov0" title="0">{ 7728 job.AddTaskInfo(task) 7729 }</span> 7730 7731 <span class="cov8" title="1">return job</span> 7732 } 7733 7734 // UnsetPodGroup removes podGroup details from a job 7735 func (ji *JobInfo) UnsetPodGroup() <span class="cov0" title="0">{ 7736 ji.PodGroup = nil 7737 }</span> 7738 7739 // SetPodGroup sets podGroup details to a job 7740 func (ji *JobInfo) SetPodGroup(pg *PodGroup) <span class="cov8" title="1">{ 7741 ji.Name = pg.Name 7742 ji.Namespace = pg.Namespace 7743 ji.MinAvailable = pg.Spec.MinMember 7744 ji.Queue = QueueID(pg.Spec.Queue) 7745 ji.CreationTimestamp = pg.GetCreationTimestamp() 7746 7747 var err error 7748 ji.WaitingTime, err = ji.extractWaitingTime(pg) 7749 if err != nil </span><span class="cov0" title="0">{ 7750 klog.Warningf("Error occurs in parsing waiting time for job <%s/%s>, err: %s.", 7751 pg.Namespace, pg.Name, err.Error()) 7752 ji.WaitingTime = nil 7753 }</span> 7754 7755 <span class="cov8" title="1">ji.Preemptable = ji.extractPreemptable(pg) 7756 ji.RevocableZone = ji.extractRevocableZone(pg) 7757 ji.Budget = ji.extractBudget(pg) 7758 7759 taskMinAvailableTotal := int32(0) 7760 for task, member := range pg.Spec.MinTaskMember </span><span class="cov0" title="0">{ 7761 ji.TaskMinAvailable[TaskID(task)] = member 7762 taskMinAvailableTotal += member 7763 }</span> 7764 <span class="cov8" title="1">ji.TaskMinAvailableTotal = taskMinAvailableTotal 7765 7766 ji.PodGroup = pg</span> 7767 } 7768 7769 // extractWaitingTime reads sla waiting time for job from podgroup annotations 7770 // TODO: should also read from given field in volcano job spec 7771 func (ji *JobInfo) extractWaitingTime(pg *PodGroup) (*time.Duration, error) <span class="cov8" title="1">{ 7772 if _, exist := pg.Annotations[JobWaitingTime]; !exist </span><span class="cov8" title="1">{ 7773 return nil, nil 7774 }</span> 7775 7776 <span class="cov0" title="0">jobWaitingTime, err := time.ParseDuration(pg.Annotations[JobWaitingTime]) 7777 if err != nil </span><span class="cov0" title="0">{ 7778 return nil, err 7779 }</span> 7780 7781 <span class="cov0" title="0">if jobWaitingTime <= 0 </span><span class="cov0" title="0">{ 7782 return nil, errors.New("invalid sla waiting time") 7783 }</span> 7784 7785 <span class="cov0" title="0">return &jobWaitingTime, nil</span> 7786 } 7787 7788 // extractPreemptable return volcano.sh/preemptable value for job 7789 func (ji *JobInfo) extractPreemptable(pg *PodGroup) bool <span class="cov8" title="1">{ 7790 // check annotaion first 7791 if len(pg.Annotations) > 0 </span><span class="cov0" title="0">{ 7792 if value, found := pg.Annotations[v1beta1.PodPreemptable]; found </span><span class="cov0" title="0">{ 7793 b, err := strconv.ParseBool(value) 7794 if err != nil </span><span class="cov0" title="0">{ 7795 klog.Warningf("invalid %s=%s", v1beta1.PodPreemptable, value) 7796 return false 7797 }</span> 7798 <span class="cov0" title="0">return b</span> 7799 } 7800 } 7801 7802 // it annotation does not exit, check label 7803 <span class="cov8" title="1">if len(pg.Labels) > 0 </span><span class="cov0" title="0">{ 7804 if value, found := pg.Labels[v1beta1.PodPreemptable]; found </span><span class="cov0" title="0">{ 7805 b, err := strconv.ParseBool(value) 7806 if err != nil </span><span class="cov0" title="0">{ 7807 klog.Warningf("invalid %s=%s", v1beta1.PodPreemptable, value) 7808 return false 7809 }</span> 7810 <span class="cov0" title="0">return b</span> 7811 } 7812 } 7813 7814 <span class="cov8" title="1">return false</span> 7815 } 7816 7817 // extractRevocableZone return volcano.sh/revocable-zone value for pod/podgroup 7818 func (ji *JobInfo) extractRevocableZone(pg *PodGroup) string <span class="cov8" title="1">{ 7819 // check annotaion first 7820 if len(pg.Annotations) > 0 </span><span class="cov0" title="0">{ 7821 if value, found := pg.Annotations[v1beta1.RevocableZone]; found </span><span class="cov0" title="0">{ 7822 if value != "*" </span><span class="cov0" title="0">{ 7823 return "" 7824 }</span> 7825 <span class="cov0" title="0">return value</span> 7826 } 7827 7828 <span class="cov0" title="0">if value, found := pg.Annotations[v1beta1.PodPreemptable]; found </span><span class="cov0" title="0">{ 7829 if b, err := strconv.ParseBool(value); err == nil && b </span><span class="cov0" title="0">{ 7830 return "*" 7831 }</span> 7832 } 7833 } 7834 7835 <span class="cov8" title="1">return ""</span> 7836 } 7837 7838 // extractBudget return budget value for job 7839 func (ji *JobInfo) extractBudget(pg *PodGroup) *DisruptionBudget <span class="cov8" title="1">{ 7840 if len(pg.Annotations) > 0 </span><span class="cov0" title="0">{ 7841 if value, found := pg.Annotations[v1beta1.JDBMinAvailable]; found </span><span class="cov0" title="0">{ 7842 return NewDisruptionBudget(value, "") 7843 }</span> else<span class="cov0" title="0"> if value, found := pg.Annotations[v1beta1.JDBMaxUnavailable]; found </span><span class="cov0" title="0">{ 7844 return NewDisruptionBudget("", value) 7845 }</span> 7846 } 7847 7848 <span class="cov8" title="1">return NewDisruptionBudget("", "")</span> 7849 } 7850 7851 // GetMinResources return the min resources of podgroup. 7852 func (ji *JobInfo) GetMinResources() *Resource <span class="cov0" title="0">{ 7853 if ji.PodGroup.Spec.MinResources == nil </span><span class="cov0" title="0">{ 7854 return EmptyResource() 7855 }</span> 7856 7857 <span class="cov0" title="0">return NewResource(*ji.PodGroup.Spec.MinResources)</span> 7858 } 7859 7860 func (ji *JobInfo) addTaskIndex(ti *TaskInfo) <span class="cov8" title="1">{ 7861 if _, found := ji.TaskStatusIndex[ti.Status]; !found </span><span class="cov8" title="1">{ 7862 ji.TaskStatusIndex[ti.Status] = tasksMap{} 7863 }</span> 7864 <span class="cov8" title="1">ji.TaskStatusIndex[ti.Status][ti.UID] = ti</span> 7865 } 7866 7867 // AddTaskInfo is used to add a task to a job 7868 func (ji *JobInfo) AddTaskInfo(ti *TaskInfo) <span class="cov8" title="1">{ 7869 ji.Tasks[ti.UID] = ti 7870 ji.addTaskIndex(ti) 7871 ji.TotalRequest.Add(ti.Resreq) 7872 if AllocatedStatus(ti.Status) </span><span class="cov8" title="1">{ 7873 ji.Allocated.Add(ti.Resreq) 7874 }</span> 7875 } 7876 7877 // UpdateTaskStatus is used to update task's status in a job. 7878 // If error occurs both task and job are guaranteed to be in the original state. 7879 func (ji *JobInfo) UpdateTaskStatus(task *TaskInfo, status TaskStatus) error <span class="cov0" title="0">{ 7880 if err := validateStatusUpdate(task.Status, status); err != nil </span><span class="cov0" title="0">{ 7881 return err 7882 }</span> 7883 7884 // First remove the task (if exist) from the task list. 7885 <span class="cov0" title="0">if _, found := ji.Tasks[task.UID]; found </span><span class="cov0" title="0">{ 7886 if err := ji.DeleteTaskInfo(task); err != nil </span><span class="cov0" title="0">{ 7887 return err 7888 }</span> 7889 } 7890 7891 // Update task's status to the target status once task addition is guaranteed to succeed. 7892 <span class="cov0" title="0">task.Status = status 7893 ji.AddTaskInfo(task) 7894 7895 return nil</span> 7896 } 7897 7898 func (ji *JobInfo) deleteTaskIndex(ti *TaskInfo) <span class="cov8" title="1">{ 7899 if tasks, found := ji.TaskStatusIndex[ti.Status]; found </span><span class="cov8" title="1">{ 7900 delete(tasks, ti.UID) 7901 7902 if len(tasks) == 0 </span><span class="cov8" title="1">{ 7903 delete(ji.TaskStatusIndex, ti.Status) 7904 }</span> 7905 } 7906 } 7907 7908 // DeleteTaskInfo is used to delete a task from a job 7909 func (ji *JobInfo) DeleteTaskInfo(ti *TaskInfo) error <span class="cov8" title="1">{ 7910 if task, found := ji.Tasks[ti.UID]; found </span><span class="cov8" title="1">{ 7911 ji.TotalRequest.Sub(task.Resreq) 7912 if AllocatedStatus(task.Status) </span><span class="cov8" title="1">{ 7913 ji.Allocated.Sub(task.Resreq) 7914 }</span> 7915 <span class="cov8" title="1">delete(ji.Tasks, task.UID) 7916 ji.deleteTaskIndex(task) 7917 return nil</span> 7918 } 7919 7920 <span class="cov0" title="0">return fmt.Errorf("failed to find task <%v/%v> in job <%v/%v>", 7921 ti.Namespace, ti.Name, ji.Namespace, ji.Name)</span> 7922 } 7923 7924 // Clone is used to clone a jobInfo object 7925 func (ji *JobInfo) Clone() *JobInfo <span class="cov0" title="0">{ 7926 info := &JobInfo{ 7927 UID: ji.UID, 7928 Name: ji.Name, 7929 Namespace: ji.Namespace, 7930 Queue: ji.Queue, 7931 Priority: ji.Priority, 7932 7933 MinAvailable: ji.MinAvailable, 7934 WaitingTime: ji.WaitingTime, 7935 JobFitErrors: ji.JobFitErrors, 7936 NodesFitErrors: make(map[TaskID]*FitErrors), 7937 Allocated: EmptyResource(), 7938 TotalRequest: EmptyResource(), 7939 7940 PodGroup: ji.PodGroup, 7941 7942 TaskStatusIndex: map[TaskStatus]tasksMap{}, 7943 TaskMinAvailable: ji.TaskMinAvailable, 7944 TaskMinAvailableTotal: ji.TaskMinAvailableTotal, 7945 Tasks: tasksMap{}, 7946 Preemptable: ji.Preemptable, 7947 RevocableZone: ji.RevocableZone, 7948 Budget: ji.Budget.Clone(), 7949 } 7950 7951 ji.CreationTimestamp.DeepCopyInto(&info.CreationTimestamp) 7952 7953 for _, task := range ji.Tasks </span><span class="cov0" title="0">{ 7954 info.AddTaskInfo(task.Clone()) 7955 }</span> 7956 7957 <span class="cov0" title="0">return info</span> 7958 } 7959 7960 // String returns a jobInfo object in string format 7961 func (ji JobInfo) String() string <span class="cov0" title="0">{ 7962 res := "" 7963 7964 i := 0 7965 for _, task := range ji.Tasks </span><span class="cov0" title="0">{ 7966 res += fmt.Sprintf("\n\t %d: %v", i, task) 7967 i++ 7968 }</span> 7969 7970 <span class="cov0" title="0">return fmt.Sprintf("Job (%v): namespace %v (%v), name %v, minAvailable %d, podGroup %+v, preemptable %+v, revocableZone %+v, minAvailable %+v, maxAvailable %+v", 7971 ji.UID, ji.Namespace, ji.Queue, ji.Name, ji.MinAvailable, ji.PodGroup, ji.Preemptable, ji.RevocableZone, ji.Budget.MinAvailable, ji.Budget.MaxUnavilable) + res</span> 7972 } 7973 7974 // FitError returns detailed information on why a job's task failed to fit on 7975 // each available node 7976 func (ji *JobInfo) FitError() string <span class="cov8" title="1">{ 7977 sortReasonsHistogram := func(reasons map[string]int) []string </span><span class="cov8" title="1">{ 7978 reasonStrings := []string{} 7979 for k, v := range reasons </span><span class="cov8" title="1">{ 7980 reasonStrings = append(reasonStrings, fmt.Sprintf("%v %v", v, k)) 7981 }</span> 7982 <span class="cov8" title="1">sort.Strings(reasonStrings) 7983 return reasonStrings</span> 7984 } 7985 7986 // Stat histogram for all tasks of the job 7987 <span class="cov8" title="1">reasons := make(map[string]int) 7988 for status, taskMap := range ji.TaskStatusIndex </span><span class="cov8" title="1">{ 7989 reasons[status.String()] += len(taskMap) 7990 }</span> 7991 <span class="cov8" title="1">reasons["minAvailable"] = int(ji.MinAvailable) 7992 reasonMsg := fmt.Sprintf("%v, %v", scheduling.PodGroupNotReady, strings.Join(sortReasonsHistogram(reasons), ", ")) 7993 7994 // Stat histogram for pending tasks only 7995 reasons = make(map[string]int) 7996 for uid := range ji.TaskStatusIndex[Pending] </span><span class="cov8" title="1">{ 7997 reason, _ := ji.TaskSchedulingReason(uid) 7998 reasons[reason]++ 7999 }</span> 8000 <span class="cov8" title="1">if len(reasons) > 0 </span><span class="cov8" title="1">{ 8001 reasonMsg += "; " + fmt.Sprintf("%s: %s", Pending.String(), strings.Join(sortReasonsHistogram(reasons), ", ")) 8002 }</span> 8003 <span class="cov8" title="1">return reasonMsg</span> 8004 } 8005 8006 // TaskSchedulingReason get detailed reason and message of the given task 8007 // It returns detailed reason and message for tasks based on last scheduling transaction. 8008 func (ji *JobInfo) TaskSchedulingReason(tid TaskID) (reason string, msg string) <span class="cov8" title="1">{ 8009 taskInfo, exists := ji.Tasks[tid] 8010 if !exists </span><span class="cov0" title="0">{ 8011 return "", "" 8012 }</span> 8013 8014 // Get detailed scheduling reason based on LastTransaction 8015 <span class="cov8" title="1">ctx := taskInfo.GetTransactionContext() 8016 if taskInfo.LastTransaction != nil </span><span class="cov8" title="1">{ 8017 ctx = *taskInfo.LastTransaction 8018 }</span> 8019 8020 <span class="cov8" title="1">msg = ji.JobFitErrors 8021 switch status := ctx.Status; status </span>{ 8022 case Allocated, Pipelined:<span class="cov8" title="1"> 8023 // Pod is schedulable 8024 msg = fmt.Sprintf("Pod %s/%s can possibly be assigned to %s", taskInfo.Namespace, taskInfo.Name, ctx.NodeName) 8025 if status == Pipelined </span><span class="cov0" title="0">{ 8026 msg += " once resource is released" 8027 }</span> 8028 <span class="cov8" title="1">return PodReasonSchedulable, msg</span> 8029 case Pending:<span class="cov8" title="1"> 8030 if fe := ji.NodesFitErrors[tid]; fe != nil </span><span class="cov8" title="1">{ 8031 // Pod is not schedulable 8032 return PodReasonUnschedulable, fe.Error() 8033 }</span> 8034 // Pod is not scheduled yet 8035 <span class="cov8" title="1">return PodReasonUndetermined, msg</span> 8036 default:<span class="cov0" title="0"> 8037 return status.String(), msg</span> 8038 } 8039 } 8040 8041 // ReadyTaskNum returns the number of tasks that are ready or that is best-effort. 8042 func (ji *JobInfo) ReadyTaskNum() int32 <span class="cov0" title="0">{ 8043 occupied := 0 8044 occupied += len(ji.TaskStatusIndex[Bound]) 8045 occupied += len(ji.TaskStatusIndex[Binding]) 8046 occupied += len(ji.TaskStatusIndex[Running]) 8047 occupied += len(ji.TaskStatusIndex[Allocated]) 8048 occupied += len(ji.TaskStatusIndex[Succeeded]) 8049 8050 if tasks, found := ji.TaskStatusIndex[Pending]; found </span><span class="cov0" title="0">{ 8051 for _, task := range tasks </span><span class="cov0" title="0">{ 8052 if task.BestEffort </span><span class="cov0" title="0">{ 8053 occupied++ 8054 }</span> 8055 } 8056 } 8057 8058 <span class="cov0" title="0">return int32(occupied)</span> 8059 } 8060 8061 // WaitingTaskNum returns the number of tasks that are pipelined. 8062 func (ji *JobInfo) WaitingTaskNum() int32 <span class="cov0" title="0">{ 8063 return int32(len(ji.TaskStatusIndex[Pipelined])) 8064 }</span> 8065 8066 // CheckTaskMinAvailable returns whether each task of job is valid. 8067 func (ji *JobInfo) CheckTaskMinAvailable() bool <span class="cov0" title="0">{ 8068 // if job minAvailable is less than sumof(task minAvailable), skip this check 8069 if ji.MinAvailable < ji.TaskMinAvailableTotal </span><span class="cov0" title="0">{ 8070 return true 8071 }</span> 8072 8073 <span class="cov0" title="0">actual := map[TaskID]int32{} 8074 for status, tasks := range ji.TaskStatusIndex </span><span class="cov0" title="0">{ 8075 if AllocatedStatus(status) || 8076 status == Succeeded || 8077 status == Pipelined || 8078 status == Pending </span><span class="cov0" title="0">{ 8079 for _, task := range tasks </span><span class="cov0" title="0">{ 8080 actual[getTaskID(task.Pod)]++ 8081 }</span> 8082 } 8083 } 8084 8085 <span class="cov0" title="0">klog.V(4).Infof("job %s/%s actual: %+v, ji.TaskMinAvailable: %+v", ji.Name, ji.Namespace, actual, ji.TaskMinAvailable) 8086 for task, minAvailable := range ji.TaskMinAvailable </span><span class="cov0" title="0">{ 8087 if act, ok := actual[task]; !ok || act < minAvailable </span><span class="cov0" title="0">{ 8088 return false 8089 }</span> 8090 } 8091 8092 <span class="cov0" title="0">return true</span> 8093 } 8094 8095 // ValidTaskNum returns the number of tasks that are valid. 8096 func (ji *JobInfo) ValidTaskNum() int32 <span class="cov0" title="0">{ 8097 occupied := 0 8098 for status, tasks := range ji.TaskStatusIndex </span><span class="cov0" title="0">{ 8099 if AllocatedStatus(status) || 8100 status == Succeeded || 8101 status == Pipelined || 8102 status == Pending </span><span class="cov0" title="0">{ 8103 occupied += len(tasks) 8104 }</span> 8105 } 8106 8107 <span class="cov0" title="0">return int32(occupied)</span> 8108 } 8109 8110 // Ready returns whether job is ready for run 8111 func (ji *JobInfo) Ready() bool <span class="cov0" title="0">{ 8112 occupied := ji.ReadyTaskNum() 8113 8114 return occupied >= ji.MinAvailable 8115 }</span> 8116 8117 // IsPending returns whether job is in pending status 8118 func (ji *JobInfo) IsPending() bool <span class="cov0" title="0">{ 8119 if ji.PodGroup == nil || ji.PodGroup.Status.Phase == scheduling.PodGroupPending || ji.PodGroup.Status.Phase == "" </span><span class="cov0" title="0">{ 8120 return true 8121 }</span> 8122 8123 <span class="cov0" title="0">return false</span> 8124 } 8125 </pre> 8126 8127 <pre class="file" id="file43" style="display: none">/* 8128 Copyright 2018 The Volcano Authors. 8129 8130 Licensed under the Apache License, Version 2.0 (the "License"); 8131 you may not use this file except in compliance with the License. 8132 You may obtain a copy of the License at 8133 8134 http://www.apache.org/licenses/LICENSE-2.0 8135 8136 Unless required by applicable law or agreed to in writing, software 8137 distributed under the License is distributed on an "AS IS" BASIS, 8138 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 8139 See the License for the specific language governing permissions and 8140 limitations under the License. 8141 */ 8142 8143 package api 8144 8145 import ( 8146 "fmt" 8147 8148 v1 "k8s.io/api/core/v1" 8149 "k8s.io/client-go/tools/cache" 8150 "k8s.io/klog" 8151 ) 8152 8153 // NamespaceName is name of namespace 8154 type NamespaceName string 8155 8156 const ( 8157 // NamespaceWeightKey is the key in ResourceQuota.spec.hard indicating the weight of this namespace 8158 NamespaceWeightKey = "volcano.sh/namespace.weight" 8159 // DefaultNamespaceWeight is the default weight of namespace 8160 DefaultNamespaceWeight = 1 8161 ) 8162 8163 // NamespaceInfo records information of namespace 8164 type NamespaceInfo struct { 8165 // Name is the name of this namespace 8166 Name NamespaceName 8167 // Weight is the highest weight among many ResourceQuota. 8168 Weight int64 8169 } 8170 8171 // GetWeight returns weight of a namespace, any invalid case would get default value 8172 func (n *NamespaceInfo) GetWeight() int64 <span class="cov0" title="0">{ 8173 if n == nil || n.Weight == 0 </span><span class="cov0" title="0">{ 8174 return DefaultNamespaceWeight 8175 }</span> 8176 <span class="cov0" title="0">return n.Weight</span> 8177 } 8178 8179 type quotaItem struct { 8180 name string 8181 weight int64 8182 } 8183 8184 func quotaItemKeyFunc(obj interface{}) (string, error) <span class="cov8" title="1">{ 8185 item, ok := obj.(*quotaItem) 8186 if !ok </span><span class="cov0" title="0">{ 8187 return "", fmt.Errorf("obj with type %T could not parse", obj) 8188 }</span> 8189 <span class="cov8" title="1">return item.name, nil</span> 8190 } 8191 8192 // for big root heap 8193 func quotaItemLessFunc(a interface{}, b interface{}) bool <span class="cov8" title="1">{ 8194 A := a.(*quotaItem) 8195 B := b.(*quotaItem) 8196 return A.weight > B.weight 8197 }</span> 8198 8199 // NamespaceCollection will record all details about namespace 8200 type NamespaceCollection struct { 8201 Name string 8202 8203 quotaWeight *cache.Heap 8204 } 8205 8206 // NewNamespaceCollection creates new NamespaceCollection object to record all information about a namespace 8207 func NewNamespaceCollection(name string) *NamespaceCollection <span class="cov8" title="1">{ 8208 n := &NamespaceCollection{ 8209 Name: name, 8210 quotaWeight: cache.NewHeap(quotaItemKeyFunc, quotaItemLessFunc), 8211 } 8212 // add at least one item into quotaWeight. 8213 // Because cache.Heap.Pop would be blocked until queue is not empty 8214 n.updateWeight(&quotaItem{ 8215 name: NamespaceWeightKey, 8216 weight: DefaultNamespaceWeight, 8217 }) 8218 return n 8219 }</span> 8220 8221 func (n *NamespaceCollection) deleteWeight(q *quotaItem) <span class="cov8" title="1">{ 8222 n.quotaWeight.Delete(q) 8223 }</span> 8224 8225 func (n *NamespaceCollection) updateWeight(q *quotaItem) <span class="cov8" title="1">{ 8226 n.quotaWeight.Update(q) 8227 }</span> 8228 8229 func itemFromQuota(quota *v1.ResourceQuota) *quotaItem <span class="cov8" title="1">{ 8230 var weight int64 = DefaultNamespaceWeight 8231 8232 quotaWeight, ok := quota.Spec.Hard[NamespaceWeightKey] 8233 if ok </span><span class="cov8" title="1">{ 8234 weight = quotaWeight.Value() 8235 }</span> 8236 8237 <span class="cov8" title="1">item := &quotaItem{ 8238 name: quota.Name, 8239 weight: weight, 8240 } 8241 return item</span> 8242 } 8243 8244 // Update modify the registered information according quota object 8245 func (n *NamespaceCollection) Update(quota *v1.ResourceQuota) <span class="cov8" title="1">{ 8246 n.updateWeight(itemFromQuota(quota)) 8247 }</span> 8248 8249 // Delete remove the registered information according quota object 8250 func (n *NamespaceCollection) Delete(quota *v1.ResourceQuota) <span class="cov8" title="1">{ 8251 n.deleteWeight(itemFromQuota(quota)) 8252 }</span> 8253 8254 // Snapshot will clone a NamespaceInfo without Heap according NamespaceCollection 8255 func (n *NamespaceCollection) Snapshot() *NamespaceInfo <span class="cov8" title="1">{ 8256 var weight int64 = DefaultNamespaceWeight 8257 8258 obj, err := n.quotaWeight.Pop() 8259 if err != nil </span><span class="cov0" title="0">{ 8260 klog.Warningf("namespace %s, quota weight meets error %v when pop", n.Name, err) 8261 }</span> else<span class="cov8" title="1"> { 8262 item := obj.(*quotaItem) 8263 weight = item.weight 8264 n.quotaWeight.Add(item) 8265 }</span> 8266 8267 <span class="cov8" title="1">return &NamespaceInfo{ 8268 Name: NamespaceName(n.Name), 8269 Weight: weight, 8270 }</span> 8271 } 8272 </pre> 8273 8274 <pre class="file" id="file44" style="display: none">/* 8275 Copyright 2021 The Volcano Authors. 8276 8277 Licensed under the Apache License, Version 2.0 (the "License"); 8278 you may not use this file except in compliance with the License. 8279 You may obtain a copy of the License at 8280 8281 http://www.apache.org/licenses/LICENSE-2.0 8282 8283 Unless required by applicable law or agreed to in writing, software 8284 distributed under the License is distributed on an "AS IS" BASIS, 8285 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 8286 See the License for the specific language governing permissions and 8287 limitations under the License. 8288 */ 8289 8290 package api 8291 8292 import ( 8293 "fmt" 8294 "strconv" 8295 8296 v1 "k8s.io/api/core/v1" 8297 "k8s.io/klog" 8298 8299 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 8300 ) 8301 8302 // NodeInfo is node level aggregated information. 8303 type NodeInfo struct { 8304 Name string 8305 Node *v1.Node 8306 8307 // The state of node 8308 State NodeState 8309 8310 // The releasing resource on that node 8311 Releasing *Resource 8312 // The pipelined resource on that node 8313 Pipelined *Resource 8314 // The idle resource on that node 8315 Idle *Resource 8316 // The used resource on that node, including running and terminating 8317 // pods 8318 Used *Resource 8319 8320 Allocatable *Resource 8321 Capability *Resource 8322 8323 Tasks map[TaskID]*TaskInfo 8324 NumaInfo *NumatopoInfo 8325 NumaChgFlag NumaChgFlag 8326 NumaSchedulerInfo *NumatopoInfo 8327 RevocableZone string 8328 8329 // Used to store custom information 8330 Others map[string]interface{} 8331 GPUDevices map[int]*GPUDevice 8332 8333 // enable node resource oversubscription 8334 OversubscriptionNode bool 8335 // OfflineJobEvicting true means node resource usage too high then dispatched pod can not use oversubscription resource 8336 OfflineJobEvicting bool 8337 8338 // Resource Oversubscription feature: the Oversubscription Resource reported in annotation 8339 OversubscriptionResource *Resource 8340 } 8341 8342 // FutureIdle returns resources that will be idle in the future: 8343 // 8344 // That is current idle resources plus released resources minus pipelined resources. 8345 func (ni *NodeInfo) FutureIdle() *Resource <span class="cov0" title="0">{ 8346 return ni.Idle.Clone().Add(ni.Releasing).Sub(ni.Pipelined) 8347 }</span> 8348 8349 // GetNodeAllocatable return node Allocatable without OversubscriptionResource resource 8350 func (ni *NodeInfo) GetNodeAllocatable() *Resource <span class="cov0" title="0">{ 8351 return NewResource(ni.Node.Status.Allocatable) 8352 }</span> 8353 8354 // NodeState defines the current state of node. 8355 type NodeState struct { 8356 Phase NodePhase 8357 Reason string 8358 } 8359 8360 // NewNodeInfo is used to create new nodeInfo object 8361 func NewNodeInfo(node *v1.Node) *NodeInfo <span class="cov8" title="1">{ 8362 nodeInfo := &NodeInfo{ 8363 Releasing: EmptyResource(), 8364 Pipelined: EmptyResource(), 8365 Idle: EmptyResource(), 8366 Used: EmptyResource(), 8367 8368 Allocatable: EmptyResource(), 8369 Capability: EmptyResource(), 8370 8371 OversubscriptionResource: EmptyResource(), 8372 Tasks: make(map[TaskID]*TaskInfo), 8373 8374 GPUDevices: make(map[int]*GPUDevice), 8375 } 8376 8377 nodeInfo.setOversubscription(node) 8378 8379 if node != nil </span><span class="cov8" title="1">{ 8380 nodeInfo.Name = node.Name 8381 nodeInfo.Node = node 8382 nodeInfo.Idle = NewResource(node.Status.Allocatable).Add(nodeInfo.OversubscriptionResource) 8383 nodeInfo.Allocatable = NewResource(node.Status.Allocatable).Add(nodeInfo.OversubscriptionResource) 8384 nodeInfo.Capability = NewResource(node.Status.Capacity).Add(nodeInfo.OversubscriptionResource) 8385 }</span> 8386 <span class="cov8" title="1">nodeInfo.setNodeGPUInfo(node) 8387 nodeInfo.setNodeState(node) 8388 nodeInfo.setRevocableZone(node) 8389 8390 return nodeInfo</span> 8391 } 8392 8393 // RefreshNumaSchedulerInfoByCrd used to update scheduler numa information based the CRD numatopo 8394 func (ni *NodeInfo) RefreshNumaSchedulerInfoByCrd() <span class="cov0" title="0">{ 8395 if ni.NumaInfo == nil </span><span class="cov0" title="0">{ 8396 ni.NumaSchedulerInfo = nil 8397 return 8398 }</span> 8399 8400 <span class="cov0" title="0">tmp := ni.NumaInfo.DeepCopy() 8401 if ni.NumaChgFlag == NumaInfoMoreFlag </span><span class="cov0" title="0">{ 8402 ni.NumaSchedulerInfo = tmp 8403 }</span> else<span class="cov0" title="0"> if ni.NumaChgFlag == NumaInfoLessFlag </span><span class="cov0" title="0">{ 8404 numaResMap := ni.NumaSchedulerInfo.NumaResMap 8405 for resName, resInfo := range tmp.NumaResMap </span><span class="cov0" title="0">{ 8406 klog.V(5).Infof("resource %s Allocatable : current %v new %v on node %s", 8407 resName, numaResMap[resName], resInfo, ni.Name) 8408 if numaResMap[resName].Allocatable.Size() >= resInfo.Allocatable.Size() </span><span class="cov0" title="0">{ 8409 numaResMap[resName].Allocatable = resInfo.Allocatable.Clone() 8410 numaResMap[resName].Capacity = resInfo.Capacity 8411 }</span> 8412 } 8413 } 8414 8415 <span class="cov0" title="0">ni.NumaChgFlag = NumaInfoResetFlag</span> 8416 } 8417 8418 // Clone used to clone nodeInfo Object 8419 func (ni *NodeInfo) Clone() *NodeInfo <span class="cov0" title="0">{ 8420 res := NewNodeInfo(ni.Node) 8421 8422 for _, p := range ni.Tasks </span><span class="cov0" title="0">{ 8423 res.AddTask(p) 8424 }</span> 8425 8426 <span class="cov0" title="0">if ni.NumaSchedulerInfo != nil </span><span class="cov0" title="0">{ 8427 res.NumaSchedulerInfo = ni.NumaSchedulerInfo.DeepCopy() 8428 klog.V(5).Infof("node[%s]", ni.Name) 8429 for resName, resInfo := range res.NumaSchedulerInfo.NumaResMap </span><span class="cov0" title="0">{ 8430 klog.V(5).Infof("current resource %s : %v", resName, resInfo) 8431 }</span> 8432 8433 <span class="cov0" title="0">klog.V(5).Infof("current Policies : %v", res.NumaSchedulerInfo.Policies)</span> 8434 } 8435 8436 <span class="cov0" title="0">res.Others = ni.Others 8437 return res</span> 8438 } 8439 8440 // Ready returns whether node is ready for scheduling 8441 func (ni *NodeInfo) Ready() bool <span class="cov0" title="0">{ 8442 return ni.State.Phase == Ready 8443 }</span> 8444 8445 func (ni *NodeInfo) setRevocableZone(node *v1.Node) <span class="cov8" title="1">{ 8446 if node == nil </span><span class="cov0" title="0">{ 8447 klog.Warningf("the argument node is null.") 8448 return 8449 }</span> 8450 8451 <span class="cov8" title="1">revocableZone := "" 8452 if len(node.Labels) > 0 </span><span class="cov0" title="0">{ 8453 if value, found := node.Labels[v1beta1.RevocableZone]; found </span><span class="cov0" title="0">{ 8454 revocableZone = value 8455 }</span> 8456 } 8457 <span class="cov8" title="1">ni.RevocableZone = revocableZone</span> 8458 } 8459 8460 // Check node if enable Oversubscription and set Oversubscription resources 8461 // Only support oversubscription cpu and memory resource for this version 8462 func (ni *NodeInfo) setOversubscription(node *v1.Node) <span class="cov8" title="1">{ 8463 if node == nil </span><span class="cov0" title="0">{ 8464 return 8465 }</span> 8466 8467 <span class="cov8" title="1">ni.OversubscriptionNode = false 8468 ni.OfflineJobEvicting = false 8469 if len(node.Labels) > 0 </span><span class="cov0" title="0">{ 8470 if value, found := node.Labels[OversubscriptionNode]; found </span><span class="cov0" title="0">{ 8471 b, err := strconv.ParseBool(value) 8472 if err == nil </span><span class="cov0" title="0">{ 8473 ni.OversubscriptionNode = b 8474 }</span> else<span class="cov0" title="0"> { 8475 ni.OversubscriptionNode = false 8476 }</span> 8477 <span class="cov0" title="0">klog.V(5).Infof("Set node %s Oversubscription to %v", node.Name, ni.OversubscriptionNode)</span> 8478 } 8479 } 8480 8481 <span class="cov8" title="1">if len(node.Annotations) > 0 </span><span class="cov0" title="0">{ 8482 if value, found := node.Annotations[OfflineJobEvicting]; found </span><span class="cov0" title="0">{ 8483 b, err := strconv.ParseBool(value) 8484 if err == nil </span><span class="cov0" title="0">{ 8485 ni.OfflineJobEvicting = b 8486 }</span> else<span class="cov0" title="0"> { 8487 ni.OfflineJobEvicting = false 8488 }</span> 8489 <span class="cov0" title="0">klog.V(5).Infof("Set node %s OfflineJobEvicting to %v", node.Name, ni.OfflineJobEvicting)</span> 8490 } 8491 <span class="cov0" title="0">if value, found := node.Annotations[OversubscriptionCPU]; found </span><span class="cov0" title="0">{ 8492 ni.OversubscriptionResource.MilliCPU, _ = strconv.ParseFloat(value, 64) 8493 klog.V(5).Infof("Set node %s Oversubscription CPU to %v", node.Name, ni.OversubscriptionResource.MilliCPU) 8494 }</span> 8495 <span class="cov0" title="0">if value, found := node.Annotations[OversubscriptionMemory]; found </span><span class="cov0" title="0">{ 8496 ni.OversubscriptionResource.Memory, _ = strconv.ParseFloat(value, 64) 8497 klog.V(5).Infof("Set node %s Oversubscription Memory to %v", node.Name, ni.OversubscriptionResource.Memory) 8498 }</span> 8499 } 8500 } 8501 8502 func (ni *NodeInfo) setNodeState(node *v1.Node) <span class="cov8" title="1">{ 8503 // If node is nil, the node is un-initialized in cache 8504 if node == nil </span><span class="cov0" title="0">{ 8505 ni.State = NodeState{ 8506 Phase: NotReady, 8507 Reason: "UnInitialized", 8508 } 8509 klog.Warningf("set the node %s status to %s for the reason UnInitialized.", node.Name, NotReady.String()) 8510 return 8511 }</span> 8512 8513 // set NodeState according to resources 8514 <span class="cov8" title="1">if !ni.Used.LessEqual(ni.Allocatable, Zero) </span><span class="cov0" title="0">{ 8515 ni.State = NodeState{ 8516 Phase: NotReady, 8517 Reason: "OutOfSync", 8518 } 8519 return 8520 }</span> 8521 8522 // If node not ready, e.g. power off 8523 <span class="cov8" title="1">for _, cond := range node.Status.Conditions </span><span class="cov0" title="0">{ 8524 if cond.Type == v1.NodeReady && cond.Status != v1.ConditionTrue </span><span class="cov0" title="0">{ 8525 ni.State = NodeState{ 8526 Phase: NotReady, 8527 Reason: "NotReady", 8528 } 8529 klog.Warningf("set the node %s status to %s.", node.Name, NotReady.String()) 8530 return 8531 }</span> 8532 } 8533 8534 // Node is ready (ignore node conditions because of taint/toleration) 8535 <span class="cov8" title="1">ni.State = NodeState{ 8536 Phase: Ready, 8537 Reason: "", 8538 } 8539 8540 klog.V(4).Infof("set the node %s status to %s.", node.Name, Ready.String())</span> 8541 } 8542 8543 func (ni *NodeInfo) setNodeGPUInfo(node *v1.Node) <span class="cov8" title="1">{ 8544 if node == nil </span><span class="cov0" title="0">{ 8545 return 8546 }</span> 8547 <span class="cov8" title="1">memory, ok := node.Status.Capacity[VolcanoGPUResource] 8548 if !ok </span><span class="cov8" title="1">{ 8549 return 8550 }</span> 8551 <span class="cov0" title="0">totalMemory := memory.Value() 8552 8553 res, ok := node.Status.Capacity[VolcanoGPUNumber] 8554 if !ok </span><span class="cov0" title="0">{ 8555 return 8556 }</span> 8557 <span class="cov0" title="0">gpuNumber := res.Value() 8558 if gpuNumber == 0 </span><span class="cov0" title="0">{ 8559 klog.Warningf("invalid %s=%s", VolcanoGPUNumber, res.String()) 8560 return 8561 }</span> 8562 8563 <span class="cov0" title="0">memoryPerCard := uint(totalMemory / gpuNumber) 8564 for i := 0; i < int(gpuNumber); i++ </span><span class="cov0" title="0">{ 8565 ni.GPUDevices[i] = NewGPUDevice(i, memoryPerCard) 8566 }</span> 8567 } 8568 8569 // SetNode sets kubernetes node object to nodeInfo object 8570 func (ni *NodeInfo) SetNode(node *v1.Node) <span class="cov0" title="0">{ 8571 ni.setOversubscription(node) 8572 ni.setNodeState(node) 8573 ni.setNodeGPUInfo(node) 8574 ni.setRevocableZone(node) 8575 8576 if !ni.Ready() </span><span class="cov0" title="0">{ 8577 klog.Warningf("Failed to set node info, phase: %s, reason: %s", 8578 ni.State.Phase, ni.State.Reason) 8579 return 8580 }</span> 8581 8582 <span class="cov0" title="0">ni.Name = node.Name 8583 ni.Node = node 8584 8585 ni.Allocatable = NewResource(node.Status.Allocatable).Add(ni.OversubscriptionResource) 8586 ni.Capability = NewResource(node.Status.Capacity).Add(ni.OversubscriptionResource) 8587 ni.Releasing = EmptyResource() 8588 ni.Pipelined = EmptyResource() 8589 ni.Idle = NewResource(node.Status.Allocatable).Add(ni.OversubscriptionResource) 8590 ni.Used = EmptyResource() 8591 8592 for _, ti := range ni.Tasks </span><span class="cov0" title="0">{ 8593 switch ti.Status </span>{ 8594 case Releasing:<span class="cov0" title="0"> 8595 ni.Idle.Sub(ti.Resreq) 8596 ni.Releasing.Add(ti.Resreq) 8597 ni.Used.Add(ti.Resreq) 8598 ni.AddGPUResource(ti.Pod)</span> 8599 case Pipelined:<span class="cov0" title="0"> 8600 ni.Pipelined.Add(ti.Resreq)</span> 8601 default:<span class="cov0" title="0"> 8602 ni.Idle.Sub(ti.Resreq) 8603 ni.Used.Add(ti.Resreq) 8604 ni.AddGPUResource(ti.Pod)</span> 8605 } 8606 } 8607 } 8608 8609 func (ni *NodeInfo) allocateIdleResource(ti *TaskInfo) error <span class="cov8" title="1">{ 8610 if ti.Resreq.LessEqual(ni.Idle, Zero) </span><span class="cov8" title="1">{ 8611 ni.Idle.Sub(ti.Resreq) 8612 return nil 8613 }</span> 8614 8615 <span class="cov8" title="1">return fmt.Errorf("selected node NotReady")</span> 8616 } 8617 8618 // AddTask is used to add a task in nodeInfo object 8619 // 8620 // If error occurs both task and node are guaranteed to be in the original state. 8621 func (ni *NodeInfo) AddTask(task *TaskInfo) error <span class="cov8" title="1">{ 8622 if len(task.NodeName) > 0 && len(ni.Name) > 0 && task.NodeName != ni.Name </span><span class="cov0" title="0">{ 8623 return fmt.Errorf("task <%v/%v> already on different node <%v>", 8624 task.Namespace, task.Name, task.NodeName) 8625 }</span> 8626 8627 <span class="cov8" title="1">key := PodKey(task.Pod) 8628 if _, found := ni.Tasks[key]; found </span><span class="cov0" title="0">{ 8629 return fmt.Errorf("task <%v/%v> already on node <%v>", 8630 task.Namespace, task.Name, ni.Name) 8631 }</span> 8632 8633 // Node will hold a copy of task to make sure the status 8634 // change will not impact resource in node. 8635 <span class="cov8" title="1">ti := task.Clone() 8636 8637 if ni.Node != nil </span><span class="cov8" title="1">{ 8638 switch ti.Status </span>{ 8639 case Releasing:<span class="cov0" title="0"> 8640 if err := ni.allocateIdleResource(ti); err != nil </span><span class="cov0" title="0">{ 8641 return err 8642 }</span> 8643 <span class="cov0" title="0">ni.Releasing.Add(ti.Resreq) 8644 ni.Used.Add(ti.Resreq) 8645 ni.AddGPUResource(ti.Pod)</span> 8646 case Pipelined:<span class="cov0" title="0"> 8647 ni.Pipelined.Add(ti.Resreq)</span> 8648 default:<span class="cov8" title="1"> 8649 if err := ni.allocateIdleResource(ti); err != nil </span><span class="cov8" title="1">{ 8650 return err 8651 }</span> 8652 <span class="cov8" title="1">ni.Used.Add(ti.Resreq) 8653 ni.AddGPUResource(ti.Pod)</span> 8654 } 8655 } 8656 8657 // Update task node name upon successful task addition. 8658 <span class="cov8" title="1">task.NodeName = ni.Name 8659 ti.NodeName = ni.Name 8660 ni.Tasks[key] = ti 8661 8662 return nil</span> 8663 } 8664 8665 // RemoveTask used to remove a task from nodeInfo object. 8666 // 8667 // If error occurs both task and node are guaranteed to be in the original state. 8668 func (ni *NodeInfo) RemoveTask(ti *TaskInfo) error <span class="cov8" title="1">{ 8669 key := PodKey(ti.Pod) 8670 8671 task, found := ni.Tasks[key] 8672 if !found </span><span class="cov0" title="0">{ 8673 klog.Warningf("failed to find task <%v/%v> on host <%v>", 8674 ti.Namespace, ti.Name, ni.Name) 8675 return nil 8676 }</span> 8677 8678 <span class="cov8" title="1">if ni.Node != nil </span><span class="cov8" title="1">{ 8679 switch task.Status </span>{ 8680 case Releasing:<span class="cov0" title="0"> 8681 ni.Releasing.Sub(task.Resreq) 8682 ni.Idle.Add(task.Resreq) 8683 ni.Used.Sub(task.Resreq) 8684 ni.SubGPUResource(ti.Pod)</span> 8685 case Pipelined:<span class="cov0" title="0"> 8686 ni.Pipelined.Sub(task.Resreq)</span> 8687 default:<span class="cov8" title="1"> 8688 ni.Idle.Add(task.Resreq) 8689 ni.Used.Sub(task.Resreq) 8690 ni.SubGPUResource(ti.Pod)</span> 8691 } 8692 } 8693 8694 <span class="cov8" title="1">delete(ni.Tasks, key) 8695 8696 return nil</span> 8697 } 8698 8699 // UpdateTask is used to update a task in nodeInfo object. 8700 // 8701 // If error occurs both task and node are guaranteed to be in the original state. 8702 func (ni *NodeInfo) UpdateTask(ti *TaskInfo) error <span class="cov0" title="0">{ 8703 if err := ni.RemoveTask(ti); err != nil </span><span class="cov0" title="0">{ 8704 return err 8705 }</span> 8706 8707 <span class="cov0" title="0">if err := ni.AddTask(ti); err != nil </span><span class="cov0" title="0">{ 8708 // This should never happen if task removal was successful, 8709 // because only possible error during task addition is when task is still on a node. 8710 klog.Fatalf("Failed to add Task <%s,%s> to Node <%s> during task update", 8711 ti.Namespace, ti.Name, ni.Name) 8712 }</span> 8713 <span class="cov0" title="0">return nil</span> 8714 } 8715 8716 // String returns nodeInfo details in string format 8717 func (ni NodeInfo) String() string <span class="cov0" title="0">{ 8718 tasks := "" 8719 8720 i := 0 8721 for _, task := range ni.Tasks </span><span class="cov0" title="0">{ 8722 tasks += fmt.Sprintf("\n\t %d: %v", i, task) 8723 i++ 8724 }</span> 8725 8726 <span class="cov0" title="0">return fmt.Sprintf("Node (%s): allocatable<%v> idle <%v>, used <%v>, releasing <%v>, oversubscribution <%v>, "+ 8727 "state <phase %s, reaseon %s>, oversubscributionNode <%v>, offlineJobEvicting <%v>,taints <%v>%s", 8728 ni.Name, ni.Allocatable, ni.Idle, ni.Used, ni.Releasing, ni.OversubscriptionResource, ni.State.Phase, ni.State.Reason, ni.OversubscriptionNode, ni.OfflineJobEvicting, ni.Node.Spec.Taints, tasks)</span> 8729 } 8730 8731 // Pods returns all pods running in that node 8732 func (ni *NodeInfo) Pods() (pods []*v1.Pod) <span class="cov0" title="0">{ 8733 for _, t := range ni.Tasks </span><span class="cov0" title="0">{ 8734 pods = append(pods, t.Pod) 8735 }</span> 8736 8737 <span class="cov0" title="0">return</span> 8738 } 8739 8740 // GetDevicesIdleGPUMemory returns all the idle GPU memory by gpu card. 8741 func (ni *NodeInfo) GetDevicesIdleGPUMemory() map[int]uint <span class="cov0" title="0">{ 8742 devicesAllGPUMemory := ni.getDevicesAllGPUMemory() 8743 devicesUsedGPUMemory := ni.getDevicesUsedGPUMemory() 8744 res := map[int]uint{} 8745 for id, allMemory := range devicesAllGPUMemory </span><span class="cov0" title="0">{ 8746 if usedMemory, found := devicesUsedGPUMemory[id]; found </span><span class="cov0" title="0">{ 8747 res[id] = allMemory - usedMemory 8748 }</span> else<span class="cov0" title="0"> { 8749 res[id] = allMemory 8750 }</span> 8751 } 8752 <span class="cov0" title="0">return res</span> 8753 } 8754 8755 func (ni *NodeInfo) getDevicesUsedGPUMemory() map[int]uint <span class="cov0" title="0">{ 8756 res := map[int]uint{} 8757 for _, device := range ni.GPUDevices </span><span class="cov0" title="0">{ 8758 res[device.ID] = device.getUsedGPUMemory() 8759 }</span> 8760 <span class="cov0" title="0">return res</span> 8761 } 8762 8763 func (ni *NodeInfo) getDevicesAllGPUMemory() map[int]uint <span class="cov0" title="0">{ 8764 res := map[int]uint{} 8765 for _, device := range ni.GPUDevices </span><span class="cov0" title="0">{ 8766 res[device.ID] = device.Memory 8767 }</span> 8768 <span class="cov0" title="0">return res</span> 8769 } 8770 8771 // AddGPUResource adds the pod to GPU pool if it is assigned 8772 func (ni *NodeInfo) AddGPUResource(pod *v1.Pod) <span class="cov8" title="1">{ 8773 gpuRes := GetGPUResourceOfPod(pod) 8774 if gpuRes > 0 </span><span class="cov0" title="0">{ 8775 id := GetGPUIndex(pod) 8776 if dev := ni.GPUDevices[id]; dev != nil </span><span class="cov0" title="0">{ 8777 dev.PodMap[string(pod.UID)] = pod 8778 }</span> 8779 } 8780 } 8781 8782 // SubGPUResource frees the gpu hold by the pod 8783 func (ni *NodeInfo) SubGPUResource(pod *v1.Pod) <span class="cov8" title="1">{ 8784 gpuRes := GetGPUResourceOfPod(pod) 8785 if gpuRes > 0 </span><span class="cov0" title="0">{ 8786 id := GetGPUIndex(pod) 8787 if dev := ni.GPUDevices[id]; dev != nil </span><span class="cov0" title="0">{ 8788 delete(dev.PodMap, string(pod.UID)) 8789 }</span> 8790 } 8791 } 8792 </pre> 8793 8794 <pre class="file" id="file45" style="display: none">/* 8795 Copyright 2021 The Volcano Authors. 8796 8797 Licensed under the Apache License, Version 2.0 (the "License"); 8798 you may not use this file except in compliance with the License. 8799 You may obtain a copy of the License at 8800 8801 http://www.apache.org/licenses/LICENSE-2.0 8802 8803 Unless required by applicable law or agreed to in writing, software 8804 distributed under the License is distributed on an "AS IS" BASIS, 8805 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 8806 See the License for the specific language governing permissions and 8807 limitations under the License. 8808 */ 8809 8810 package api 8811 8812 import ( 8813 v1 "k8s.io/api/core/v1" 8814 "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" 8815 "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" 8816 8817 nodeinfov1alpha1 "volcano.sh/apis/pkg/apis/nodeinfo/v1alpha1" 8818 ) 8819 8820 // NumaChgFlag indicate node numainfo changed status 8821 type NumaChgFlag int 8822 8823 const ( 8824 // NumaInfoResetFlag indicate reset operate 8825 NumaInfoResetFlag NumaChgFlag = 0b00 8826 // NumaInfoMoreFlag indicate the received allocatable resource is getting more 8827 NumaInfoMoreFlag NumaChgFlag = 0b11 8828 // NumaInfoLessFlag indicate the received allocatable resource is getting less 8829 NumaInfoLessFlag NumaChgFlag = 0b10 8830 ) 8831 8832 // ResourceInfo is the allocatable information for the resource 8833 type ResourceInfo struct { 8834 Allocatable cpuset.CPUSet 8835 Capacity int 8836 } 8837 8838 // NumatopoInfo is the information about topology manager on the node 8839 type NumatopoInfo struct { 8840 Namespace string 8841 Name string 8842 Policies map[nodeinfov1alpha1.PolicyName]string 8843 NumaResMap map[string]*ResourceInfo 8844 CPUDetail topology.CPUDetails 8845 ResReserved v1.ResourceList 8846 } 8847 8848 // DeepCopy used to copy NumatopoInfo 8849 func (info *NumatopoInfo) DeepCopy() *NumatopoInfo <span class="cov0" title="0">{ 8850 numaInfo := &NumatopoInfo{ 8851 Namespace: info.Namespace, 8852 Name: info.Name, 8853 Policies: make(map[nodeinfov1alpha1.PolicyName]string), 8854 NumaResMap: make(map[string]*ResourceInfo), 8855 CPUDetail: topology.CPUDetails{}, 8856 ResReserved: make(v1.ResourceList), 8857 } 8858 8859 policies := info.Policies 8860 for name, policy := range policies </span><span class="cov0" title="0">{ 8861 numaInfo.Policies[name] = policy 8862 }</span> 8863 8864 <span class="cov0" title="0">for resName, resInfo := range info.NumaResMap </span><span class="cov0" title="0">{ 8865 var tmpInfo ResourceInfo 8866 tmpInfo.Capacity = resInfo.Capacity 8867 tmpInfo.Allocatable = resInfo.Allocatable.Clone() 8868 numaInfo.NumaResMap[resName] = &tmpInfo 8869 }</span> 8870 8871 <span class="cov0" title="0">cpuDetail := info.CPUDetail 8872 for cpuID, detail := range cpuDetail </span><span class="cov0" title="0">{ 8873 numaInfo.CPUDetail[cpuID] = detail 8874 }</span> 8875 8876 <span class="cov0" title="0">resReserved := info.ResReserved 8877 for resName, res := range resReserved </span><span class="cov0" title="0">{ 8878 numaInfo.ResReserved[resName] = res 8879 }</span> 8880 8881 <span class="cov0" title="0">return numaInfo</span> 8882 } 8883 8884 // Compare is the function to show the change of the resource on kubelet 8885 // return val: 8886 // - true : the resource on kubelet is getting more or no change 8887 // - false : the resource on kubelet is getting less 8888 func (info *NumatopoInfo) Compare(newInfo *NumatopoInfo) bool <span class="cov0" title="0">{ 8889 for resName := range info.NumaResMap </span><span class="cov0" title="0">{ 8890 oldSize := info.NumaResMap[resName].Allocatable.Size() 8891 newSize := newInfo.NumaResMap[resName].Allocatable.Size() 8892 if oldSize <= newSize </span><span class="cov0" title="0">{ 8893 return true 8894 }</span> 8895 } 8896 8897 <span class="cov0" title="0">return false</span> 8898 } 8899 8900 // Allocate is the function to remove the allocated resource 8901 func (info *NumatopoInfo) Allocate(resSets ResNumaSets) <span class="cov0" title="0">{ 8902 for resName := range resSets </span><span class="cov0" title="0">{ 8903 info.NumaResMap[resName].Allocatable = info.NumaResMap[resName].Allocatable.Difference(resSets[resName]) 8904 }</span> 8905 } 8906 8907 // Release is the function to reclaim the allocated resource 8908 func (info *NumatopoInfo) Release(resSets ResNumaSets) <span class="cov0" title="0">{ 8909 for resName := range resSets </span><span class="cov0" title="0">{ 8910 info.NumaResMap[resName].Allocatable = info.NumaResMap[resName].Allocatable.Union(resSets[resName]) 8911 }</span> 8912 } 8913 8914 // GenerateNodeResNumaSets return the idle resource sets of all node 8915 func GenerateNodeResNumaSets(nodes map[string]*NodeInfo) map[string]ResNumaSets <span class="cov0" title="0">{ 8916 nodeSlice := make(map[string]ResNumaSets) 8917 for _, node := range nodes </span><span class="cov0" title="0">{ 8918 if node.NumaSchedulerInfo == nil </span><span class="cov0" title="0">{ 8919 continue</span> 8920 } 8921 8922 <span class="cov0" title="0">resMaps := make(ResNumaSets) 8923 for resName, resMap := range node.NumaSchedulerInfo.NumaResMap </span><span class="cov0" title="0">{ 8924 resMaps[resName] = resMap.Allocatable.Clone() 8925 }</span> 8926 8927 <span class="cov0" title="0">nodeSlice[node.Name] = resMaps</span> 8928 } 8929 8930 <span class="cov0" title="0">return nodeSlice</span> 8931 } 8932 8933 // GenerateNumaNodes return the numa IDs of all node 8934 func GenerateNumaNodes(nodes map[string]*NodeInfo) map[string][]int <span class="cov0" title="0">{ 8935 nodeNumaMap := make(map[string][]int) 8936 8937 for _, node := range nodes </span><span class="cov0" title="0">{ 8938 if node.NumaSchedulerInfo == nil </span><span class="cov0" title="0">{ 8939 continue</span> 8940 } 8941 8942 <span class="cov0" title="0">nodeNumaMap[node.Name] = node.NumaSchedulerInfo.CPUDetail.NUMANodes().ToSlice()</span> 8943 } 8944 8945 <span class="cov0" title="0">return nodeNumaMap</span> 8946 } 8947 8948 // ResNumaSets is the set map of the resource 8949 type ResNumaSets map[string]cpuset.CPUSet 8950 8951 // Allocate is to remove the allocated resource which is assigned to task 8952 func (resSets ResNumaSets) Allocate(taskSets ResNumaSets) <span class="cov0" title="0">{ 8953 for resName := range taskSets </span><span class="cov0" title="0">{ 8954 if _, ok := resSets[resName]; !ok </span><span class="cov0" title="0">{ 8955 continue</span> 8956 } 8957 <span class="cov0" title="0">resSets[resName] = resSets[resName].Difference(taskSets[resName])</span> 8958 } 8959 } 8960 8961 // Release is to reclaim the allocated resource which is assigned to task 8962 func (resSets ResNumaSets) Release(taskSets ResNumaSets) <span class="cov0" title="0">{ 8963 for resName := range taskSets </span><span class="cov0" title="0">{ 8964 if _, ok := resSets[resName]; !ok </span><span class="cov0" title="0">{ 8965 continue</span> 8966 } 8967 <span class="cov0" title="0">resSets[resName] = resSets[resName].Union(taskSets[resName])</span> 8968 } 8969 } 8970 8971 // Clone is the copy action 8972 func (resSets ResNumaSets) Clone() ResNumaSets <span class="cov0" title="0">{ 8973 newSets := make(ResNumaSets) 8974 for resName := range resSets </span><span class="cov0" title="0">{ 8975 newSets[resName] = resSets[resName].Clone() 8976 }</span> 8977 8978 <span class="cov0" title="0">return newSets</span> 8979 } 8980 </pre> 8981 8982 <pre class="file" id="file46" style="display: none">/* 8983 Copyright 2019 The Kubernetes Authors. 8984 8985 Licensed under the Apache License, Version 2.0 (the "License"); 8986 you may not use this file except in compliance with the License. 8987 You may obtain a copy of the License at 8988 8989 http://www.apache.org/licenses/LICENSE-2.0 8990 8991 Unless required by applicable law or agreed to in writing, software 8992 distributed under the License is distributed on an "AS IS" BASIS, 8993 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 8994 See the License for the specific language governing permissions and 8995 limitations under the License. 8996 */ 8997 8998 package api 8999 9000 import ( 9001 "fmt" 9002 "strconv" 9003 "strings" 9004 "time" 9005 9006 v1 "k8s.io/api/core/v1" 9007 "k8s.io/klog" 9008 9009 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 9010 ) 9011 9012 // Refer k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/predicates.go#GetResourceRequest. 9013 // 9014 // GetResourceRequest returns a *Resource that covers the largest width in each resource dimension. 9015 // Because init-containers run sequentially, we collect the max in each dimension iteratively. 9016 // In contrast, we sum the resource vectors for regular containers since they run simultaneously. 9017 // 9018 // To be consistent with kubernetes default scheduler, it is only used for predicates of actions(e.g. 9019 // allocate, backfill, preempt, reclaim), please use GetPodResourceWithoutInitContainers for other cases. 9020 // 9021 // Example: 9022 // 9023 // Pod: 9024 // InitContainers 9025 // IC1: 9026 // CPU: 2 9027 // Memory: 1G 9028 // IC2: 9029 // CPU: 2 9030 // Memory: 3G 9031 // Containers 9032 // C1: 9033 // CPU: 2 9034 // Memory: 1G 9035 // C2: 9036 // CPU: 1 9037 // Memory: 1G 9038 // 9039 // Result: CPU: 3, Memory: 3G 9040 9041 // GetPodResourceRequest returns all the resource required for that pod 9042 func GetPodResourceRequest(pod *v1.Pod) *Resource <span class="cov8" title="1">{ 9043 result := GetPodResourceWithoutInitContainers(pod) 9044 9045 // take max_resource(sum_pod, any_init_container) 9046 for _, container := range pod.Spec.InitContainers </span><span class="cov8" title="1">{ 9047 result.SetMaxResource(NewResource(container.Resources.Requests)) 9048 }</span> 9049 9050 <span class="cov8" title="1">return result</span> 9051 } 9052 9053 // GetPodPreemptable return volcano.sh/preemptable value for pod 9054 func GetPodPreemptable(pod *v1.Pod) bool <span class="cov8" title="1">{ 9055 // check annotaion first 9056 if len(pod.Annotations) > 0 </span><span class="cov8" title="1">{ 9057 if value, found := pod.Annotations[v1beta1.PodPreemptable]; found </span><span class="cov0" title="0">{ 9058 b, err := strconv.ParseBool(value) 9059 if err != nil </span><span class="cov0" title="0">{ 9060 klog.Warningf("invalid %s=%s", v1beta1.PodPreemptable, value) 9061 return false 9062 }</span> 9063 <span class="cov0" title="0">return b</span> 9064 } 9065 } 9066 9067 // it annotation does not exit, check label 9068 <span class="cov8" title="1">if len(pod.Labels) > 0 </span><span class="cov0" title="0">{ 9069 if value, found := pod.Labels[v1beta1.PodPreemptable]; found </span><span class="cov0" title="0">{ 9070 b, err := strconv.ParseBool(value) 9071 if err != nil </span><span class="cov0" title="0">{ 9072 klog.Warningf("invalid %s=%s", v1beta1.PodPreemptable, value) 9073 return false 9074 }</span> 9075 <span class="cov0" title="0">return b</span> 9076 } 9077 } 9078 9079 <span class="cov8" title="1">return false</span> 9080 } 9081 9082 // GetPodRevocableZone return volcano.sh/revocable-zone value for pod/podgroup 9083 func GetPodRevocableZone(pod *v1.Pod) string <span class="cov8" title="1">{ 9084 if len(pod.Annotations) > 0 </span><span class="cov8" title="1">{ 9085 if value, found := pod.Annotations[v1beta1.RevocableZone]; found </span><span class="cov0" title="0">{ 9086 if value != "*" </span><span class="cov0" title="0">{ 9087 return "" 9088 }</span> 9089 <span class="cov0" title="0">return value</span> 9090 } 9091 9092 <span class="cov8" title="1">if value, found := pod.Annotations[v1beta1.PodPreemptable]; found </span><span class="cov0" title="0">{ 9093 if b, err := strconv.ParseBool(value); err == nil && b </span><span class="cov0" title="0">{ 9094 return "*" 9095 }</span> 9096 } 9097 } 9098 <span class="cov8" title="1">return ""</span> 9099 } 9100 9101 // GetPodTopologyPolicy return volcano.sh/numa-topology-policy value for pod 9102 func GetPodTopologyPolicy(pod *v1.Pod) string <span class="cov8" title="1">{ 9103 if len(pod.Annotations) > 0 </span><span class="cov8" title="1">{ 9104 if value, found := pod.Annotations[v1beta1.NumaPolicyKey]; found </span><span class="cov0" title="0">{ 9105 return value 9106 }</span> 9107 } 9108 <span class="cov8" title="1">return ""</span> 9109 } 9110 9111 // GetPodResourceWithoutInitContainers returns Pod's resource request, it does not contain 9112 // init containers' resource request. 9113 func GetPodResourceWithoutInitContainers(pod *v1.Pod) *Resource <span class="cov8" title="1">{ 9114 result := EmptyResource() 9115 for _, container := range pod.Spec.Containers </span><span class="cov8" title="1">{ 9116 result.Add(NewResource(container.Resources.Requests)) 9117 }</span> 9118 9119 <span class="cov8" title="1">return result</span> 9120 } 9121 9122 // GetGPUIndex returns the ID of the GPU 9123 func GetGPUIndex(pod *v1.Pod) int <span class="cov0" title="0">{ 9124 if len(pod.Annotations) > 0 </span><span class="cov0" title="0">{ 9125 value, found := pod.Annotations[GPUIndex] 9126 if found </span><span class="cov0" title="0">{ 9127 id, err := strconv.Atoi(value) 9128 if err != nil </span><span class="cov0" title="0">{ 9129 klog.Errorf("invalid %s=%s", GPUIndex, value) 9130 return -1 9131 }</span> 9132 <span class="cov0" title="0">return id</span> 9133 } 9134 } 9135 9136 <span class="cov0" title="0">return -1</span> 9137 } 9138 9139 func escapeJSONPointer(p string) string <span class="cov0" title="0">{ 9140 // Escaping reference name using https://tools.ietf.org/html/rfc6901 9141 p = strings.Replace(p, "~", "~0", -1) 9142 p = strings.Replace(p, "/", "~1", -1) 9143 return p 9144 }</span> 9145 9146 // AddGPUIndexPatch returns the patch adding GPU index 9147 func AddGPUIndexPatch(id int) string <span class="cov0" title="0">{ 9148 return fmt.Sprintf(`[{"op": "add", "path": "/metadata/annotations/%s", "value":"%d"},`+ 9149 `{"op": "add", "path": "/metadata/annotations/%s", "value": "%d"}]`, 9150 escapeJSONPointer(PredicateTime), time.Now().UnixNano(), 9151 escapeJSONPointer(GPUIndex), id) 9152 }</span> 9153 9154 // RemoveGPUIndexPatch returns the patch removing GPU index 9155 func RemoveGPUIndexPatch() string <span class="cov0" title="0">{ 9156 return fmt.Sprintf(`[{"op": "remove", "path": "/metadata/annotations/%s"},`+ 9157 `{"op": "remove", "path": "/metadata/annotations/%s"]`, escapeJSONPointer(PredicateTime), escapeJSONPointer(GPUIndex)) 9158 }</span> 9159 </pre> 9160 9161 <pre class="file" id="file47" style="display: none">/* 9162 Copyright 2018 The Kubernetes Authors. 9163 9164 Licensed under the Apache License, Version 2.0 (the "License"); 9165 you may not use this file except in compliance with the License. 9166 You may obtain a copy of the License at 9167 9168 http://www.apache.org/licenses/LICENSE-2.0 9169 9170 Unless required by applicable law or agreed to in writing, software 9171 distributed under the License is distributed on an "AS IS" BASIS, 9172 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9173 See the License for the specific language governing permissions and 9174 limitations under the License. 9175 */ 9176 9177 package api 9178 9179 import ( 9180 "k8s.io/apimachinery/pkg/types" 9181 9182 "volcano.sh/apis/pkg/apis/scheduling" 9183 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 9184 ) 9185 9186 // QueueID is UID type, serves as unique ID for each queue 9187 type QueueID types.UID 9188 9189 // QueueInfo will have all details about queue 9190 type QueueInfo struct { 9191 UID QueueID 9192 Name string 9193 9194 Weight int32 9195 9196 // Weights is a list of slash sperated float numbers. 9197 // Each of them is a weight corresponding the 9198 // hierarchy level. 9199 Weights string 9200 // Hierarchy is a list of node name along the 9201 // path from the root to the node itself. 9202 Hierarchy string 9203 9204 Queue *scheduling.Queue 9205 } 9206 9207 // NewQueueInfo creates new queueInfo object 9208 func NewQueueInfo(queue *scheduling.Queue) *QueueInfo <span class="cov0" title="0">{ 9209 return &QueueInfo{ 9210 UID: QueueID(queue.Name), 9211 Name: queue.Name, 9212 9213 Weight: queue.Spec.Weight, 9214 Hierarchy: queue.Annotations[v1beta1.KubeHierarchyAnnotationKey], 9215 Weights: queue.Annotations[v1beta1.KubeHierarchyWeightAnnotationKey], 9216 9217 Queue: queue, 9218 } 9219 }</span> 9220 9221 // Clone is used to clone queueInfo object 9222 func (q *QueueInfo) Clone() *QueueInfo <span class="cov0" title="0">{ 9223 return &QueueInfo{ 9224 UID: q.UID, 9225 Name: q.Name, 9226 Weight: q.Weight, 9227 Hierarchy: q.Hierarchy, 9228 Weights: q.Weights, 9229 Queue: q.Queue, 9230 } 9231 }</span> 9232 9233 // Reclaimable return whether queue is reclaimable 9234 func (q *QueueInfo) Reclaimable() bool <span class="cov0" title="0">{ 9235 if q == nil </span><span class="cov0" title="0">{ 9236 return false 9237 }</span> 9238 9239 <span class="cov0" title="0">if q.Queue == nil </span><span class="cov0" title="0">{ 9240 return false 9241 }</span> 9242 9243 <span class="cov0" title="0">if q.Queue.Spec.Reclaimable == nil </span><span class="cov0" title="0">{ 9244 return true 9245 }</span> 9246 9247 <span class="cov0" title="0">return *q.Queue.Spec.Reclaimable</span> 9248 } 9249 </pre> 9250 9251 <pre class="file" id="file48" style="display: none">/* 9252 Copyright 2017 The Kubernetes Authors. 9253 9254 Licensed under the Apache License, Version 2.0 (the "License"); 9255 you may not use this file except in compliance with the License. 9256 You may obtain a copy of the License at 9257 9258 http://www.apache.org/licenses/LICENSE-2.0 9259 9260 Unless required by applicable law or agreed to in writing, software 9261 distributed under the License is distributed on an "AS IS" BASIS, 9262 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9263 See the License for the specific language governing permissions and 9264 limitations under the License. 9265 */ 9266 9267 package api 9268 9269 import ( 9270 "fmt" 9271 "math" 9272 9273 v1 "k8s.io/api/core/v1" 9274 "k8s.io/apimachinery/pkg/api/resource" 9275 v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" 9276 9277 "volcano.sh/volcano/pkg/scheduler/util/assert" 9278 ) 9279 9280 const ( 9281 // GPUResourceName need to follow https://github.com/NVIDIA/k8s-device-plugin/blob/66a35b71ac4b5cbfb04714678b548bd77e5ba719/server.go#L20 9282 GPUResourceName = "nvidia.com/gpu" 9283 ) 9284 9285 const ( 9286 minResource float64 = 0.1 9287 ) 9288 9289 // DimensionDefaultValue means default value for black resource dimension 9290 type DimensionDefaultValue string 9291 9292 const ( 9293 // Zero means resource dimension not defined will be treated as zero 9294 Zero DimensionDefaultValue = "Zero" 9295 // Infinity means resource dimension not defined will be treated as infinity 9296 Infinity DimensionDefaultValue = "Infinity" 9297 ) 9298 9299 // Resource struct defines all the resource type 9300 type Resource struct { 9301 MilliCPU float64 9302 Memory float64 9303 9304 // ScalarResources 9305 ScalarResources map[v1.ResourceName]float64 9306 9307 // MaxTaskNum is only used by predicates; it should NOT 9308 // be accounted in other operators, e.g. Add. 9309 MaxTaskNum int 9310 } 9311 9312 // EmptyResource creates a empty resource object and returns 9313 func EmptyResource() *Resource <span class="cov8" title="1">{ 9314 return &Resource{} 9315 }</span> 9316 9317 // NewResource creates a new resource object from resource list 9318 func NewResource(rl v1.ResourceList) *Resource <span class="cov8" title="1">{ 9319 r := EmptyResource() 9320 for rName, rQuant := range rl </span><span class="cov8" title="1">{ 9321 switch rName </span>{ 9322 case v1.ResourceCPU:<span class="cov8" title="1"> 9323 r.MilliCPU += float64(rQuant.MilliValue())</span> 9324 case v1.ResourceMemory:<span class="cov8" title="1"> 9325 r.Memory += float64(rQuant.Value())</span> 9326 case v1.ResourcePods:<span class="cov0" title="0"> 9327 r.MaxTaskNum += int(rQuant.Value())</span> 9328 default:<span class="cov8" title="1"> 9329 //NOTE: When converting this back to k8s resource, we need record the format as well as / 1000 9330 if v1helper.IsScalarResourceName(rName) </span><span class="cov8" title="1">{ 9331 r.AddScalar(rName, float64(rQuant.MilliValue())) 9332 }</span> 9333 } 9334 } 9335 <span class="cov8" title="1">return r</span> 9336 } 9337 9338 // Clone is used to clone a resource type, which is a deep copy function. 9339 func (r *Resource) Clone() *Resource <span class="cov8" title="1">{ 9340 clone := &Resource{ 9341 MilliCPU: r.MilliCPU, 9342 Memory: r.Memory, 9343 MaxTaskNum: r.MaxTaskNum, 9344 } 9345 9346 if r.ScalarResources != nil </span><span class="cov8" title="1">{ 9347 clone.ScalarResources = make(map[v1.ResourceName]float64) 9348 for k, v := range r.ScalarResources </span><span class="cov8" title="1">{ 9349 clone.ScalarResources[k] = v 9350 }</span> 9351 } 9352 9353 <span class="cov8" title="1">return clone</span> 9354 } 9355 9356 // String returns resource details in string format 9357 func (r *Resource) String() string <span class="cov0" title="0">{ 9358 str := fmt.Sprintf("cpu %0.2f, memory %0.2f", r.MilliCPU, r.Memory) 9359 for rName, rQuant := range r.ScalarResources </span><span class="cov0" title="0">{ 9360 str = fmt.Sprintf("%s, %s %0.2f", str, rName, rQuant) 9361 }</span> 9362 <span class="cov0" title="0">return str</span> 9363 } 9364 9365 // ResourceNames returns all resource types 9366 func (r *Resource) ResourceNames() ResourceNameList <span class="cov0" title="0">{ 9367 resNames := ResourceNameList{} 9368 9369 if r.MilliCPU >= minResource </span><span class="cov0" title="0">{ 9370 resNames = append(resNames, v1.ResourceCPU) 9371 }</span> 9372 9373 <span class="cov0" title="0">if r.Memory >= minResource </span><span class="cov0" title="0">{ 9374 resNames = append(resNames, v1.ResourceMemory) 9375 }</span> 9376 9377 <span class="cov0" title="0">for rName, rMount := range r.ScalarResources </span><span class="cov0" title="0">{ 9378 if rMount >= minResource </span><span class="cov0" title="0">{ 9379 resNames = append(resNames, rName) 9380 }</span> 9381 } 9382 9383 <span class="cov0" title="0">return resNames</span> 9384 } 9385 9386 // Get returns the resource value for that particular resource type 9387 func (r *Resource) Get(rn v1.ResourceName) float64 <span class="cov0" title="0">{ 9388 switch rn </span>{ 9389 case v1.ResourceCPU:<span class="cov0" title="0"> 9390 return r.MilliCPU</span> 9391 case v1.ResourceMemory:<span class="cov0" title="0"> 9392 return r.Memory</span> 9393 default:<span class="cov0" title="0"> 9394 if r.ScalarResources == nil </span><span class="cov0" title="0">{ 9395 return 0 9396 }</span> 9397 <span class="cov0" title="0">return r.ScalarResources[rn]</span> 9398 } 9399 } 9400 9401 // IsEmpty returns false if any kind of resource is not less than min value, otherwise returns true 9402 func (r *Resource) IsEmpty() bool <span class="cov8" title="1">{ 9403 if !(r.MilliCPU < minResource && r.Memory < minResource) </span><span class="cov8" title="1">{ 9404 return false 9405 }</span> 9406 9407 <span class="cov0" title="0">for _, rQuant := range r.ScalarResources </span><span class="cov0" title="0">{ 9408 if rQuant >= minResource </span><span class="cov0" title="0">{ 9409 return false 9410 }</span> 9411 } 9412 9413 <span class="cov0" title="0">return true</span> 9414 } 9415 9416 // IsZero returns false if the given kind of resource is not less than min value 9417 func (r *Resource) IsZero(rn v1.ResourceName) bool <span class="cov8" title="1">{ 9418 switch rn </span>{ 9419 case v1.ResourceCPU:<span class="cov8" title="1"> 9420 return r.MilliCPU < minResource</span> 9421 case v1.ResourceMemory:<span class="cov0" title="0"> 9422 return r.Memory < minResource</span> 9423 default:<span class="cov8" title="1"> 9424 if r.ScalarResources == nil </span><span class="cov0" title="0">{ 9425 return true 9426 }</span> 9427 9428 <span class="cov8" title="1">_, found := r.ScalarResources[rn] 9429 assert.Assertf(found, "unknown resource %s", rn) 9430 9431 return r.ScalarResources[rn] < minResource</span> 9432 } 9433 } 9434 9435 // Add is used to add two given resources 9436 func (r *Resource) Add(rr *Resource) *Resource <span class="cov8" title="1">{ 9437 r.MilliCPU += rr.MilliCPU 9438 r.Memory += rr.Memory 9439 9440 for rName, rQuant := range rr.ScalarResources </span><span class="cov8" title="1">{ 9441 if r.ScalarResources == nil </span><span class="cov8" title="1">{ 9442 r.ScalarResources = map[v1.ResourceName]float64{} 9443 }</span> 9444 <span class="cov8" title="1">r.ScalarResources[rName] += rQuant</span> 9445 } 9446 9447 <span class="cov8" title="1">return r</span> 9448 } 9449 9450 //Sub subtracts two Resource objects. 9451 func (r *Resource) Sub(rr *Resource) *Resource <span class="cov8" title="1">{ 9452 assert.Assertf(rr.LessEqual(r, Zero), "resource is not sufficient to do operation: <%v> sub <%v>", r, rr) 9453 9454 r.MilliCPU -= rr.MilliCPU 9455 r.Memory -= rr.Memory 9456 9457 if r.ScalarResources == nil </span><span class="cov8" title="1">{ 9458 return r 9459 }</span> 9460 <span class="cov8" title="1">for rrName, rrQuant := range rr.ScalarResources </span><span class="cov8" title="1">{ 9461 r.ScalarResources[rrName] -= rrQuant 9462 }</span> 9463 9464 <span class="cov8" title="1">return r</span> 9465 } 9466 9467 // Multi multiples the resource with ratio provided 9468 func (r *Resource) Multi(ratio float64) *Resource <span class="cov0" title="0">{ 9469 r.MilliCPU *= ratio 9470 r.Memory *= ratio 9471 for rName, rQuant := range r.ScalarResources </span><span class="cov0" title="0">{ 9472 r.ScalarResources[rName] = rQuant * ratio 9473 }</span> 9474 <span class="cov0" title="0">return r</span> 9475 } 9476 9477 // SetMaxResource compares with ResourceList and takes max value for each Resource. 9478 func (r *Resource) SetMaxResource(rr *Resource) <span class="cov8" title="1">{ 9479 if r == nil || rr == nil </span><span class="cov0" title="0">{ 9480 return 9481 }</span> 9482 9483 <span class="cov8" title="1">if rr.MilliCPU > r.MilliCPU </span><span class="cov8" title="1">{ 9484 r.MilliCPU = rr.MilliCPU 9485 }</span> 9486 <span class="cov8" title="1">if rr.Memory > r.Memory </span><span class="cov8" title="1">{ 9487 r.Memory = rr.Memory 9488 }</span> 9489 9490 <span class="cov8" title="1">for rrName, rrQuant := range rr.ScalarResources </span><span class="cov8" title="1">{ 9491 if r.ScalarResources == nil </span><span class="cov8" title="1">{ 9492 r.ScalarResources = make(map[v1.ResourceName]float64) 9493 for k, v := range rr.ScalarResources </span><span class="cov8" title="1">{ 9494 r.ScalarResources[k] = v 9495 }</span> 9496 <span class="cov8" title="1">return</span> 9497 } 9498 <span class="cov8" title="1">_, ok := r.ScalarResources[rrName] 9499 if !ok || rrQuant > r.ScalarResources[rrName] </span><span class="cov8" title="1">{ 9500 r.ScalarResources[rrName] = rrQuant 9501 }</span> 9502 } 9503 } 9504 9505 //FitDelta Computes the delta between a resource object representing available 9506 //resources an operand representing resources being requested. Any 9507 //field that is less than 0 after the operation represents an 9508 //insufficient resource. 9509 func (r *Resource) FitDelta(rr *Resource) *Resource <span class="cov0" title="0">{ 9510 if rr.MilliCPU > 0 </span><span class="cov0" title="0">{ 9511 r.MilliCPU -= rr.MilliCPU + minResource 9512 }</span> 9513 9514 <span class="cov0" title="0">if rr.Memory > 0 </span><span class="cov0" title="0">{ 9515 r.Memory -= rr.Memory + minResource 9516 }</span> 9517 9518 <span class="cov0" title="0">if r.ScalarResources == nil </span><span class="cov0" title="0">{ 9519 r.ScalarResources = make(map[v1.ResourceName]float64) 9520 }</span> 9521 9522 <span class="cov0" title="0">for rrName, rrQuant := range rr.ScalarResources </span><span class="cov0" title="0">{ 9523 if rrQuant > 0 </span><span class="cov0" title="0">{ 9524 _, ok := r.ScalarResources[rrName] 9525 if !ok </span><span class="cov0" title="0">{ 9526 r.ScalarResources[rrName] = 0 9527 }</span> 9528 <span class="cov0" title="0">r.ScalarResources[rrName] -= rrQuant + minResource</span> 9529 } 9530 } 9531 9532 <span class="cov0" title="0">return r</span> 9533 } 9534 9535 // Less returns true only on condition that all dimensions of resources in r are less than that of rr, 9536 // Otherwise returns false. 9537 // @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'" 9538 func (r *Resource) Less(rr *Resource, defaultValue DimensionDefaultValue) bool <span class="cov8" title="1">{ 9539 lessFunc := func(l, r float64) bool </span><span class="cov8" title="1">{ 9540 return l < r 9541 }</span> 9542 9543 <span class="cov8" title="1">leftResource := r.Clone() 9544 rightResource := rr.Clone() 9545 9546 if !lessFunc(leftResource.MilliCPU, rightResource.MilliCPU) </span><span class="cov8" title="1">{ 9547 return false 9548 }</span> 9549 <span class="cov8" title="1">if !lessFunc(leftResource.Memory, rightResource.Memory) </span><span class="cov0" title="0">{ 9550 return false 9551 }</span> 9552 9553 <span class="cov8" title="1">r.setDefaultValue(leftResource, rightResource, defaultValue) 9554 9555 for resourceName, leftValue := range leftResource.ScalarResources </span><span class="cov8" title="1">{ 9556 rightValue := rightResource.ScalarResources[resourceName] 9557 if rightValue == -1 </span><span class="cov8" title="1">{ 9558 continue</span> 9559 } 9560 <span class="cov8" title="1">if leftValue == -1 || !lessFunc(leftValue, rightValue) </span><span class="cov8" title="1">{ 9561 return false 9562 }</span> 9563 } 9564 <span class="cov8" title="1">return true</span> 9565 } 9566 9567 // LessEqual returns true only on condition that all dimensions of resources in r are less than or equal with that of rr, 9568 // Otherwise returns false. 9569 // @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'" 9570 func (r *Resource) LessEqual(rr *Resource, defaultValue DimensionDefaultValue) bool <span class="cov8" title="1">{ 9571 lessEqualFunc := func(l, r, diff float64) bool </span><span class="cov8" title="1">{ 9572 if l < r || math.Abs(l-r) < diff </span><span class="cov8" title="1">{ 9573 return true 9574 }</span> 9575 <span class="cov8" title="1">return false</span> 9576 } 9577 9578 <span class="cov8" title="1">leftResource := r.Clone() 9579 rightResource := rr.Clone() 9580 9581 if !lessEqualFunc(leftResource.MilliCPU, rightResource.MilliCPU, minResource) </span><span class="cov8" title="1">{ 9582 return false 9583 }</span> 9584 <span class="cov8" title="1">if !lessEqualFunc(leftResource.Memory, rightResource.Memory, minResource) </span><span class="cov8" title="1">{ 9585 return false 9586 }</span> 9587 9588 <span class="cov8" title="1">r.setDefaultValue(leftResource, rightResource, defaultValue) 9589 9590 for resourceName, leftValue := range leftResource.ScalarResources </span><span class="cov8" title="1">{ 9591 rightValue := rightResource.ScalarResources[resourceName] 9592 if rightValue == -1 </span><span class="cov0" title="0">{ 9593 continue</span> 9594 } 9595 <span class="cov8" title="1">if leftValue == -1 || !lessEqualFunc(leftValue, rightValue, minResource) </span><span class="cov8" title="1">{ 9596 return false 9597 }</span> 9598 } 9599 <span class="cov8" title="1">return true</span> 9600 } 9601 9602 // LessPartly returns true if there exists any dimension whose resource amount in r is less than that in rr. 9603 // Otherwise returns false. 9604 // @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'" 9605 func (r *Resource) LessPartly(rr *Resource, defaultValue DimensionDefaultValue) bool <span class="cov8" title="1">{ 9606 lessFunc := func(l, r float64) bool </span><span class="cov8" title="1">{ 9607 return l < r 9608 }</span> 9609 9610 <span class="cov8" title="1">leftResource := r.Clone() 9611 rightResource := rr.Clone() 9612 9613 if lessFunc(leftResource.MilliCPU, rightResource.MilliCPU) || lessFunc(leftResource.Memory, rightResource.Memory) </span><span class="cov8" title="1">{ 9614 return true 9615 }</span> 9616 9617 <span class="cov8" title="1">r.setDefaultValue(leftResource, rightResource, defaultValue) 9618 9619 for resourceName, leftValue := range leftResource.ScalarResources </span><span class="cov8" title="1">{ 9620 rightValue := rightResource.ScalarResources[resourceName] 9621 if leftValue == -1 </span><span class="cov8" title="1">{ 9622 continue</span> 9623 } 9624 <span class="cov8" title="1">if rightValue == -1 || lessFunc(leftValue, rightValue) </span><span class="cov8" title="1">{ 9625 return true 9626 }</span> 9627 } 9628 <span class="cov8" title="1">return false</span> 9629 } 9630 9631 // LessEqualPartly returns true if there exists any dimension whose resource amount in r is less than or equal with that in rr. 9632 // Otherwise returns false. 9633 // @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'" 9634 func (r *Resource) LessEqualPartly(rr *Resource, defaultValue DimensionDefaultValue) bool <span class="cov8" title="1">{ 9635 lessEqualFunc := func(l, r, diff float64) bool </span><span class="cov8" title="1">{ 9636 if l < r || math.Abs(l-r) < diff </span><span class="cov8" title="1">{ 9637 return true 9638 }</span> 9639 <span class="cov8" title="1">return false</span> 9640 } 9641 9642 <span class="cov8" title="1">leftResource := r.Clone() 9643 rightResource := rr.Clone() 9644 9645 if lessEqualFunc(leftResource.MilliCPU, rightResource.MilliCPU, minResource) || lessEqualFunc(leftResource.Memory, rightResource.Memory, minResource) </span><span class="cov8" title="1">{ 9646 return true 9647 }</span> 9648 9649 <span class="cov8" title="1">r.setDefaultValue(leftResource, rightResource, defaultValue) 9650 9651 for resourceName, leftValue := range leftResource.ScalarResources </span><span class="cov8" title="1">{ 9652 rightValue := rightResource.ScalarResources[resourceName] 9653 if leftValue == -1 </span><span class="cov0" title="0">{ 9654 continue</span> 9655 } 9656 <span class="cov8" title="1">if rightValue == -1 || lessEqualFunc(leftValue, rightValue, minResource) </span><span class="cov8" title="1">{ 9657 return true 9658 }</span> 9659 } 9660 <span class="cov8" title="1">return false</span> 9661 } 9662 9663 // Equal returns true only on condition that values in all dimension are equal with each other for r and rr 9664 // Otherwise returns false. 9665 // @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'" 9666 func (r *Resource) Equal(rr *Resource, defaultValue DimensionDefaultValue) bool <span class="cov8" title="1">{ 9667 equalFunc := func(l, r, diff float64) bool </span><span class="cov8" title="1">{ 9668 return l == r || math.Abs(l-r) < diff 9669 }</span> 9670 9671 <span class="cov8" title="1">leftResource := r.Clone() 9672 rightResource := rr.Clone() 9673 9674 if !equalFunc(leftResource.MilliCPU, rightResource.MilliCPU, minResource) || !equalFunc(leftResource.Memory, rightResource.Memory, minResource) </span><span class="cov8" title="1">{ 9675 return false 9676 }</span> 9677 9678 <span class="cov8" title="1">r.setDefaultValue(leftResource, rightResource, defaultValue) 9679 9680 for resourceName, leftValue := range leftResource.ScalarResources </span><span class="cov8" title="1">{ 9681 rightValue := rightResource.ScalarResources[resourceName] 9682 if !equalFunc(leftValue, rightValue, minResource) </span><span class="cov0" title="0">{ 9683 return false 9684 }</span> 9685 } 9686 <span class="cov8" title="1">return true</span> 9687 } 9688 9689 // Diff calculate the difference between two resource object 9690 // Note: if `defaultValue` equals `Infinity`, the difference between two values will be `Infinity`, marked as -1 9691 func (r *Resource) Diff(rr *Resource, defaultValue DimensionDefaultValue) (*Resource, *Resource) <span class="cov8" title="1">{ 9692 leftRes := r.Clone() 9693 rightRes := rr.Clone() 9694 increasedVal := EmptyResource() 9695 decreasedVal := EmptyResource() 9696 r.setDefaultValue(leftRes, rightRes, defaultValue) 9697 9698 if leftRes.MilliCPU > rightRes.MilliCPU </span><span class="cov8" title="1">{ 9699 increasedVal.MilliCPU = leftRes.MilliCPU - rightRes.MilliCPU 9700 }</span> else<span class="cov8" title="1"> { 9701 decreasedVal.MilliCPU = rightRes.MilliCPU - leftRes.MilliCPU 9702 }</span> 9703 9704 <span class="cov8" title="1">if leftRes.Memory > rightRes.Memory </span><span class="cov8" title="1">{ 9705 increasedVal.Memory = leftRes.Memory - rightRes.Memory 9706 }</span> else<span class="cov8" title="1"> { 9707 decreasedVal.Memory = rightRes.Memory - leftRes.Memory 9708 }</span> 9709 9710 <span class="cov8" title="1">increasedVal.ScalarResources = make(map[v1.ResourceName]float64, 0) 9711 decreasedVal.ScalarResources = make(map[v1.ResourceName]float64, 0) 9712 for lName, lQuant := range leftRes.ScalarResources </span><span class="cov8" title="1">{ 9713 rQuant, _ := rightRes.ScalarResources[lName] 9714 if lQuant == -1 </span><span class="cov8" title="1">{ 9715 increasedVal.ScalarResources[lName] = -1 9716 continue</span> 9717 } 9718 <span class="cov8" title="1">if rQuant == -1 </span><span class="cov8" title="1">{ 9719 decreasedVal.ScalarResources[lName] = -1 9720 continue</span> 9721 } 9722 <span class="cov8" title="1">if lQuant > rQuant </span><span class="cov8" title="1">{ 9723 increasedVal.ScalarResources[lName] = lQuant - rQuant 9724 }</span> else<span class="cov8" title="1"> { 9725 decreasedVal.ScalarResources[lName] = rQuant - lQuant 9726 }</span> 9727 } 9728 9729 <span class="cov8" title="1">return increasedVal, decreasedVal</span> 9730 } 9731 9732 // AddScalar adds a resource by a scalar value of this resource. 9733 func (r *Resource) AddScalar(name v1.ResourceName, quantity float64) <span class="cov8" title="1">{ 9734 r.SetScalar(name, r.ScalarResources[name]+quantity) 9735 }</span> 9736 9737 // SetScalar sets a resource by a scalar value of this resource. 9738 func (r *Resource) SetScalar(name v1.ResourceName, quantity float64) <span class="cov8" title="1">{ 9739 // Lazily allocate scalar resource map. 9740 if r.ScalarResources == nil </span><span class="cov8" title="1">{ 9741 r.ScalarResources = map[v1.ResourceName]float64{} 9742 }</span> 9743 <span class="cov8" title="1">r.ScalarResources[name] = quantity</span> 9744 } 9745 9746 // MinDimensionResource is used to reset the r resource dimension which is less than rr 9747 // e.g r resource is <cpu 2000.00, memory 4047845376.00, hugepages-2Mi 0.00, hugepages-1Gi 0.00> 9748 // rr resource is <cpu 3000.00, memory 1000.00> 9749 // return r resource is <cpu 2000.00, memory 1000.00, hugepages-2Mi 0.00, hugepages-1Gi 0.00> 9750 func (r *Resource) MinDimensionResource(rr *Resource) *Resource <span class="cov8" title="1">{ 9751 if rr.MilliCPU < r.MilliCPU </span><span class="cov8" title="1">{ 9752 r.MilliCPU = rr.MilliCPU 9753 }</span> 9754 <span class="cov8" title="1">if rr.Memory < r.Memory </span><span class="cov8" title="1">{ 9755 r.Memory = rr.Memory 9756 }</span> 9757 9758 <span class="cov8" title="1">if rr.ScalarResources == nil </span><span class="cov0" title="0">{ 9759 if r.ScalarResources != nil </span><span class="cov0" title="0">{ 9760 for name := range r.ScalarResources </span><span class="cov0" title="0">{ 9761 r.ScalarResources[name] = 0 9762 }</span> 9763 } 9764 } else<span class="cov8" title="1"> { 9765 if r.ScalarResources != nil </span><span class="cov8" title="1">{ 9766 for name, quant := range rr.ScalarResources </span><span class="cov8" title="1">{ 9767 if quant < r.ScalarResources[name] </span><span class="cov8" title="1">{ 9768 r.ScalarResources[name] = quant 9769 }</span> 9770 } 9771 } 9772 } 9773 <span class="cov8" title="1">return r</span> 9774 } 9775 9776 // setDefaultValue sets default value for resource dimension not defined of ScalarResource in leftResource and rightResource 9777 // @param defaultValue "default value for resource dimension not defined in ScalarResources. It can only be one of 'Zero' or 'Infinity'" 9778 func (r *Resource) setDefaultValue(leftResource, rightResource *Resource, defaultValue DimensionDefaultValue) <span class="cov8" title="1">{ 9779 if leftResource.ScalarResources == nil </span><span class="cov8" title="1">{ 9780 leftResource.ScalarResources = map[v1.ResourceName]float64{} 9781 }</span> 9782 <span class="cov8" title="1">if rightResource.ScalarResources == nil </span><span class="cov8" title="1">{ 9783 rightResource.ScalarResources = map[v1.ResourceName]float64{} 9784 }</span> 9785 <span class="cov8" title="1">for resourceName := range leftResource.ScalarResources </span><span class="cov8" title="1">{ 9786 _, ok := rightResource.ScalarResources[resourceName] 9787 if !ok </span><span class="cov8" title="1">{ 9788 if defaultValue == Zero </span><span class="cov8" title="1">{ 9789 rightResource.ScalarResources[resourceName] = 0 9790 }</span> else<span class="cov8" title="1"> if defaultValue == Infinity </span><span class="cov8" title="1">{ 9791 rightResource.ScalarResources[resourceName] = -1 9792 }</span> 9793 } 9794 } 9795 9796 <span class="cov8" title="1">for resourceName := range rightResource.ScalarResources </span><span class="cov8" title="1">{ 9797 _, ok := leftResource.ScalarResources[resourceName] 9798 if !ok </span><span class="cov8" title="1">{ 9799 if defaultValue == Zero </span><span class="cov8" title="1">{ 9800 leftResource.ScalarResources[resourceName] = 0 9801 }</span> else<span class="cov8" title="1"> if defaultValue == Infinity </span><span class="cov8" title="1">{ 9802 leftResource.ScalarResources[resourceName] = -1 9803 }</span> 9804 } 9805 } 9806 } 9807 9808 // ParseResourceList parses the given configuration map into an API 9809 // ResourceList or returns an error. 9810 func ParseResourceList(m map[string]string) (v1.ResourceList, error) <span class="cov0" title="0">{ 9811 if len(m) == 0 </span><span class="cov0" title="0">{ 9812 return nil, nil 9813 }</span> 9814 <span class="cov0" title="0">rl := make(v1.ResourceList) 9815 for k, v := range m </span><span class="cov0" title="0">{ 9816 switch v1.ResourceName(k) </span>{ 9817 // CPU, memory, local storage, and PID resources are supported. 9818 case v1.ResourceCPU, v1.ResourceMemory, v1.ResourceEphemeralStorage:<span class="cov0" title="0"> 9819 q, err := resource.ParseQuantity(v) 9820 if err != nil </span><span class="cov0" title="0">{ 9821 return nil, err 9822 }</span> 9823 <span class="cov0" title="0">if q.Sign() == -1 </span><span class="cov0" title="0">{ 9824 return nil, fmt.Errorf("resource quantity for %q cannot be negative: %v", k, v) 9825 }</span> 9826 <span class="cov0" title="0">rl[v1.ResourceName(k)] = q</span> 9827 default:<span class="cov0" title="0"> 9828 return nil, fmt.Errorf("cannot reserve %q resource", k)</span> 9829 } 9830 } 9831 <span class="cov0" title="0">return rl, nil</span> 9832 } 9833 9834 func GetMinResource() float64 <span class="cov0" title="0">{ 9835 return minResource 9836 }</span> 9837 9838 // ResourceNameList struct defines resource name collection 9839 type ResourceNameList []v1.ResourceName 9840 9841 // Contains judges whether rr is subset of r 9842 func (r ResourceNameList) Contains(rr ResourceNameList) bool <span class="cov0" title="0">{ 9843 for _, rrName := range ([]v1.ResourceName)(rr) </span><span class="cov0" title="0">{ 9844 isResourceExist := false 9845 for _, rName := range ([]v1.ResourceName)(r) </span><span class="cov0" title="0">{ 9846 if rName == rrName </span><span class="cov0" title="0">{ 9847 isResourceExist = true 9848 break</span> 9849 } 9850 } 9851 <span class="cov0" title="0">if !isResourceExist </span><span class="cov0" title="0">{ 9852 return false 9853 }</span> 9854 } 9855 <span class="cov0" title="0">return true</span> 9856 } 9857 </pre> 9858 9859 <pre class="file" id="file49" style="display: none">/* 9860 Copyright 2021 The Volcano Authors. 9861 9862 Licensed under the Apache License, Version 2.0 (the "License"); 9863 you may not use this file except in compliance with the License. 9864 You may obtain a copy of the License at 9865 9866 http://www.apache.org/licenses/LICENSE-2.0 9867 9868 Unless required by applicable law or agreed to in writing, software 9869 distributed under the License is distributed on an "AS IS" BASIS, 9870 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9871 See the License for the specific language governing permissions and 9872 limitations under the License. 9873 */ 9874 9875 package api 9876 9877 import ( 9878 "k8s.io/apimachinery/pkg/types" 9879 9880 "volcano.sh/apis/pkg/apis/scheduling" 9881 ) 9882 9883 // ClusterID is UID type, serves as unique ID for each queue 9884 type ClusterID types.UID 9885 9886 // SiloClusterInfo will have all details about queue 9887 type SiloClusterInfo struct { 9888 UID ClusterID 9889 Cluster *scheduling.Cluster 9890 } 9891 9892 // NewSiloClusterInfo creates new queueInfo object 9893 func NewSiloClusterInfo(cluster *scheduling.Cluster) *SiloClusterInfo <span class="cov0" title="0">{ 9894 return &SiloClusterInfo{ 9895 UID: ClusterID(cluster.Name), 9896 Cluster: cluster, 9897 } 9898 }</span> 9899 </pre> 9900 9901 <pre class="file" id="file50" style="display: none">/* 9902 Copyright 2018 The Kubernetes Authors. 9903 9904 Licensed under the Apache License, Version 2.0 (the "License"); 9905 you may not use this file except in compliance with the License. 9906 You may obtain a copy of the License at 9907 9908 http://www.apache.org/licenses/LICENSE-2.0 9909 9910 Unless required by applicable law or agreed to in writing, software 9911 distributed under the License is distributed on an "AS IS" BASIS, 9912 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9913 See the License for the specific language governing permissions and 9914 limitations under the License. 9915 */ 9916 9917 package api 9918 9919 import ( 9920 "fmt" 9921 9922 v1 "k8s.io/api/core/v1" 9923 "k8s.io/apimachinery/pkg/api/resource" 9924 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9925 "k8s.io/apimachinery/pkg/types" 9926 ) 9927 9928 func buildNode(name string, alloc v1.ResourceList) *v1.Node <span class="cov8" title="1">{ 9929 return &v1.Node{ 9930 ObjectMeta: metav1.ObjectMeta{ 9931 Name: name, 9932 }, 9933 Status: v1.NodeStatus{ 9934 Capacity: alloc, 9935 Allocatable: alloc, 9936 }, 9937 } 9938 }</span> 9939 9940 func buildPod(ns, n, nn string, p v1.PodPhase, req v1.ResourceList, owner []metav1.OwnerReference, labels map[string]string) *v1.Pod <span class="cov8" title="1">{ 9941 return &v1.Pod{ 9942 ObjectMeta: metav1.ObjectMeta{ 9943 UID: types.UID(fmt.Sprintf("%v-%v", ns, n)), 9944 Name: n, 9945 Namespace: ns, 9946 OwnerReferences: owner, 9947 Labels: labels, 9948 }, 9949 Status: v1.PodStatus{ 9950 Phase: p, 9951 }, 9952 Spec: v1.PodSpec{ 9953 NodeName: nn, 9954 Containers: []v1.Container{ 9955 { 9956 Resources: v1.ResourceRequirements{ 9957 Requests: req, 9958 }, 9959 }, 9960 }, 9961 }, 9962 } 9963 }</span> 9964 9965 func buildResourceList(cpu string, memory string) v1.ResourceList <span class="cov8" title="1">{ 9966 return v1.ResourceList{ 9967 v1.ResourceCPU: resource.MustParse(cpu), 9968 v1.ResourceMemory: resource.MustParse(memory), 9969 } 9970 }</span> 9971 9972 func buildResource(cpu string, memory string) *Resource <span class="cov8" title="1">{ 9973 return NewResource(v1.ResourceList{ 9974 v1.ResourceCPU: resource.MustParse(cpu), 9975 v1.ResourceMemory: resource.MustParse(memory), 9976 }) 9977 }</span> 9978 9979 func buildOwnerReference(owner string) metav1.OwnerReference <span class="cov8" title="1">{ 9980 controller := true 9981 return metav1.OwnerReference{ 9982 Controller: &controller, 9983 UID: types.UID(owner), 9984 } 9985 }</span> 9986 </pre> 9987 9988 <pre class="file" id="file51" style="display: none">/* 9989 Copyright 2018 The Kubernetes Authors. 9990 9991 Licensed under the Apache License, Version 2.0 (the "License"); 9992 you may not use this file except in compliance with the License. 9993 You may obtain a copy of the License at 9994 9995 http://www.apache.org/licenses/LICENSE-2.0 9996 9997 Unless required by applicable law or agreed to in writing, software 9998 distributed under the License is distributed on an "AS IS" BASIS, 9999 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10000 See the License for the specific language governing permissions and 10001 limitations under the License. 10002 */ 10003 10004 package api 10005 10006 import ( 10007 k8sframework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1" 10008 ) 10009 10010 // TaskStatus defines the status of a task/pod. 10011 type TaskStatus int 10012 10013 const ( 10014 // Pending means the task is pending in the apiserver. 10015 Pending TaskStatus = 1 << iota 10016 10017 // Allocated means the scheduler assigns a host to it. 10018 Allocated 10019 10020 // Pipelined means the scheduler assigns a host to wait for releasing resource. 10021 Pipelined 10022 10023 // Binding means the scheduler send Bind request to apiserver. 10024 Binding 10025 10026 // Bound means the task/Pod bounds to a host. 10027 Bound 10028 10029 // Running means a task is running on the host. 10030 Running 10031 10032 // Releasing means a task/pod is deleted. 10033 Releasing 10034 10035 // Succeeded means that all containers in the pod have voluntarily terminated 10036 // with a container exit code of 0, and the system is not going to restart any of these containers. 10037 Succeeded 10038 10039 // Failed means that all containers in the pod have terminated, and at least one container has 10040 // terminated in a failure (exited with a non-zero exit code or was stopped by the system). 10041 Failed 10042 10043 // Unknown means the status of task/pod is unknown to the scheduler. 10044 Unknown 10045 ) 10046 10047 func (ts TaskStatus) String() string <span class="cov8" title="1">{ 10048 switch ts </span>{ 10049 case Pending:<span class="cov8" title="1"> 10050 return "Pending"</span> 10051 case Allocated:<span class="cov0" title="0"> 10052 return "Allocated"</span> 10053 case Pipelined:<span class="cov0" title="0"> 10054 return "Pipelined"</span> 10055 case Binding:<span class="cov0" title="0"> 10056 return "Binding"</span> 10057 case Bound:<span class="cov0" title="0"> 10058 return "Bound"</span> 10059 case Running:<span class="cov0" title="0"> 10060 return "Running"</span> 10061 case Releasing:<span class="cov0" title="0"> 10062 return "Releasing"</span> 10063 case Succeeded:<span class="cov0" title="0"> 10064 return "Succeeded"</span> 10065 case Failed:<span class="cov0" title="0"> 10066 return "Failed"</span> 10067 default:<span class="cov0" title="0"> 10068 return "Unknown"</span> 10069 } 10070 } 10071 10072 // NodePhase defines the phase of node 10073 type NodePhase int 10074 10075 const ( 10076 // Ready means the node is ready for scheduling 10077 Ready NodePhase = 1 << iota 10078 // NotReady means the node is not ready for scheduling 10079 NotReady 10080 ) 10081 10082 func (np NodePhase) String() string <span class="cov8" title="1">{ 10083 switch np </span>{ 10084 case Ready:<span class="cov8" title="1"> 10085 return "Ready"</span> 10086 case NotReady:<span class="cov0" title="0"> 10087 return "NotReady"</span> 10088 } 10089 10090 <span class="cov0" title="0">return "Unknown"</span> 10091 } 10092 10093 // validateStatusUpdate validates whether the status transfer is valid. 10094 func validateStatusUpdate(oldStatus, newStatus TaskStatus) error <span class="cov0" title="0">{ 10095 return nil 10096 }</span> 10097 10098 // LessFn is the func declaration used by sort or priority queue. 10099 type LessFn func(interface{}, interface{}) bool 10100 10101 // CompareFn is the func declaration used by sort or priority queue. 10102 type CompareFn func(interface{}, interface{}) int 10103 10104 // ValidateFn is the func declaration used to check object's status. 10105 type ValidateFn func(interface{}) bool 10106 10107 // ValidateResult is struct to which can used to determine the result 10108 type ValidateResult struct { 10109 Pass bool 10110 Reason string 10111 Message string 10112 } 10113 10114 // ValidateExFn is the func declaration used to validate the result. 10115 type ValidateExFn func(interface{}) *ValidateResult 10116 10117 // VoteFn is the func declaration used to check object's complicated status. 10118 type VoteFn func(interface{}) int 10119 10120 // JobEnqueuedFn is the func declaration used to call after job enqueued. 10121 type JobEnqueuedFn func(interface{}) 10122 10123 // PredicateFn is the func declaration used to predicate node for task. 10124 type PredicateFn func(*TaskInfo, *NodeInfo) error 10125 10126 // BestNodeFn is the func declaration used to return the nodeScores to plugins. 10127 type BestNodeFn func(*TaskInfo, map[float64][]*NodeInfo) *NodeInfo 10128 10129 // EvictableFn is the func declaration used to evict tasks. 10130 type EvictableFn func(*TaskInfo, []*TaskInfo) ([]*TaskInfo, int) 10131 10132 // NodeOrderFn is the func declaration used to get priority score for a node for a particular task. 10133 type NodeOrderFn func(*TaskInfo, *NodeInfo) (float64, error) 10134 10135 // BatchNodeOrderFn is the func declaration used to get priority score for ALL nodes for a particular task. 10136 type BatchNodeOrderFn func(*TaskInfo, []*NodeInfo) (map[string]float64, error) 10137 10138 // NodeMapFn is the func declaration used to get priority score for a node for a particular task. 10139 type NodeMapFn func(*TaskInfo, *NodeInfo) (float64, error) 10140 10141 // NodeReduceFn is the func declaration used to reduce priority score for a node for a particular task. 10142 type NodeReduceFn func(*TaskInfo, k8sframework.NodeScoreList) error 10143 10144 // NodeOrderMapFn is the func declaration used to get priority score of all plugins for a node for a particular task. 10145 type NodeOrderMapFn func(*TaskInfo, *NodeInfo) (map[string]float64, float64, error) 10146 10147 // NodeOrderReduceFn is the func declaration used to reduce priority score of all nodes for a plugin for a particular task. 10148 type NodeOrderReduceFn func(*TaskInfo, map[string]k8sframework.NodeScoreList) (map[string]float64, error) 10149 10150 // TargetJobFn is the func declaration used to select the target job satisfies some conditions 10151 type TargetJobFn func([]*JobInfo) *JobInfo 10152 10153 // ReservedNodesFn is the func declaration used to select the reserved nodes 10154 type ReservedNodesFn func() 10155 10156 // VictimTasksFn is the func declaration used to select victim tasks 10157 type VictimTasksFn func() []*TaskInfo 10158 10159 // UnderUsedResourceFn is the func declaration used to get under used resource list for queue 10160 type UnderUsedResourceFn func(*QueueInfo) ResourceNameList 10161 </pre> 10162 10163 <pre class="file" id="file52" style="display: none">package api 10164 10165 import ( 10166 "fmt" 10167 "sort" 10168 "strings" 10169 ) 10170 10171 const ( 10172 // NodePodNumberExceeded means pods in node exceed the allocatable pod number 10173 NodePodNumberExceeded = "node(s) pod number exceeded" 10174 // NodeResourceFitFailed means node could not fit the request of pod 10175 NodeResourceFitFailed = "node(s) resource fit failed" 10176 10177 // AllNodeUnavailableMsg is the default error message 10178 AllNodeUnavailableMsg = "all nodes are unavailable" 10179 ) 10180 10181 // These are reasons for a pod's transition to a condition. 10182 const ( 10183 // PodReasonUnschedulable reason in PodScheduled PodCondition means that the scheduler 10184 // can't schedule the pod right now, for example due to insufficient resources in the cluster. 10185 PodReasonUnschedulable = "Unschedulable" 10186 // PodReasonSchedulable reason in PodScheduled PodCondition means that the scheduler 10187 // can schedule the pod right now, but not bind yet 10188 PodReasonSchedulable = "Schedulable" 10189 // PodReasonUndetermined reason in PodScheduled PodCondition means that the scheduler 10190 // skips scheduling the pod which left the pod `Undetermined`, for example due to unschedulable pod already occurred. 10191 PodReasonUndetermined = "Undetermined" 10192 ) 10193 10194 // FitErrors is set of FitError on many nodes 10195 type FitErrors struct { 10196 nodes map[string]*FitError 10197 err string 10198 } 10199 10200 // NewFitErrors returns an FitErrors 10201 func NewFitErrors() *FitErrors <span class="cov0" title="0">{ 10202 f := new(FitErrors) 10203 f.nodes = make(map[string]*FitError) 10204 return f 10205 }</span> 10206 10207 // SetError set the common error message in FitErrors 10208 func (f *FitErrors) SetError(err string) <span class="cov0" title="0">{ 10209 f.err = err 10210 }</span> 10211 10212 // SetNodeError set the node error in FitErrors 10213 func (f *FitErrors) SetNodeError(nodeName string, err error) <span class="cov0" title="0">{ 10214 var fe *FitError 10215 switch obj := err.(type) </span>{ 10216 case *FitError:<span class="cov0" title="0"> 10217 obj.NodeName = nodeName 10218 fe = obj</span> 10219 default:<span class="cov0" title="0"> 10220 fe = &FitError{ 10221 NodeName: nodeName, 10222 Reasons: []string{obj.Error()}, 10223 }</span> 10224 } 10225 10226 <span class="cov0" title="0">f.nodes[nodeName] = fe</span> 10227 } 10228 10229 // Error returns the final error message 10230 func (f *FitErrors) Error() string <span class="cov8" title="1">{ 10231 reasons := make(map[string]int) 10232 10233 for _, node := range f.nodes </span><span class="cov8" title="1">{ 10234 for _, reason := range node.Reasons </span><span class="cov8" title="1">{ 10235 reasons[reason]++ 10236 }</span> 10237 } 10238 10239 <span class="cov8" title="1">sortReasonsHistogram := func() []string </span><span class="cov8" title="1">{ 10240 reasonStrings := []string{} 10241 for k, v := range reasons </span><span class="cov8" title="1">{ 10242 reasonStrings = append(reasonStrings, fmt.Sprintf("%v %v", v, k)) 10243 }</span> 10244 <span class="cov8" title="1">sort.Strings(reasonStrings) 10245 return reasonStrings</span> 10246 } 10247 <span class="cov8" title="1">if f.err == "" </span><span class="cov8" title="1">{ 10248 f.err = AllNodeUnavailableMsg 10249 }</span> 10250 <span class="cov8" title="1">reasonMsg := fmt.Sprintf(f.err+": %v.", strings.Join(sortReasonsHistogram(), ", ")) 10251 return reasonMsg</span> 10252 } 10253 10254 // FitError describe the reason why task could not fit that node 10255 type FitError struct { 10256 taskNamespace string 10257 taskName string 10258 NodeName string 10259 Reasons []string 10260 } 10261 10262 // NewFitError return FitError by message 10263 func NewFitError(task *TaskInfo, node *NodeInfo, message ...string) *FitError <span class="cov0" title="0">{ 10264 fe := &FitError{ 10265 taskName: task.Name, 10266 taskNamespace: task.Namespace, 10267 NodeName: node.Name, 10268 Reasons: message, 10269 } 10270 return fe 10271 }</span> 10272 10273 // Error returns the final error message 10274 func (f *FitError) Error() string <span class="cov0" title="0">{ 10275 return fmt.Sprintf("task %s/%s on node %s fit failed: %s", f.taskNamespace, f.taskName, f.NodeName, strings.Join(f.Reasons, ", ")) 10276 }</span> 10277 </pre> 10278 10279 <pre class="file" id="file53" style="display: none">/* 10280 Copyright 2021 The Volcano Authors. 10281 10282 Licensed under the Apache License, Version 2.0 (the "License"); 10283 you may not use this file except in compliance with the License. 10284 You may obtain a copy of the License at 10285 10286 http://www.apache.org/licenses/LICENSE-2.0 10287 10288 Unless required by applicable law or agreed to in writing, software 10289 distributed under the License is distributed on an "AS IS" BASIS, 10290 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10291 See the License for the specific language governing permissions and 10292 limitations under the License. 10293 */ 10294 10295 package cache 10296 10297 import ( 10298 "context" 10299 "fmt" 10300 "sync" 10301 "time" 10302 10303 v1 "k8s.io/api/core/v1" 10304 "k8s.io/api/scheduling/v1beta1" 10305 apierrors "k8s.io/apimachinery/pkg/api/errors" 10306 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 10307 "k8s.io/apimachinery/pkg/runtime" 10308 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 10309 "k8s.io/apimachinery/pkg/util/wait" 10310 "k8s.io/client-go/informers" 10311 infov1 "k8s.io/client-go/informers/core/v1" 10312 schedv1 "k8s.io/client-go/informers/scheduling/v1beta1" 10313 storagev1 "k8s.io/client-go/informers/storage/v1" 10314 storagev1alpha1 "k8s.io/client-go/informers/storage/v1alpha1" 10315 "k8s.io/client-go/kubernetes" 10316 corev1 "k8s.io/client-go/kubernetes/typed/core/v1" 10317 "k8s.io/client-go/rest" 10318 "k8s.io/client-go/tools/cache" 10319 "k8s.io/client-go/tools/record" 10320 "k8s.io/client-go/util/workqueue" 10321 "k8s.io/klog" 10322 podutil "k8s.io/kubernetes/pkg/api/v1/pod" 10323 volumescheduling "k8s.io/kubernetes/pkg/controller/volume/scheduling" 10324 10325 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 10326 "volcano.sh/apis/pkg/apis/scheduling" 10327 schedulingscheme "volcano.sh/apis/pkg/apis/scheduling/scheme" 10328 vcv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 10329 vcclient "volcano.sh/apis/pkg/client/clientset/versioned" 10330 "volcano.sh/apis/pkg/client/clientset/versioned/scheme" 10331 vcinformer "volcano.sh/apis/pkg/client/informers/externalversions" 10332 cpuinformerv1 "volcano.sh/apis/pkg/client/informers/externalversions/nodeinfo/v1alpha1" 10333 vcinformerv1 "volcano.sh/apis/pkg/client/informers/externalversions/scheduling/v1beta1" 10334 10335 "volcano.sh/volcano/cmd/scheduler/app/options" 10336 schedulingapi "volcano.sh/volcano/pkg/scheduler/api" 10337 ) 10338 10339 func init() <span class="cov8" title="1">{ 10340 schemeBuilder := runtime.SchemeBuilder{ 10341 v1.AddToScheme, 10342 } 10343 10344 utilruntime.Must(schemeBuilder.AddToScheme(scheme.Scheme)) 10345 }</span> 10346 10347 // New returns a Cache implementation. 10348 func New(config *rest.Config, schedulerName string, defaultQueue string) Cache <span class="cov0" title="0">{ 10349 return newSchedulerCache(config, schedulerName, defaultQueue) 10350 }</span> 10351 10352 // SchedulerCache cache for the kube batch 10353 type SchedulerCache struct { 10354 sync.Mutex 10355 10356 kubeClient *kubernetes.Clientset 10357 vcClient *vcclient.Clientset 10358 defaultQueue string 10359 // schedulerName is the name for volcano scheduler 10360 schedulerName string 10361 10362 podInformer infov1.PodInformer 10363 nodeInformer infov1.NodeInformer 10364 podGroupInformerV1beta1 vcinformerv1.PodGroupInformer 10365 queueInformerV1beta1 vcinformerv1.QueueInformer 10366 pvInformer infov1.PersistentVolumeInformer 10367 pvcInformer infov1.PersistentVolumeClaimInformer 10368 scInformer storagev1.StorageClassInformer 10369 pcInformer schedv1.PriorityClassInformer 10370 quotaInformer infov1.ResourceQuotaInformer 10371 csiNodeInformer storagev1.CSINodeInformer 10372 csiDriverInformer storagev1.CSIDriverInformer 10373 csiStorageCapacityInformer storagev1alpha1.CSIStorageCapacityInformer 10374 cpuInformer cpuinformerv1.NumatopologyInformer 10375 10376 Binder Binder 10377 Evictor Evictor 10378 StatusUpdater StatusUpdater 10379 PodGroupBinder BatchBinder 10380 VolumeBinder VolumeBinder 10381 10382 Recorder record.EventRecorder 10383 10384 Jobs map[schedulingapi.JobID]*schedulingapi.JobInfo 10385 Nodes map[string]*schedulingapi.NodeInfo 10386 Queues map[schedulingapi.QueueID]*schedulingapi.QueueInfo 10387 PriorityClasses map[string]*v1beta1.PriorityClass 10388 NodeList []string 10389 defaultPriorityClass *v1beta1.PriorityClass 10390 defaultPriority int32 10391 10392 NamespaceCollection map[string]*schedulingapi.NamespaceCollection 10393 10394 errTasks workqueue.RateLimitingInterface 10395 deletedJobs workqueue.RateLimitingInterface 10396 10397 informerFactory informers.SharedInformerFactory 10398 } 10399 10400 type defaultBinder struct { 10401 kubeclient *kubernetes.Clientset 10402 } 10403 10404 //Bind will send bind request to api server 10405 func (db *defaultBinder) Bind(p *v1.Pod, hostname string) error <span class="cov0" title="0">{ 10406 if err := db.kubeclient.CoreV1().Pods(p.Namespace).Bind(context.TODO(), 10407 &v1.Binding{ 10408 ObjectMeta: metav1.ObjectMeta{Namespace: p.Namespace, Name: p.Name, UID: p.UID, Annotations: p.Annotations}, 10409 Target: v1.ObjectReference{ 10410 Kind: "Node", 10411 Name: hostname, 10412 }, 10413 }, 10414 metav1.CreateOptions{}); err != nil </span><span class="cov0" title="0">{ 10415 klog.Errorf("Failed to bind pod <%v/%v>: %#v", p.Namespace, p.Name, err) 10416 return err 10417 }</span> 10418 <span class="cov0" title="0">return nil</span> 10419 } 10420 10421 type defaultEvictor struct { 10422 kubeclient *kubernetes.Clientset 10423 recorder record.EventRecorder 10424 } 10425 10426 // Evict will send delete pod request to api server 10427 func (de *defaultEvictor) Evict(p *v1.Pod, reason string) error <span class="cov0" title="0">{ 10428 klog.V(3).Infof("Evicting pod %v/%v, because of %v", p.Namespace, p.Name, reason) 10429 10430 evictMsg := fmt.Sprintf("Pod is evicted, because of %v", reason) 10431 annotations := map[string]string{} 10432 // record that we are evicting the pod 10433 de.recorder.AnnotatedEventf(p, annotations, v1.EventTypeWarning, "Evict", evictMsg) 10434 10435 pod := p.DeepCopy() 10436 condition := &v1.PodCondition{ 10437 Type: v1.PodReady, 10438 Status: v1.ConditionFalse, 10439 Reason: "Evict", 10440 Message: evictMsg, 10441 } 10442 if !podutil.UpdatePodCondition(&pod.Status, condition) </span><span class="cov0" title="0">{ 10443 klog.V(1).Infof("UpdatePodCondition: existed condition, not update") 10444 klog.V(1).Infof("%+v", pod.Status.Conditions) 10445 return nil 10446 }</span> 10447 <span class="cov0" title="0">if _, err := de.kubeclient.CoreV1().Pods(p.Namespace).UpdateStatus(context.TODO(), pod, metav1.UpdateOptions{}); err != nil </span><span class="cov0" title="0">{ 10448 klog.Errorf("Failed to update pod <%v/%v> status: %v", pod.Namespace, pod.Name, err) 10449 return err 10450 }</span> 10451 <span class="cov0" title="0">if err := de.kubeclient.CoreV1().Pods(p.Namespace).Delete(context.TODO(), p.Name, metav1.DeleteOptions{}); err != nil </span><span class="cov0" title="0">{ 10452 klog.Errorf("Failed to evict pod <%v/%v>: %#v", p.Namespace, p.Name, err) 10453 return err 10454 }</span> 10455 10456 <span class="cov0" title="0">return nil</span> 10457 } 10458 10459 // defaultStatusUpdater is the default implementation of the StatusUpdater interface 10460 type defaultStatusUpdater struct { 10461 kubeclient *kubernetes.Clientset 10462 vcclient *vcclient.Clientset 10463 } 10464 10465 // following the same logic as podutil.UpdatePodCondition 10466 func podConditionHaveUpdate(status *v1.PodStatus, condition *v1.PodCondition) bool <span class="cov0" title="0">{ 10467 lastTransitionTime := metav1.Now() 10468 // Try to find this pod condition. 10469 _, oldCondition := podutil.GetPodCondition(status, condition.Type) 10470 10471 if oldCondition == nil </span><span class="cov0" title="0">{ 10472 // We are adding new pod condition. 10473 return true 10474 }</span> 10475 // We are updating an existing condition, so we need to check if it has changed. 10476 <span class="cov0" title="0">if condition.Status == oldCondition.Status </span><span class="cov0" title="0">{ 10477 lastTransitionTime = oldCondition.LastTransitionTime 10478 }</span> 10479 10480 <span class="cov0" title="0">isEqual := condition.Status == oldCondition.Status && 10481 condition.Reason == oldCondition.Reason && 10482 condition.Message == oldCondition.Message && 10483 condition.LastProbeTime.Equal(&oldCondition.LastProbeTime) && 10484 lastTransitionTime.Equal(&oldCondition.LastTransitionTime) 10485 10486 // Return true if one of the fields have changed. 10487 return !isEqual</span> 10488 } 10489 10490 // UpdatePodCondition will Update pod with podCondition 10491 func (su *defaultStatusUpdater) UpdatePodCondition(pod *v1.Pod, condition *v1.PodCondition) (*v1.Pod, error) <span class="cov0" title="0">{ 10492 klog.V(3).Infof("Updating pod condition for %s/%s to (%s==%s)", pod.Namespace, pod.Name, condition.Type, condition.Status) 10493 if podutil.UpdatePodCondition(&pod.Status, condition) </span><span class="cov0" title="0">{ 10494 return su.kubeclient.CoreV1().Pods(pod.Namespace).UpdateStatus(context.TODO(), pod, metav1.UpdateOptions{}) 10495 }</span> 10496 <span class="cov0" title="0">return pod, nil</span> 10497 } 10498 10499 // UpdatePodGroup will Update pod with podCondition 10500 func (su *defaultStatusUpdater) UpdatePodGroup(pg *schedulingapi.PodGroup) (*schedulingapi.PodGroup, error) <span class="cov0" title="0">{ 10501 podgroup := &vcv1beta1.PodGroup{} 10502 if err := schedulingscheme.Scheme.Convert(&pg.PodGroup, podgroup, nil); err != nil </span><span class="cov0" title="0">{ 10503 klog.Errorf("Error while converting PodGroup to v1alpha1.PodGroup with error: %v", err) 10504 return nil, err 10505 }</span> 10506 10507 <span class="cov0" title="0">updated, err := su.vcclient.SchedulingV1beta1().PodGroups(podgroup.Namespace).Update(context.TODO(), podgroup, metav1.UpdateOptions{}) 10508 if err != nil </span><span class="cov0" title="0">{ 10509 klog.Errorf("Error while updating PodGroup with error: %v", err) 10510 return nil, err 10511 }</span> 10512 10513 <span class="cov0" title="0">podGroupInfo := &schedulingapi.PodGroup{Version: schedulingapi.PodGroupVersionV1Beta1} 10514 if err := schedulingscheme.Scheme.Convert(updated, &podGroupInfo.PodGroup, nil); err != nil </span><span class="cov0" title="0">{ 10515 klog.Errorf("Error while converting v1alpha.PodGroup to api.PodGroup with error: %v", err) 10516 return nil, err 10517 }</span> 10518 10519 <span class="cov0" title="0">return podGroupInfo, nil</span> 10520 } 10521 10522 type defaultVolumeBinder struct { 10523 volumeBinder volumescheduling.SchedulerVolumeBinder 10524 } 10525 10526 // AllocateVolumes allocates volume on the host to the task 10527 func (dvb *defaultVolumeBinder) AllocateVolumes(task *schedulingapi.TaskInfo, hostname string, podVolumes *volumescheduling.PodVolumes) error <span class="cov0" title="0">{ 10528 allBound, err := dvb.volumeBinder.AssumePodVolumes(task.Pod, hostname, podVolumes) 10529 task.VolumeReady = allBound 10530 10531 return err 10532 }</span> 10533 10534 // GetPodVolumes get pod volume on the host 10535 func (dvb *defaultVolumeBinder) GetPodVolumes(task *schedulingapi.TaskInfo, 10536 node *v1.Node) (podVolumes *volumescheduling.PodVolumes, err error) <span class="cov0" title="0">{ 10537 boundClaims, claimsToBind, _, err := dvb.volumeBinder.GetPodVolumes(task.Pod) 10538 if err != nil </span><span class="cov0" title="0">{ 10539 return nil, err 10540 }</span> 10541 10542 <span class="cov0" title="0">podVolumes, _, err = dvb.volumeBinder.FindPodVolumes(task.Pod, boundClaims, claimsToBind, node) 10543 return podVolumes, err</span> 10544 } 10545 10546 // BindVolumes binds volumes to the task 10547 func (dvb *defaultVolumeBinder) BindVolumes(task *schedulingapi.TaskInfo, podVolumes *volumescheduling.PodVolumes) error <span class="cov0" title="0">{ 10548 // If task's volumes are ready, did not bind them again. 10549 if task.VolumeReady </span><span class="cov0" title="0">{ 10550 return nil 10551 }</span> 10552 10553 <span class="cov0" title="0">return dvb.volumeBinder.BindPodVolumes(task.Pod, podVolumes)</span> 10554 } 10555 10556 type podgroupBinder struct { 10557 kubeclient *kubernetes.Clientset 10558 vcclient *vcclient.Clientset 10559 } 10560 10561 // Bind will add silo cluster annotaion on pod and podgroup 10562 func (pgb *podgroupBinder) Bind(job *schedulingapi.JobInfo, cluster string) (*schedulingapi.JobInfo, error) <span class="cov0" title="0">{ 10563 if len(job.Tasks) == 0 </span><span class="cov0" title="0">{ 10564 klog.V(4).Infof("Job pods have not been created yet") 10565 return job, nil 10566 }</span> 10567 <span class="cov0" title="0">for _, task := range job.Tasks </span><span class="cov0" title="0">{ 10568 pod := task.Pod 10569 pod.Annotations[batch.ForwardClusterKey] = cluster 10570 pod.ResourceVersion = "" 10571 _, err := pgb.kubeclient.CoreV1().Pods(pod.Namespace).UpdateStatus(context.TODO(), pod, metav1.UpdateOptions{}) 10572 if err != nil </span><span class="cov0" title="0">{ 10573 klog.Errorf("Error while update pod annotation with error: %v", err) 10574 return nil, err 10575 }</span> 10576 } 10577 10578 <span class="cov0" title="0">pg := job.PodGroup 10579 pg.Annotations[batch.ForwardClusterKey] = cluster 10580 podgroup := &vcv1beta1.PodGroup{} 10581 if err := schedulingscheme.Scheme.Convert(&pg.PodGroup, podgroup, nil); err != nil </span><span class="cov0" title="0">{ 10582 klog.Errorf("Error while converting PodGroup to v1alpha1.PodGroup with error: %v", err) 10583 return nil, err 10584 }</span> 10585 <span class="cov0" title="0">newPg, err := pgb.vcclient.SchedulingV1beta1().PodGroups(pg.Namespace).Update(context.TODO(), podgroup, metav1.UpdateOptions{}) 10586 if err != nil </span><span class="cov0" title="0">{ 10587 klog.Errorf("Error while update PodGroup annotation with error: %v", err) 10588 return nil, err 10589 }</span> 10590 <span class="cov0" title="0">job.PodGroup.ResourceVersion = newPg.ResourceVersion 10591 klog.V(4).Infof("Bind PodGroup <%s> successfully", job.PodGroup.Name) 10592 return job, nil</span> 10593 } 10594 10595 func newSchedulerCache(config *rest.Config, schedulerName string, defaultQueue string) *SchedulerCache <span class="cov0" title="0">{ 10596 kubeClient, err := kubernetes.NewForConfig(config) 10597 if err != nil </span><span class="cov0" title="0">{ 10598 panic(fmt.Sprintf("failed init kubeClient, with err: %v", err))</span> 10599 } 10600 <span class="cov0" title="0">vcClient, err := vcclient.NewForConfig(config) 10601 if err != nil </span><span class="cov0" title="0">{ 10602 panic(fmt.Sprintf("failed init vcClient, with err: %v", err))</span> 10603 } 10604 <span class="cov0" title="0">eventClient, err := kubernetes.NewForConfig(config) 10605 if err != nil </span><span class="cov0" title="0">{ 10606 panic(fmt.Sprintf("failed init eventClient, with err: %v", err))</span> 10607 } 10608 10609 // create default queue 10610 <span class="cov0" title="0">reclaimable := true 10611 defaultQue := vcv1beta1.Queue{ 10612 ObjectMeta: metav1.ObjectMeta{ 10613 Name: defaultQueue, 10614 }, 10615 Spec: vcv1beta1.QueueSpec{ 10616 Reclaimable: &reclaimable, 10617 Weight: 1, 10618 }, 10619 } 10620 if _, err := vcClient.SchedulingV1beta1().Queues().Create(context.TODO(), &defaultQue, metav1.CreateOptions{}); err != nil && !apierrors.IsAlreadyExists(err) </span><span class="cov0" title="0">{ 10621 panic(fmt.Sprintf("failed init default queue, with err: %v", err))</span> 10622 } 10623 10624 <span class="cov0" title="0">sc := &SchedulerCache{ 10625 Jobs: make(map[schedulingapi.JobID]*schedulingapi.JobInfo), 10626 Nodes: make(map[string]*schedulingapi.NodeInfo), 10627 Queues: make(map[schedulingapi.QueueID]*schedulingapi.QueueInfo), 10628 PriorityClasses: make(map[string]*v1beta1.PriorityClass), 10629 errTasks: workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()), 10630 deletedJobs: workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()), 10631 kubeClient: kubeClient, 10632 vcClient: vcClient, 10633 defaultQueue: defaultQueue, 10634 schedulerName: schedulerName, 10635 10636 NamespaceCollection: make(map[string]*schedulingapi.NamespaceCollection), 10637 10638 NodeList: []string{}, 10639 } 10640 10641 // Prepare event clients. 10642 broadcaster := record.NewBroadcaster() 10643 broadcaster.StartRecordingToSink(&corev1.EventSinkImpl{Interface: eventClient.CoreV1().Events("")}) 10644 sc.Recorder = broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: schedulerName}) 10645 10646 sc.Binder = &defaultBinder{ 10647 kubeclient: sc.kubeClient, 10648 } 10649 10650 sc.Evictor = &defaultEvictor{ 10651 kubeclient: sc.kubeClient, 10652 recorder: sc.Recorder, 10653 } 10654 10655 sc.StatusUpdater = &defaultStatusUpdater{ 10656 kubeclient: sc.kubeClient, 10657 vcclient: sc.vcClient, 10658 } 10659 10660 sc.PodGroupBinder = &podgroupBinder{ 10661 kubeclient: sc.kubeClient, 10662 vcclient: sc.vcClient, 10663 } 10664 10665 informerFactory := informers.NewSharedInformerFactory(sc.kubeClient, 0) 10666 sc.informerFactory = informerFactory 10667 10668 // create informer for node information 10669 sc.nodeInformer = informerFactory.Core().V1().Nodes() 10670 sc.nodeInformer.Informer().AddEventHandlerWithResyncPeriod( 10671 cache.ResourceEventHandlerFuncs{ 10672 AddFunc: sc.AddNode, 10673 UpdateFunc: sc.UpdateNode, 10674 DeleteFunc: sc.DeleteNode, 10675 }, 10676 0, 10677 ) 10678 10679 sc.podInformer = informerFactory.Core().V1().Pods() 10680 sc.pvcInformer = informerFactory.Core().V1().PersistentVolumeClaims() 10681 sc.pvInformer = informerFactory.Core().V1().PersistentVolumes() 10682 sc.scInformer = informerFactory.Storage().V1().StorageClasses() 10683 sc.csiNodeInformer = informerFactory.Storage().V1().CSINodes() 10684 sc.csiDriverInformer = informerFactory.Storage().V1().CSIDrivers() 10685 sc.csiStorageCapacityInformer = informerFactory.Storage().V1alpha1().CSIStorageCapacities() 10686 sc.VolumeBinder = &defaultVolumeBinder{ 10687 volumeBinder: volumescheduling.NewVolumeBinder( 10688 sc.kubeClient, 10689 sc.podInformer, 10690 sc.nodeInformer, 10691 sc.csiNodeInformer, 10692 sc.pvcInformer, 10693 sc.pvInformer, 10694 sc.scInformer, 10695 &volumescheduling.CapacityCheck{ 10696 CSIDriverInformer: sc.csiDriverInformer, 10697 CSIStorageCapacityInformer: sc.csiStorageCapacityInformer, 10698 }, 10699 30*time.Second, 10700 ), 10701 } 10702 10703 // create informer for pod information 10704 sc.podInformer.Informer().AddEventHandler( 10705 cache.FilteringResourceEventHandler{ 10706 FilterFunc: func(obj interface{}) bool </span><span class="cov0" title="0">{ 10707 switch v := obj.(type) </span>{ 10708 case *v1.Pod:<span class="cov0" title="0"> 10709 if !responsibleForPod(v, schedulerName) </span><span class="cov0" title="0">{ 10710 if len(v.Spec.NodeName) == 0 </span><span class="cov0" title="0">{ 10711 return false 10712 }</span> 10713 } 10714 <span class="cov0" title="0">return true</span> 10715 default:<span class="cov0" title="0"> 10716 return false</span> 10717 } 10718 }, 10719 Handler: cache.ResourceEventHandlerFuncs{ 10720 AddFunc: sc.AddPod, 10721 UpdateFunc: sc.UpdatePod, 10722 DeleteFunc: sc.DeletePod, 10723 }, 10724 }) 10725 10726 <span class="cov0" title="0">sc.pcInformer = informerFactory.Scheduling().V1beta1().PriorityClasses() 10727 sc.pcInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 10728 AddFunc: sc.AddPriorityClass, 10729 UpdateFunc: sc.UpdatePriorityClass, 10730 DeleteFunc: sc.DeletePriorityClass, 10731 }) 10732 10733 sc.quotaInformer = informerFactory.Core().V1().ResourceQuotas() 10734 sc.quotaInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 10735 AddFunc: sc.AddResourceQuota, 10736 UpdateFunc: sc.UpdateResourceQuota, 10737 DeleteFunc: sc.DeleteResourceQuota, 10738 }) 10739 10740 vcinformers := vcinformer.NewSharedInformerFactory(sc.vcClient, 0) 10741 10742 // create informer for PodGroup(v1beta1) information 10743 sc.podGroupInformerV1beta1 = vcinformers.Scheduling().V1beta1().PodGroups() 10744 sc.podGroupInformerV1beta1.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 10745 AddFunc: sc.AddPodGroupV1beta1, 10746 UpdateFunc: sc.UpdatePodGroupV1beta1, 10747 DeleteFunc: sc.DeletePodGroupV1beta1, 10748 }) 10749 10750 // create informer(v1beta1) for Queue information 10751 sc.queueInformerV1beta1 = vcinformers.Scheduling().V1beta1().Queues() 10752 sc.queueInformerV1beta1.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 10753 AddFunc: sc.AddQueueV1beta1, 10754 UpdateFunc: sc.UpdateQueueV1beta1, 10755 DeleteFunc: sc.DeleteQueueV1beta1, 10756 }) 10757 10758 sc.cpuInformer = vcinformers.Nodeinfo().V1alpha1().Numatopologies() 10759 sc.cpuInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 10760 AddFunc: sc.AddNumaInfoV1alpha1, 10761 UpdateFunc: sc.UpdateNumaInfoV1alpha1, 10762 DeleteFunc: sc.DeleteNumaInfoV1alpha1, 10763 }) 10764 return sc</span> 10765 } 10766 10767 // Run starts the schedulerCache 10768 func (sc *SchedulerCache) Run(stopCh <-chan struct{}) <span class="cov0" title="0">{ 10769 go sc.podInformer.Informer().Run(stopCh) 10770 go sc.nodeInformer.Informer().Run(stopCh) 10771 go sc.podGroupInformerV1beta1.Informer().Run(stopCh) 10772 go sc.pvInformer.Informer().Run(stopCh) 10773 go sc.pvcInformer.Informer().Run(stopCh) 10774 go sc.scInformer.Informer().Run(stopCh) 10775 go sc.queueInformerV1beta1.Informer().Run(stopCh) 10776 go sc.quotaInformer.Informer().Run(stopCh) 10777 go sc.cpuInformer.Informer().Run(stopCh) 10778 10779 if options.ServerOpts.EnablePriorityClass </span><span class="cov0" title="0">{ 10780 go sc.pcInformer.Informer().Run(stopCh) 10781 }</span> 10782 10783 // Re-sync error tasks. 10784 <span class="cov0" title="0">go wait.Until(sc.processResyncTask, 0, stopCh) 10785 10786 // Cleanup jobs. 10787 go wait.Until(sc.processCleanupJob, 0, stopCh)</span> 10788 } 10789 10790 // WaitForCacheSync sync the cache with the api server 10791 func (sc *SchedulerCache) WaitForCacheSync(stopCh <-chan struct{}) bool <span class="cov0" title="0">{ 10792 return cache.WaitForCacheSync(stopCh, 10793 func() []cache.InformerSynced </span><span class="cov0" title="0">{ 10794 informerSynced := []cache.InformerSynced{ 10795 sc.podInformer.Informer().HasSynced, 10796 sc.podGroupInformerV1beta1.Informer().HasSynced, 10797 sc.nodeInformer.Informer().HasSynced, 10798 sc.pvInformer.Informer().HasSynced, 10799 sc.pvcInformer.Informer().HasSynced, 10800 sc.scInformer.Informer().HasSynced, 10801 sc.queueInformerV1beta1.Informer().HasSynced, 10802 sc.quotaInformer.Informer().HasSynced, 10803 sc.cpuInformer.Informer().HasSynced, 10804 } 10805 if options.ServerOpts.EnablePriorityClass </span><span class="cov0" title="0">{ 10806 informerSynced = append(informerSynced, sc.pcInformer.Informer().HasSynced) 10807 }</span> 10808 <span class="cov0" title="0">return informerSynced</span> 10809 }()..., 10810 ) 10811 } 10812 10813 // findJobAndTask returns job and the task info 10814 func (sc *SchedulerCache) findJobAndTask(taskInfo *schedulingapi.TaskInfo) (*schedulingapi.JobInfo, *schedulingapi.TaskInfo, error) <span class="cov8" title="1">{ 10815 job, found := sc.Jobs[taskInfo.Job] 10816 if !found </span><span class="cov0" title="0">{ 10817 return nil, nil, fmt.Errorf("failed to find Job %v for Task %v", 10818 taskInfo.Job, taskInfo.UID) 10819 }</span> 10820 10821 <span class="cov8" title="1">task, found := job.Tasks[taskInfo.UID] 10822 if !found </span><span class="cov0" title="0">{ 10823 return nil, nil, fmt.Errorf("failed to find task in status %v by id %v", 10824 taskInfo.Status, taskInfo.UID) 10825 }</span> 10826 10827 <span class="cov8" title="1">return job, task, nil</span> 10828 } 10829 10830 // Evict will evict the pod. 10831 // 10832 // If error occurs both task and job are guaranteed to be in the original state. 10833 func (sc *SchedulerCache) Evict(taskInfo *schedulingapi.TaskInfo, reason string) error <span class="cov0" title="0">{ 10834 sc.Mutex.Lock() 10835 defer sc.Mutex.Unlock() 10836 10837 job, task, err := sc.findJobAndTask(taskInfo) 10838 10839 if err != nil </span><span class="cov0" title="0">{ 10840 return err 10841 }</span> 10842 10843 <span class="cov0" title="0">node, found := sc.Nodes[task.NodeName] 10844 if !found </span><span class="cov0" title="0">{ 10845 return fmt.Errorf("failed to bind Task %v to host %v, host does not exist", 10846 task.UID, task.NodeName) 10847 }</span> 10848 10849 <span class="cov0" title="0">originalStatus := task.Status 10850 if err := job.UpdateTaskStatus(task, schedulingapi.Releasing); err != nil </span><span class="cov0" title="0">{ 10851 return err 10852 }</span> 10853 10854 // Add new task to node. 10855 <span class="cov0" title="0">if err := node.UpdateTask(task); err != nil </span><span class="cov0" title="0">{ 10856 // After failing to update task to a node we need to revert task status from Releasing, 10857 // otherwise task might be stuck in the Releasing state indefinitely. 10858 if err := job.UpdateTaskStatus(task, originalStatus); err != nil </span><span class="cov0" title="0">{ 10859 klog.Errorf("Task <%s/%s> will be resynchronized after failing to revert status "+ 10860 "from %s to %s after failing to update Task on Node <%s>: %v", 10861 task.Namespace, task.Name, task.Status, originalStatus, node.Name, err) 10862 sc.resyncTask(task) 10863 }</span> 10864 <span class="cov0" title="0">return err</span> 10865 } 10866 10867 <span class="cov0" title="0">p := task.Pod 10868 10869 go func() </span><span class="cov0" title="0">{ 10870 err := sc.Evictor.Evict(p, reason) 10871 if err != nil </span><span class="cov0" title="0">{ 10872 sc.resyncTask(task) 10873 }</span> 10874 }() 10875 10876 <span class="cov0" title="0">podgroup := &vcv1beta1.PodGroup{} 10877 if err := schedulingscheme.Scheme.Convert(&job.PodGroup.PodGroup, podgroup, nil); err != nil </span><span class="cov0" title="0">{ 10878 klog.Errorf("Error while converting PodGroup to v1alpha1.PodGroup with error: %v", err) 10879 return err 10880 }</span> 10881 <span class="cov0" title="0">sc.Recorder.Eventf(podgroup, v1.EventTypeNormal, "Evict", reason) 10882 return nil</span> 10883 } 10884 10885 // Bind binds task to the target host. 10886 func (sc *SchedulerCache) Bind(taskInfo *schedulingapi.TaskInfo, hostname string) error <span class="cov8" title="1">{ 10887 sc.Mutex.Lock() 10888 defer sc.Mutex.Unlock() 10889 10890 job, task, err := sc.findJobAndTask(taskInfo) 10891 10892 if err != nil </span><span class="cov0" title="0">{ 10893 return err 10894 }</span> 10895 10896 <span class="cov8" title="1">node, found := sc.Nodes[hostname] 10897 if !found </span><span class="cov0" title="0">{ 10898 return fmt.Errorf("failed to bind Task %v to host %v, host does not exist", 10899 task.UID, hostname) 10900 }</span> 10901 10902 <span class="cov8" title="1">originalStatus := task.Status 10903 if err := job.UpdateTaskStatus(task, schedulingapi.Binding); err != nil </span><span class="cov0" title="0">{ 10904 return err 10905 }</span> 10906 10907 // Add task to the node. 10908 <span class="cov8" title="1">if err := node.AddTask(task); err != nil </span><span class="cov8" title="1">{ 10909 // After failing to update task to a node we need to revert task status from Releasing, 10910 // otherwise task might be stuck in the Releasing state indefinitely. 10911 if err := job.UpdateTaskStatus(task, originalStatus); err != nil </span><span class="cov0" title="0">{ 10912 klog.Errorf("Task <%s/%s> will be resynchronized after failing to revert status "+ 10913 "from %s to %s after failing to update Task on Node <%s>: %v", 10914 task.Namespace, task.Name, task.Status, originalStatus, node.Name, err) 10915 sc.resyncTask(task) 10916 }</span> 10917 <span class="cov8" title="1">return err</span> 10918 } 10919 10920 <span class="cov8" title="1">p := task.Pod 10921 if !(task.TopologyPolicy == "" || task.TopologyPolicy == "none") </span><span class="cov0" title="0">{ 10922 if err := sc.Binder.Bind(p, hostname); err != nil </span><span class="cov0" title="0">{ 10923 sc.resyncTask(task) 10924 }</span> else<span class="cov0" title="0"> { 10925 sc.Recorder.Eventf(p, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v/%v to %v", p.Namespace, p.Name, hostname) 10926 }</span> 10927 } else<span class="cov8" title="1"> { 10928 go func() </span><span class="cov8" title="1">{ 10929 if err := sc.Binder.Bind(p, hostname); err != nil </span><span class="cov0" title="0">{ 10930 sc.resyncTask(task) 10931 }</span> else<span class="cov0" title="0"> { 10932 sc.Recorder.Eventf(p, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v/%v to %v", p.Namespace, p.Name, hostname) 10933 }</span> 10934 }() 10935 } 10936 10937 <span class="cov8" title="1">return nil</span> 10938 } 10939 10940 // BindPodGroup binds job to silo cluster 10941 func (sc *SchedulerCache) BindPodGroup(job *schedulingapi.JobInfo, cluster string) error <span class="cov0" title="0">{ 10942 if _, err := sc.PodGroupBinder.Bind(job, cluster); err != nil </span><span class="cov0" title="0">{ 10943 klog.Errorf("Bind job <%s> to cluster <%s> failed: %v", job.Name, cluster, err) 10944 return err 10945 }</span> 10946 <span class="cov0" title="0">return nil</span> 10947 } 10948 10949 // GetPodVolumes get pod volume on the host 10950 func (sc *SchedulerCache) GetPodVolumes(task *schedulingapi.TaskInfo, node *v1.Node) (*volumescheduling.PodVolumes, error) <span class="cov0" title="0">{ 10951 return sc.VolumeBinder.GetPodVolumes(task, node) 10952 }</span> 10953 10954 // AllocateVolumes allocates volume on the host to the task 10955 func (sc *SchedulerCache) AllocateVolumes(task *schedulingapi.TaskInfo, hostname string, podVolumes *volumescheduling.PodVolumes) error <span class="cov0" title="0">{ 10956 return sc.VolumeBinder.AllocateVolumes(task, hostname, podVolumes) 10957 }</span> 10958 10959 // BindVolumes binds volumes to the task 10960 func (sc *SchedulerCache) BindVolumes(task *schedulingapi.TaskInfo, podVolumes *volumescheduling.PodVolumes) error <span class="cov0" title="0">{ 10961 return sc.VolumeBinder.BindVolumes(task, podVolumes) 10962 }</span> 10963 10964 // Client returns the kubernetes clientSet 10965 func (sc *SchedulerCache) Client() kubernetes.Interface <span class="cov0" title="0">{ 10966 return sc.kubeClient 10967 }</span> 10968 10969 // SharedInformerFactory returns the scheduler SharedInformerFactory 10970 func (sc *SchedulerCache) SharedInformerFactory() informers.SharedInformerFactory <span class="cov0" title="0">{ 10971 return sc.informerFactory 10972 }</span> 10973 10974 // UpdateSchedulerNumaInfo used to update scheduler node cache NumaSchedulerInfo 10975 func (sc *SchedulerCache) UpdateSchedulerNumaInfo(AllocatedSets map[string]schedulingapi.ResNumaSets) error <span class="cov0" title="0">{ 10976 sc.Mutex.Lock() 10977 defer sc.Mutex.Unlock() 10978 10979 for nodeName, sets := range AllocatedSets </span><span class="cov0" title="0">{ 10980 if _, found := sc.Nodes[nodeName]; !found </span><span class="cov0" title="0">{ 10981 continue</span> 10982 } 10983 10984 <span class="cov0" title="0">numaInfo := sc.Nodes[nodeName].NumaSchedulerInfo 10985 if numaInfo == nil </span><span class="cov0" title="0">{ 10986 continue</span> 10987 } 10988 10989 <span class="cov0" title="0">numaInfo.Allocate(sets)</span> 10990 } 10991 <span class="cov0" title="0">return nil</span> 10992 } 10993 10994 // taskUnschedulable updates pod status of pending task 10995 func (sc *SchedulerCache) taskUnschedulable(task *schedulingapi.TaskInfo, reason, message string) error <span class="cov0" title="0">{ 10996 pod := task.Pod 10997 10998 condition := &v1.PodCondition{ 10999 Type: v1.PodScheduled, 11000 Status: v1.ConditionFalse, 11001 Reason: reason, // Add more reasons in order to distinguish more specific scenario of pending tasks 11002 Message: message, 11003 } 11004 11005 if podConditionHaveUpdate(&pod.Status, condition) </span><span class="cov0" title="0">{ 11006 pod = pod.DeepCopy() 11007 11008 // The reason field in 'Events' should be "FailedScheduling", there is not constants defined for this in 11009 // k8s core, so using the same string here. 11010 // The reason field in PodCondition can be "Unschedulable" 11011 sc.Recorder.Eventf(pod, v1.EventTypeWarning, "FailedScheduling", message) 11012 if _, err := sc.StatusUpdater.UpdatePodCondition(pod, condition); err != nil </span><span class="cov0" title="0">{ 11013 return err 11014 }</span> 11015 } else<span class="cov0" title="0"> { 11016 klog.V(4).Infof("task unscheduleable %s/%s, message: %s, skip by no condition update", pod.Namespace, pod.Name, message) 11017 }</span> 11018 11019 <span class="cov0" title="0">return nil</span> 11020 } 11021 11022 func (sc *SchedulerCache) deleteJob(job *schedulingapi.JobInfo) <span class="cov8" title="1">{ 11023 klog.V(3).Infof("Try to delete Job <%v:%v/%v>", job.UID, job.Namespace, job.Name) 11024 11025 sc.deletedJobs.AddRateLimited(job) 11026 }</span> 11027 11028 func (sc *SchedulerCache) processCleanupJob() <span class="cov0" title="0">{ 11029 obj, shutdown := sc.deletedJobs.Get() 11030 if shutdown </span><span class="cov0" title="0">{ 11031 return 11032 }</span> 11033 11034 <span class="cov0" title="0">defer sc.deletedJobs.Done(obj) 11035 11036 job, found := obj.(*schedulingapi.JobInfo) 11037 if !found </span><span class="cov0" title="0">{ 11038 klog.Errorf("Failed to convert <%v> to *JobInfo", obj) 11039 return 11040 }</span> 11041 11042 <span class="cov0" title="0">sc.Mutex.Lock() 11043 defer sc.Mutex.Unlock() 11044 11045 if schedulingapi.JobTerminated(job) </span><span class="cov0" title="0">{ 11046 delete(sc.Jobs, job.UID) 11047 klog.V(3).Infof("Job <%v:%v/%v> was deleted.", job.UID, job.Namespace, job.Name) 11048 }</span> else<span class="cov0" title="0"> { 11049 // Retry 11050 sc.deleteJob(job) 11051 }</span> 11052 } 11053 11054 func (sc *SchedulerCache) resyncTask(task *schedulingapi.TaskInfo) <span class="cov0" title="0">{ 11055 sc.errTasks.AddRateLimited(task) 11056 }</span> 11057 11058 func (sc *SchedulerCache) processResyncTask() <span class="cov0" title="0">{ 11059 obj, shutdown := sc.errTasks.Get() 11060 if shutdown </span><span class="cov0" title="0">{ 11061 return 11062 }</span> 11063 11064 <span class="cov0" title="0">defer sc.errTasks.Done(obj) 11065 11066 task, ok := obj.(*schedulingapi.TaskInfo) 11067 if !ok </span><span class="cov0" title="0">{ 11068 klog.Errorf("failed to convert %v to *schedulingapi.TaskInfo", obj) 11069 return 11070 }</span> 11071 11072 <span class="cov0" title="0">if err := sc.syncTask(task); err != nil </span><span class="cov0" title="0">{ 11073 klog.Errorf("Failed to sync pod <%v/%v>, retry it.", task.Namespace, task.Name) 11074 sc.resyncTask(task) 11075 }</span> 11076 } 11077 11078 // Snapshot returns the complete snapshot of the cluster from cache 11079 func (sc *SchedulerCache) Snapshot() *schedulingapi.ClusterInfo <span class="cov0" title="0">{ 11080 sc.Mutex.Lock() 11081 defer sc.Mutex.Unlock() 11082 11083 snapshot := &schedulingapi.ClusterInfo{ 11084 Nodes: make(map[string]*schedulingapi.NodeInfo), 11085 Jobs: make(map[schedulingapi.JobID]*schedulingapi.JobInfo), 11086 Queues: make(map[schedulingapi.QueueID]*schedulingapi.QueueInfo), 11087 NamespaceInfo: make(map[schedulingapi.NamespaceName]*schedulingapi.NamespaceInfo), 11088 RevocableNodes: make(map[string]*schedulingapi.NodeInfo), 11089 NodeList: make([]string, len(sc.NodeList)), 11090 } 11091 11092 copy(snapshot.NodeList, sc.NodeList) 11093 for _, value := range sc.Nodes </span><span class="cov0" title="0">{ 11094 value.RefreshNumaSchedulerInfoByCrd() 11095 }</span> 11096 11097 <span class="cov0" title="0">for _, value := range sc.Nodes </span><span class="cov0" title="0">{ 11098 if !value.Ready() </span><span class="cov0" title="0">{ 11099 continue</span> 11100 } 11101 11102 <span class="cov0" title="0">snapshot.Nodes[value.Name] = value.Clone() 11103 11104 if value.RevocableZone != "" </span><span class="cov0" title="0">{ 11105 snapshot.RevocableNodes[value.Name] = snapshot.Nodes[value.Name] 11106 }</span> 11107 } 11108 11109 <span class="cov0" title="0">for _, value := range sc.Queues </span><span class="cov0" title="0">{ 11110 snapshot.Queues[value.UID] = value.Clone() 11111 }</span> 11112 11113 <span class="cov0" title="0">var cloneJobLock sync.Mutex 11114 var wg sync.WaitGroup 11115 11116 cloneJob := func(value *schedulingapi.JobInfo) </span><span class="cov0" title="0">{ 11117 defer wg.Done() 11118 if value.PodGroup != nil </span><span class="cov0" title="0">{ 11119 value.Priority = sc.defaultPriority 11120 11121 priName := value.PodGroup.Spec.PriorityClassName 11122 if priorityClass, found := sc.PriorityClasses[priName]; found </span><span class="cov0" title="0">{ 11123 value.Priority = priorityClass.Value 11124 }</span> 11125 11126 <span class="cov0" title="0">klog.V(4).Infof("The priority of job <%s/%s> is <%s/%d>", 11127 value.Namespace, value.Name, priName, value.Priority)</span> 11128 } 11129 11130 <span class="cov0" title="0">clonedJob := value.Clone() 11131 11132 cloneJobLock.Lock() 11133 snapshot.Jobs[value.UID] = clonedJob 11134 cloneJobLock.Unlock()</span> 11135 } 11136 11137 <span class="cov0" title="0">for _, value := range sc.NamespaceCollection </span><span class="cov0" title="0">{ 11138 info := value.Snapshot() 11139 snapshot.NamespaceInfo[info.Name] = info 11140 klog.V(4).Infof("Namespace %s has weight %v", 11141 value.Name, info.GetWeight()) 11142 }</span> 11143 11144 <span class="cov0" title="0">for _, value := range sc.Jobs </span><span class="cov0" title="0">{ 11145 // If no scheduling spec, does not handle it. 11146 if value.PodGroup == nil </span><span class="cov0" title="0">{ 11147 klog.V(4).Infof("The scheduling spec of Job <%v:%s/%s> is nil, ignore it.", 11148 value.UID, value.Namespace, value.Name) 11149 11150 continue</span> 11151 } 11152 11153 <span class="cov0" title="0">if _, found := snapshot.Queues[value.Queue]; !found </span><span class="cov0" title="0">{ 11154 klog.V(3).Infof("The Queue <%v> of Job <%v/%v> does not exist, ignore it.", 11155 value.Queue, value.Namespace, value.Name) 11156 continue</span> 11157 } 11158 11159 <span class="cov0" title="0">wg.Add(1) 11160 go cloneJob(value)</span> 11161 } 11162 <span class="cov0" title="0">wg.Wait() 11163 11164 klog.V(3).Infof("There are <%d> Jobs, <%d> Queues and <%d> Nodes in total for scheduling.", 11165 len(snapshot.Jobs), len(snapshot.Queues), len(snapshot.Nodes)) 11166 11167 return snapshot</span> 11168 } 11169 11170 // String returns information about the cache in a string format 11171 func (sc *SchedulerCache) String() string <span class="cov0" title="0">{ 11172 sc.Mutex.Lock() 11173 defer sc.Mutex.Unlock() 11174 11175 str := "Cache:\n" 11176 11177 if len(sc.Nodes) != 0 </span><span class="cov0" title="0">{ 11178 str += "Nodes:\n" 11179 for _, n := range sc.Nodes </span><span class="cov0" title="0">{ 11180 str += fmt.Sprintf("\t %s: idle(%v) used(%v) allocatable(%v) pods(%d)\n", 11181 n.Name, n.Idle, n.Used, n.Allocatable, len(n.Tasks)) 11182 11183 i := 0 11184 for _, p := range n.Tasks </span><span class="cov0" title="0">{ 11185 str += fmt.Sprintf("\t\t %d: %v\n", i, p) 11186 i++ 11187 }</span> 11188 } 11189 } 11190 11191 <span class="cov0" title="0">if len(sc.Jobs) != 0 </span><span class="cov0" title="0">{ 11192 str += "Jobs:\n" 11193 for _, job := range sc.Jobs </span><span class="cov0" title="0">{ 11194 str += fmt.Sprintf("\t %s\n", job) 11195 }</span> 11196 } 11197 11198 <span class="cov0" title="0">if len(sc.NamespaceCollection) != 0 </span><span class="cov0" title="0">{ 11199 str += "Namespaces:\n" 11200 for _, ns := range sc.NamespaceCollection </span><span class="cov0" title="0">{ 11201 info := ns.Snapshot() 11202 str += fmt.Sprintf("\t Namespace(%s) Weight(%v)\n", 11203 info.Name, info.Weight) 11204 }</span> 11205 } 11206 11207 <span class="cov0" title="0">if len(sc.NodeList) != 0 </span><span class="cov0" title="0">{ 11208 str += fmt.Sprintf("NodeList: %v\n", sc.NodeList) 11209 }</span> 11210 11211 <span class="cov0" title="0">return str</span> 11212 } 11213 11214 // RecordJobStatusEvent records related events according to job status. 11215 func (sc *SchedulerCache) RecordJobStatusEvent(job *schedulingapi.JobInfo) <span class="cov0" title="0">{ 11216 pgUnschedulable := job.PodGroup != nil && 11217 (job.PodGroup.Status.Phase == scheduling.PodGroupUnknown || 11218 job.PodGroup.Status.Phase == scheduling.PodGroupPending || 11219 job.PodGroup.Status.Phase == scheduling.PodGroupInqueue) 11220 11221 // If pending or unschedulable, record unschedulable event. 11222 if pgUnschedulable </span><span class="cov0" title="0">{ 11223 msg := fmt.Sprintf("%v/%v tasks in gang unschedulable: %v", 11224 len(job.TaskStatusIndex[schedulingapi.Pending]), 11225 len(job.Tasks), 11226 job.FitError()) 11227 sc.recordPodGroupEvent(job.PodGroup, v1.EventTypeWarning, string(scheduling.PodGroupUnschedulableType), msg) 11228 }</span> else<span class="cov0" title="0"> { 11229 sc.recordPodGroupEvent(job.PodGroup, v1.EventTypeNormal, string(scheduling.PodGroupScheduled), string(scheduling.PodGroupReady)) 11230 }</span> 11231 11232 <span class="cov0" title="0">baseErrorMessage := job.JobFitErrors 11233 if baseErrorMessage == "" </span><span class="cov0" title="0">{ 11234 baseErrorMessage = schedulingapi.AllNodeUnavailableMsg 11235 }</span> 11236 // Update podCondition for tasks Allocated and Pending before job discarded 11237 <span class="cov0" title="0">for _, status := range []schedulingapi.TaskStatus{schedulingapi.Allocated, schedulingapi.Pending, schedulingapi.Pipelined} </span><span class="cov0" title="0">{ 11238 for _, taskInfo := range job.TaskStatusIndex[status] </span><span class="cov0" title="0">{ 11239 reason, msg := job.TaskSchedulingReason(taskInfo.UID) 11240 if len(msg) == 0 </span><span class="cov0" title="0">{ 11241 msg = baseErrorMessage 11242 }</span> 11243 <span class="cov0" title="0">if err := sc.taskUnschedulable(taskInfo, reason, msg); err != nil </span><span class="cov0" title="0">{ 11244 klog.Errorf("Failed to update unschedulable task status <%s/%s>: %v", 11245 taskInfo.Namespace, taskInfo.Name, err) 11246 }</span> 11247 } 11248 } 11249 } 11250 11251 // UpdateJobStatus update the status of job and its tasks. 11252 func (sc *SchedulerCache) UpdateJobStatus(job *schedulingapi.JobInfo, updatePG bool) (*schedulingapi.JobInfo, error) <span class="cov0" title="0">{ 11253 if updatePG </span><span class="cov0" title="0">{ 11254 pg, err := sc.StatusUpdater.UpdatePodGroup(job.PodGroup) 11255 if err != nil </span><span class="cov0" title="0">{ 11256 return nil, err 11257 }</span> 11258 <span class="cov0" title="0">job.PodGroup = pg</span> 11259 } 11260 11261 <span class="cov0" title="0">sc.RecordJobStatusEvent(job) 11262 11263 return job, nil</span> 11264 } 11265 11266 func (sc *SchedulerCache) recordPodGroupEvent(podGroup *schedulingapi.PodGroup, eventType, reason, msg string) <span class="cov0" title="0">{ 11267 if podGroup == nil </span><span class="cov0" title="0">{ 11268 return 11269 }</span> 11270 11271 <span class="cov0" title="0">pg := &vcv1beta1.PodGroup{} 11272 if err := schedulingscheme.Scheme.Convert(&podGroup.PodGroup, pg, nil); err != nil </span><span class="cov0" title="0">{ 11273 klog.Errorf("Error while converting PodGroup to v1alpha1.PodGroup with error: %v", err) 11274 return 11275 }</span> 11276 <span class="cov0" title="0">sc.Recorder.Eventf(pg, eventType, reason, msg)</span> 11277 } 11278 </pre> 11279 11280 <pre class="file" id="file54" style="display: none">/* 11281 Copyright 2017 The Kubernetes Authors. 11282 11283 Licensed under the Apache License, Version 2.0 (the "License"); 11284 you may not use this file except in compliance with the License. 11285 You may obtain a copy of the License at 11286 11287 http://www.apache.org/licenses/LICENSE-2.0 11288 11289 Unless required by applicable law or agreed to in writing, software 11290 distributed under the License is distributed on an "AS IS" BASIS, 11291 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11292 See the License for the specific language governing permissions and 11293 limitations under the License. 11294 */ 11295 11296 package cache 11297 11298 import ( 11299 "context" 11300 "fmt" 11301 "strconv" 11302 11303 v1 "k8s.io/api/core/v1" 11304 "k8s.io/api/scheduling/v1beta1" 11305 "k8s.io/apimachinery/pkg/api/errors" 11306 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 11307 "k8s.io/client-go/tools/cache" 11308 "k8s.io/klog" 11309 "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" 11310 "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" 11311 11312 nodeinfov1alpha1 "volcano.sh/apis/pkg/apis/nodeinfo/v1alpha1" 11313 "volcano.sh/apis/pkg/apis/scheduling" 11314 "volcano.sh/apis/pkg/apis/scheduling/scheme" 11315 schedulingv1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 11316 "volcano.sh/apis/pkg/apis/utils" 11317 schedulingapi "volcano.sh/volcano/pkg/scheduler/api" 11318 ) 11319 11320 func isTerminated(status schedulingapi.TaskStatus) bool <span class="cov8" title="1">{ 11321 return status == schedulingapi.Succeeded || status == schedulingapi.Failed 11322 }</span> 11323 11324 // getOrCreateJob will return corresponding Job for pi if it exists, or it will create a Job and return it if 11325 // pi.Pod.Spec.SchedulerName is same as volcano scheduler's name, otherwise it will return nil. 11326 func (sc *SchedulerCache) getOrCreateJob(pi *schedulingapi.TaskInfo) *schedulingapi.JobInfo <span class="cov8" title="1">{ 11327 if len(pi.Job) == 0 </span><span class="cov8" title="1">{ 11328 if pi.Pod.Spec.SchedulerName != sc.schedulerName </span><span class="cov8" title="1">{ 11329 klog.V(4).Infof("Pod %s/%s will not not scheduled by %s, skip creating PodGroup and Job for it", 11330 pi.Pod.Namespace, pi.Pod.Name, sc.schedulerName) 11331 }</span> 11332 <span class="cov8" title="1">return nil</span> 11333 } 11334 11335 <span class="cov8" title="1">if _, found := sc.Jobs[pi.Job]; !found </span><span class="cov8" title="1">{ 11336 sc.Jobs[pi.Job] = schedulingapi.NewJobInfo(pi.Job) 11337 }</span> 11338 11339 <span class="cov8" title="1">return sc.Jobs[pi.Job]</span> 11340 } 11341 11342 func (sc *SchedulerCache) addTask(pi *schedulingapi.TaskInfo) error <span class="cov8" title="1">{ 11343 if len(pi.NodeName) != 0 </span><span class="cov8" title="1">{ 11344 if _, found := sc.Nodes[pi.NodeName]; !found </span><span class="cov0" title="0">{ 11345 sc.Nodes[pi.NodeName] = schedulingapi.NewNodeInfo(nil) 11346 sc.Nodes[pi.NodeName].Name = pi.NodeName 11347 }</span> 11348 11349 <span class="cov8" title="1">node := sc.Nodes[pi.NodeName] 11350 if !isTerminated(pi.Status) </span><span class="cov8" title="1">{ 11351 if err := node.AddTask(pi); err != nil </span><span class="cov0" title="0">{ 11352 return err 11353 }</span> 11354 } else<span class="cov8" title="1"> { 11355 klog.V(4).Infof("Pod <%v/%v> is in status %s.", pi.Namespace, pi.Name, pi.Status.String()) 11356 }</span> 11357 } 11358 11359 <span class="cov8" title="1">job := sc.getOrCreateJob(pi) 11360 if job != nil </span><span class="cov8" title="1">{ 11361 job.AddTaskInfo(pi) 11362 }</span> 11363 11364 <span class="cov8" title="1">return nil</span> 11365 } 11366 11367 // Assumes that lock is already acquired. 11368 func (sc *SchedulerCache) addPod(pod *v1.Pod) error <span class="cov8" title="1">{ 11369 pi := schedulingapi.NewTaskInfo(pod) 11370 11371 return sc.addTask(pi) 11372 }</span> 11373 11374 func (sc *SchedulerCache) syncTask(oldTask *schedulingapi.TaskInfo) error <span class="cov0" title="0">{ 11375 newPod, err := sc.kubeClient.CoreV1().Pods(oldTask.Namespace).Get(context.TODO(), oldTask.Name, metav1.GetOptions{}) 11376 if err != nil </span><span class="cov0" title="0">{ 11377 if errors.IsNotFound(err) </span><span class="cov0" title="0">{ 11378 err := sc.deleteTask(oldTask) 11379 if err != nil </span><span class="cov0" title="0">{ 11380 klog.Errorf("Failed to delete Pod <%v/%v> and remove from cache: %s", oldTask.Namespace, oldTask.Name, err.Error()) 11381 return err 11382 }</span> 11383 <span class="cov0" title="0">klog.V(3).Infof("Pod <%v/%v> was deleted, removed from cache.", oldTask.Namespace, oldTask.Name) 11384 11385 return nil</span> 11386 } 11387 <span class="cov0" title="0">return fmt.Errorf("failed to get Pod <%v/%v>: err %v", oldTask.Namespace, oldTask.Name, err)</span> 11388 } 11389 11390 <span class="cov0" title="0">newTask := schedulingapi.NewTaskInfo(newPod) 11391 11392 sc.Mutex.Lock() 11393 defer sc.Mutex.Unlock() 11394 return sc.updateTask(oldTask, newTask)</span> 11395 } 11396 11397 func (sc *SchedulerCache) updateTask(oldTask, newTask *schedulingapi.TaskInfo) error <span class="cov8" title="1">{ 11398 if err := sc.deleteTask(oldTask); err != nil </span><span class="cov0" title="0">{ 11399 klog.Warningf("Failed to delete task: %v", err) 11400 }</span> 11401 11402 <span class="cov8" title="1">return sc.addTask(newTask)</span> 11403 } 11404 11405 // Assumes that lock is already acquired. 11406 func (sc *SchedulerCache) updatePod(oldPod, newPod *v1.Pod) error <span class="cov8" title="1">{ 11407 if err := sc.deletePod(oldPod); err != nil </span><span class="cov0" title="0">{ 11408 return err 11409 }</span> 11410 //when delete pod, the ownerreference of pod will be set nil,just as orphan pod 11411 <span class="cov8" title="1">if len(utils.GetController(newPod)) == 0 </span><span class="cov0" title="0">{ 11412 newPod.OwnerReferences = oldPod.OwnerReferences 11413 }</span> 11414 <span class="cov8" title="1">return sc.addPod(newPod)</span> 11415 } 11416 11417 func (sc *SchedulerCache) deleteTask(pi *schedulingapi.TaskInfo) error <span class="cov8" title="1">{ 11418 var jobErr, nodeErr, numaErr error 11419 11420 if len(pi.Job) != 0 </span><span class="cov0" title="0">{ 11421 if job, found := sc.Jobs[pi.Job]; found </span><span class="cov0" title="0">{ 11422 jobErr = job.DeleteTaskInfo(pi) 11423 }</span> else<span class="cov0" title="0"> { 11424 jobErr = fmt.Errorf("failed to find Job <%v> for Task %v/%v", 11425 pi.Job, pi.Namespace, pi.Name) 11426 }</span> 11427 } 11428 11429 <span class="cov8" title="1">if len(pi.NodeName) != 0 </span><span class="cov8" title="1">{ 11430 node := sc.Nodes[pi.NodeName] 11431 if node != nil </span><span class="cov8" title="1">{ 11432 nodeErr = node.RemoveTask(pi) 11433 }</span> 11434 } 11435 11436 <span class="cov8" title="1">if jobErr != nil || nodeErr != nil </span><span class="cov0" title="0">{ 11437 return schedulingapi.MergeErrors(jobErr, nodeErr, numaErr) 11438 }</span> 11439 11440 <span class="cov8" title="1">return nil</span> 11441 } 11442 11443 // Assumes that lock is already acquired. 11444 func (sc *SchedulerCache) deletePod(pod *v1.Pod) error <span class="cov8" title="1">{ 11445 pi := schedulingapi.NewTaskInfo(pod) 11446 11447 // Delete the Task in cache to handle Binding status. 11448 task := pi 11449 if job, found := sc.Jobs[pi.Job]; found </span><span class="cov0" title="0">{ 11450 if t, found := job.Tasks[pi.UID]; found </span><span class="cov0" title="0">{ 11451 task = t 11452 }</span> 11453 } 11454 <span class="cov8" title="1">if err := sc.deleteTask(task); err != nil </span><span class="cov0" title="0">{ 11455 klog.Warningf("Failed to delete task: %v", err) 11456 }</span> 11457 11458 // If job was terminated, delete it. 11459 <span class="cov8" title="1">if job, found := sc.Jobs[pi.Job]; found && schedulingapi.JobTerminated(job) </span><span class="cov0" title="0">{ 11460 sc.deleteJob(job) 11461 }</span> 11462 11463 <span class="cov8" title="1">return nil</span> 11464 } 11465 11466 // AddPod add pod to scheduler cache 11467 func (sc *SchedulerCache) AddPod(obj interface{}) <span class="cov8" title="1">{ 11468 pod, ok := obj.(*v1.Pod) 11469 if !ok </span><span class="cov0" title="0">{ 11470 klog.Errorf("Cannot convert to *v1.Pod: %v", obj) 11471 return 11472 }</span> 11473 11474 <span class="cov8" title="1">sc.Mutex.Lock() 11475 defer sc.Mutex.Unlock() 11476 11477 err := sc.addPod(pod) 11478 if err != nil </span><span class="cov0" title="0">{ 11479 klog.Errorf("Failed to add pod <%s/%s> into cache: %v", 11480 pod.Namespace, pod.Name, err) 11481 return 11482 }</span> 11483 <span class="cov8" title="1">klog.V(3).Infof("Added pod <%s/%v> into cache.", pod.Namespace, pod.Name)</span> 11484 } 11485 11486 // UpdatePod update pod to scheduler cache 11487 func (sc *SchedulerCache) UpdatePod(oldObj, newObj interface{}) <span class="cov0" title="0">{ 11488 oldPod, ok := oldObj.(*v1.Pod) 11489 if !ok </span><span class="cov0" title="0">{ 11490 klog.Errorf("Cannot convert oldObj to *v1.Pod: %v", oldObj) 11491 return 11492 }</span> 11493 <span class="cov0" title="0">newPod, ok := newObj.(*v1.Pod) 11494 if !ok </span><span class="cov0" title="0">{ 11495 klog.Errorf("Cannot convert newObj to *v1.Pod: %v", newObj) 11496 return 11497 }</span> 11498 11499 <span class="cov0" title="0">sc.Mutex.Lock() 11500 defer sc.Mutex.Unlock() 11501 11502 err := sc.updatePod(oldPod, newPod) 11503 if err != nil </span><span class="cov0" title="0">{ 11504 klog.Errorf("Failed to update pod %v in cache: %v", oldPod.Name, err) 11505 return 11506 }</span> 11507 11508 <span class="cov0" title="0">klog.V(4).Infof("Updated pod <%s/%v> in cache.", oldPod.Namespace, oldPod.Name)</span> 11509 } 11510 11511 // DeletePod delete pod from scheduler cache 11512 func (sc *SchedulerCache) DeletePod(obj interface{}) <span class="cov0" title="0">{ 11513 var pod *v1.Pod 11514 switch t := obj.(type) </span>{ 11515 case *v1.Pod:<span class="cov0" title="0"> 11516 pod = t</span> 11517 case cache.DeletedFinalStateUnknown:<span class="cov0" title="0"> 11518 var ok bool 11519 pod, ok = t.Obj.(*v1.Pod) 11520 if !ok </span><span class="cov0" title="0">{ 11521 klog.Errorf("Cannot convert to *v1.Pod: %v", t.Obj) 11522 return 11523 }</span> 11524 default:<span class="cov0" title="0"> 11525 klog.Errorf("Cannot convert to *v1.Pod: %v", t) 11526 return</span> 11527 } 11528 11529 <span class="cov0" title="0">sc.Mutex.Lock() 11530 defer sc.Mutex.Unlock() 11531 11532 err := sc.deletePod(pod) 11533 if err != nil </span><span class="cov0" title="0">{ 11534 klog.Errorf("Failed to delete pod %v from cache: %v", pod.Name, err) 11535 return 11536 }</span> 11537 11538 <span class="cov0" title="0">klog.V(3).Infof("Deleted pod <%s/%v> from cache.", pod.Namespace, pod.Name)</span> 11539 } 11540 11541 // Assumes that lock is already acquired. 11542 func (sc *SchedulerCache) addNode(node *v1.Node) error <span class="cov8" title="1">{ 11543 if sc.Nodes[node.Name] != nil </span><span class="cov0" title="0">{ 11544 sc.Nodes[node.Name].SetNode(node) 11545 }</span> else<span class="cov8" title="1"> { 11546 sc.Nodes[node.Name] = schedulingapi.NewNodeInfo(node) 11547 }</span> 11548 <span class="cov8" title="1">return nil</span> 11549 } 11550 11551 // Assumes that lock is already acquired. 11552 func (sc *SchedulerCache) updateNode(oldNode, newNode *v1.Node) error <span class="cov0" title="0">{ 11553 if sc.Nodes[newNode.Name] != nil </span><span class="cov0" title="0">{ 11554 sc.Nodes[newNode.Name].SetNode(newNode) 11555 return nil 11556 }</span> 11557 11558 <span class="cov0" title="0">return fmt.Errorf("node <%s> does not exist", newNode.Name)</span> 11559 } 11560 11561 // Assumes that lock is already acquired. 11562 func (sc *SchedulerCache) deleteNode(node *v1.Node) error <span class="cov8" title="1">{ 11563 if _, ok := sc.Nodes[node.Name]; !ok </span><span class="cov0" title="0">{ 11564 return fmt.Errorf("node <%s> does not exist", node.Name) 11565 }</span> 11566 11567 <span class="cov8" title="1">numaInfo := sc.Nodes[node.Name].NumaInfo 11568 if numaInfo != nil </span><span class="cov0" title="0">{ 11569 klog.V(3).Infof("delete numatopo <%s/%s>", numaInfo.Namespace, numaInfo.Name) 11570 err := sc.vcClient.NodeinfoV1alpha1().Numatopologies().Delete(context.TODO(), numaInfo.Name, metav1.DeleteOptions{}) 11571 if err != nil </span><span class="cov0" title="0">{ 11572 klog.Errorf("delete numatopo <%s/%s> failed.", numaInfo.Namespace, numaInfo.Name) 11573 }</span> 11574 } 11575 11576 <span class="cov8" title="1">delete(sc.Nodes, node.Name) 11577 11578 return nil</span> 11579 } 11580 11581 // AddNode add node to scheduler cache 11582 func (sc *SchedulerCache) AddNode(obj interface{}) <span class="cov8" title="1">{ 11583 node, ok := obj.(*v1.Node) 11584 if !ok </span><span class="cov0" title="0">{ 11585 klog.Errorf("Cannot convert to *v1.Node: %v", obj) 11586 return 11587 }</span> 11588 11589 <span class="cov8" title="1">sc.Mutex.Lock() 11590 defer sc.Mutex.Unlock() 11591 11592 err := sc.addNode(node) 11593 if err != nil </span><span class="cov0" title="0">{ 11594 klog.Errorf("Failed to add node %s into cache: %v", node.Name, err) 11595 return 11596 }</span> 11597 <span class="cov8" title="1">sc.NodeList = append(sc.NodeList, node.Name)</span> 11598 } 11599 11600 // UpdateNode update node to scheduler cache 11601 func (sc *SchedulerCache) UpdateNode(oldObj, newObj interface{}) <span class="cov0" title="0">{ 11602 oldNode, ok := oldObj.(*v1.Node) 11603 if !ok </span><span class="cov0" title="0">{ 11604 klog.Errorf("Cannot convert oldObj to *v1.Node: %v", oldObj) 11605 return 11606 }</span> 11607 <span class="cov0" title="0">newNode, ok := newObj.(*v1.Node) 11608 if !ok </span><span class="cov0" title="0">{ 11609 klog.Errorf("Cannot convert newObj to *v1.Node: %v", newObj) 11610 return 11611 }</span> 11612 11613 <span class="cov0" title="0">sc.Mutex.Lock() 11614 defer sc.Mutex.Unlock() 11615 11616 err := sc.updateNode(oldNode, newNode) 11617 if err != nil </span><span class="cov0" title="0">{ 11618 klog.Errorf("Failed to update node %v in cache: %v", oldNode.Name, err) 11619 return 11620 }</span> 11621 } 11622 11623 // DeleteNode delete node from scheduler cache 11624 func (sc *SchedulerCache) DeleteNode(obj interface{}) <span class="cov8" title="1">{ 11625 var node *v1.Node 11626 switch t := obj.(type) </span>{ 11627 case *v1.Node:<span class="cov8" title="1"> 11628 node = t</span> 11629 case cache.DeletedFinalStateUnknown:<span class="cov0" title="0"> 11630 var ok bool 11631 node, ok = t.Obj.(*v1.Node) 11632 if !ok </span><span class="cov0" title="0">{ 11633 klog.Errorf("Cannot convert to *v1.Node: %v", t.Obj) 11634 return 11635 }</span> 11636 default:<span class="cov0" title="0"> 11637 klog.Errorf("Cannot convert to *v1.Node: %v", t) 11638 return</span> 11639 } 11640 11641 <span class="cov8" title="1">sc.Mutex.Lock() 11642 defer sc.Mutex.Unlock() 11643 11644 err := sc.deleteNode(node) 11645 if err != nil </span><span class="cov0" title="0">{ 11646 klog.Errorf("Failed to delete node %s from cache: %v", node.Name, err) 11647 return 11648 }</span> 11649 11650 <span class="cov8" title="1">for i, name := range sc.NodeList </span><span class="cov8" title="1">{ 11651 if name == node.Name </span><span class="cov8" title="1">{ 11652 sc.NodeList = append(sc.NodeList[:i], sc.NodeList[i+1:]...) 11653 break</span> 11654 } 11655 } 11656 } 11657 11658 func getJobID(pg *schedulingapi.PodGroup) schedulingapi.JobID <span class="cov8" title="1">{ 11659 return schedulingapi.JobID(fmt.Sprintf("%s/%s", pg.Namespace, pg.Name)) 11660 }</span> 11661 11662 // Assumes that lock is already acquired. 11663 func (sc *SchedulerCache) setPodGroup(ss *schedulingapi.PodGroup) error <span class="cov8" title="1">{ 11664 job := getJobID(ss) 11665 if _, found := sc.Jobs[job]; !found </span><span class="cov8" title="1">{ 11666 sc.Jobs[job] = schedulingapi.NewJobInfo(job) 11667 }</span> 11668 11669 <span class="cov8" title="1">sc.Jobs[job].SetPodGroup(ss) 11670 11671 // TODO(k82cn): set default queue in admission. 11672 if len(ss.Spec.Queue) == 0 </span><span class="cov8" title="1">{ 11673 sc.Jobs[job].Queue = schedulingapi.QueueID(sc.defaultQueue) 11674 }</span> 11675 11676 <span class="cov8" title="1">return nil</span> 11677 } 11678 11679 // Assumes that lock is already acquired. 11680 func (sc *SchedulerCache) updatePodGroup(newPodGroup *schedulingapi.PodGroup) error <span class="cov0" title="0">{ 11681 return sc.setPodGroup(newPodGroup) 11682 }</span> 11683 11684 // Assumes that lock is already acquired. 11685 func (sc *SchedulerCache) deletePodGroup(id schedulingapi.JobID) error <span class="cov8" title="1">{ 11686 job, found := sc.Jobs[id] 11687 if !found </span><span class="cov0" title="0">{ 11688 return fmt.Errorf("can not found job %v", id) 11689 }</span> 11690 11691 // Unset SchedulingSpec 11692 <span class="cov8" title="1">job.UnsetPodGroup() 11693 11694 sc.deleteJob(job) 11695 11696 return nil</span> 11697 } 11698 11699 // AddPodGroupV1beta1 add podgroup to scheduler cache 11700 func (sc *SchedulerCache) AddPodGroupV1beta1(obj interface{}) <span class="cov8" title="1">{ 11701 ss, ok := obj.(*schedulingv1.PodGroup) 11702 if !ok </span><span class="cov0" title="0">{ 11703 klog.Errorf("Cannot convert to *schedulingv1.PodGroup: %v", obj) 11704 return 11705 }</span> 11706 11707 <span class="cov8" title="1">podgroup := scheduling.PodGroup{} 11708 if err := scheme.Scheme.Convert(ss, &podgroup, nil); err != nil </span><span class="cov0" title="0">{ 11709 klog.Errorf("Failed to convert podgroup from %T to %T", ss, podgroup) 11710 return 11711 }</span> 11712 11713 <span class="cov8" title="1">pg := &schedulingapi.PodGroup{PodGroup: podgroup, Version: schedulingapi.PodGroupVersionV1Beta1} 11714 klog.V(4).Infof("Add PodGroup(%s) into cache, spec(%#v)", ss.Name, ss.Spec) 11715 11716 sc.Mutex.Lock() 11717 defer sc.Mutex.Unlock() 11718 11719 if err := sc.setPodGroup(pg); err != nil </span><span class="cov0" title="0">{ 11720 klog.Errorf("Failed to add PodGroup %s into cache: %v", ss.Name, err) 11721 return 11722 }</span> 11723 } 11724 11725 // UpdatePodGroupV1beta1 add podgroup to scheduler cache 11726 func (sc *SchedulerCache) UpdatePodGroupV1beta1(oldObj, newObj interface{}) <span class="cov8" title="1">{ 11727 oldSS, ok := oldObj.(*schedulingv1.PodGroup) 11728 if !ok </span><span class="cov0" title="0">{ 11729 klog.Errorf("Cannot convert oldObj to *schedulingv1.SchedulingSpec: %v", oldObj) 11730 return 11731 }</span> 11732 <span class="cov8" title="1">newSS, ok := newObj.(*schedulingv1.PodGroup) 11733 if !ok </span><span class="cov0" title="0">{ 11734 klog.Errorf("Cannot convert newObj to *schedulingv1.SchedulingSpec: %v", newObj) 11735 return 11736 }</span> 11737 11738 <span class="cov8" title="1">if oldSS.ResourceVersion == newSS.ResourceVersion </span><span class="cov8" title="1">{ 11739 return 11740 }</span> 11741 11742 <span class="cov0" title="0">podgroup := scheduling.PodGroup{} 11743 if err := scheme.Scheme.Convert(newSS, &podgroup, nil); err != nil </span><span class="cov0" title="0">{ 11744 klog.Errorf("Failed to convert podgroup from %T to %T", newSS, podgroup) 11745 return 11746 }</span> 11747 11748 <span class="cov0" title="0">pg := &schedulingapi.PodGroup{PodGroup: podgroup, Version: schedulingapi.PodGroupVersionV1Beta1} 11749 11750 sc.Mutex.Lock() 11751 defer sc.Mutex.Unlock() 11752 11753 if err := sc.updatePodGroup(pg); err != nil </span><span class="cov0" title="0">{ 11754 klog.Errorf("Failed to update SchedulingSpec %s into cache: %v", pg.Name, err) 11755 return 11756 }</span> 11757 } 11758 11759 // DeletePodGroupV1beta1 delete podgroup from scheduler cache 11760 func (sc *SchedulerCache) DeletePodGroupV1beta1(obj interface{}) <span class="cov8" title="1">{ 11761 var ss *schedulingv1.PodGroup 11762 switch t := obj.(type) </span>{ 11763 case *schedulingv1.PodGroup:<span class="cov8" title="1"> 11764 ss = t</span> 11765 case cache.DeletedFinalStateUnknown:<span class="cov0" title="0"> 11766 var ok bool 11767 ss, ok = t.Obj.(*schedulingv1.PodGroup) 11768 if !ok </span><span class="cov0" title="0">{ 11769 klog.Errorf("Cannot convert to podgroup: %v", t.Obj) 11770 return 11771 }</span> 11772 default:<span class="cov0" title="0"> 11773 klog.Errorf("Cannot convert to podgroup: %v", t) 11774 return</span> 11775 } 11776 11777 <span class="cov8" title="1">jobID := schedulingapi.JobID(fmt.Sprintf("%s/%s", ss.Namespace, ss.Name)) 11778 11779 sc.Mutex.Lock() 11780 defer sc.Mutex.Unlock() 11781 11782 if err := sc.deletePodGroup(jobID); err != nil </span><span class="cov0" title="0">{ 11783 klog.Errorf("Failed to delete podgroup %s from cache: %v", ss.Name, err) 11784 return 11785 }</span> 11786 } 11787 11788 // AddQueueV1beta1 add queue to scheduler cache 11789 func (sc *SchedulerCache) AddQueueV1beta1(obj interface{}) <span class="cov8" title="1">{ 11790 ss, ok := obj.(*schedulingv1.Queue) 11791 if !ok </span><span class="cov0" title="0">{ 11792 klog.Errorf("Cannot convert to *schedulingv1.Queue: %v", obj) 11793 return 11794 }</span> 11795 11796 <span class="cov8" title="1">queue := &scheduling.Queue{} 11797 if err := scheme.Scheme.Convert(ss, queue, nil); err != nil </span><span class="cov0" title="0">{ 11798 klog.Errorf("Failed to convert queue from %T to %T", ss, queue) 11799 return 11800 }</span> 11801 11802 <span class="cov8" title="1">sc.Mutex.Lock() 11803 defer sc.Mutex.Unlock() 11804 11805 klog.V(4).Infof("Add Queue(%s) into cache, spec(%#v)", ss.Name, ss.Spec) 11806 sc.addQueue(queue)</span> 11807 } 11808 11809 // UpdateQueueV1beta1 update queue to scheduler cache 11810 func (sc *SchedulerCache) UpdateQueueV1beta1(oldObj, newObj interface{}) <span class="cov8" title="1">{ 11811 oldSS, ok := oldObj.(*schedulingv1.Queue) 11812 if !ok </span><span class="cov0" title="0">{ 11813 klog.Errorf("Cannot convert oldObj to *schedulingv1.Queue: %v", oldObj) 11814 return 11815 }</span> 11816 <span class="cov8" title="1">newSS, ok := newObj.(*schedulingv1.Queue) 11817 if !ok </span><span class="cov0" title="0">{ 11818 klog.Errorf("Cannot convert newObj to *schedulingv1.Queue: %v", newObj) 11819 return 11820 }</span> 11821 11822 <span class="cov8" title="1">if oldSS.ResourceVersion == newSS.ResourceVersion </span><span class="cov8" title="1">{ 11823 return 11824 }</span> 11825 11826 <span class="cov0" title="0">newQueue := &scheduling.Queue{} 11827 if err := scheme.Scheme.Convert(newSS, newQueue, nil); err != nil </span><span class="cov0" title="0">{ 11828 klog.Errorf("Failed to convert queue from %T to %T", newSS, newQueue) 11829 return 11830 }</span> 11831 11832 <span class="cov0" title="0">sc.Mutex.Lock() 11833 defer sc.Mutex.Unlock() 11834 sc.updateQueue(newQueue)</span> 11835 } 11836 11837 // DeleteQueueV1beta1 delete queue from the scheduler cache 11838 func (sc *SchedulerCache) DeleteQueueV1beta1(obj interface{}) <span class="cov8" title="1">{ 11839 var ss *schedulingv1.Queue 11840 switch t := obj.(type) </span>{ 11841 case *schedulingv1.Queue:<span class="cov8" title="1"> 11842 ss = t</span> 11843 case cache.DeletedFinalStateUnknown:<span class="cov0" title="0"> 11844 var ok bool 11845 ss, ok = t.Obj.(*schedulingv1.Queue) 11846 if !ok </span><span class="cov0" title="0">{ 11847 klog.Errorf("Cannot convert to *schedulingv1.Queue: %v", t.Obj) 11848 return 11849 }</span> 11850 default:<span class="cov0" title="0"> 11851 klog.Errorf("Cannot convert to *schedulingv1.Queue: %v", t) 11852 return</span> 11853 } 11854 11855 <span class="cov8" title="1">sc.Mutex.Lock() 11856 defer sc.Mutex.Unlock() 11857 sc.deleteQueue(schedulingapi.QueueID(ss.Name))</span> 11858 } 11859 11860 func (sc *SchedulerCache) addQueue(queue *scheduling.Queue) <span class="cov8" title="1">{ 11861 qi := schedulingapi.NewQueueInfo(queue) 11862 sc.Queues[qi.UID] = qi 11863 }</span> 11864 11865 func (sc *SchedulerCache) updateQueue(queue *scheduling.Queue) <span class="cov0" title="0">{ 11866 sc.addQueue(queue) 11867 }</span> 11868 11869 func (sc *SchedulerCache) deleteQueue(id schedulingapi.QueueID) <span class="cov8" title="1">{ 11870 delete(sc.Queues, id) 11871 }</span> 11872 11873 //DeletePriorityClass delete priorityclass from the scheduler cache 11874 func (sc *SchedulerCache) DeletePriorityClass(obj interface{}) <span class="cov0" title="0">{ 11875 var ss *v1beta1.PriorityClass 11876 switch t := obj.(type) </span>{ 11877 case *v1beta1.PriorityClass:<span class="cov0" title="0"> 11878 ss = t</span> 11879 case cache.DeletedFinalStateUnknown:<span class="cov0" title="0"> 11880 var ok bool 11881 ss, ok = t.Obj.(*v1beta1.PriorityClass) 11882 if !ok </span><span class="cov0" title="0">{ 11883 klog.Errorf("Cannot convert to *v1beta1.PriorityClass: %v", t.Obj) 11884 return 11885 }</span> 11886 default:<span class="cov0" title="0"> 11887 klog.Errorf("Cannot convert to *v1beta1.PriorityClass: %v", t) 11888 return</span> 11889 } 11890 11891 <span class="cov0" title="0">sc.Mutex.Lock() 11892 defer sc.Mutex.Unlock() 11893 11894 sc.deletePriorityClass(ss)</span> 11895 } 11896 11897 //UpdatePriorityClass update priorityclass to scheduler cache 11898 func (sc *SchedulerCache) UpdatePriorityClass(oldObj, newObj interface{}) <span class="cov0" title="0">{ 11899 oldSS, ok := oldObj.(*v1beta1.PriorityClass) 11900 if !ok </span><span class="cov0" title="0">{ 11901 klog.Errorf("Cannot convert oldObj to *v1beta1.PriorityClass: %v", oldObj) 11902 11903 return 11904 }</span> 11905 11906 <span class="cov0" title="0">newSS, ok := newObj.(*v1beta1.PriorityClass) 11907 if !ok </span><span class="cov0" title="0">{ 11908 klog.Errorf("Cannot convert newObj to *v1beta1.PriorityClass: %v", newObj) 11909 return 11910 }</span> 11911 11912 <span class="cov0" title="0">sc.Mutex.Lock() 11913 defer sc.Mutex.Unlock() 11914 11915 sc.deletePriorityClass(oldSS) 11916 sc.addPriorityClass(newSS)</span> 11917 } 11918 11919 //AddPriorityClass add priorityclass to scheduler cache 11920 func (sc *SchedulerCache) AddPriorityClass(obj interface{}) <span class="cov0" title="0">{ 11921 var ss *v1beta1.PriorityClass 11922 switch t := obj.(type) </span>{ 11923 case *v1beta1.PriorityClass:<span class="cov0" title="0"> 11924 ss = t</span> 11925 case cache.DeletedFinalStateUnknown:<span class="cov0" title="0"> 11926 var ok bool 11927 ss, ok = t.Obj.(*v1beta1.PriorityClass) 11928 if !ok </span><span class="cov0" title="0">{ 11929 klog.Errorf("Cannot convert to *v1beta1.PriorityClass: %v", t.Obj) 11930 return 11931 }</span> 11932 default:<span class="cov0" title="0"> 11933 klog.Errorf("Cannot convert to *v1beta1.PriorityClass: %v", t) 11934 return</span> 11935 } 11936 11937 <span class="cov0" title="0">sc.Mutex.Lock() 11938 defer sc.Mutex.Unlock() 11939 11940 sc.addPriorityClass(ss)</span> 11941 } 11942 11943 func (sc *SchedulerCache) deletePriorityClass(pc *v1beta1.PriorityClass) <span class="cov0" title="0">{ 11944 if pc.GlobalDefault </span><span class="cov0" title="0">{ 11945 sc.defaultPriorityClass = nil 11946 sc.defaultPriority = 0 11947 }</span> 11948 11949 <span class="cov0" title="0">delete(sc.PriorityClasses, pc.Name)</span> 11950 } 11951 11952 func (sc *SchedulerCache) addPriorityClass(pc *v1beta1.PriorityClass) <span class="cov0" title="0">{ 11953 if pc.GlobalDefault </span><span class="cov0" title="0">{ 11954 if sc.defaultPriorityClass != nil </span><span class="cov0" title="0">{ 11955 klog.Errorf("Updated default priority class from <%s> to <%s> forcefully.", 11956 sc.defaultPriorityClass.Name, pc.Name) 11957 }</span> 11958 <span class="cov0" title="0">sc.defaultPriorityClass = pc 11959 sc.defaultPriority = pc.Value</span> 11960 } 11961 11962 <span class="cov0" title="0">sc.PriorityClasses[pc.Name] = pc</span> 11963 } 11964 11965 func (sc *SchedulerCache) updateResourceQuota(quota *v1.ResourceQuota) <span class="cov0" title="0">{ 11966 collection, ok := sc.NamespaceCollection[quota.Namespace] 11967 if !ok </span><span class="cov0" title="0">{ 11968 collection = schedulingapi.NewNamespaceCollection(quota.Namespace) 11969 sc.NamespaceCollection[quota.Namespace] = collection 11970 }</span> 11971 11972 <span class="cov0" title="0">collection.Update(quota)</span> 11973 } 11974 11975 func (sc *SchedulerCache) deleteResourceQuota(quota *v1.ResourceQuota) <span class="cov0" title="0">{ 11976 collection, ok := sc.NamespaceCollection[quota.Namespace] 11977 if !ok </span><span class="cov0" title="0">{ 11978 return 11979 }</span> 11980 11981 <span class="cov0" title="0">collection.Delete(quota)</span> 11982 } 11983 11984 // DeleteResourceQuota delete ResourceQuota from the scheduler cache 11985 func (sc *SchedulerCache) DeleteResourceQuota(obj interface{}) <span class="cov0" title="0">{ 11986 var r *v1.ResourceQuota 11987 switch t := obj.(type) </span>{ 11988 case *v1.ResourceQuota:<span class="cov0" title="0"> 11989 r = t</span> 11990 case cache.DeletedFinalStateUnknown:<span class="cov0" title="0"> 11991 var ok bool 11992 r, ok = t.Obj.(*v1.ResourceQuota) 11993 if !ok </span><span class="cov0" title="0">{ 11994 klog.Errorf("Cannot convert to *v1.ResourceQuota: %v", t.Obj) 11995 return 11996 }</span> 11997 default:<span class="cov0" title="0"> 11998 klog.Errorf("Cannot convert to *v1.ResourceQuota: %v", t) 11999 return</span> 12000 } 12001 12002 <span class="cov0" title="0">sc.Mutex.Lock() 12003 defer sc.Mutex.Unlock() 12004 12005 klog.V(3).Infof("Delete ResourceQuota <%s/%v> in cache", r.Namespace, r.Name) 12006 sc.deleteResourceQuota(r)</span> 12007 } 12008 12009 // UpdateResourceQuota update ResourceQuota to scheduler cache 12010 func (sc *SchedulerCache) UpdateResourceQuota(oldObj, newObj interface{}) <span class="cov0" title="0">{ 12011 newR, ok := newObj.(*v1.ResourceQuota) 12012 if !ok </span><span class="cov0" title="0">{ 12013 klog.Errorf("Cannot convert newObj to *v1.ResourceQuota: %v", newObj) 12014 return 12015 }</span> 12016 12017 <span class="cov0" title="0">sc.Mutex.Lock() 12018 defer sc.Mutex.Unlock() 12019 12020 klog.V(3).Infof("Update ResourceQuota <%s/%v> in cache, with spec: %v.", newR.Namespace, newR.Name, newR.Spec.Hard) 12021 sc.updateResourceQuota(newR)</span> 12022 } 12023 12024 // AddResourceQuota add ResourceQuota to scheduler cache 12025 func (sc *SchedulerCache) AddResourceQuota(obj interface{}) <span class="cov0" title="0">{ 12026 var r *v1.ResourceQuota 12027 switch t := obj.(type) </span>{ 12028 case *v1.ResourceQuota:<span class="cov0" title="0"> 12029 r = t</span> 12030 default:<span class="cov0" title="0"> 12031 klog.Errorf("Cannot convert to *v1.ResourceQuota: %v", t) 12032 return</span> 12033 } 12034 12035 <span class="cov0" title="0">sc.Mutex.Lock() 12036 defer sc.Mutex.Unlock() 12037 12038 klog.V(3).Infof("Add ResourceQuota <%s/%v> in cache, with spec: %v.", r.Namespace, r.Name, r.Spec.Hard) 12039 sc.updateResourceQuota(r)</span> 12040 } 12041 12042 func getNumaInfo(srcInfo *nodeinfov1alpha1.Numatopology) *schedulingapi.NumatopoInfo <span class="cov0" title="0">{ 12043 numaInfo := &schedulingapi.NumatopoInfo{ 12044 Namespace: srcInfo.Namespace, 12045 Name: srcInfo.Name, 12046 Policies: make(map[nodeinfov1alpha1.PolicyName]string), 12047 NumaResMap: make(map[string]*schedulingapi.ResourceInfo), 12048 CPUDetail: topology.CPUDetails{}, 12049 ResReserved: make(v1.ResourceList), 12050 } 12051 12052 policies := srcInfo.Spec.Policies 12053 for name, policy := range policies </span><span class="cov0" title="0">{ 12054 numaInfo.Policies[name] = policy 12055 }</span> 12056 12057 <span class="cov0" title="0">numaResMap := srcInfo.Spec.NumaResMap 12058 for name, resInfo := range numaResMap </span><span class="cov0" title="0">{ 12059 tmp := schedulingapi.ResourceInfo{} 12060 tmp.Capacity = resInfo.Capacity 12061 tmp.Allocatable = cpuset.MustParse(resInfo.Allocatable) 12062 numaInfo.NumaResMap[name] = &tmp 12063 }</span> 12064 12065 <span class="cov0" title="0">cpuDetail := srcInfo.Spec.CPUDetail 12066 for key, detail := range cpuDetail </span><span class="cov0" title="0">{ 12067 cpuID, _ := strconv.Atoi(key) 12068 numaInfo.CPUDetail[cpuID] = topology.CPUInfo{ 12069 NUMANodeID: detail.NUMANodeID, 12070 SocketID: detail.SocketID, 12071 CoreID: detail.CoreID, 12072 } 12073 }</span> 12074 12075 <span class="cov0" title="0">resReserved, err := schedulingapi.ParseResourceList(srcInfo.Spec.ResReserved) 12076 if err != nil </span><span class="cov0" title="0">{ 12077 klog.Errorf("ParseResourceList failed, err=%v", err) 12078 }</span> else<span class="cov0" title="0"> { 12079 numaInfo.ResReserved = resReserved 12080 }</span> 12081 12082 <span class="cov0" title="0">return numaInfo</span> 12083 } 12084 12085 // Assumes that lock is already acquired. 12086 func (sc *SchedulerCache) addNumaInfo(info *nodeinfov1alpha1.Numatopology) error <span class="cov0" title="0">{ 12087 if sc.Nodes[info.Name] == nil </span><span class="cov0" title="0">{ 12088 sc.Nodes[info.Name] = schedulingapi.NewNodeInfo(nil) 12089 sc.Nodes[info.Name].Name = info.Name 12090 }</span> 12091 12092 <span class="cov0" title="0">if sc.Nodes[info.Name].NumaInfo == nil </span><span class="cov0" title="0">{ 12093 sc.Nodes[info.Name].NumaInfo = getNumaInfo(info) 12094 }</span> 12095 12096 <span class="cov0" title="0">newLocalInfo := getNumaInfo(info) 12097 if sc.Nodes[info.Name].NumaInfo.Compare(newLocalInfo) </span><span class="cov0" title="0">{ 12098 sc.Nodes[info.Name].NumaChgFlag = schedulingapi.NumaInfoMoreFlag 12099 }</span> else<span class="cov0" title="0"> { 12100 sc.Nodes[info.Name].NumaChgFlag = schedulingapi.NumaInfoLessFlag 12101 }</span> 12102 12103 <span class="cov0" title="0">sc.Nodes[info.Name].NumaInfo = newLocalInfo 12104 12105 for resName, NumaResInfo := range sc.Nodes[info.Name].NumaInfo.NumaResMap </span><span class="cov0" title="0">{ 12106 klog.V(3).Infof("resource %s Allocatable %v on node[%s] into cache", resName, NumaResInfo, info.Name) 12107 }</span> 12108 12109 <span class="cov0" title="0">klog.V(3).Infof("Policies %v on node[%s] into cache, change= %v", 12110 sc.Nodes[info.Name].NumaInfo.Policies, info.Name, sc.Nodes[info.Name].NumaChgFlag) 12111 return nil</span> 12112 } 12113 12114 // Assumes that lock is already acquired. 12115 func (sc *SchedulerCache) deleteNumaInfo(info *nodeinfov1alpha1.Numatopology) <span class="cov0" title="0">{ 12116 if sc.Nodes[info.Name] != nil </span><span class="cov0" title="0">{ 12117 sc.Nodes[info.Name].NumaInfo = nil 12118 sc.Nodes[info.Name].NumaChgFlag = schedulingapi.NumaInfoResetFlag 12119 klog.V(3).Infof("delete numainfo in cahce for node<%s>", info.Name) 12120 }</span> 12121 } 12122 12123 // AddNumaInfoV1alpha1 add numa information to scheduler cache 12124 func (sc *SchedulerCache) AddNumaInfoV1alpha1(obj interface{}) <span class="cov0" title="0">{ 12125 ss, ok := obj.(*nodeinfov1alpha1.Numatopology) 12126 if !ok </span><span class="cov0" title="0">{ 12127 klog.Errorf("Cannot convert oldObj to *nodeinfov1alpha1.Numatopology: %v", obj) 12128 return 12129 }</span> 12130 12131 <span class="cov0" title="0">sc.Mutex.Lock() 12132 defer sc.Mutex.Unlock() 12133 12134 sc.addNumaInfo(ss)</span> 12135 } 12136 12137 // UpdateNumaInfoV1alpha1 update numa information to scheduler cache 12138 func (sc *SchedulerCache) UpdateNumaInfoV1alpha1(oldObj, newObj interface{}) <span class="cov0" title="0">{ 12139 ss, ok := newObj.(*nodeinfov1alpha1.Numatopology) 12140 if !ok </span><span class="cov0" title="0">{ 12141 klog.Errorf("Cannot convert oldObj to *nodeinfov1alpha1.Numatopology: %v", newObj) 12142 return 12143 }</span> 12144 12145 <span class="cov0" title="0">sc.Mutex.Lock() 12146 defer sc.Mutex.Unlock() 12147 sc.addNumaInfo(ss) 12148 klog.V(3).Infof("update numaInfo<%s> in cahce, with spec: Policy: %v, resMap: %v", ss.Name, ss.Spec.Policies, ss.Spec.NumaResMap)</span> 12149 } 12150 12151 // DeleteNumaInfoV1alpha1 delete numa information from scheduler cache 12152 func (sc *SchedulerCache) DeleteNumaInfoV1alpha1(obj interface{}) <span class="cov0" title="0">{ 12153 var ss *nodeinfov1alpha1.Numatopology 12154 switch t := obj.(type) </span>{ 12155 case *nodeinfov1alpha1.Numatopology:<span class="cov0" title="0"> 12156 ss = t</span> 12157 case cache.DeletedFinalStateUnknown:<span class="cov0" title="0"> 12158 var ok bool 12159 ss, ok = t.Obj.(*nodeinfov1alpha1.Numatopology) 12160 if !ok </span><span class="cov0" title="0">{ 12161 klog.Errorf("Cannot convert to Numatopo: %v", t.Obj) 12162 return 12163 }</span> 12164 default:<span class="cov0" title="0"> 12165 klog.Errorf("Cannot convert to Numatopo: %v", t) 12166 return</span> 12167 } 12168 12169 <span class="cov0" title="0">sc.Mutex.Lock() 12170 defer sc.Mutex.Unlock() 12171 12172 sc.deleteNumaInfo(ss) 12173 klog.V(3).Infof("Delete numaInfo<%s> from cahce, with spec: Policy: %v, resMap: %v", ss.Name, ss.Spec.Policies, ss.Spec.NumaResMap)</span> 12174 } 12175 </pre> 12176 12177 <pre class="file" id="file55" style="display: none">/* 12178 Copyright 2018 The Kubernetes Authors. 12179 12180 Licensed under the Apache License, Version 2.0 (the "License"); 12181 you may not use this file except in compliance with the License. 12182 You may obtain a copy of the License at 12183 12184 http://www.apache.org/licenses/LICENSE-2.0 12185 12186 Unless required by applicable law or agreed to in writing, software 12187 distributed under the License is distributed on an "AS IS" BASIS, 12188 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12189 See the License for the specific language governing permissions and 12190 limitations under the License. 12191 */ 12192 12193 package cache 12194 12195 import v1 "k8s.io/api/core/v1" 12196 12197 // responsibleForPod returns true if the pod has asked to be scheduled by the given scheduler. 12198 func responsibleForPod(pod *v1.Pod, schedulerName string) bool <span class="cov0" title="0">{ 12199 return schedulerName == pod.Spec.SchedulerName 12200 }</span> 12201 </pre> 12202 12203 <pre class="file" id="file56" style="display: none">/* 12204 Copyright 2019 The Kubernetes Authors. 12205 12206 Licensed under the Apache License, Version 2.0 (the "License"); 12207 you may not use this file except in compliance with the License. 12208 You may obtain a copy of the License at 12209 12210 http://www.apache.org/licenses/LICENSE-2.0 12211 12212 Unless required by applicable law or agreed to in writing, software 12213 distributed under the License is distributed on an "AS IS" BASIS, 12214 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12215 See the License for the specific language governing permissions and 12216 limitations under the License. 12217 */ 12218 12219 package framework 12220 12221 import ( 12222 "strconv" 12223 12224 "volcano.sh/volcano/pkg/scheduler/conf" 12225 12226 "k8s.io/klog" 12227 ) 12228 12229 // Arguments map 12230 type Arguments map[string]string 12231 12232 // GetInt get the integer value from string 12233 func (a Arguments) GetInt(ptr *int, key string) <span class="cov8" title="1">{ 12234 if ptr == nil </span><span class="cov8" title="1">{ 12235 return 12236 }</span> 12237 12238 <span class="cov8" title="1">argv, ok := a[key] 12239 if !ok || argv == "" </span><span class="cov8" title="1">{ 12240 return 12241 }</span> 12242 12243 <span class="cov8" title="1">value, err := strconv.Atoi(argv) 12244 if err != nil </span><span class="cov8" title="1">{ 12245 klog.Warningf("Could not parse argument: %s for key %s, with err %v", argv, key, err) 12246 return 12247 }</span> 12248 12249 <span class="cov8" title="1">*ptr = value</span> 12250 } 12251 12252 // GetFloat64 get the float64 value from string 12253 func (a Arguments) GetFloat64(ptr *float64, key string) <span class="cov8" title="1">{ 12254 if ptr == nil </span><span class="cov0" title="0">{ 12255 return 12256 }</span> 12257 12258 <span class="cov8" title="1">argv, ok := a[key] 12259 if !ok || len(argv) == 0 </span><span class="cov8" title="1">{ 12260 return 12261 }</span> 12262 12263 <span class="cov8" title="1">value, err := strconv.ParseFloat(argv, 64) 12264 if err != nil </span><span class="cov8" title="1">{ 12265 klog.Warningf("Could not parse argument: %s for key %s, with err %v", argv, key, err) 12266 return 12267 }</span> 12268 12269 <span class="cov8" title="1">*ptr = value</span> 12270 } 12271 12272 // GetBool get the bool value from string 12273 func (a Arguments) GetBool(ptr *bool, key string) <span class="cov0" title="0">{ 12274 if ptr == nil </span><span class="cov0" title="0">{ 12275 return 12276 }</span> 12277 12278 <span class="cov0" title="0">argv, ok := a[key] 12279 if !ok || argv == "" </span><span class="cov0" title="0">{ 12280 return 12281 }</span> 12282 12283 <span class="cov0" title="0">value, err := strconv.ParseBool(argv) 12284 if err != nil </span><span class="cov0" title="0">{ 12285 klog.Warningf("Could not parse argument: %s for key %s, with err %v", argv, key, err) 12286 return 12287 }</span> 12288 12289 <span class="cov0" title="0">*ptr = value</span> 12290 } 12291 12292 // GetArgOfActionFromConf return argument of action reading from configuration of schedule 12293 func GetArgOfActionFromConf(configurations []conf.Configuration, actionName string) Arguments <span class="cov8" title="1">{ 12294 for _, c := range configurations </span><span class="cov8" title="1">{ 12295 if c.Name == actionName </span><span class="cov8" title="1">{ 12296 return c.Arguments 12297 }</span> 12298 } 12299 12300 <span class="cov8" title="1">return nil</span> 12301 } 12302 </pre> 12303 12304 <pre class="file" id="file57" style="display: none">/* 12305 Copyright 2018 The Kubernetes Authors. 12306 12307 Licensed under the Apache License, Version 2.0 (the "License"); 12308 you may not use this file except in compliance with the License. 12309 You may obtain a copy of the License at 12310 12311 http://www.apache.org/licenses/LICENSE-2.0 12312 12313 Unless required by applicable law or agreed to in writing, software 12314 distributed under the License is distributed on an "AS IS" BASIS, 12315 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12316 See the License for the specific language governing permissions and 12317 limitations under the License. 12318 */ 12319 12320 package framework 12321 12322 import ( 12323 "time" 12324 12325 "k8s.io/klog" 12326 12327 "volcano.sh/volcano/pkg/scheduler/cache" 12328 "volcano.sh/volcano/pkg/scheduler/conf" 12329 "volcano.sh/volcano/pkg/scheduler/metrics" 12330 ) 12331 12332 // OpenSession start the session 12333 func OpenSession(cache cache.Cache, tiers []conf.Tier, configurations []conf.Configuration) *Session <span class="cov0" title="0">{ 12334 ssn := openSession(cache) 12335 ssn.Tiers = tiers 12336 ssn.Configurations = configurations 12337 12338 for _, tier := range tiers </span><span class="cov0" title="0">{ 12339 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 12340 if pb, found := GetPluginBuilder(plugin.Name); !found </span><span class="cov0" title="0">{ 12341 klog.Errorf("Failed to get plugin %s.", plugin.Name) 12342 }</span> else<span class="cov0" title="0"> { 12343 plugin := pb(plugin.Arguments) 12344 ssn.plugins[plugin.Name()] = plugin 12345 onSessionOpenStart := time.Now() 12346 plugin.OnSessionOpen(ssn) 12347 metrics.UpdatePluginDuration(plugin.Name(), metrics.OnSessionOpen, metrics.Duration(onSessionOpenStart)) 12348 }</span> 12349 } 12350 } 12351 <span class="cov0" title="0">return ssn</span> 12352 } 12353 12354 // CloseSession close the session 12355 func CloseSession(ssn *Session) <span class="cov0" title="0">{ 12356 for _, plugin := range ssn.plugins </span><span class="cov0" title="0">{ 12357 onSessionCloseStart := time.Now() 12358 plugin.OnSessionClose(ssn) 12359 metrics.UpdatePluginDuration(plugin.Name(), metrics.OnSessionClose, metrics.Duration(onSessionCloseStart)) 12360 }</span> 12361 12362 <span class="cov0" title="0">closeSession(ssn)</span> 12363 } 12364 </pre> 12365 12366 <pre class="file" id="file58" style="display: none">package framework 12367 12368 import ( 12369 "context" 12370 "math/rand" 12371 "reflect" 12372 "time" 12373 12374 "k8s.io/client-go/util/workqueue" 12375 "k8s.io/klog" 12376 12377 "volcano.sh/apis/pkg/apis/scheduling" 12378 "volcano.sh/volcano/pkg/scheduler/api" 12379 ) 12380 12381 const ( 12382 jobUpdaterWorker = 16 12383 12384 jobConditionUpdateTime = time.Minute 12385 jobConditionUpdateTimeJitter = 30 * time.Second 12386 ) 12387 12388 // TimeJitterAfter means: new after old + duration + jitter 12389 func TimeJitterAfter(new, old time.Time, duration, maxJitter time.Duration) bool <span class="cov0" title="0">{ 12390 var jitter int64 12391 if maxJitter > 0 </span><span class="cov0" title="0">{ 12392 jitter = rand.Int63n(int64(maxJitter)) 12393 }</span> 12394 <span class="cov0" title="0">return new.After(old.Add(duration + time.Duration(jitter)))</span> 12395 } 12396 12397 type jobUpdater struct { 12398 ssn *Session 12399 jobQueue []*api.JobInfo 12400 } 12401 12402 func newJobUpdater(ssn *Session) *jobUpdater <span class="cov0" title="0">{ 12403 queue := make([]*api.JobInfo, 0, len(ssn.Jobs)) 12404 for _, job := range ssn.Jobs </span><span class="cov0" title="0">{ 12405 queue = append(queue, job) 12406 }</span> 12407 12408 <span class="cov0" title="0">ju := &jobUpdater{ 12409 ssn: ssn, 12410 jobQueue: queue, 12411 } 12412 return ju</span> 12413 } 12414 12415 func (ju *jobUpdater) UpdateAll() <span class="cov0" title="0">{ 12416 workqueue.ParallelizeUntil(context.TODO(), jobUpdaterWorker, len(ju.jobQueue), ju.updateJob) 12417 }</span> 12418 12419 func isPodGroupConditionsUpdated(newCondition, oldCondition []scheduling.PodGroupCondition) bool <span class="cov0" title="0">{ 12420 if len(newCondition) != len(oldCondition) </span><span class="cov0" title="0">{ 12421 return true 12422 }</span> 12423 12424 <span class="cov0" title="0">for index, newCond := range newCondition </span><span class="cov0" title="0">{ 12425 oldCond := oldCondition[index] 12426 12427 newTime := newCond.LastTransitionTime 12428 oldTime := oldCond.LastTransitionTime 12429 if TimeJitterAfter(newTime.Time, oldTime.Time, jobConditionUpdateTime, jobConditionUpdateTimeJitter) </span><span class="cov0" title="0">{ 12430 return true 12431 }</span> 12432 12433 // if newCond is not new enough, we treat it the same as the old one 12434 <span class="cov0" title="0">newCond.LastTransitionTime = oldTime 12435 12436 // comparing should ignore the TransitionID 12437 newTransitionID := newCond.TransitionID 12438 newCond.TransitionID = oldCond.TransitionID 12439 12440 shouldUpdate := !reflect.DeepEqual(&newCond, &oldCond) 12441 12442 newCond.LastTransitionTime = newTime 12443 newCond.TransitionID = newTransitionID 12444 if shouldUpdate </span><span class="cov0" title="0">{ 12445 return true 12446 }</span> 12447 } 12448 12449 <span class="cov0" title="0">return false</span> 12450 } 12451 12452 func isPodGroupStatusUpdated(newStatus, oldStatus scheduling.PodGroupStatus) bool <span class="cov0" title="0">{ 12453 newCondition := newStatus.Conditions 12454 newStatus.Conditions = nil 12455 oldCondition := oldStatus.Conditions 12456 oldStatus.Conditions = nil 12457 12458 return !reflect.DeepEqual(newStatus, oldStatus) || isPodGroupConditionsUpdated(newCondition, oldCondition) 12459 }</span> 12460 12461 // updateJob update specified job 12462 func (ju *jobUpdater) updateJob(index int) <span class="cov0" title="0">{ 12463 job := ju.jobQueue[index] 12464 ssn := ju.ssn 12465 12466 job.PodGroup.Status = jobStatus(ssn, job) 12467 oldStatus, found := ssn.podGroupStatus[job.UID] 12468 updatePG := !found || isPodGroupStatusUpdated(job.PodGroup.Status, oldStatus) 12469 if _, err := ssn.cache.UpdateJobStatus(job, updatePG); err != nil </span><span class="cov0" title="0">{ 12470 klog.Errorf("Failed to update job <%s/%s>: %v", 12471 job.Namespace, job.Name, err) 12472 }</span> 12473 } 12474 </pre> 12475 12476 <pre class="file" id="file59" style="display: none">/* 12477 Copyright 2018 The Kubernetes Authors. 12478 12479 Licensed under the Apache License, Version 2.0 (the "License"); 12480 you may not use this file except in compliance with the License. 12481 You may obtain a copy of the License at 12482 12483 http://www.apache.org/licenses/LICENSE-2.0 12484 12485 Unless required by applicable law or agreed to in writing, software 12486 distributed under the License is distributed on an "AS IS" BASIS, 12487 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12488 See the License for the specific language governing permissions and 12489 limitations under the License. 12490 */ 12491 12492 package framework 12493 12494 import ( 12495 "fmt" 12496 "path/filepath" 12497 "plugin" 12498 "strings" 12499 "sync" 12500 12501 "k8s.io/klog" 12502 ) 12503 12504 var pluginMutex sync.Mutex 12505 12506 // PluginBuilder plugin management 12507 type PluginBuilder = func(Arguments) Plugin 12508 12509 // Plugin management 12510 var pluginBuilders = map[string]PluginBuilder{} 12511 12512 // RegisterPluginBuilder register the plugin 12513 func RegisterPluginBuilder(name string, pc PluginBuilder) <span class="cov0" title="0">{ 12514 pluginMutex.Lock() 12515 defer pluginMutex.Unlock() 12516 12517 pluginBuilders[name] = pc 12518 }</span> 12519 12520 // CleanupPluginBuilders cleans up all the plugin 12521 func CleanupPluginBuilders() <span class="cov0" title="0">{ 12522 pluginMutex.Lock() 12523 defer pluginMutex.Unlock() 12524 12525 pluginBuilders = map[string]PluginBuilder{} 12526 }</span> 12527 12528 // GetPluginBuilder get the pluginbuilder by name 12529 func GetPluginBuilder(name string) (PluginBuilder, bool) <span class="cov0" title="0">{ 12530 pluginMutex.Lock() 12531 defer pluginMutex.Unlock() 12532 12533 pb, found := pluginBuilders[name] 12534 return pb, found 12535 }</span> 12536 12537 // LoadCustomPlugins loads custom implement plugins 12538 func LoadCustomPlugins(pluginsDir string) error <span class="cov0" title="0">{ 12539 pluginPaths, _ := filepath.Glob(fmt.Sprintf("%s/*.so", pluginsDir)) 12540 for _, pluginPath := range pluginPaths </span><span class="cov0" title="0">{ 12541 pluginBuilder, err := loadPluginBuilder(pluginPath) 12542 if err != nil </span><span class="cov0" title="0">{ 12543 return err 12544 }</span> 12545 <span class="cov0" title="0">pluginName := getPluginName(pluginPath) 12546 RegisterPluginBuilder(pluginName, pluginBuilder) 12547 klog.V(4).Infof("Custom plugin %s loaded", pluginName)</span> 12548 } 12549 12550 <span class="cov0" title="0">return nil</span> 12551 } 12552 12553 func getPluginName(pluginPath string) string <span class="cov8" title="1">{ 12554 return strings.TrimSuffix(filepath.Base(pluginPath), filepath.Ext(pluginPath)) 12555 }</span> 12556 12557 func loadPluginBuilder(pluginPath string) (PluginBuilder, error) <span class="cov0" title="0">{ 12558 plug, err := plugin.Open(pluginPath) 12559 if err != nil </span><span class="cov0" title="0">{ 12560 return nil, err 12561 }</span> 12562 12563 <span class="cov0" title="0">symBuilder, err := plug.Lookup("New") 12564 if err != nil </span><span class="cov0" title="0">{ 12565 return nil, err 12566 }</span> 12567 12568 <span class="cov0" title="0">builder, ok := symBuilder.(PluginBuilder) 12569 if !ok </span><span class="cov0" title="0">{ 12570 return nil, fmt.Errorf("unexpected plugin: %s, failed to convert PluginBuilder `New`", pluginPath) 12571 }</span> 12572 12573 <span class="cov0" title="0">return builder, nil</span> 12574 } 12575 12576 // Action management 12577 var actionMap = map[string]Action{} 12578 12579 // RegisterAction register action 12580 func RegisterAction(act Action) <span class="cov0" title="0">{ 12581 pluginMutex.Lock() 12582 defer pluginMutex.Unlock() 12583 12584 actionMap[act.Name()] = act 12585 }</span> 12586 12587 // GetAction get the action by name 12588 func GetAction(name string) (Action, bool) <span class="cov0" title="0">{ 12589 pluginMutex.Lock() 12590 defer pluginMutex.Unlock() 12591 12592 act, found := actionMap[name] 12593 return act, found 12594 }</span> 12595 </pre> 12596 12597 <pre class="file" id="file60" style="display: none">/* 12598 Copyright 2018 The Kubernetes Authors. 12599 12600 Licensed under the Apache License, Version 2.0 (the "License"); 12601 you may not use this file except in compliance with the License. 12602 You may obtain a copy of the License at 12603 12604 http://www.apache.org/licenses/LICENSE-2.0 12605 12606 Unless required by applicable law or agreed to in writing, software 12607 distributed under the License is distributed on an "AS IS" BASIS, 12608 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12609 See the License for the specific language governing permissions and 12610 limitations under the License. 12611 */ 12612 12613 package framework 12614 12615 import ( 12616 "fmt" 12617 12618 v1 "k8s.io/api/core/v1" 12619 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 12620 "k8s.io/apimachinery/pkg/types" 12621 "k8s.io/apimachinery/pkg/util/uuid" 12622 "k8s.io/client-go/informers" 12623 "k8s.io/client-go/kubernetes" 12624 "k8s.io/klog" 12625 volumescheduling "k8s.io/kubernetes/pkg/controller/volume/scheduling" 12626 12627 "volcano.sh/apis/pkg/apis/scheduling" 12628 "volcano.sh/volcano/pkg/scheduler/api" 12629 "volcano.sh/volcano/pkg/scheduler/cache" 12630 "volcano.sh/volcano/pkg/scheduler/conf" 12631 "volcano.sh/volcano/pkg/scheduler/metrics" 12632 "volcano.sh/volcano/pkg/scheduler/util" 12633 ) 12634 12635 // Session information for the current session 12636 type Session struct { 12637 UID types.UID 12638 12639 kubeClient kubernetes.Interface 12640 cache cache.Cache 12641 informerFactory informers.SharedInformerFactory 12642 12643 TotalResource *api.Resource 12644 // podGroupStatus cache podgroup status during schedule 12645 // This should not be mutated after initiated 12646 podGroupStatus map[api.JobID]scheduling.PodGroupStatus 12647 12648 Jobs map[api.JobID]*api.JobInfo 12649 Nodes map[string]*api.NodeInfo 12650 RevocableNodes map[string]*api.NodeInfo 12651 Queues map[api.QueueID]*api.QueueInfo 12652 NamespaceInfo map[api.NamespaceName]*api.NamespaceInfo 12653 12654 Tiers []conf.Tier 12655 Configurations []conf.Configuration 12656 NodeList []*api.NodeInfo 12657 12658 plugins map[string]Plugin 12659 eventHandlers []*EventHandler 12660 jobOrderFns map[string]api.CompareFn 12661 queueOrderFns map[string]api.CompareFn 12662 taskOrderFns map[string]api.CompareFn 12663 namespaceOrderFns map[string]api.CompareFn 12664 clusterOrderFns map[string]api.CompareFn 12665 predicateFns map[string]api.PredicateFn 12666 bestNodeFns map[string]api.BestNodeFn 12667 nodeOrderFns map[string]api.NodeOrderFn 12668 batchNodeOrderFns map[string]api.BatchNodeOrderFn 12669 nodeMapFns map[string]api.NodeMapFn 12670 nodeReduceFns map[string]api.NodeReduceFn 12671 preemptableFns map[string]api.EvictableFn 12672 reclaimableFns map[string]api.EvictableFn 12673 overusedFns map[string]api.ValidateFn 12674 underUsedFns map[string]api.UnderUsedResourceFn 12675 jobReadyFns map[string]api.ValidateFn 12676 jobPipelinedFns map[string]api.VoteFn 12677 jobValidFns map[string]api.ValidateExFn 12678 jobEnqueueableFns map[string]api.VoteFn 12679 jobEnqueuedFns map[string]api.JobEnqueuedFn 12680 targetJobFns map[string]api.TargetJobFn 12681 reservedNodesFns map[string]api.ReservedNodesFn 12682 victimTasksFns map[string]api.VictimTasksFn 12683 jobStarvingFns map[string]api.ValidateFn 12684 } 12685 12686 func openSession(cache cache.Cache) *Session <span class="cov0" title="0">{ 12687 ssn := &Session{ 12688 UID: uuid.NewUUID(), 12689 kubeClient: cache.Client(), 12690 cache: cache, 12691 informerFactory: cache.SharedInformerFactory(), 12692 12693 TotalResource: api.EmptyResource(), 12694 podGroupStatus: map[api.JobID]scheduling.PodGroupStatus{}, 12695 12696 Jobs: map[api.JobID]*api.JobInfo{}, 12697 Nodes: map[string]*api.NodeInfo{}, 12698 RevocableNodes: map[string]*api.NodeInfo{}, 12699 Queues: map[api.QueueID]*api.QueueInfo{}, 12700 12701 plugins: map[string]Plugin{}, 12702 jobOrderFns: map[string]api.CompareFn{}, 12703 queueOrderFns: map[string]api.CompareFn{}, 12704 taskOrderFns: map[string]api.CompareFn{}, 12705 namespaceOrderFns: map[string]api.CompareFn{}, 12706 clusterOrderFns: map[string]api.CompareFn{}, 12707 predicateFns: map[string]api.PredicateFn{}, 12708 bestNodeFns: map[string]api.BestNodeFn{}, 12709 nodeOrderFns: map[string]api.NodeOrderFn{}, 12710 batchNodeOrderFns: map[string]api.BatchNodeOrderFn{}, 12711 nodeMapFns: map[string]api.NodeMapFn{}, 12712 nodeReduceFns: map[string]api.NodeReduceFn{}, 12713 preemptableFns: map[string]api.EvictableFn{}, 12714 reclaimableFns: map[string]api.EvictableFn{}, 12715 overusedFns: map[string]api.ValidateFn{}, 12716 underUsedFns: map[string]api.UnderUsedResourceFn{}, 12717 jobReadyFns: map[string]api.ValidateFn{}, 12718 jobPipelinedFns: map[string]api.VoteFn{}, 12719 jobValidFns: map[string]api.ValidateExFn{}, 12720 jobEnqueueableFns: map[string]api.VoteFn{}, 12721 jobEnqueuedFns: map[string]api.JobEnqueuedFn{}, 12722 targetJobFns: map[string]api.TargetJobFn{}, 12723 reservedNodesFns: map[string]api.ReservedNodesFn{}, 12724 victimTasksFns: map[string]api.VictimTasksFn{}, 12725 jobStarvingFns: map[string]api.ValidateFn{}, 12726 } 12727 12728 snapshot := cache.Snapshot() 12729 12730 ssn.Jobs = snapshot.Jobs 12731 for _, job := range ssn.Jobs </span><span class="cov0" title="0">{ 12732 // only conditions will be updated periodically 12733 if job.PodGroup != nil && job.PodGroup.Status.Conditions != nil </span><span class="cov0" title="0">{ 12734 ssn.podGroupStatus[job.UID] = job.PodGroup.Status 12735 }</span> 12736 12737 <span class="cov0" title="0">if vjr := ssn.JobValid(job); vjr != nil </span><span class="cov0" title="0">{ 12738 if !vjr.Pass </span><span class="cov0" title="0">{ 12739 jc := &scheduling.PodGroupCondition{ 12740 Type: scheduling.PodGroupUnschedulableType, 12741 Status: v1.ConditionTrue, 12742 LastTransitionTime: metav1.Now(), 12743 TransitionID: string(ssn.UID), 12744 Reason: vjr.Reason, 12745 Message: vjr.Message, 12746 } 12747 12748 if err := ssn.UpdatePodGroupCondition(job, jc); err != nil </span><span class="cov0" title="0">{ 12749 klog.Errorf("Failed to update job condition: %v", err) 12750 }</span> 12751 } 12752 12753 <span class="cov0" title="0">delete(ssn.Jobs, job.UID)</span> 12754 } 12755 } 12756 <span class="cov0" title="0">ssn.NodeList = util.GetNodeList(snapshot.Nodes, snapshot.NodeList) 12757 ssn.Nodes = snapshot.Nodes 12758 ssn.RevocableNodes = snapshot.RevocableNodes 12759 ssn.Queues = snapshot.Queues 12760 ssn.NamespaceInfo = snapshot.NamespaceInfo 12761 // calculate all nodes' resource only once in each schedule cycle, other plugins can clone it when need 12762 for _, n := range ssn.Nodes </span><span class="cov0" title="0">{ 12763 ssn.TotalResource.Add(n.Allocatable) 12764 }</span> 12765 12766 <span class="cov0" title="0">klog.V(3).Infof("Open Session %v with <%d> Job and <%d> Queues", 12767 ssn.UID, len(ssn.Jobs), len(ssn.Queues)) 12768 12769 return ssn</span> 12770 } 12771 12772 func closeSession(ssn *Session) <span class="cov0" title="0">{ 12773 ju := newJobUpdater(ssn) 12774 ju.UpdateAll() 12775 12776 ssn.Jobs = nil 12777 ssn.Nodes = nil 12778 ssn.RevocableNodes = nil 12779 ssn.plugins = nil 12780 ssn.eventHandlers = nil 12781 ssn.jobOrderFns = nil 12782 ssn.namespaceOrderFns = nil 12783 ssn.queueOrderFns = nil 12784 ssn.clusterOrderFns = nil 12785 ssn.NodeList = nil 12786 ssn.TotalResource = nil 12787 12788 klog.V(3).Infof("Close Session %v", ssn.UID) 12789 }</span> 12790 12791 func jobStatus(ssn *Session, jobInfo *api.JobInfo) scheduling.PodGroupStatus <span class="cov0" title="0">{ 12792 status := jobInfo.PodGroup.Status 12793 12794 unschedulable := false 12795 for _, c := range status.Conditions </span><span class="cov0" title="0">{ 12796 if c.Type == scheduling.PodGroupUnschedulableType && 12797 c.Status == v1.ConditionTrue && 12798 c.TransitionID == string(ssn.UID) </span><span class="cov0" title="0">{ 12799 unschedulable = true 12800 break</span> 12801 } 12802 } 12803 12804 // If running tasks && unschedulable, unknown phase 12805 <span class="cov0" title="0">if len(jobInfo.TaskStatusIndex[api.Running]) != 0 && unschedulable </span><span class="cov0" title="0">{ 12806 status.Phase = scheduling.PodGroupUnknown 12807 }</span> else<span class="cov0" title="0"> { 12808 allocated := 0 12809 for status, tasks := range jobInfo.TaskStatusIndex </span><span class="cov0" title="0">{ 12810 if api.AllocatedStatus(status) || status == api.Succeeded </span><span class="cov0" title="0">{ 12811 allocated += len(tasks) 12812 }</span> 12813 } 12814 12815 // If there're enough allocated resource, it's running 12816 <span class="cov0" title="0">if int32(allocated) >= jobInfo.PodGroup.Spec.MinMember </span><span class="cov0" title="0">{ 12817 status.Phase = scheduling.PodGroupRunning 12818 }</span> else<span class="cov0" title="0"> if jobInfo.PodGroup.Status.Phase != scheduling.PodGroupInqueue </span><span class="cov0" title="0">{ 12819 status.Phase = scheduling.PodGroupPending 12820 }</span> 12821 } 12822 12823 <span class="cov0" title="0">status.Running = int32(len(jobInfo.TaskStatusIndex[api.Running])) 12824 status.Failed = int32(len(jobInfo.TaskStatusIndex[api.Failed])) 12825 status.Succeeded = int32(len(jobInfo.TaskStatusIndex[api.Succeeded])) 12826 12827 return status</span> 12828 } 12829 12830 // Statement returns new statement object 12831 func (ssn *Session) Statement() *Statement <span class="cov0" title="0">{ 12832 return &Statement{ 12833 ssn: ssn, 12834 } 12835 }</span> 12836 12837 // Pipeline the task to the node in the session 12838 func (ssn *Session) Pipeline(task *api.TaskInfo, hostname string) error <span class="cov0" title="0">{ 12839 // Only update status in session 12840 job, found := ssn.Jobs[task.Job] 12841 if found </span><span class="cov0" title="0">{ 12842 if err := job.UpdateTaskStatus(task, api.Pipelined); err != nil </span><span class="cov0" title="0">{ 12843 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 12844 task.Namespace, task.Name, api.Pipelined, ssn.UID, err) 12845 return err 12846 }</span> 12847 } else<span class="cov0" title="0"> { 12848 klog.Errorf("Failed to found Job <%s> in Session <%s> index when binding.", 12849 task.Job, ssn.UID) 12850 return fmt.Errorf("failed to find job %s when binding", task.Job) 12851 }</span> 12852 12853 <span class="cov0" title="0">task.NodeName = hostname 12854 12855 if node, found := ssn.Nodes[hostname]; found </span><span class="cov0" title="0">{ 12856 if err := node.AddTask(task); err != nil </span><span class="cov0" title="0">{ 12857 klog.Errorf("Failed to add task <%v/%v> to node <%v> in Session <%v>: %v", 12858 task.Namespace, task.Name, hostname, ssn.UID, err) 12859 return err 12860 }</span> 12861 <span class="cov0" title="0">klog.V(3).Infof("After added Task <%v/%v> to Node <%v>: idle <%v>, used <%v>, releasing <%v>", 12862 task.Namespace, task.Name, node.Name, node.Idle, node.Used, node.Releasing)</span> 12863 } else<span class="cov0" title="0"> { 12864 klog.Errorf("Failed to found Node <%s> in Session <%s> index when binding.", 12865 hostname, ssn.UID) 12866 return fmt.Errorf("failed to find node %s", hostname) 12867 }</span> 12868 12869 <span class="cov0" title="0">for _, eh := range ssn.eventHandlers </span><span class="cov0" title="0">{ 12870 if eh.AllocateFunc != nil </span><span class="cov0" title="0">{ 12871 eh.AllocateFunc(&Event{ 12872 Task: task, 12873 }) 12874 }</span> 12875 } 12876 12877 <span class="cov0" title="0">return nil</span> 12878 } 12879 12880 //Allocate the task to the node in the session 12881 func (ssn *Session) Allocate(task *api.TaskInfo, nodeInfo *api.NodeInfo) error <span class="cov0" title="0">{ 12882 podVolumes, err := ssn.cache.GetPodVolumes(task, nodeInfo.Node) 12883 if err != nil </span><span class="cov0" title="0">{ 12884 return err 12885 }</span> 12886 12887 <span class="cov0" title="0">hostname := nodeInfo.Name 12888 if err := ssn.cache.AllocateVolumes(task, hostname, podVolumes); err != nil </span><span class="cov0" title="0">{ 12889 return err 12890 }</span> 12891 12892 <span class="cov0" title="0">task.Pod.Spec.NodeName = hostname 12893 task.PodVolumes = podVolumes 12894 12895 // Only update status in session 12896 job, found := ssn.Jobs[task.Job] 12897 if found </span><span class="cov0" title="0">{ 12898 if err := job.UpdateTaskStatus(task, api.Allocated); err != nil </span><span class="cov0" title="0">{ 12899 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 12900 task.Namespace, task.Name, api.Allocated, ssn.UID, err) 12901 return err 12902 }</span> 12903 } else<span class="cov0" title="0"> { 12904 klog.Errorf("Failed to found Job <%s> in Session <%s> index when binding.", 12905 task.Job, ssn.UID) 12906 return fmt.Errorf("failed to find job %s", task.Job) 12907 }</span> 12908 12909 <span class="cov0" title="0">task.NodeName = hostname 12910 12911 if node, found := ssn.Nodes[hostname]; found </span><span class="cov0" title="0">{ 12912 if err := node.AddTask(task); err != nil </span><span class="cov0" title="0">{ 12913 klog.Errorf("Failed to add task <%v/%v> to node <%v> in Session <%v>: %v", 12914 task.Namespace, task.Name, hostname, ssn.UID, err) 12915 return err 12916 }</span> 12917 <span class="cov0" title="0">klog.V(3).Infof("After allocated Task <%v/%v> to Node <%v>: idle <%v>, used <%v>, releasing <%v>", 12918 task.Namespace, task.Name, node.Name, node.Idle, node.Used, node.Releasing)</span> 12919 } else<span class="cov0" title="0"> { 12920 klog.Errorf("Failed to found Node <%s> in Session <%s> index when binding.", 12921 hostname, ssn.UID) 12922 return fmt.Errorf("failed to find node %s", hostname) 12923 }</span> 12924 12925 // Callbacks 12926 <span class="cov0" title="0">for _, eh := range ssn.eventHandlers </span><span class="cov0" title="0">{ 12927 if eh.AllocateFunc != nil </span><span class="cov0" title="0">{ 12928 eh.AllocateFunc(&Event{ 12929 Task: task, 12930 }) 12931 }</span> 12932 } 12933 12934 <span class="cov0" title="0">if ssn.JobReady(job) </span><span class="cov0" title="0">{ 12935 for _, task := range job.TaskStatusIndex[api.Allocated] </span><span class="cov0" title="0">{ 12936 if err := ssn.dispatch(task, podVolumes); err != nil </span><span class="cov0" title="0">{ 12937 klog.Errorf("Failed to dispatch task <%v/%v>: %v", 12938 task.Namespace, task.Name, err) 12939 return err 12940 }</span> 12941 } 12942 } 12943 12944 <span class="cov0" title="0">return nil</span> 12945 } 12946 12947 func (ssn *Session) dispatch(task *api.TaskInfo, volumes *volumescheduling.PodVolumes) error <span class="cov0" title="0">{ 12948 if err := ssn.cache.BindVolumes(task, volumes); err != nil </span><span class="cov0" title="0">{ 12949 return err 12950 }</span> 12951 12952 <span class="cov0" title="0">if err := ssn.cache.Bind(task, task.NodeName); err != nil </span><span class="cov0" title="0">{ 12953 return err 12954 }</span> 12955 12956 // Update status in session 12957 <span class="cov0" title="0">if job, found := ssn.Jobs[task.Job]; found </span><span class="cov0" title="0">{ 12958 if err := job.UpdateTaskStatus(task, api.Binding); err != nil </span><span class="cov0" title="0">{ 12959 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 12960 task.Namespace, task.Name, api.Binding, ssn.UID, err) 12961 return err 12962 }</span> 12963 } else<span class="cov0" title="0"> { 12964 klog.Errorf("Failed to found Job <%s> in Session <%s> index when binding.", 12965 task.Job, ssn.UID) 12966 return fmt.Errorf("failed to find job %s", task.Job) 12967 }</span> 12968 12969 <span class="cov0" title="0">metrics.UpdateTaskScheduleDuration(metrics.Duration(task.Pod.CreationTimestamp.Time)) 12970 return nil</span> 12971 } 12972 12973 //Evict the task in the session 12974 func (ssn *Session) Evict(reclaimee *api.TaskInfo, reason string) error <span class="cov0" title="0">{ 12975 if err := ssn.cache.Evict(reclaimee, reason); err != nil </span><span class="cov0" title="0">{ 12976 return err 12977 }</span> 12978 12979 // Update status in session 12980 <span class="cov0" title="0">job, found := ssn.Jobs[reclaimee.Job] 12981 if found </span><span class="cov0" title="0">{ 12982 if err := job.UpdateTaskStatus(reclaimee, api.Releasing); err != nil </span><span class="cov0" title="0">{ 12983 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 12984 reclaimee.Namespace, reclaimee.Name, api.Releasing, ssn.UID, err) 12985 return err 12986 }</span> 12987 } else<span class="cov0" title="0"> { 12988 klog.Errorf("Failed to found Job <%s> in Session <%s> index when binding.", 12989 reclaimee.Job, ssn.UID) 12990 return fmt.Errorf("failed to find job %s", reclaimee.Job) 12991 }</span> 12992 12993 // Update task in node. 12994 <span class="cov0" title="0">if node, found := ssn.Nodes[reclaimee.NodeName]; found </span><span class="cov0" title="0">{ 12995 if err := node.UpdateTask(reclaimee); err != nil </span><span class="cov0" title="0">{ 12996 klog.Errorf("Failed to update task <%v/%v> in Session <%v>: %v", 12997 reclaimee.Namespace, reclaimee.Name, ssn.UID, err) 12998 return err 12999 }</span> 13000 } 13001 13002 <span class="cov0" title="0">for _, eh := range ssn.eventHandlers </span><span class="cov0" title="0">{ 13003 if eh.DeallocateFunc != nil </span><span class="cov0" title="0">{ 13004 eh.DeallocateFunc(&Event{ 13005 Task: reclaimee, 13006 }) 13007 }</span> 13008 } 13009 13010 <span class="cov0" title="0">return nil</span> 13011 } 13012 13013 // BindPodGroup bind PodGroup to specified cluster 13014 func (ssn *Session) BindPodGroup(job *api.JobInfo, cluster string) error <span class="cov0" title="0">{ 13015 return ssn.cache.BindPodGroup(job, cluster) 13016 }</span> 13017 13018 // UpdatePodGroupCondition update job condition accordingly. 13019 func (ssn *Session) UpdatePodGroupCondition(jobInfo *api.JobInfo, cond *scheduling.PodGroupCondition) error <span class="cov0" title="0">{ 13020 job, ok := ssn.Jobs[jobInfo.UID] 13021 if !ok </span><span class="cov0" title="0">{ 13022 return fmt.Errorf("failed to find job <%s/%s>", jobInfo.Namespace, jobInfo.Name) 13023 }</span> 13024 13025 <span class="cov0" title="0">index := -1 13026 for i, c := range job.PodGroup.Status.Conditions </span><span class="cov0" title="0">{ 13027 if c.Type == cond.Type </span><span class="cov0" title="0">{ 13028 index = i 13029 break</span> 13030 } 13031 } 13032 13033 // Update condition to the new condition. 13034 <span class="cov0" title="0">if index < 0 </span><span class="cov0" title="0">{ 13035 job.PodGroup.Status.Conditions = append(job.PodGroup.Status.Conditions, *cond) 13036 }</span> else<span class="cov0" title="0"> { 13037 job.PodGroup.Status.Conditions[index] = *cond 13038 }</span> 13039 13040 <span class="cov0" title="0">return nil</span> 13041 } 13042 13043 // AddEventHandler add event handlers 13044 func (ssn *Session) AddEventHandler(eh *EventHandler) <span class="cov0" title="0">{ 13045 ssn.eventHandlers = append(ssn.eventHandlers, eh) 13046 }</span> 13047 13048 // UpdateSchedulerNumaInfo update SchedulerNumaInfo 13049 func (ssn *Session) UpdateSchedulerNumaInfo(AllocatedSets map[string]api.ResNumaSets) <span class="cov0" title="0">{ 13050 ssn.cache.UpdateSchedulerNumaInfo(AllocatedSets) 13051 }</span> 13052 13053 // KubeClient returns the kubernetes client 13054 func (ssn Session) KubeClient() kubernetes.Interface <span class="cov0" title="0">{ 13055 return ssn.kubeClient 13056 }</span> 13057 13058 // InformerFactory returns the scheduler ShareInformerFactory 13059 func (ssn Session) InformerFactory() informers.SharedInformerFactory <span class="cov0" title="0">{ 13060 return ssn.informerFactory 13061 }</span> 13062 13063 //String return nodes and jobs information in the session 13064 func (ssn Session) String() string <span class="cov0" title="0">{ 13065 msg := fmt.Sprintf("Session %v: \n", ssn.UID) 13066 13067 for _, job := range ssn.Jobs </span><span class="cov0" title="0">{ 13068 msg = fmt.Sprintf("%s%v\n", msg, job) 13069 }</span> 13070 13071 <span class="cov0" title="0">for _, node := range ssn.Nodes </span><span class="cov0" title="0">{ 13072 msg = fmt.Sprintf("%s%v\n", msg, node) 13073 }</span> 13074 13075 <span class="cov0" title="0">return msg</span> 13076 } 13077 </pre> 13078 13079 <pre class="file" id="file61" style="display: none">/* 13080 Copyright 2018 The Kubernetes Authors. 13081 13082 Licensed under the Apache License, Version 2.0 (the "License"); 13083 you may not use this file except in compliance with the License. 13084 You may obtain a copy of the License at 13085 13086 http://www.apache.org/licenses/LICENSE-2.0 13087 13088 Unless required by applicable law or agreed to in writing, software 13089 distributed under the License is distributed on an "AS IS" BASIS, 13090 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13091 See the License for the specific language governing permissions and 13092 limitations under the License. 13093 */ 13094 13095 package framework 13096 13097 import ( 13098 k8sframework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1" 13099 13100 "volcano.sh/apis/pkg/apis/scheduling" 13101 "volcano.sh/volcano/pkg/scheduler/api" 13102 ) 13103 13104 // AddJobOrderFn add job order function 13105 func (ssn *Session) AddJobOrderFn(name string, cf api.CompareFn) <span class="cov0" title="0">{ 13106 ssn.jobOrderFns[name] = cf 13107 }</span> 13108 13109 // AddQueueOrderFn add queue order function 13110 func (ssn *Session) AddQueueOrderFn(name string, qf api.CompareFn) <span class="cov0" title="0">{ 13111 ssn.queueOrderFns[name] = qf 13112 }</span> 13113 13114 // AddClusterOrderFn add queue order function 13115 func (ssn *Session) AddClusterOrderFn(name string, qf api.CompareFn) <span class="cov0" title="0">{ 13116 ssn.clusterOrderFns[name] = qf 13117 }</span> 13118 13119 // AddTaskOrderFn add task order function 13120 func (ssn *Session) AddTaskOrderFn(name string, cf api.CompareFn) <span class="cov0" title="0">{ 13121 ssn.taskOrderFns[name] = cf 13122 }</span> 13123 13124 // AddNamespaceOrderFn add namespace order function 13125 func (ssn *Session) AddNamespaceOrderFn(name string, cf api.CompareFn) <span class="cov0" title="0">{ 13126 ssn.namespaceOrderFns[name] = cf 13127 }</span> 13128 13129 // AddPreemptableFn add preemptable function 13130 func (ssn *Session) AddPreemptableFn(name string, cf api.EvictableFn) <span class="cov0" title="0">{ 13131 ssn.preemptableFns[name] = cf 13132 }</span> 13133 13134 // AddReclaimableFn add Reclaimable function 13135 func (ssn *Session) AddReclaimableFn(name string, rf api.EvictableFn) <span class="cov0" title="0">{ 13136 ssn.reclaimableFns[name] = rf 13137 }</span> 13138 13139 // AddJobReadyFn add JobReady function 13140 func (ssn *Session) AddJobReadyFn(name string, vf api.ValidateFn) <span class="cov0" title="0">{ 13141 ssn.jobReadyFns[name] = vf 13142 }</span> 13143 13144 // AddJobPipelinedFn add pipelined function 13145 func (ssn *Session) AddJobPipelinedFn(name string, vf api.VoteFn) <span class="cov0" title="0">{ 13146 ssn.jobPipelinedFns[name] = vf 13147 }</span> 13148 13149 // AddPredicateFn add Predicate function 13150 func (ssn *Session) AddPredicateFn(name string, pf api.PredicateFn) <span class="cov0" title="0">{ 13151 ssn.predicateFns[name] = pf 13152 }</span> 13153 13154 // AddBestNodeFn add BestNode function 13155 func (ssn *Session) AddBestNodeFn(name string, pf api.BestNodeFn) <span class="cov0" title="0">{ 13156 ssn.bestNodeFns[name] = pf 13157 }</span> 13158 13159 // AddNodeOrderFn add Node order function 13160 func (ssn *Session) AddNodeOrderFn(name string, pf api.NodeOrderFn) <span class="cov0" title="0">{ 13161 ssn.nodeOrderFns[name] = pf 13162 }</span> 13163 13164 // AddBatchNodeOrderFn add Batch Node order function 13165 func (ssn *Session) AddBatchNodeOrderFn(name string, pf api.BatchNodeOrderFn) <span class="cov0" title="0">{ 13166 ssn.batchNodeOrderFns[name] = pf 13167 }</span> 13168 13169 // AddNodeMapFn add Node map function 13170 func (ssn *Session) AddNodeMapFn(name string, pf api.NodeMapFn) <span class="cov0" title="0">{ 13171 ssn.nodeMapFns[name] = pf 13172 }</span> 13173 13174 // AddNodeReduceFn add Node reduce function 13175 func (ssn *Session) AddNodeReduceFn(name string, pf api.NodeReduceFn) <span class="cov0" title="0">{ 13176 ssn.nodeReduceFns[name] = pf 13177 }</span> 13178 13179 // AddOverusedFn add overused function 13180 func (ssn *Session) AddOverusedFn(name string, fn api.ValidateFn) <span class="cov0" title="0">{ 13181 ssn.overusedFns[name] = fn 13182 }</span> 13183 13184 // AddUnderusedResourceFn add underused function 13185 func (ssn *Session) AddUnderusedResourceFn(name string, fn api.UnderUsedResourceFn) <span class="cov0" title="0">{ 13186 ssn.underUsedFns[name] = fn 13187 }</span> 13188 13189 // AddJobValidFn add jobvalid function 13190 func (ssn *Session) AddJobValidFn(name string, fn api.ValidateExFn) <span class="cov0" title="0">{ 13191 ssn.jobValidFns[name] = fn 13192 }</span> 13193 13194 // AddJobEnqueueableFn add jobenqueueable function 13195 func (ssn *Session) AddJobEnqueueableFn(name string, fn api.VoteFn) <span class="cov0" title="0">{ 13196 ssn.jobEnqueueableFns[name] = fn 13197 }</span> 13198 13199 // AddJobEnqueuedFn add jobEnqueued function 13200 func (ssn *Session) AddJobEnqueuedFn(name string, fn api.JobEnqueuedFn) <span class="cov0" title="0">{ 13201 ssn.jobEnqueuedFns[name] = fn 13202 }</span> 13203 13204 // AddTargetJobFn add targetjob function 13205 func (ssn *Session) AddTargetJobFn(name string, fn api.TargetJobFn) <span class="cov0" title="0">{ 13206 ssn.targetJobFns[name] = fn 13207 }</span> 13208 13209 // AddReservedNodesFn add reservedNodesFn function 13210 func (ssn *Session) AddReservedNodesFn(name string, fn api.ReservedNodesFn) <span class="cov0" title="0">{ 13211 ssn.reservedNodesFns[name] = fn 13212 }</span> 13213 13214 // AddVictimTasksFns add victimTasksFns function 13215 func (ssn *Session) AddVictimTasksFns(name string, fn api.VictimTasksFn) <span class="cov0" title="0">{ 13216 ssn.victimTasksFns[name] = fn 13217 }</span> 13218 13219 // AddJobStarvingFns add jobStarvingFns function 13220 func (ssn *Session) AddJobStarvingFns(name string, fn api.ValidateFn) <span class="cov0" title="0">{ 13221 ssn.jobStarvingFns[name] = fn 13222 }</span> 13223 13224 // Reclaimable invoke reclaimable function of the plugins 13225 func (ssn *Session) Reclaimable(reclaimer *api.TaskInfo, reclaimees []*api.TaskInfo) []*api.TaskInfo <span class="cov0" title="0">{ 13226 var victims []*api.TaskInfo 13227 var init bool 13228 13229 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13230 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13231 if !isEnabled(plugin.EnabledReclaimable) </span><span class="cov0" title="0">{ 13232 continue</span> 13233 } 13234 <span class="cov0" title="0">rf, found := ssn.reclaimableFns[plugin.Name] 13235 if !found </span><span class="cov0" title="0">{ 13236 continue</span> 13237 } 13238 13239 <span class="cov0" title="0">candidates, abstain := rf(reclaimer, reclaimees) 13240 if abstain == 0 </span><span class="cov0" title="0">{ 13241 continue</span> 13242 } 13243 <span class="cov0" title="0">if len(candidates) == 0 </span><span class="cov0" title="0">{ 13244 victims = nil 13245 break</span> 13246 } 13247 <span class="cov0" title="0">if !init </span><span class="cov0" title="0">{ 13248 victims = candidates 13249 init = true 13250 }</span> else<span class="cov0" title="0"> { 13251 var intersection []*api.TaskInfo 13252 // Get intersection of victims and candidates. 13253 for _, v := range victims </span><span class="cov0" title="0">{ 13254 for _, c := range candidates </span><span class="cov0" title="0">{ 13255 if v.UID == c.UID </span><span class="cov0" title="0">{ 13256 intersection = append(intersection, v) 13257 }</span> 13258 } 13259 } 13260 13261 // Update victims to intersection 13262 <span class="cov0" title="0">victims = intersection</span> 13263 } 13264 } 13265 // Plugins in this tier made decision if victims is not nil 13266 <span class="cov0" title="0">if victims != nil </span><span class="cov0" title="0">{ 13267 return victims 13268 }</span> 13269 } 13270 13271 <span class="cov0" title="0">return victims</span> 13272 } 13273 13274 // Preemptable invoke preemptable function of the plugins 13275 func (ssn *Session) Preemptable(preemptor *api.TaskInfo, preemptees []*api.TaskInfo) []*api.TaskInfo <span class="cov0" title="0">{ 13276 var victims []*api.TaskInfo 13277 var init bool 13278 13279 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13280 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13281 if !isEnabled(plugin.EnabledPreemptable) </span><span class="cov0" title="0">{ 13282 continue</span> 13283 } 13284 13285 <span class="cov0" title="0">pf, found := ssn.preemptableFns[plugin.Name] 13286 if !found </span><span class="cov0" title="0">{ 13287 continue</span> 13288 } 13289 <span class="cov0" title="0">candidates, abstain := pf(preemptor, preemptees) 13290 if abstain == 0 </span><span class="cov0" title="0">{ 13291 continue</span> 13292 } 13293 // intersection will be nil if length is 0, don't need to do any more check 13294 <span class="cov0" title="0">if len(candidates) == 0 </span><span class="cov0" title="0">{ 13295 victims = nil 13296 break</span> 13297 } 13298 13299 <span class="cov0" title="0">if !init </span><span class="cov0" title="0">{ 13300 victims = candidates 13301 init = true 13302 }</span> else<span class="cov0" title="0"> { 13303 var intersection []*api.TaskInfo 13304 // Get intersection of victims and candidates. 13305 for _, v := range victims </span><span class="cov0" title="0">{ 13306 for _, c := range candidates </span><span class="cov0" title="0">{ 13307 if v.UID == c.UID </span><span class="cov0" title="0">{ 13308 intersection = append(intersection, v) 13309 }</span> 13310 } 13311 } 13312 13313 // Update victims to intersection 13314 <span class="cov0" title="0">victims = intersection</span> 13315 } 13316 } 13317 // Plugins in this tier made decision if victims is not nil 13318 <span class="cov0" title="0">if victims != nil </span><span class="cov0" title="0">{ 13319 return victims 13320 }</span> 13321 } 13322 13323 <span class="cov0" title="0">return victims</span> 13324 } 13325 13326 // Overused invoke overused function of the plugins 13327 func (ssn *Session) Overused(queue *api.QueueInfo) bool <span class="cov0" title="0">{ 13328 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13329 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13330 of, found := ssn.overusedFns[plugin.Name] 13331 if !found </span><span class="cov0" title="0">{ 13332 continue</span> 13333 } 13334 <span class="cov0" title="0">if of(queue) </span><span class="cov0" title="0">{ 13335 return true 13336 }</span> 13337 } 13338 } 13339 13340 <span class="cov0" title="0">return false</span> 13341 } 13342 13343 // UnderusedResources invoke underused function of the plugins 13344 // Returns: 13345 // * nil if no `UnderUsedResourceFn` is registered 13346 // * [] if no under-used resources 13347 func (ssn *Session) UnderusedResources(queue *api.QueueInfo) api.ResourceNameList <span class="cov0" title="0">{ 13348 if len(ssn.underUsedFns) == 0 </span><span class="cov0" title="0">{ 13349 return nil 13350 }</span> 13351 <span class="cov0" title="0">for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13352 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13353 of, found := ssn.underUsedFns[plugin.Name] 13354 if !found </span><span class="cov0" title="0">{ 13355 continue</span> 13356 } 13357 <span class="cov0" title="0">underUsedResourceList := of(queue) 13358 return underUsedResourceList</span> 13359 } 13360 } 13361 13362 <span class="cov0" title="0">return api.ResourceNameList{}</span> 13363 } 13364 13365 // JobReady invoke jobready function of the plugins 13366 func (ssn *Session) JobReady(obj interface{}) bool <span class="cov0" title="0">{ 13367 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13368 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13369 if !isEnabled(plugin.EnabledJobReady) </span><span class="cov0" title="0">{ 13370 continue</span> 13371 } 13372 <span class="cov0" title="0">jrf, found := ssn.jobReadyFns[plugin.Name] 13373 if !found </span><span class="cov0" title="0">{ 13374 continue</span> 13375 } 13376 13377 <span class="cov0" title="0">if !jrf(obj) </span><span class="cov0" title="0">{ 13378 return false 13379 }</span> 13380 } 13381 } 13382 13383 <span class="cov0" title="0">return true</span> 13384 } 13385 13386 // JobPipelined invoke pipelined function of the plugins 13387 // Check if job has get enough resource to run 13388 func (ssn *Session) JobPipelined(obj interface{}) bool <span class="cov0" title="0">{ 13389 var hasFound bool 13390 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13391 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13392 if !isEnabled(plugin.EnabledJobPipelined) </span><span class="cov0" title="0">{ 13393 continue</span> 13394 } 13395 <span class="cov0" title="0">jrf, found := ssn.jobPipelinedFns[plugin.Name] 13396 if !found </span><span class="cov0" title="0">{ 13397 continue</span> 13398 } 13399 13400 <span class="cov0" title="0">res := jrf(obj) 13401 if res < 0 </span><span class="cov0" title="0">{ 13402 return false 13403 }</span> 13404 <span class="cov0" title="0">if res > 0 </span><span class="cov0" title="0">{ 13405 hasFound = true 13406 }</span> 13407 } 13408 // if plugin exists that votes permit, meanwhile other plugin votes abstention, 13409 // permit job to be pipelined, do not check next tier 13410 <span class="cov0" title="0">if hasFound </span><span class="cov0" title="0">{ 13411 return true 13412 }</span> 13413 } 13414 13415 <span class="cov0" title="0">return true</span> 13416 } 13417 13418 // JobStarving invoke jobStarving function of the plugins 13419 // Check if job still need more resource 13420 func (ssn *Session) JobStarving(obj interface{}) bool <span class="cov0" title="0">{ 13421 var hasFound bool 13422 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13423 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13424 if !isEnabled(plugin.EnabledJobStarving) </span><span class="cov0" title="0">{ 13425 continue</span> 13426 } 13427 <span class="cov0" title="0">jrf, found := ssn.jobStarvingFns[plugin.Name] 13428 if !found </span><span class="cov0" title="0">{ 13429 continue</span> 13430 } 13431 <span class="cov0" title="0">hasFound = true 13432 13433 if !jrf(obj) </span><span class="cov0" title="0">{ 13434 return false 13435 }</span> 13436 } 13437 // this tier registered function 13438 <span class="cov0" title="0">if hasFound </span><span class="cov0" title="0">{ 13439 return true 13440 }</span> 13441 } 13442 13443 <span class="cov0" title="0">return false</span> 13444 } 13445 13446 // JobValid invoke jobvalid function of the plugins 13447 func (ssn *Session) JobValid(obj interface{}) *api.ValidateResult <span class="cov0" title="0">{ 13448 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13449 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13450 jrf, found := ssn.jobValidFns[plugin.Name] 13451 if !found </span><span class="cov0" title="0">{ 13452 continue</span> 13453 } 13454 13455 <span class="cov0" title="0">if vr := jrf(obj); vr != nil && !vr.Pass </span><span class="cov0" title="0">{ 13456 return vr 13457 }</span> 13458 } 13459 } 13460 13461 <span class="cov0" title="0">return nil</span> 13462 } 13463 13464 // JobEnqueueable invoke jobEnqueueableFns function of the plugins 13465 func (ssn *Session) JobEnqueueable(obj interface{}) bool <span class="cov0" title="0">{ 13466 var hasFound bool 13467 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13468 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13469 if !isEnabled(plugin.EnabledJobEnqueued) </span><span class="cov0" title="0">{ 13470 continue</span> 13471 } 13472 <span class="cov0" title="0">fn, found := ssn.jobEnqueueableFns[plugin.Name] 13473 if !found </span><span class="cov0" title="0">{ 13474 continue</span> 13475 } 13476 13477 <span class="cov0" title="0">res := fn(obj) 13478 if res < 0 </span><span class="cov0" title="0">{ 13479 return false 13480 }</span> 13481 <span class="cov0" title="0">if res > 0 </span><span class="cov0" title="0">{ 13482 hasFound = true 13483 }</span> 13484 } 13485 // if plugin exists that votes permit, meanwhile other plugin votes abstention, 13486 // permit job to be enqueueable, do not check next tier 13487 <span class="cov0" title="0">if hasFound </span><span class="cov0" title="0">{ 13488 return true 13489 }</span> 13490 } 13491 13492 <span class="cov0" title="0">return true</span> 13493 } 13494 13495 // JobEnqueued invoke jobEnqueuedFns function of the plugins 13496 func (ssn *Session) JobEnqueued(obj interface{}) <span class="cov0" title="0">{ 13497 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13498 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13499 if !isEnabled(plugin.EnabledJobEnqueued) </span><span class="cov0" title="0">{ 13500 continue</span> 13501 } 13502 <span class="cov0" title="0">fn, found := ssn.jobEnqueuedFns[plugin.Name] 13503 if !found </span><span class="cov0" title="0">{ 13504 continue</span> 13505 } 13506 13507 <span class="cov0" title="0">fn(obj)</span> 13508 } 13509 } 13510 } 13511 13512 // TargetJob invoke targetJobFns function of the plugins 13513 func (ssn *Session) TargetJob(jobs []*api.JobInfo) *api.JobInfo <span class="cov0" title="0">{ 13514 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13515 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13516 if !isEnabled(plugin.EnabledTargetJob) </span><span class="cov0" title="0">{ 13517 continue</span> 13518 } 13519 <span class="cov0" title="0">fn, found := ssn.targetJobFns[plugin.Name] 13520 if !found </span><span class="cov0" title="0">{ 13521 continue</span> 13522 } 13523 <span class="cov0" title="0">return fn(jobs)</span> 13524 } 13525 } 13526 <span class="cov0" title="0">return nil</span> 13527 } 13528 13529 // VictimTasks invoke ReservedNodes function of the plugins 13530 func (ssn *Session) VictimTasks() []*api.TaskInfo <span class="cov0" title="0">{ 13531 var victims []*api.TaskInfo 13532 var init bool 13533 13534 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13535 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13536 if !isEnabled(plugin.EnabledVictim) </span><span class="cov0" title="0">{ 13537 continue</span> 13538 } 13539 13540 <span class="cov0" title="0">pf, found := ssn.victimTasksFns[plugin.Name] 13541 if !found </span><span class="cov0" title="0">{ 13542 continue</span> 13543 } 13544 <span class="cov0" title="0">candidates := pf() 13545 if !init </span><span class="cov0" title="0">{ 13546 victims = candidates 13547 init = true 13548 }</span> else<span class="cov0" title="0"> { 13549 var intersection []*api.TaskInfo 13550 // Get intersection of victims and candidates. 13551 for _, v := range victims </span><span class="cov0" title="0">{ 13552 for _, c := range candidates </span><span class="cov0" title="0">{ 13553 if v.UID == c.UID </span><span class="cov0" title="0">{ 13554 intersection = append(intersection, v) 13555 }</span> 13556 } 13557 } 13558 13559 // Update victims to intersection 13560 <span class="cov0" title="0">victims = intersection</span> 13561 } 13562 } 13563 // Plugins in this tier made decision if victims is not nil 13564 <span class="cov0" title="0">if victims != nil </span><span class="cov0" title="0">{ 13565 return victims 13566 }</span> 13567 } 13568 13569 <span class="cov0" title="0">return victims</span> 13570 } 13571 13572 // ReservedNodes invoke ReservedNodes function of the plugins 13573 func (ssn *Session) ReservedNodes() <span class="cov0" title="0">{ 13574 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13575 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13576 if !isEnabled(plugin.EnabledReservedNodes) </span><span class="cov0" title="0">{ 13577 continue</span> 13578 } 13579 <span class="cov0" title="0">fn, found := ssn.reservedNodesFns[plugin.Name] 13580 if !found </span><span class="cov0" title="0">{ 13581 continue</span> 13582 } 13583 <span class="cov0" title="0">fn()</span> 13584 } 13585 } 13586 } 13587 13588 // JobOrderFn invoke joborder function of the plugins 13589 func (ssn *Session) JobOrderFn(l, r interface{}) bool <span class="cov0" title="0">{ 13590 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13591 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13592 if !isEnabled(plugin.EnabledJobOrder) </span><span class="cov0" title="0">{ 13593 continue</span> 13594 } 13595 <span class="cov0" title="0">jof, found := ssn.jobOrderFns[plugin.Name] 13596 if !found </span><span class="cov0" title="0">{ 13597 continue</span> 13598 } 13599 <span class="cov0" title="0">if j := jof(l, r); j != 0 </span><span class="cov0" title="0">{ 13600 return j < 0 13601 }</span> 13602 } 13603 } 13604 13605 // If no job order funcs, order job by CreationTimestamp first, then by UID. 13606 <span class="cov0" title="0">lv := l.(*api.JobInfo) 13607 rv := r.(*api.JobInfo) 13608 if lv.CreationTimestamp.Equal(&rv.CreationTimestamp) </span><span class="cov0" title="0">{ 13609 return lv.UID < rv.UID 13610 }</span> 13611 <span class="cov0" title="0">return lv.CreationTimestamp.Before(&rv.CreationTimestamp)</span> 13612 } 13613 13614 // NamespaceOrderFn invoke namespaceorder function of the plugins 13615 func (ssn *Session) NamespaceOrderFn(l, r interface{}) bool <span class="cov0" title="0">{ 13616 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13617 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13618 if !isEnabled(plugin.EnabledNamespaceOrder) </span><span class="cov0" title="0">{ 13619 continue</span> 13620 } 13621 <span class="cov0" title="0">nof, found := ssn.namespaceOrderFns[plugin.Name] 13622 if !found </span><span class="cov0" title="0">{ 13623 continue</span> 13624 } 13625 <span class="cov0" title="0">if j := nof(l, r); j != 0 </span><span class="cov0" title="0">{ 13626 return j < 0 13627 }</span> 13628 } 13629 } 13630 13631 // TODO(lminzhw): if all NamespaceOrderFn treat these two namespace as the same, 13632 // we should make the job order have its affect among namespaces. 13633 // or just schedule namespace one by one 13634 <span class="cov0" title="0">lv := l.(api.NamespaceName) 13635 rv := r.(api.NamespaceName) 13636 return lv < rv</span> 13637 } 13638 13639 // ClusterOrderFn invoke ClusterOrderFn function of the plugins 13640 func (ssn *Session) ClusterOrderFn(l, r interface{}) bool <span class="cov0" title="0">{ 13641 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13642 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13643 if !isEnabled(plugin.EnabledClusterOrder) </span><span class="cov0" title="0">{ 13644 continue</span> 13645 } 13646 <span class="cov0" title="0">cof, found := ssn.clusterOrderFns[plugin.Name] 13647 if !found </span><span class="cov0" title="0">{ 13648 continue</span> 13649 } 13650 <span class="cov0" title="0">if j := cof(l, r); j != 0 </span><span class="cov0" title="0">{ 13651 return j < 0 13652 }</span> 13653 } 13654 } 13655 13656 // If no cluster order funcs, order cluster by ClusterID 13657 <span class="cov0" title="0">lv := l.(*scheduling.Cluster) 13658 rv := r.(*scheduling.Cluster) 13659 return lv.Name < rv.Name</span> 13660 } 13661 13662 // QueueOrderFn invoke queueorder function of the plugins 13663 func (ssn *Session) QueueOrderFn(l, r interface{}) bool <span class="cov0" title="0">{ 13664 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13665 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13666 if !isEnabled(plugin.EnabledQueueOrder) </span><span class="cov0" title="0">{ 13667 continue</span> 13668 } 13669 <span class="cov0" title="0">qof, found := ssn.queueOrderFns[plugin.Name] 13670 if !found </span><span class="cov0" title="0">{ 13671 continue</span> 13672 } 13673 <span class="cov0" title="0">if j := qof(l, r); j != 0 </span><span class="cov0" title="0">{ 13674 return j < 0 13675 }</span> 13676 } 13677 } 13678 13679 // If no queue order funcs, order queue by CreationTimestamp first, then by UID. 13680 <span class="cov0" title="0">lv := l.(*api.QueueInfo) 13681 rv := r.(*api.QueueInfo) 13682 if lv.Queue.CreationTimestamp.Equal(&rv.Queue.CreationTimestamp) </span><span class="cov0" title="0">{ 13683 return lv.UID < rv.UID 13684 }</span> 13685 <span class="cov0" title="0">return lv.Queue.CreationTimestamp.Before(&rv.Queue.CreationTimestamp)</span> 13686 } 13687 13688 // TaskCompareFns invoke taskorder function of the plugins 13689 func (ssn *Session) TaskCompareFns(l, r interface{}) int <span class="cov0" title="0">{ 13690 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13691 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13692 if !isEnabled(plugin.EnabledTaskOrder) </span><span class="cov0" title="0">{ 13693 continue</span> 13694 } 13695 <span class="cov0" title="0">tof, found := ssn.taskOrderFns[plugin.Name] 13696 if !found </span><span class="cov0" title="0">{ 13697 continue</span> 13698 } 13699 <span class="cov0" title="0">if j := tof(l, r); j != 0 </span><span class="cov0" title="0">{ 13700 return j 13701 }</span> 13702 } 13703 } 13704 13705 <span class="cov0" title="0">return 0</span> 13706 } 13707 13708 // TaskOrderFn invoke taskorder function of the plugins 13709 func (ssn *Session) TaskOrderFn(l, r interface{}) bool <span class="cov0" title="0">{ 13710 if res := ssn.TaskCompareFns(l, r); res != 0 </span><span class="cov0" title="0">{ 13711 return res < 0 13712 }</span> 13713 13714 // If no task order funcs, order task by CreationTimestamp first, then by UID. 13715 <span class="cov0" title="0">lv := l.(*api.TaskInfo) 13716 rv := r.(*api.TaskInfo) 13717 if lv.Pod.CreationTimestamp.Equal(&rv.Pod.CreationTimestamp) </span><span class="cov0" title="0">{ 13718 return lv.UID < rv.UID 13719 }</span> 13720 <span class="cov0" title="0">return lv.Pod.CreationTimestamp.Before(&rv.Pod.CreationTimestamp)</span> 13721 } 13722 13723 // PredicateFn invoke predicate function of the plugins 13724 func (ssn *Session) PredicateFn(task *api.TaskInfo, node *api.NodeInfo) error <span class="cov0" title="0">{ 13725 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13726 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13727 if !isEnabled(plugin.EnabledPredicate) </span><span class="cov0" title="0">{ 13728 continue</span> 13729 } 13730 <span class="cov0" title="0">pfn, found := ssn.predicateFns[plugin.Name] 13731 if !found </span><span class="cov0" title="0">{ 13732 continue</span> 13733 } 13734 <span class="cov0" title="0">err := pfn(task, node) 13735 if err != nil </span><span class="cov0" title="0">{ 13736 return err 13737 }</span> 13738 } 13739 } 13740 <span class="cov0" title="0">return nil</span> 13741 } 13742 13743 // BestNodeFn invoke bestNode function of the plugins 13744 func (ssn *Session) BestNodeFn(task *api.TaskInfo, nodeScores map[float64][]*api.NodeInfo) *api.NodeInfo <span class="cov0" title="0">{ 13745 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13746 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13747 if !isEnabled(plugin.EnabledBestNode) </span><span class="cov0" title="0">{ 13748 continue</span> 13749 } 13750 <span class="cov0" title="0">pfn, found := ssn.bestNodeFns[plugin.Name] 13751 if !found </span><span class="cov0" title="0">{ 13752 continue</span> 13753 } 13754 // Only the first plugin that enables and realizes bestNodeFn is allowed to choose best node for task 13755 <span class="cov0" title="0">if bestNode := pfn(task, nodeScores); bestNode != nil </span><span class="cov0" title="0">{ 13756 return bestNode 13757 }</span> 13758 } 13759 } 13760 <span class="cov0" title="0">return nil</span> 13761 } 13762 13763 // NodeOrderFn invoke node order function of the plugins 13764 func (ssn *Session) NodeOrderFn(task *api.TaskInfo, node *api.NodeInfo) (float64, error) <span class="cov0" title="0">{ 13765 priorityScore := 0.0 13766 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13767 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13768 if !isEnabled(plugin.EnabledNodeOrder) </span><span class="cov0" title="0">{ 13769 continue</span> 13770 } 13771 <span class="cov0" title="0">pfn, found := ssn.nodeOrderFns[plugin.Name] 13772 if !found </span><span class="cov0" title="0">{ 13773 continue</span> 13774 } 13775 <span class="cov0" title="0">score, err := pfn(task, node) 13776 if err != nil </span><span class="cov0" title="0">{ 13777 return 0, err 13778 }</span> 13779 <span class="cov0" title="0">priorityScore += score</span> 13780 } 13781 } 13782 <span class="cov0" title="0">return priorityScore, nil</span> 13783 } 13784 13785 // BatchNodeOrderFn invoke node order function of the plugins 13786 func (ssn *Session) BatchNodeOrderFn(task *api.TaskInfo, nodes []*api.NodeInfo) (map[string]float64, error) <span class="cov0" title="0">{ 13787 priorityScore := make(map[string]float64, len(nodes)) 13788 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13789 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13790 if !isEnabled(plugin.EnabledNodeOrder) </span><span class="cov0" title="0">{ 13791 continue</span> 13792 } 13793 <span class="cov0" title="0">pfn, found := ssn.batchNodeOrderFns[plugin.Name] 13794 if !found </span><span class="cov0" title="0">{ 13795 continue</span> 13796 } 13797 <span class="cov0" title="0">score, err := pfn(task, nodes) 13798 if err != nil </span><span class="cov0" title="0">{ 13799 return nil, err 13800 }</span> 13801 <span class="cov0" title="0">for nodeName, score := range score </span><span class="cov0" title="0">{ 13802 priorityScore[nodeName] += score 13803 }</span> 13804 } 13805 } 13806 <span class="cov0" title="0">return priorityScore, nil</span> 13807 } 13808 13809 func isEnabled(enabled *bool) bool <span class="cov0" title="0">{ 13810 return enabled != nil && *enabled 13811 }</span> 13812 13813 // NodeOrderMapFn invoke node order function of the plugins 13814 func (ssn *Session) NodeOrderMapFn(task *api.TaskInfo, node *api.NodeInfo) (map[string]float64, float64, error) <span class="cov0" title="0">{ 13815 nodeScoreMap := map[string]float64{} 13816 var priorityScore float64 13817 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13818 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13819 if !isEnabled(plugin.EnabledNodeOrder) </span><span class="cov0" title="0">{ 13820 continue</span> 13821 } 13822 <span class="cov0" title="0">if pfn, found := ssn.nodeOrderFns[plugin.Name]; found </span><span class="cov0" title="0">{ 13823 score, err := pfn(task, node) 13824 if err != nil </span><span class="cov0" title="0">{ 13825 return nodeScoreMap, priorityScore, err 13826 }</span> 13827 <span class="cov0" title="0">priorityScore += score</span> 13828 } 13829 <span class="cov0" title="0">if pfn, found := ssn.nodeMapFns[plugin.Name]; found </span><span class="cov0" title="0">{ 13830 score, err := pfn(task, node) 13831 if err != nil </span><span class="cov0" title="0">{ 13832 return nodeScoreMap, priorityScore, err 13833 }</span> 13834 <span class="cov0" title="0">nodeScoreMap[plugin.Name] = score</span> 13835 } 13836 } 13837 } 13838 <span class="cov0" title="0">return nodeScoreMap, priorityScore, nil</span> 13839 } 13840 13841 // NodeOrderReduceFn invoke node order function of the plugins 13842 func (ssn *Session) NodeOrderReduceFn(task *api.TaskInfo, pluginNodeScoreMap map[string]k8sframework.NodeScoreList) (map[string]float64, error) <span class="cov0" title="0">{ 13843 nodeScoreMap := map[string]float64{} 13844 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 13845 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13846 if !isEnabled(plugin.EnabledNodeOrder) </span><span class="cov0" title="0">{ 13847 continue</span> 13848 } 13849 <span class="cov0" title="0">pfn, found := ssn.nodeReduceFns[plugin.Name] 13850 if !found </span><span class="cov0" title="0">{ 13851 continue</span> 13852 } 13853 <span class="cov0" title="0">if err := pfn(task, pluginNodeScoreMap[plugin.Name]); err != nil </span><span class="cov0" title="0">{ 13854 return nodeScoreMap, err 13855 }</span> 13856 <span class="cov0" title="0">for _, hp := range pluginNodeScoreMap[plugin.Name] </span><span class="cov0" title="0">{ 13857 nodeScoreMap[hp.Name] += float64(hp.Score) 13858 }</span> 13859 } 13860 } 13861 <span class="cov0" title="0">return nodeScoreMap, nil</span> 13862 } 13863 </pre> 13864 13865 <pre class="file" id="file62" style="display: none">/* 13866 Copyright 2018 The Kubernetes Authors. 13867 13868 Licensed under the Apache License, Version 2.0 (the "License"); 13869 you may not use this file except in compliance with the License. 13870 You may obtain a copy of the License at 13871 13872 http://www.apache.org/licenses/LICENSE-2.0 13873 13874 Unless required by applicable law or agreed to in writing, software 13875 distributed under the License is distributed on an "AS IS" BASIS, 13876 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13877 See the License for the specific language governing permissions and 13878 limitations under the License. 13879 */ 13880 13881 package framework 13882 13883 import ( 13884 "fmt" 13885 13886 "k8s.io/klog" 13887 13888 "volcano.sh/volcano/pkg/scheduler/api" 13889 "volcano.sh/volcano/pkg/scheduler/metrics" 13890 ) 13891 13892 // Operation type 13893 type Operation int8 13894 13895 const ( 13896 // Evict op 13897 Evict = iota 13898 // Pipeline op 13899 Pipeline 13900 // Allocate op 13901 Allocate 13902 ) 13903 13904 type operation struct { 13905 name Operation 13906 task *api.TaskInfo 13907 reason string 13908 } 13909 13910 // Statement structure 13911 type Statement struct { 13912 operations []operation 13913 ssn *Session 13914 } 13915 13916 // NewStatement returns new statement object 13917 func NewStatement(ssn *Session) *Statement <span class="cov0" title="0">{ 13918 return &Statement{ 13919 ssn: ssn, 13920 } 13921 }</span> 13922 13923 // Evict the pod 13924 func (s *Statement) Evict(reclaimee *api.TaskInfo, reason string) error <span class="cov0" title="0">{ 13925 // Update status in session 13926 if job, found := s.ssn.Jobs[reclaimee.Job]; found </span><span class="cov0" title="0">{ 13927 if err := job.UpdateTaskStatus(reclaimee, api.Releasing); err != nil </span><span class="cov0" title="0">{ 13928 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 13929 reclaimee.Namespace, reclaimee.Name, api.Releasing, s.ssn.UID, err) 13930 }</span> 13931 } else<span class="cov0" title="0"> { 13932 klog.Errorf("Failed to found Job <%s> in Session <%s> index when binding.", 13933 reclaimee.Job, s.ssn.UID) 13934 }</span> 13935 13936 // Update task in node. 13937 <span class="cov0" title="0">if node, found := s.ssn.Nodes[reclaimee.NodeName]; found </span><span class="cov0" title="0">{ 13938 err := node.UpdateTask(reclaimee) 13939 if err != nil </span><span class="cov0" title="0">{ 13940 klog.Errorf("Failed to update task <%v/%v> in node %v for: %s", 13941 reclaimee.Namespace, reclaimee.Name, reclaimee.NodeName, err.Error()) 13942 return err 13943 }</span> 13944 } 13945 13946 <span class="cov0" title="0">for _, eh := range s.ssn.eventHandlers </span><span class="cov0" title="0">{ 13947 if eh.DeallocateFunc != nil </span><span class="cov0" title="0">{ 13948 eh.DeallocateFunc(&Event{ 13949 Task: reclaimee, 13950 }) 13951 }</span> 13952 } 13953 13954 <span class="cov0" title="0">s.operations = append(s.operations, operation{ 13955 name: Evict, 13956 task: reclaimee, 13957 reason: reason, 13958 }) 13959 13960 return nil</span> 13961 } 13962 13963 func (s *Statement) evict(reclaimee *api.TaskInfo, reason string) error <span class="cov0" title="0">{ 13964 if err := s.ssn.cache.Evict(reclaimee, reason); err != nil </span><span class="cov0" title="0">{ 13965 if e := s.unevict(reclaimee); e != nil </span><span class="cov0" title="0">{ 13966 klog.Errorf("Faled to unevict task <%v/%v>: %v.", 13967 reclaimee.Namespace, reclaimee.Name, e) 13968 }</span> 13969 <span class="cov0" title="0">return err</span> 13970 } 13971 13972 <span class="cov0" title="0">return nil</span> 13973 } 13974 13975 func (s *Statement) unevict(reclaimee *api.TaskInfo) error <span class="cov0" title="0">{ 13976 // Update status in session 13977 job, found := s.ssn.Jobs[reclaimee.Job] 13978 if found </span><span class="cov0" title="0">{ 13979 if err := job.UpdateTaskStatus(reclaimee, api.Running); err != nil </span><span class="cov0" title="0">{ 13980 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 13981 reclaimee.Namespace, reclaimee.Name, api.Releasing, s.ssn.UID, err) 13982 }</span> 13983 } else<span class="cov0" title="0"> { 13984 klog.Errorf("Failed to found Job <%s> in Session <%s> index when binding.", 13985 reclaimee.Job, s.ssn.UID) 13986 }</span> 13987 13988 // Update task in node. 13989 <span class="cov0" title="0">if node, found := s.ssn.Nodes[reclaimee.NodeName]; found </span><span class="cov0" title="0">{ 13990 err := node.UpdateTask(reclaimee) 13991 if err != nil </span><span class="cov0" title="0">{ 13992 klog.Errorf("Failed to update task <%v/%v> in node %v for: %s", 13993 reclaimee.Namespace, reclaimee.Name, reclaimee.NodeName, err.Error()) 13994 return err 13995 }</span> 13996 } 13997 13998 <span class="cov0" title="0">for _, eh := range s.ssn.eventHandlers </span><span class="cov0" title="0">{ 13999 if eh.AllocateFunc != nil </span><span class="cov0" title="0">{ 14000 eh.AllocateFunc(&Event{ 14001 Task: reclaimee, 14002 }) 14003 }</span> 14004 } 14005 14006 <span class="cov0" title="0">return nil</span> 14007 } 14008 14009 // Pipeline the task for the node 14010 func (s *Statement) Pipeline(task *api.TaskInfo, hostname string) error <span class="cov0" title="0">{ 14011 job, found := s.ssn.Jobs[task.Job] 14012 if found </span><span class="cov0" title="0">{ 14013 if err := job.UpdateTaskStatus(task, api.Pipelined); err != nil </span><span class="cov0" title="0">{ 14014 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 14015 task.Namespace, task.Name, api.Pipelined, s.ssn.UID, err) 14016 }</span> 14017 } else<span class="cov0" title="0"> { 14018 klog.Errorf("Failed to found Job <%s> in Session <%s> index when binding.", 14019 task.Job, s.ssn.UID) 14020 }</span> 14021 14022 <span class="cov0" title="0">task.NodeName = hostname 14023 14024 if node, found := s.ssn.Nodes[hostname]; found </span><span class="cov0" title="0">{ 14025 if err := node.AddTask(task); err != nil </span><span class="cov0" title="0">{ 14026 klog.Errorf("Failed to pipeline task <%v/%v> to node <%v> in Session <%v>: %v", 14027 task.Namespace, task.Name, hostname, s.ssn.UID, err) 14028 }</span> 14029 <span class="cov0" title="0">klog.V(3).Infof("After pipelined Task <%v/%v> to Node <%v>: idle <%v>, used <%v>, releasing <%v>", 14030 task.Namespace, task.Name, node.Name, node.Idle, node.Used, node.Releasing)</span> 14031 } else<span class="cov0" title="0"> { 14032 klog.Errorf("Failed to found Node <%s> in Session <%s> index when binding.", 14033 hostname, s.ssn.UID) 14034 }</span> 14035 14036 <span class="cov0" title="0">for _, eh := range s.ssn.eventHandlers </span><span class="cov0" title="0">{ 14037 if eh.AllocateFunc != nil </span><span class="cov0" title="0">{ 14038 eh.AllocateFunc(&Event{ 14039 Task: task, 14040 }) 14041 }</span> 14042 } 14043 14044 <span class="cov0" title="0">s.operations = append(s.operations, operation{ 14045 name: Pipeline, 14046 task: task, 14047 }) 14048 14049 return nil</span> 14050 } 14051 14052 func (s *Statement) pipeline(task *api.TaskInfo) {<span class="cov0" title="0"> 14053 }</span> 14054 14055 func (s *Statement) unpipeline(task *api.TaskInfo) error <span class="cov0" title="0">{ 14056 job, found := s.ssn.Jobs[task.Job] 14057 if found </span><span class="cov0" title="0">{ 14058 if err := job.UpdateTaskStatus(task, api.Pending); err != nil </span><span class="cov0" title="0">{ 14059 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 14060 task.Namespace, task.Name, api.Pipelined, s.ssn.UID, err) 14061 }</span> 14062 } else<span class="cov0" title="0"> { 14063 klog.Errorf("Failed to found Job <%s> in Session <%s> index when binding.", 14064 task.Job, s.ssn.UID) 14065 }</span> 14066 14067 <span class="cov0" title="0">if node, found := s.ssn.Nodes[task.NodeName]; found </span><span class="cov0" title="0">{ 14068 if err := node.RemoveTask(task); err != nil </span><span class="cov0" title="0">{ 14069 klog.Errorf("Failed to pipeline task <%v/%v> to node <%v> in Session <%v>: %v", 14070 task.Namespace, task.Name, task.NodeName, s.ssn.UID, err) 14071 }</span> 14072 <span class="cov0" title="0">klog.V(3).Infof("After pipelined Task <%v/%v> to Node <%v>: idle <%v>, used <%v>, releasing <%v>", 14073 task.Namespace, task.Name, node.Name, node.Idle, node.Used, node.Releasing)</span> 14074 } else<span class="cov0" title="0"> { 14075 klog.Errorf("Failed to found Node <%s> in Session <%s> index when binding.", 14076 task.NodeName, s.ssn.UID) 14077 }</span> 14078 14079 <span class="cov0" title="0">for _, eh := range s.ssn.eventHandlers </span><span class="cov0" title="0">{ 14080 if eh.DeallocateFunc != nil </span><span class="cov0" title="0">{ 14081 eh.DeallocateFunc(&Event{ 14082 Task: task, 14083 }) 14084 }</span> 14085 } 14086 <span class="cov0" title="0">task.NodeName = "" 14087 14088 return nil</span> 14089 } 14090 14091 // Allocate the task to node 14092 func (s *Statement) Allocate(task *api.TaskInfo, nodeInfo *api.NodeInfo) error <span class="cov0" title="0">{ 14093 podVolumes, err := s.ssn.cache.GetPodVolumes(task, nodeInfo.Node) 14094 if err != nil </span><span class="cov0" title="0">{ 14095 return err 14096 }</span> 14097 14098 <span class="cov0" title="0">hostname := nodeInfo.Name 14099 if err := s.ssn.cache.AllocateVolumes(task, hostname, podVolumes); err != nil </span><span class="cov0" title="0">{ 14100 return err 14101 }</span> 14102 14103 <span class="cov0" title="0">task.Pod.Spec.NodeName = hostname 14104 task.PodVolumes = podVolumes 14105 14106 // Only update status in session 14107 job, found := s.ssn.Jobs[task.Job] 14108 if found </span><span class="cov0" title="0">{ 14109 if err := job.UpdateTaskStatus(task, api.Allocated); err != nil </span><span class="cov0" title="0">{ 14110 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 14111 task.Namespace, task.Name, api.Allocated, s.ssn.UID, err) 14112 return err 14113 }</span> 14114 } else<span class="cov0" title="0"> { 14115 klog.Errorf("Failed to found Job <%s> in Session <%s> index when binding.", 14116 task.Job, s.ssn.UID) 14117 return fmt.Errorf("failed to find job %s", task.Job) 14118 }</span> 14119 14120 <span class="cov0" title="0">task.NodeName = hostname 14121 if node, found := s.ssn.Nodes[hostname]; found </span><span class="cov0" title="0">{ 14122 if err := node.AddTask(task); err != nil </span><span class="cov0" title="0">{ 14123 klog.Errorf("Failed to add task <%v/%v> to node <%v> in Session <%v>: %v", 14124 task.Namespace, task.Name, hostname, s.ssn.UID, err) 14125 return err 14126 }</span> 14127 <span class="cov0" title="0">klog.V(3).Infof("After allocated Task <%v/%v> to Node <%v>: idle <%v>, used <%v>, releasing <%v>", 14128 task.Namespace, task.Name, node.Name, node.Idle, node.Used, node.Releasing)</span> 14129 } else<span class="cov0" title="0"> { 14130 klog.Errorf("Failed to found Node <%s> in Session <%s> index when binding.", 14131 hostname, s.ssn.UID) 14132 return fmt.Errorf("failed to find node %s", hostname) 14133 }</span> 14134 14135 // Callbacks 14136 <span class="cov0" title="0">for _, eh := range s.ssn.eventHandlers </span><span class="cov0" title="0">{ 14137 if eh.AllocateFunc != nil </span><span class="cov0" title="0">{ 14138 eh.AllocateFunc(&Event{ 14139 Task: task, 14140 }) 14141 }</span> 14142 } 14143 14144 // Update status in session 14145 <span class="cov0" title="0">klog.V(3).Info("Allocating operations ...") 14146 s.operations = append(s.operations, operation{ 14147 name: Allocate, 14148 task: task, 14149 }) 14150 14151 return nil</span> 14152 } 14153 14154 func (s *Statement) allocate(task *api.TaskInfo) error <span class="cov0" title="0">{ 14155 if err := s.ssn.cache.BindVolumes(task, task.PodVolumes); err != nil </span><span class="cov0" title="0">{ 14156 return err 14157 }</span> 14158 14159 <span class="cov0" title="0">if err := s.ssn.cache.Bind(task, task.NodeName); err != nil </span><span class="cov0" title="0">{ 14160 return err 14161 }</span> 14162 14163 // Update status in session 14164 <span class="cov0" title="0">if job, found := s.ssn.Jobs[task.Job]; found </span><span class="cov0" title="0">{ 14165 if err := job.UpdateTaskStatus(task, api.Binding); err != nil </span><span class="cov0" title="0">{ 14166 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 14167 task.Namespace, task.Name, api.Binding, s.ssn.UID, err) 14168 return err 14169 }</span> 14170 } else<span class="cov0" title="0"> { 14171 klog.Errorf("Failed to found Job <%s> in Session <%s> index when binding.", 14172 task.Job, s.ssn.UID) 14173 return fmt.Errorf("failed to find job %s", task.Job) 14174 }</span> 14175 14176 <span class="cov0" title="0">metrics.UpdateTaskScheduleDuration(metrics.Duration(task.Pod.CreationTimestamp.Time)) 14177 return nil</span> 14178 } 14179 14180 // unallocate the pod for task 14181 func (s *Statement) unallocate(task *api.TaskInfo) error <span class="cov0" title="0">{ 14182 // Update status in session 14183 job, found := s.ssn.Jobs[task.Job] 14184 if found </span><span class="cov0" title="0">{ 14185 if err := job.UpdateTaskStatus(task, api.Pending); err != nil </span><span class="cov0" title="0">{ 14186 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 14187 task.Namespace, task.Name, api.Pending, s.ssn.UID, err) 14188 }</span> 14189 } else<span class="cov0" title="0"> { 14190 klog.Errorf("Failed to find Job <%s> in Session <%s> index when unallocating.", 14191 task.Job, s.ssn.UID) 14192 }</span> 14193 14194 <span class="cov0" title="0">if node, found := s.ssn.Nodes[task.NodeName]; found </span><span class="cov0" title="0">{ 14195 klog.V(3).Infof("Remove Task <%v> on node <%v>", task.Name, task.NodeName) 14196 err := node.RemoveTask(task) 14197 if err != nil </span><span class="cov0" title="0">{ 14198 klog.Errorf("Failed to remove Task <%v> on node <%v>: %s", task.Name, task.NodeName, err.Error()) 14199 }</span> 14200 } 14201 14202 <span class="cov0" title="0">for _, eh := range s.ssn.eventHandlers </span><span class="cov0" title="0">{ 14203 if eh.DeallocateFunc != nil </span><span class="cov0" title="0">{ 14204 eh.DeallocateFunc(&Event{ 14205 Task: task, 14206 }) 14207 }</span> 14208 } 14209 <span class="cov0" title="0">task.NodeName = "" 14210 14211 return nil</span> 14212 } 14213 14214 // Discard operation for evict, pipeline and allocate 14215 func (s *Statement) Discard() <span class="cov0" title="0">{ 14216 klog.V(3).Info("Discarding operations ...") 14217 for i := len(s.operations) - 1; i >= 0; i-- </span><span class="cov0" title="0">{ 14218 op := s.operations[i] 14219 op.task.GenerateLastTxContext() 14220 switch op.name </span>{ 14221 case Evict:<span class="cov0" title="0"> 14222 err := s.unevict(op.task) 14223 if err != nil </span><span class="cov0" title="0">{ 14224 klog.Errorf("Failed to unevict task: %s", err.Error()) 14225 }</span> 14226 case Pipeline:<span class="cov0" title="0"> 14227 err := s.unpipeline(op.task) 14228 if err != nil </span><span class="cov0" title="0">{ 14229 klog.Errorf("Failed to unpipeline task: %s", err.Error()) 14230 }</span> 14231 case Allocate:<span class="cov0" title="0"> 14232 err := s.unallocate(op.task) 14233 if err != nil </span><span class="cov0" title="0">{ 14234 klog.Errorf("Failed to unallocate task: %s", err.Error()) 14235 }</span> 14236 } 14237 } 14238 } 14239 14240 // Commit operation for evict and pipeline 14241 func (s *Statement) Commit() <span class="cov0" title="0">{ 14242 klog.V(3).Info("Committing operations ...") 14243 for _, op := range s.operations </span><span class="cov0" title="0">{ 14244 op.task.ClearLastTxContext() 14245 switch op.name </span>{ 14246 case Evict:<span class="cov0" title="0"> 14247 err := s.evict(op.task, op.reason) 14248 if err != nil </span><span class="cov0" title="0">{ 14249 klog.Errorf("Failed to evict task: %s", err.Error()) 14250 }</span> 14251 case Pipeline:<span class="cov0" title="0"> 14252 s.pipeline(op.task)</span> 14253 case Allocate:<span class="cov0" title="0"> 14254 err := s.allocate(op.task) 14255 if err != nil </span><span class="cov0" title="0">{ 14256 klog.Errorf("Failed to allocate task: for %s", err.Error()) 14257 }</span> 14258 } 14259 } 14260 } 14261 </pre> 14262 14263 <pre class="file" id="file63" style="display: none">/* 14264 Copyright 2019 The Volcano Authors. 14265 14266 Licensed under the Apache License, Version 2.0 (the "License"); 14267 you may not use this file except in compliance with the License. 14268 You may obtain a copy of the License at 14269 14270 http://www.apache.org/licenses/LICENSE-2.0 14271 14272 Unless required by applicable law or agreed to in writing, software 14273 distributed under the License is distributed on an "AS IS" BASIS, 14274 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14275 See the License for the specific language governing permissions and 14276 limitations under the License. 14277 */ 14278 14279 package binpack 14280 14281 import ( 14282 "fmt" 14283 "strings" 14284 14285 v1 "k8s.io/api/core/v1" 14286 "k8s.io/klog" 14287 "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1" 14288 14289 "volcano.sh/volcano/pkg/scheduler/api" 14290 "volcano.sh/volcano/pkg/scheduler/framework" 14291 ) 14292 14293 const ( 14294 // PluginName indicates name of volcano scheduler plugin. 14295 PluginName = "binpack" 14296 ) 14297 14298 const ( 14299 // BinpackWeight is the key for providing Binpack Priority Weight in YAML 14300 BinpackWeight = "binpack.weight" 14301 // BinpackCPU is the key for weight of cpu 14302 BinpackCPU = "binpack.cpu" 14303 // BinpackMemory is the key for weight of memory 14304 BinpackMemory = "binpack.memory" 14305 14306 // BinpackResources is the key for additional resource key name 14307 BinpackResources = "binpack.resources" 14308 // BinpackResourcesPrefix is the key prefix for additional resource key name 14309 BinpackResourcesPrefix = BinpackResources + "." 14310 14311 resourceFmt = "%s[%d]" 14312 ) 14313 14314 type priorityWeight struct { 14315 BinPackingWeight int 14316 BinPackingCPU int 14317 BinPackingMemory int 14318 BinPackingResources map[v1.ResourceName]int 14319 } 14320 14321 func (w *priorityWeight) String() string <span class="cov0" title="0">{ 14322 length := 3 14323 if extendLength := len(w.BinPackingResources); extendLength == 0 </span><span class="cov0" title="0">{ 14324 length++ 14325 }</span> else<span class="cov0" title="0"> { 14326 length += extendLength 14327 }</span> 14328 <span class="cov0" title="0">msg := make([]string, 0, length) 14329 msg = append(msg, 14330 fmt.Sprintf(resourceFmt, BinpackWeight, w.BinPackingWeight), 14331 fmt.Sprintf(resourceFmt, BinpackCPU, w.BinPackingCPU), 14332 fmt.Sprintf(resourceFmt, BinpackMemory, w.BinPackingMemory), 14333 ) 14334 14335 if len(w.BinPackingResources) == 0 </span><span class="cov0" title="0">{ 14336 msg = append(msg, "no extend resources.") 14337 }</span> else<span class="cov0" title="0"> { 14338 for name, weight := range w.BinPackingResources </span><span class="cov0" title="0">{ 14339 msg = append(msg, fmt.Sprintf(resourceFmt, name, weight)) 14340 }</span> 14341 } 14342 <span class="cov0" title="0">return strings.Join(msg, ", ")</span> 14343 } 14344 14345 type binpackPlugin struct { 14346 // Arguments given for the plugin 14347 weight priorityWeight 14348 } 14349 14350 //New function returns prioritizePlugin object 14351 func New(aruguments framework.Arguments) framework.Plugin <span class="cov8" title="1">{ 14352 weight := calculateWeight(aruguments) 14353 return &binpackPlugin{weight: weight} 14354 }</span> 14355 14356 func calculateWeight(args framework.Arguments) priorityWeight <span class="cov8" title="1">{ 14357 /* 14358 User Should give priorityWeight in this format(binpack.weight, binpack.cpu, binpack.memory). 14359 Support change the weight about cpu, memory and additional resource by arguments. 14360 14361 actions: "enqueue, reclaim, allocate, backfill, preempt" 14362 tiers: 14363 - plugins: 14364 - name: binpack 14365 arguments: 14366 binpack.weight: 10 14367 binpack.cpu: 5 14368 binpack.memory: 1 14369 binpack.resources: nvidia.com/gpu, example.com/foo 14370 binpack.resources.nvidia.com/gpu: 2 14371 binpack.resources.example.com/foo: 3 14372 */ 14373 // Values are initialized to 1. 14374 weight := priorityWeight{ 14375 BinPackingWeight: 1, 14376 BinPackingCPU: 1, 14377 BinPackingMemory: 1, 14378 BinPackingResources: make(map[v1.ResourceName]int), 14379 } 14380 14381 // Checks whether binpack.weight is provided or not, if given, modifies the value in weight struct. 14382 args.GetInt(&weight.BinPackingWeight, BinpackWeight) 14383 // Checks whether binpack.cpu is provided or not, if given, modifies the value in weight struct. 14384 args.GetInt(&weight.BinPackingCPU, BinpackCPU) 14385 if weight.BinPackingCPU < 0 </span><span class="cov0" title="0">{ 14386 weight.BinPackingCPU = 1 14387 }</span> 14388 // Checks whether binpack.memory is provided or not, if given, modifies the value in weight struct. 14389 <span class="cov8" title="1">args.GetInt(&weight.BinPackingMemory, BinpackMemory) 14390 if weight.BinPackingMemory < 0 </span><span class="cov0" title="0">{ 14391 weight.BinPackingMemory = 1 14392 }</span> 14393 14394 <span class="cov8" title="1">resourcesStr := args[BinpackResources] 14395 resources := strings.Split(resourcesStr, ",") 14396 for _, resource := range resources </span><span class="cov8" title="1">{ 14397 resource = strings.TrimSpace(resource) 14398 if resource == "" </span><span class="cov0" title="0">{ 14399 continue</span> 14400 } 14401 14402 // binpack.resources.[ResourceName] 14403 <span class="cov8" title="1">resourceKey := BinpackResourcesPrefix + resource 14404 resourceWeight := 1 14405 args.GetInt(&resourceWeight, resourceKey) 14406 if resourceWeight < 0 </span><span class="cov8" title="1">{ 14407 resourceWeight = 1 14408 }</span> 14409 <span class="cov8" title="1">weight.BinPackingResources[v1.ResourceName(resource)] = resourceWeight</span> 14410 } 14411 14412 <span class="cov8" title="1">return weight</span> 14413 } 14414 14415 func (bp *binpackPlugin) Name() string <span class="cov8" title="1">{ 14416 return PluginName 14417 }</span> 14418 14419 func (bp *binpackPlugin) OnSessionOpen(ssn *framework.Session) <span class="cov8" title="1">{ 14420 klog.V(4).Infof("Enter binpack plugin ...") 14421 if klog.V(4) </span><span class="cov0" title="0">{ 14422 defer func() </span><span class="cov0" title="0">{ 14423 klog.V(4).Infof("Leaving binpack plugin. %s ...", bp.weight.String()) 14424 }</span>() 14425 14426 <span class="cov0" title="0">notFoundResource := []string{} 14427 for resource := range bp.weight.BinPackingResources </span><span class="cov0" title="0">{ 14428 found := false 14429 for _, nodeInfo := range ssn.Nodes </span><span class="cov0" title="0">{ 14430 if nodeInfo.Allocatable.Get(resource) > 0 </span><span class="cov0" title="0">{ 14431 found = true 14432 break</span> 14433 } 14434 } 14435 <span class="cov0" title="0">if !found </span><span class="cov0" title="0">{ 14436 notFoundResource = append(notFoundResource, string(resource)) 14437 }</span> 14438 } 14439 <span class="cov0" title="0">klog.V(4).Infof("resources [%s] record in weight but not found on any node", strings.Join(notFoundResource, ", "))</span> 14440 } 14441 14442 <span class="cov8" title="1">nodeOrderFn := func(task *api.TaskInfo, node *api.NodeInfo) (float64, error) </span><span class="cov8" title="1">{ 14443 binPackingScore := BinPackingScore(task, node, bp.weight) 14444 14445 klog.V(4).Infof("Binpack score for Task %s/%s on node %s is: %v", task.Namespace, task.Name, node.Name, binPackingScore) 14446 return binPackingScore, nil 14447 }</span> 14448 <span class="cov8" title="1">if bp.weight.BinPackingWeight != 0 </span><span class="cov8" title="1">{ 14449 ssn.AddNodeOrderFn(bp.Name(), nodeOrderFn) 14450 }</span> else<span class="cov0" title="0"> { 14451 klog.Infof("binpack weight is zero, skip node order function") 14452 }</span> 14453 } 14454 14455 func (bp *binpackPlugin) OnSessionClose(ssn *framework.Session) {<span class="cov8" title="1"> 14456 }</span> 14457 14458 // BinPackingScore use the best fit polices during scheduling. 14459 // Goals: 14460 // - Schedule Jobs using BestFit Policy using Resource Bin Packing Priority Function 14461 // - Reduce Fragmentation of scarce resources on the Cluster 14462 func BinPackingScore(task *api.TaskInfo, node *api.NodeInfo, weight priorityWeight) float64 <span class="cov8" title="1">{ 14463 score := 0.0 14464 weightSum := 0 14465 requested := task.Resreq 14466 allocatable := node.Allocatable 14467 used := node.Used 14468 14469 for _, resource := range requested.ResourceNames() </span><span class="cov8" title="1">{ 14470 request := requested.Get(resource) 14471 if request == 0 </span><span class="cov0" title="0">{ 14472 continue</span> 14473 } 14474 <span class="cov8" title="1">allocate := allocatable.Get(resource) 14475 nodeUsed := used.Get(resource) 14476 14477 resourceWeight := 0 14478 found := false 14479 switch resource </span>{ 14480 case v1.ResourceCPU:<span class="cov8" title="1"> 14481 resourceWeight = weight.BinPackingCPU 14482 found = true</span> 14483 case v1.ResourceMemory:<span class="cov8" title="1"> 14484 resourceWeight = weight.BinPackingMemory 14485 found = true</span> 14486 default:<span class="cov8" title="1"> 14487 resourceWeight, found = weight.BinPackingResources[resource]</span> 14488 } 14489 <span class="cov8" title="1">if !found </span><span class="cov8" title="1">{ 14490 continue</span> 14491 } 14492 14493 <span class="cov8" title="1">resourceScore := ResourceBinPackingScore(request, allocate, nodeUsed, resourceWeight) 14494 klog.V(5).Infof("task %s/%s on node %s resource %s, need %f, used %f, allocatable %f, weight %d, score %f", task.Namespace, task.Name, node.Name, resource, request, nodeUsed, allocate, resourceWeight, resourceScore) 14495 14496 score += resourceScore 14497 weightSum += resourceWeight</span> 14498 } 14499 14500 // mapping the result from [0, weightSum] to [0, 10(MaxPriority)] 14501 <span class="cov8" title="1">if weightSum > 0 </span><span class="cov8" title="1">{ 14502 score /= float64(weightSum) 14503 }</span> 14504 <span class="cov8" title="1">score *= float64(v1alpha1.MaxNodeScore * int64(weight.BinPackingWeight)) 14505 14506 return score</span> 14507 } 14508 14509 // ResourceBinPackingScore calculate the binpack score for resource with provided info 14510 func ResourceBinPackingScore(requested, capacity, used float64, weight int) float64 <span class="cov8" title="1">{ 14511 if capacity == 0 || weight == 0 </span><span class="cov8" title="1">{ 14512 return 0 14513 }</span> 14514 14515 <span class="cov8" title="1">usedFinally := requested + used 14516 if usedFinally > capacity </span><span class="cov8" title="1">{ 14517 return 0 14518 }</span> 14519 14520 <span class="cov8" title="1">score := usedFinally * float64(weight) / capacity 14521 return score</span> 14522 } 14523 </pre> 14524 14525 <pre class="file" id="file64" style="display: none">/* 14526 Copyright 2018 The Kubernetes Authors. 14527 14528 Licensed under the Apache License, Version 2.0 (the "License"); 14529 you may not use this file except in compliance with the License. 14530 You may obtain a copy of the License at 14531 14532 http://www.apache.org/licenses/LICENSE-2.0 14533 14534 Unless required by applicable law or agreed to in writing, software 14535 distributed under the License is distributed on an "AS IS" BASIS, 14536 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14537 See the License for the specific language governing permissions and 14538 limitations under the License. 14539 */ 14540 14541 package drf 14542 14543 import ( 14544 "fmt" 14545 "math" 14546 "strconv" 14547 "strings" 14548 14549 v1 "k8s.io/api/core/v1" 14550 "k8s.io/klog" 14551 14552 "volcano.sh/volcano/pkg/scheduler/api" 14553 "volcano.sh/volcano/pkg/scheduler/api/helpers" 14554 "volcano.sh/volcano/pkg/scheduler/framework" 14555 "volcano.sh/volcano/pkg/scheduler/metrics" 14556 "volcano.sh/volcano/pkg/scheduler/plugins/util" 14557 ) 14558 14559 // PluginName indicates name of volcano scheduler plugin. 14560 const PluginName = "drf" 14561 14562 var shareDelta = 0.000001 14563 14564 // hierarchicalNode represents the node hierarchy 14565 // and the corresponding weight and drf attribute 14566 type hierarchicalNode struct { 14567 parent *hierarchicalNode 14568 attr *drfAttr 14569 // If the node is a leaf node, 14570 // request represents the request of the job. 14571 request *api.Resource 14572 weight float64 14573 saturated bool 14574 hierarchy string 14575 children map[string]*hierarchicalNode 14576 } 14577 14578 func (node *hierarchicalNode) Clone(parent *hierarchicalNode) *hierarchicalNode <span class="cov0" title="0">{ 14579 newNode := &hierarchicalNode{ 14580 parent: parent, 14581 attr: &drfAttr{ 14582 share: node.attr.share, 14583 dominantResource: node.attr.dominantResource, 14584 allocated: node.attr.allocated.Clone(), 14585 }, 14586 request: node.request.Clone(), 14587 weight: node.weight, 14588 saturated: node.saturated, 14589 hierarchy: node.hierarchy, 14590 children: nil, 14591 } 14592 if node.children != nil </span><span class="cov0" title="0">{ 14593 newNode.children = map[string]*hierarchicalNode{} 14594 for _, child := range node.children </span><span class="cov0" title="0">{ 14595 newNode.children[child.hierarchy] = child.Clone(newNode) 14596 }</span> 14597 } 14598 <span class="cov0" title="0">return newNode</span> 14599 } 14600 14601 // resourceSaturated returns true if any resource of the job is saturated or the job demands fully allocated resource 14602 func resourceSaturated(allocated *api.Resource, 14603 jobRequest *api.Resource, demandingResources map[v1.ResourceName]bool) bool <span class="cov8" title="1">{ 14604 for _, rn := range allocated.ResourceNames() </span><span class="cov8" title="1">{ 14605 if allocated.Get(rn) != 0 && jobRequest.Get(rn) != 0 && 14606 allocated.Get(rn) >= jobRequest.Get(rn) </span><span class="cov0" title="0">{ 14607 return true 14608 }</span> 14609 <span class="cov8" title="1">if !demandingResources[rn] && jobRequest.Get(rn) != 0 </span><span class="cov8" title="1">{ 14610 return true 14611 }</span> 14612 } 14613 <span class="cov8" title="1">return false</span> 14614 } 14615 14616 type drfAttr struct { 14617 share float64 14618 dominantResource string 14619 allocated *api.Resource 14620 } 14621 14622 func (attr *drfAttr) String() string <span class="cov8" title="1">{ 14623 return fmt.Sprintf("dominant resource <%s>, dominant share %f, allocated %s", 14624 attr.dominantResource, attr.share, attr.allocated) 14625 }</span> 14626 14627 type drfPlugin struct { 14628 totalResource *api.Resource 14629 totalAllocated *api.Resource 14630 14631 // Key is Job ID 14632 jobAttrs map[api.JobID]*drfAttr 14633 14634 // map[namespaceName]->attr 14635 namespaceOpts map[string]*drfAttr 14636 14637 // hierarchical tree root 14638 hierarchicalRoot *hierarchicalNode 14639 14640 // Arguments given for the plugin 14641 pluginArguments framework.Arguments 14642 } 14643 14644 // New return drf plugin 14645 func New(arguments framework.Arguments) framework.Plugin <span class="cov8" title="1">{ 14646 return &drfPlugin{ 14647 totalResource: api.EmptyResource(), 14648 totalAllocated: api.EmptyResource(), 14649 jobAttrs: map[api.JobID]*drfAttr{}, 14650 namespaceOpts: map[string]*drfAttr{}, 14651 hierarchicalRoot: &hierarchicalNode{ 14652 attr: &drfAttr{allocated: api.EmptyResource()}, 14653 request: api.EmptyResource(), 14654 hierarchy: "root", 14655 weight: 1, 14656 children: map[string]*hierarchicalNode{}, 14657 }, 14658 pluginArguments: arguments, 14659 } 14660 }</span> 14661 14662 func (drf *drfPlugin) Name() string <span class="cov8" title="1">{ 14663 return PluginName 14664 }</span> 14665 14666 // HierarchyEnabled returns if hierarchy is enabled 14667 func (drf *drfPlugin) HierarchyEnabled(ssn *framework.Session) bool <span class="cov8" title="1">{ 14668 for _, tier := range ssn.Tiers </span><span class="cov8" title="1">{ 14669 for _, plugin := range tier.Plugins </span><span class="cov8" title="1">{ 14670 if plugin.Name != PluginName </span><span class="cov0" title="0">{ 14671 continue</span> 14672 } 14673 <span class="cov8" title="1">return plugin.EnabledHierarchy != nil && *plugin.EnabledHierarchy</span> 14674 } 14675 } 14676 <span class="cov0" title="0">return false</span> 14677 } 14678 14679 // NamespaceOrderEnabled returns the NamespaceOrder for this plugin is enabled in this session or not 14680 func (drf *drfPlugin) NamespaceOrderEnabled(ssn *framework.Session) bool <span class="cov8" title="1">{ 14681 for _, tier := range ssn.Tiers </span><span class="cov8" title="1">{ 14682 for _, plugin := range tier.Plugins </span><span class="cov8" title="1">{ 14683 if plugin.Name != PluginName </span><span class="cov0" title="0">{ 14684 continue</span> 14685 } 14686 <span class="cov8" title="1">return plugin.EnabledNamespaceOrder != nil && *plugin.EnabledNamespaceOrder</span> 14687 } 14688 } 14689 <span class="cov0" title="0">return false</span> 14690 } 14691 14692 func (drf *drfPlugin) compareQueues(root *hierarchicalNode, lqueue *api.QueueInfo, rqueue *api.QueueInfo) float64 <span class="cov8" title="1">{ 14693 lnode := root 14694 lpaths := strings.Split(lqueue.Hierarchy, "/") 14695 rnode := root 14696 rpaths := strings.Split(rqueue.Hierarchy, "/") 14697 depth := 0 14698 if len(lpaths) < len(rpaths) </span><span class="cov8" title="1">{ 14699 depth = len(lpaths) 14700 }</span> else<span class="cov8" title="1"> { 14701 depth = len(rpaths) 14702 }</span> 14703 <span class="cov8" title="1">for i := 0; i < depth; i++ </span><span class="cov8" title="1">{ 14704 // Saturated nodes have minumun prioirty, 14705 // so that demanding nodes will be poped first. 14706 if !lnode.saturated && rnode.saturated </span><span class="cov0" title="0">{ 14707 return -1 14708 }</span> 14709 <span class="cov8" title="1">if lnode.saturated && !rnode.saturated </span><span class="cov0" title="0">{ 14710 return 1 14711 }</span> 14712 <span class="cov8" title="1">if lnode.attr.share/lnode.weight == rnode.attr.share/rnode.weight </span><span class="cov8" title="1">{ 14713 if i < depth-1 </span><span class="cov8" title="1">{ 14714 lnode = lnode.children[lpaths[i+1]] 14715 rnode = rnode.children[rpaths[i+1]] 14716 }</span> 14717 } else<span class="cov8" title="1"> { 14718 return lnode.attr.share/lnode.weight - rnode.attr.share/rnode.weight 14719 }</span> 14720 } 14721 <span class="cov8" title="1">return 0</span> 14722 } 14723 14724 func (drf *drfPlugin) OnSessionOpen(ssn *framework.Session) <span class="cov8" title="1">{ 14725 // Prepare scheduling data for this session. 14726 drf.totalResource.Add(ssn.TotalResource) 14727 14728 klog.V(4).Infof("Total Allocatable %s", drf.totalResource) 14729 14730 namespaceOrderEnabled := drf.NamespaceOrderEnabled(ssn) 14731 hierarchyEnabled := drf.HierarchyEnabled(ssn) 14732 14733 for _, job := range ssn.Jobs </span><span class="cov8" title="1">{ 14734 attr := &drfAttr{ 14735 allocated: api.EmptyResource(), 14736 } 14737 14738 for status, tasks := range job.TaskStatusIndex </span><span class="cov8" title="1">{ 14739 if api.AllocatedStatus(status) </span><span class="cov0" title="0">{ 14740 for _, t := range tasks </span><span class="cov0" title="0">{ 14741 attr.allocated.Add(t.Resreq) 14742 }</span> 14743 } 14744 } 14745 14746 // Calculate the init share of Job 14747 <span class="cov8" title="1">drf.updateJobShare(job.Namespace, job.Name, attr) 14748 14749 drf.jobAttrs[job.UID] = attr 14750 14751 if namespaceOrderEnabled </span><span class="cov0" title="0">{ 14752 nsOpts, found := drf.namespaceOpts[job.Namespace] 14753 if !found </span><span class="cov0" title="0">{ 14754 nsOpts = &drfAttr{ 14755 allocated: api.EmptyResource(), 14756 } 14757 drf.namespaceOpts[job.Namespace] = nsOpts 14758 }</span> 14759 // all task in job should have the same namespace with job 14760 <span class="cov0" title="0">nsOpts.allocated.Add(attr.allocated) 14761 drf.updateNamespaceShare(job.Namespace, nsOpts)</span> 14762 } 14763 <span class="cov8" title="1">if hierarchyEnabled </span><span class="cov8" title="1">{ 14764 queue := ssn.Queues[job.Queue] 14765 drf.totalAllocated.Add(attr.allocated) 14766 drf.UpdateHierarchicalShare(drf.hierarchicalRoot, drf.totalAllocated, job, attr, queue.Hierarchy, queue.Weights) 14767 }</span> 14768 } 14769 14770 <span class="cov8" title="1">preemptableFn := func(preemptor *api.TaskInfo, preemptees []*api.TaskInfo) ([]*api.TaskInfo, int) </span><span class="cov0" title="0">{ 14771 var victims []*api.TaskInfo 14772 14773 addVictim := func(candidate *api.TaskInfo) </span><span class="cov0" title="0">{ 14774 victims = append(victims, candidate) 14775 }</span> 14776 14777 <span class="cov0" title="0">if namespaceOrderEnabled </span><span class="cov0" title="0">{ 14778 // apply the namespace share policy on preemptee firstly 14779 14780 lWeight := ssn.NamespaceInfo[api.NamespaceName(preemptor.Namespace)].GetWeight() 14781 lNsAtt := drf.namespaceOpts[preemptor.Namespace] 14782 lNsAlloc := lNsAtt.allocated.Clone().Add(preemptor.Resreq) 14783 _, lNsShare := drf.calculateShare(lNsAlloc, drf.totalResource) 14784 lNsShareWeighted := lNsShare / float64(lWeight) 14785 14786 namespaceAllocation := map[string]*api.Resource{} 14787 14788 // undecidedPreemptees means this policy could not judge preemptee is preemptable or not 14789 // and left it to next policy 14790 undecidedPreemptees := []*api.TaskInfo{} 14791 14792 for _, preemptee := range preemptees </span><span class="cov0" title="0">{ 14793 if preemptor.Namespace == preemptee.Namespace </span><span class="cov0" title="0">{ 14794 // policy is disabled when they are in the same namespace 14795 undecidedPreemptees = append(undecidedPreemptees, preemptee) 14796 continue</span> 14797 } 14798 14799 // compute the preemptee namespace weighted share after preemption 14800 <span class="cov0" title="0">nsAllocation, found := namespaceAllocation[preemptee.Namespace] 14801 if !found </span><span class="cov0" title="0">{ 14802 rNsAtt := drf.namespaceOpts[preemptee.Namespace] 14803 nsAllocation = rNsAtt.allocated.Clone() 14804 namespaceAllocation[preemptee.Namespace] = nsAllocation 14805 }</span> 14806 <span class="cov0" title="0">rWeight := ssn.NamespaceInfo[api.NamespaceName(preemptee.Namespace)].GetWeight() 14807 rNsAlloc := nsAllocation.Sub(preemptee.Resreq) 14808 _, rNsShare := drf.calculateShare(rNsAlloc, drf.totalResource) 14809 rNsShareWeighted := rNsShare / float64(rWeight) 14810 14811 // to avoid ping pong actions, the preemptee namespace should 14812 // have the higher weighted share after preemption. 14813 if lNsShareWeighted < rNsShareWeighted </span><span class="cov0" title="0">{ 14814 addVictim(preemptee) 14815 continue</span> 14816 } 14817 <span class="cov0" title="0">if lNsShareWeighted-rNsShareWeighted > shareDelta </span><span class="cov0" title="0">{ 14818 continue</span> 14819 } 14820 14821 // equal namespace order leads to judgement of jobOrder 14822 <span class="cov0" title="0">undecidedPreemptees = append(undecidedPreemptees, preemptee)</span> 14823 } 14824 14825 <span class="cov0" title="0">preemptees = undecidedPreemptees</span> 14826 } 14827 14828 <span class="cov0" title="0">latt := drf.jobAttrs[preemptor.Job] 14829 lalloc := latt.allocated.Clone().Add(preemptor.Resreq) 14830 _, ls := drf.calculateShare(lalloc, drf.totalResource) 14831 14832 allocations := map[api.JobID]*api.Resource{} 14833 14834 for _, preemptee := range preemptees </span><span class="cov0" title="0">{ 14835 if _, found := allocations[preemptee.Job]; !found </span><span class="cov0" title="0">{ 14836 ratt := drf.jobAttrs[preemptee.Job] 14837 allocations[preemptee.Job] = ratt.allocated.Clone() 14838 }</span> 14839 <span class="cov0" title="0">ralloc := allocations[preemptee.Job].Sub(preemptee.Resreq) 14840 _, rs := drf.calculateShare(ralloc, drf.totalResource) 14841 14842 if ls < rs || math.Abs(ls-rs) <= shareDelta </span><span class="cov0" title="0">{ 14843 addVictim(preemptee) 14844 }</span> 14845 } 14846 14847 <span class="cov0" title="0">klog.V(4).Infof("Victims from DRF plugins are %+v", victims) 14848 14849 return victims, util.Permit</span> 14850 } 14851 14852 <span class="cov8" title="1">ssn.AddPreemptableFn(drf.Name(), preemptableFn) 14853 14854 if hierarchyEnabled </span><span class="cov8" title="1">{ 14855 queueOrderFn := func(l interface{}, r interface{}) int </span><span class="cov8" title="1">{ 14856 lv := l.(*api.QueueInfo) 14857 rv := r.(*api.QueueInfo) 14858 ret := drf.compareQueues(drf.hierarchicalRoot, lv, rv) 14859 if ret < 0 </span><span class="cov8" title="1">{ 14860 return -1 14861 }</span> 14862 <span class="cov8" title="1">if ret > 0 </span><span class="cov8" title="1">{ 14863 return 1 14864 }</span> 14865 <span class="cov8" title="1">return 0</span> 14866 } 14867 <span class="cov8" title="1">ssn.AddQueueOrderFn(drf.Name(), queueOrderFn) 14868 14869 reclaimFn := func(reclaimer *api.TaskInfo, reclaimees []*api.TaskInfo) ([]*api.TaskInfo, int) </span><span class="cov0" title="0">{ 14870 var victims []*api.TaskInfo 14871 // clone hdrf tree 14872 totalAllocated := drf.totalAllocated.Clone() 14873 root := drf.hierarchicalRoot.Clone(nil) 14874 14875 // update reclaimer hdrf 14876 ljob := ssn.Jobs[reclaimer.Job] 14877 lqueue := ssn.Queues[ljob.Queue] 14878 ljob = ljob.Clone() 14879 attr := drf.jobAttrs[ljob.UID] 14880 lattr := &drfAttr{ 14881 allocated: attr.allocated.Clone(), 14882 } 14883 lattr.allocated.Add(reclaimer.Resreq) 14884 totalAllocated.Add(reclaimer.Resreq) 14885 drf.updateShare(lattr) 14886 drf.UpdateHierarchicalShare(root, totalAllocated, ljob, lattr, lqueue.Hierarchy, lqueue.Weights) 14887 14888 for _, preemptee := range reclaimees </span><span class="cov0" title="0">{ 14889 rjob := ssn.Jobs[preemptee.Job] 14890 rqueue := ssn.Queues[rjob.Queue] 14891 14892 // update hdrf of reclaimee job 14893 totalAllocated.Sub(preemptee.Resreq) 14894 rjob = rjob.Clone() 14895 attr := drf.jobAttrs[rjob.UID] 14896 rattr := &drfAttr{ 14897 allocated: attr.allocated.Clone(), 14898 } 14899 rattr.allocated.Sub(preemptee.Resreq) 14900 drf.updateShare(rattr) 14901 drf.UpdateHierarchicalShare(root, totalAllocated, rjob, rattr, rqueue.Hierarchy, rqueue.Weights) 14902 14903 // compare hdrf of queues 14904 ret := drf.compareQueues(root, lqueue, rqueue) 14905 14906 // resume hdrf of reclaimee job 14907 totalAllocated.Add(preemptee.Resreq) 14908 rattr.allocated.Add(preemptee.Resreq) 14909 drf.updateShare(rattr) 14910 drf.UpdateHierarchicalShare(root, totalAllocated, rjob, rattr, rqueue.Hierarchy, rqueue.Weights) 14911 14912 if ret < 0 </span><span class="cov0" title="0">{ 14913 victims = append(victims, preemptee) 14914 }</span> 14915 14916 <span class="cov0" title="0">if ret > shareDelta </span><span class="cov0" title="0">{ 14917 continue</span> 14918 } 14919 } 14920 14921 <span class="cov0" title="0">klog.V(4).Infof("Victims from HDRF plugins are %+v", victims) 14922 14923 return victims, util.Permit</span> 14924 } 14925 <span class="cov8" title="1">ssn.AddReclaimableFn(drf.Name(), reclaimFn)</span> 14926 } 14927 14928 <span class="cov8" title="1">jobOrderFn := func(l interface{}, r interface{}) int </span><span class="cov0" title="0">{ 14929 lv := l.(*api.JobInfo) 14930 rv := r.(*api.JobInfo) 14931 14932 klog.V(4).Infof("DRF JobOrderFn: <%v/%v> share state: %v, <%v/%v> share state: %v", 14933 lv.Namespace, lv.Name, drf.jobAttrs[lv.UID].share, rv.Namespace, rv.Name, drf.jobAttrs[rv.UID].share) 14934 14935 if drf.jobAttrs[lv.UID].share == drf.jobAttrs[rv.UID].share </span><span class="cov0" title="0">{ 14936 return 0 14937 }</span> 14938 14939 <span class="cov0" title="0">if drf.jobAttrs[lv.UID].share < drf.jobAttrs[rv.UID].share </span><span class="cov0" title="0">{ 14940 return -1 14941 }</span> 14942 14943 <span class="cov0" title="0">return 1</span> 14944 } 14945 14946 <span class="cov8" title="1">ssn.AddJobOrderFn(drf.Name(), jobOrderFn) 14947 14948 namespaceOrderFn := func(l interface{}, r interface{}) int </span><span class="cov0" title="0">{ 14949 lv := l.(api.NamespaceName) 14950 rv := r.(api.NamespaceName) 14951 14952 lOpt := drf.namespaceOpts[string(lv)] 14953 rOpt := drf.namespaceOpts[string(rv)] 14954 14955 lWeight := ssn.NamespaceInfo[lv].GetWeight() 14956 rWeight := ssn.NamespaceInfo[rv].GetWeight() 14957 14958 klog.V(4).Infof("DRF NamespaceOrderFn: <%v> share state: %f, weight %v, <%v> share state: %f, weight %v", 14959 lv, lOpt.share, lWeight, rv, rOpt.share, rWeight) 14960 14961 lWeightedShare := lOpt.share / float64(lWeight) 14962 rWeightedShare := rOpt.share / float64(rWeight) 14963 14964 metrics.UpdateNamespaceWeight(string(lv), lWeight) 14965 metrics.UpdateNamespaceWeight(string(rv), rWeight) 14966 metrics.UpdateNamespaceWeightedShare(string(lv), lWeightedShare) 14967 metrics.UpdateNamespaceWeightedShare(string(rv), rWeightedShare) 14968 14969 if lWeightedShare == rWeightedShare </span><span class="cov0" title="0">{ 14970 return 0 14971 }</span> 14972 14973 <span class="cov0" title="0">if lWeightedShare < rWeightedShare </span><span class="cov0" title="0">{ 14974 return -1 14975 }</span> 14976 14977 <span class="cov0" title="0">return 1</span> 14978 } 14979 14980 <span class="cov8" title="1">if namespaceOrderEnabled </span><span class="cov0" title="0">{ 14981 ssn.AddNamespaceOrderFn(drf.Name(), namespaceOrderFn) 14982 }</span> 14983 14984 // Register event handlers. 14985 <span class="cov8" title="1">ssn.AddEventHandler(&framework.EventHandler{ 14986 AllocateFunc: func(event *framework.Event) </span><span class="cov8" title="1">{ 14987 attr := drf.jobAttrs[event.Task.Job] 14988 attr.allocated.Add(event.Task.Resreq) 14989 14990 job := ssn.Jobs[event.Task.Job] 14991 drf.updateJobShare(job.Namespace, job.Name, attr) 14992 14993 nsShare := -1.0 14994 if namespaceOrderEnabled </span><span class="cov0" title="0">{ 14995 nsOpt := drf.namespaceOpts[event.Task.Namespace] 14996 nsOpt.allocated.Add(event.Task.Resreq) 14997 14998 drf.updateNamespaceShare(event.Task.Namespace, nsOpt) 14999 nsShare = nsOpt.share 15000 }</span> 15001 <span class="cov8" title="1">if hierarchyEnabled </span><span class="cov8" title="1">{ 15002 queue := ssn.Queues[job.Queue] 15003 15004 drf.totalAllocated.Add(event.Task.Resreq) 15005 drf.UpdateHierarchicalShare(drf.hierarchicalRoot, drf.totalAllocated, job, attr, queue.Hierarchy, queue.Weights) 15006 }</span> 15007 15008 <span class="cov8" title="1">klog.V(4).Infof("DRF AllocateFunc: task <%v/%v>, resreq <%v>, share <%v>, namespace share <%v>", 15009 event.Task.Namespace, event.Task.Name, event.Task.Resreq, attr.share, nsShare)</span> 15010 }, 15011 DeallocateFunc: func(event *framework.Event) <span class="cov0" title="0">{ 15012 attr := drf.jobAttrs[event.Task.Job] 15013 attr.allocated.Sub(event.Task.Resreq) 15014 15015 job := ssn.Jobs[event.Task.Job] 15016 drf.updateJobShare(job.Namespace, job.Name, attr) 15017 15018 nsShare := -1.0 15019 if namespaceOrderEnabled </span><span class="cov0" title="0">{ 15020 nsOpt := drf.namespaceOpts[event.Task.Namespace] 15021 nsOpt.allocated.Sub(event.Task.Resreq) 15022 15023 drf.updateNamespaceShare(event.Task.Namespace, nsOpt) 15024 nsShare = nsOpt.share 15025 }</span> 15026 15027 <span class="cov0" title="0">if hierarchyEnabled </span><span class="cov0" title="0">{ 15028 queue := ssn.Queues[job.Queue] 15029 drf.totalAllocated.Sub(event.Task.Resreq) 15030 drf.UpdateHierarchicalShare(drf.hierarchicalRoot, drf.totalAllocated, job, attr, queue.Hierarchy, queue.Weights) 15031 }</span> 15032 15033 <span class="cov0" title="0">klog.V(4).Infof("DRF EvictFunc: task <%v/%v>, resreq <%v>, share <%v>, namespace share <%v>", 15034 event.Task.Namespace, event.Task.Name, event.Task.Resreq, attr.share, nsShare)</span> 15035 }, 15036 }) 15037 } 15038 15039 func (drf *drfPlugin) updateNamespaceShare(namespaceName string, attr *drfAttr) <span class="cov0" title="0">{ 15040 drf.updateShare(attr) 15041 metrics.UpdateNamespaceShare(namespaceName, attr.share) 15042 }</span> 15043 15044 // build hierarchy if the node does not exist 15045 func (drf *drfPlugin) buildHierarchy(root *hierarchicalNode, job *api.JobInfo, attr *drfAttr, 15046 hierarchy, hierarchicalWeights string) <span class="cov8" title="1">{ 15047 inode := root 15048 paths := strings.Split(hierarchy, "/") 15049 weights := strings.Split(hierarchicalWeights, "/") 15050 15051 for i := 1; i < len(paths); i++ </span><span class="cov8" title="1">{ 15052 if child, ok := inode.children[paths[i]]; ok </span><span class="cov8" title="1">{ 15053 inode = child 15054 }</span> else<span class="cov8" title="1"> { 15055 fweight, _ := strconv.ParseFloat(weights[i], 64) 15056 if fweight < 1 </span><span class="cov0" title="0">{ 15057 fweight = 1 15058 }</span> 15059 <span class="cov8" title="1">child = &hierarchicalNode{ 15060 weight: fweight, 15061 hierarchy: paths[i], 15062 request: api.EmptyResource(), 15063 attr: &drfAttr{ 15064 allocated: api.EmptyResource(), 15065 }, 15066 children: make(map[string]*hierarchicalNode), 15067 } 15068 klog.V(4).Infof("Node %s added to %s, weight %f", 15069 child.hierarchy, inode.hierarchy, fweight) 15070 inode.children[paths[i]] = child 15071 child.parent = inode 15072 inode = child</span> 15073 } 15074 } 15075 15076 <span class="cov8" title="1">child := &hierarchicalNode{ 15077 weight: 1, 15078 attr: attr, 15079 hierarchy: string(job.UID), 15080 request: job.TotalRequest.Clone(), 15081 children: nil, 15082 } 15083 inode.children[string(job.UID)] = child 15084 // update drf attribute bottom up 15085 klog.V(4).Infof("Job <%s/%s> added to %s, weights %s, attr %v, total request: %s", 15086 job.Namespace, job.Name, inode.hierarchy, hierarchicalWeights, child.attr, job.TotalRequest)</span> 15087 } 15088 15089 // updateNamespaceShare updates the node attribute recursively 15090 func (drf *drfPlugin) updateHierarchicalShare(node *hierarchicalNode, 15091 demandingResources map[v1.ResourceName]bool) <span class="cov8" title="1">{ 15092 if node.children == nil </span><span class="cov8" title="1">{ 15093 node.saturated = resourceSaturated(node.attr.allocated, 15094 node.request, demandingResources) 15095 klog.V(4).Infof("Update hierarchical node %s, share %f, dominant %s, resource %v, saturated: %t", 15096 node.hierarchy, node.attr.share, node.attr.dominantResource, node.attr.allocated, node.saturated) 15097 }</span> else<span class="cov8" title="1"> { 15098 var mdr float64 = 1 15099 // get minimun dominant resource share 15100 for _, child := range node.children </span><span class="cov8" title="1">{ 15101 drf.updateHierarchicalShare(child, demandingResources) 15102 // skip empty child and saturated child 15103 if child.attr.share != 0 && !child.saturated </span><span class="cov8" title="1">{ 15104 _, resShare := drf.calculateShare(child.attr.allocated, drf.totalResource) 15105 if resShare < mdr </span><span class="cov8" title="1">{ 15106 mdr = resShare 15107 }</span> 15108 } 15109 } 15110 15111 <span class="cov8" title="1">node.attr.allocated = api.EmptyResource() 15112 saturated := true 15113 for _, child := range node.children </span><span class="cov8" title="1">{ 15114 if !child.saturated </span><span class="cov8" title="1">{ 15115 saturated = false 15116 }</span> 15117 // only consider non-empty children 15118 <span class="cov8" title="1">if child.attr.share != 0 </span><span class="cov8" title="1">{ 15119 // saturated child is not scaled 15120 if child.saturated </span><span class="cov8" title="1">{ 15121 t := child.attr.allocated 15122 node.attr.allocated.Add(t) 15123 }</span> else<span class="cov8" title="1"> { 15124 t := child.attr.allocated.Clone().Multi(mdr / child.attr.share) 15125 node.attr.allocated.Add(t) 15126 }</span> 15127 } 15128 } 15129 <span class="cov8" title="1">node.attr.dominantResource, node.attr.share = drf.calculateShare( 15130 node.attr.allocated, drf.totalResource) 15131 node.saturated = saturated 15132 klog.V(4).Infof("Update hierarchical node %s, share %f, dominant resource %s, resource %v, saturated: %t", 15133 node.hierarchy, node.attr.share, node.attr.dominantResource, node.attr.allocated, node.saturated)</span> 15134 } 15135 } 15136 15137 func (drf *drfPlugin) UpdateHierarchicalShare(root *hierarchicalNode, totalAllocated *api.Resource, job *api.JobInfo, attr *drfAttr, hierarchy, hierarchicalWeights string) <span class="cov8" title="1">{ 15138 // filter out demanding resources 15139 demandingResources := map[v1.ResourceName]bool{} 15140 for _, rn := range drf.totalResource.ResourceNames() </span><span class="cov8" title="1">{ 15141 if totalAllocated.Get(rn) < drf.totalResource.Get(rn) </span><span class="cov8" title="1">{ 15142 demandingResources[rn] = true 15143 }</span> 15144 } 15145 <span class="cov8" title="1">drf.buildHierarchy(root, job, attr, hierarchy, hierarchicalWeights) 15146 drf.updateHierarchicalShare(root, demandingResources)</span> 15147 } 15148 15149 func (drf *drfPlugin) updateJobShare(jobNs, jobName string, attr *drfAttr) <span class="cov8" title="1">{ 15150 drf.updateShare(attr) 15151 metrics.UpdateJobShare(jobNs, jobName, attr.share) 15152 }</span> 15153 15154 func (drf *drfPlugin) updateShare(attr *drfAttr) <span class="cov8" title="1">{ 15155 attr.dominantResource, attr.share = drf.calculateShare(attr.allocated, drf.totalResource) 15156 }</span> 15157 15158 func (drf *drfPlugin) calculateShare(allocated, totalResource *api.Resource) (string, float64) <span class="cov8" title="1">{ 15159 res := float64(0) 15160 dominantResource := "" 15161 for _, rn := range totalResource.ResourceNames() </span><span class="cov8" title="1">{ 15162 share := helpers.Share(allocated.Get(rn), totalResource.Get(rn)) 15163 if share > res </span><span class="cov8" title="1">{ 15164 res = share 15165 dominantResource = string(rn) 15166 }</span> 15167 } 15168 15169 <span class="cov8" title="1">return dominantResource, res</span> 15170 } 15171 15172 func (drf *drfPlugin) OnSessionClose(session *framework.Session) <span class="cov8" title="1">{ 15173 // Clean schedule data. 15174 drf.totalResource = api.EmptyResource() 15175 drf.totalAllocated = api.EmptyResource() 15176 drf.jobAttrs = map[api.JobID]*drfAttr{} 15177 }</span> 15178 </pre> 15179 15180 <pre class="file" id="file65" style="display: none">/* 15181 Copyright 2021 The Volcano Authors. 15182 15183 Licensed under the Apache License, Version 2.0 (the "License"); 15184 you may not use this file except in compliance with the License. 15185 You may obtain a copy of the License at 15186 15187 http://www.apache.org/licenses/LICENSE-2.0 15188 15189 Unless required by applicable law or agreed to in writing, software 15190 distributed under the License is distributed on an "AS IS" BASIS, 15191 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15192 See the License for the specific language governing permissions and 15193 limitations under the License. 15194 */ 15195 15196 package policy 15197 15198 import ( 15199 v1 "k8s.io/api/core/v1" 15200 "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" 15201 "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" 15202 15203 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 15204 nodeinfov1alpha1 "volcano.sh/apis/pkg/apis/nodeinfo/v1alpha1" 15205 "volcano.sh/volcano/pkg/scheduler/api" 15206 ) 15207 15208 // TopologyHint is a struct containing the NUMANodeAffinity for a Container 15209 type TopologyHint struct { 15210 NUMANodeAffinity bitmask.BitMask 15211 // Preferred is set to true when the NUMANodeAffinity encodes a preferred 15212 // allocation for the Container. It is set to false otherwise. 15213 Preferred bool 15214 } 15215 15216 // Policy is an interface for topology manager policy 15217 type Policy interface { 15218 // Predicate Get the best hit. 15219 Predicate(providersHints []map[string][]TopologyHint) (TopologyHint, bool) 15220 } 15221 15222 // HintProvider is an interface for components that want to collaborate to 15223 // achieve globally optimal concrete resource alignment with respect to 15224 // NUMA locality. 15225 type HintProvider interface { 15226 // Name returns provider name used for register and logging. 15227 Name() string 15228 // GetTopologyHints returns hints if this hint provider has a preference, 15229 GetTopologyHints(container *v1.Container, topoInfo *api.NumatopoInfo, resNumaSets api.ResNumaSets) map[string][]TopologyHint 15230 Allocate(container *v1.Container, bestHit *TopologyHint, topoInfo *api.NumatopoInfo, resNumaSets api.ResNumaSets) map[string]cpuset.CPUSet 15231 } 15232 15233 // GetPolicy return the interface matched the input task topology config 15234 func GetPolicy(node *api.NodeInfo, numaNodes []int) Policy <span class="cov0" title="0">{ 15235 switch batch.NumaPolicy(node.NumaSchedulerInfo.Policies[nodeinfov1alpha1.TopologyManagerPolicy]) </span>{ 15236 case batch.None:<span class="cov0" title="0"> 15237 return NewPolicyNone(numaNodes)</span> 15238 case batch.BestEffort:<span class="cov0" title="0"> 15239 return NewPolicyBestEffort(numaNodes)</span> 15240 case batch.Restricted:<span class="cov0" title="0"> 15241 return NewPolicyRestricted(numaNodes)</span> 15242 case batch.SingleNumaNode:<span class="cov0" title="0"> 15243 return NewPolicySingleNumaNode(numaNodes)</span> 15244 } 15245 15246 <span class="cov0" title="0">return &policyNone{}</span> 15247 } 15248 15249 // AccumulateProvidersHints return all TopologyHint collection from different providers 15250 func AccumulateProvidersHints(container *v1.Container, 15251 topoInfo *api.NumatopoInfo, resNumaSets api.ResNumaSets, 15252 hintProviders []HintProvider) (providersHints []map[string][]TopologyHint) <span class="cov0" title="0">{ 15253 for _, provider := range hintProviders </span><span class="cov0" title="0">{ 15254 hints := provider.GetTopologyHints(container, topoInfo, resNumaSets) 15255 providersHints = append(providersHints, hints) 15256 }</span> 15257 15258 <span class="cov0" title="0">return providersHints</span> 15259 } 15260 15261 // Allocate return all resource assignment collection from different providers 15262 func Allocate(container *v1.Container, bestHit *TopologyHint, 15263 topoInfo *api.NumatopoInfo, resNumaSets api.ResNumaSets, hintProviders []HintProvider) map[string]cpuset.CPUSet <span class="cov0" title="0">{ 15264 allResAlloc := make(map[string]cpuset.CPUSet) 15265 for _, provider := range hintProviders </span><span class="cov0" title="0">{ 15266 resAlloc := provider.Allocate(container, bestHit, topoInfo, resNumaSets) 15267 for resName, assign := range resAlloc </span><span class="cov0" title="0">{ 15268 allResAlloc[resName] = assign 15269 }</span> 15270 } 15271 15272 <span class="cov0" title="0">return allResAlloc</span> 15273 } 15274 </pre> 15275 15276 <pre class="file" id="file66" style="display: none">/* 15277 Copyright 2021 The Volcano Authors. 15278 15279 Licensed under the Apache License, Version 2.0 (the "License"); 15280 you may not use this file except in compliance with the License. 15281 You may obtain a copy of the License at 15282 15283 http://www.apache.org/licenses/LICENSE-2.0 15284 15285 Unless required by applicable law or agreed to in writing, software 15286 distributed under the License is distributed on an "AS IS" BASIS, 15287 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15288 See the License for the specific language governing permissions and 15289 limitations under the License. 15290 */ 15291 15292 package policy 15293 15294 import ( 15295 "k8s.io/klog" 15296 "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" 15297 ) 15298 15299 func filterProvidersHints(providersHints []map[string][]TopologyHint) [][]TopologyHint <span class="cov8" title="1">{ 15300 var allProviderHints [][]TopologyHint 15301 for _, hints := range providersHints </span><span class="cov8" title="1">{ 15302 // If hints is nil, insert a single, preferred any-numa hint into allProviderHints. 15303 if len(hints) == 0 </span><span class="cov0" title="0">{ 15304 klog.Infof("[numatopo] Hint Provider has no preference for NUMA affinity with any resource") 15305 allProviderHints = append(allProviderHints, []TopologyHint{{nil, true}}) 15306 continue</span> 15307 } 15308 15309 // Otherwise, accumulate the hints for each resource type into allProviderHints. 15310 <span class="cov8" title="1">for resource := range hints </span><span class="cov8" title="1">{ 15311 if hints[resource] == nil </span><span class="cov0" title="0">{ 15312 klog.Infof("[numatopo] Hint Provider has no preference for NUMA affinity with resource '%s'", resource) 15313 allProviderHints = append(allProviderHints, []TopologyHint{{nil, true}}) 15314 continue</span> 15315 } 15316 15317 <span class="cov8" title="1">if len(hints[resource]) == 0 </span><span class="cov0" title="0">{ 15318 klog.Infof("[numatopo] Hint Provider has no possible NUMA affinities for resource '%s'", resource) 15319 allProviderHints = append(allProviderHints, []TopologyHint{{nil, false}}) 15320 continue</span> 15321 } 15322 15323 <span class="cov8" title="1">allProviderHints = append(allProviderHints, hints[resource])</span> 15324 } 15325 } 15326 <span class="cov8" title="1">return allProviderHints</span> 15327 } 15328 15329 func mergeFilteredHints(numaNodes []int, filteredHints [][]TopologyHint) TopologyHint <span class="cov8" title="1">{ 15330 // Set the default affinity as an any-numa affinity containing the list 15331 // of NUMA Nodes available on this machine. 15332 defaultAffinity, _ := bitmask.NewBitMask(numaNodes...) 15333 15334 // Set the bestHint to return from this function as {nil false}. 15335 // This will only be returned if no better hint can be found when 15336 // merging hints from each hint provider. 15337 bestHint := TopologyHint{defaultAffinity, false} 15338 iterateAllProviderTopologyHints(filteredHints, func(permutation []TopologyHint) </span><span class="cov8" title="1">{ 15339 // Get the NUMANodeAffinity from each hint in the permutation and see if any 15340 // of them encode unpreferred allocations. 15341 mergedHint := mergePermutation(numaNodes, permutation) 15342 // Only consider mergedHints that result in a NUMANodeAffinity > 0 to 15343 // replace the current bestHint. 15344 if mergedHint.NUMANodeAffinity.Count() == 0 </span><span class="cov8" title="1">{ 15345 return 15346 }</span> 15347 15348 // If the current bestHint is non-preferred and the new mergedHint is 15349 // preferred, always choose the preferred hint over the non-preferred one. 15350 <span class="cov8" title="1">if mergedHint.Preferred && !bestHint.Preferred </span><span class="cov8" title="1">{ 15351 bestHint = mergedHint 15352 return 15353 }</span> 15354 15355 // If the current bestHint is preferred and the new mergedHint is 15356 // non-preferred, never update bestHint, regardless of mergedHint's 15357 // narowness. 15358 <span class="cov8" title="1">if !mergedHint.Preferred && bestHint.Preferred </span><span class="cov8" title="1">{ 15359 return 15360 }</span> 15361 15362 // If mergedHint and bestHint has the same preference, only consider 15363 // mergedHints that have a narrower NUMANodeAffinity than the 15364 // NUMANodeAffinity in the current bestHint. 15365 <span class="cov8" title="1">if !mergedHint.NUMANodeAffinity.IsNarrowerThan(bestHint.NUMANodeAffinity) </span><span class="cov8" title="1">{ 15366 return 15367 }</span> 15368 15369 // In all other cases, update bestHint to the current mergedHint 15370 <span class="cov8" title="1">bestHint = mergedHint</span> 15371 }) 15372 15373 <span class="cov8" title="1">return bestHint</span> 15374 } 15375 15376 // Iterate over all permutations of hints in 'allProviderHints [][]TopologyHint'. 15377 // 15378 // This procedure is implemented as a recursive function over the set of hints 15379 // in 'allproviderHints[i]'. It applies the function 'callback' to each 15380 // permutation as it is found. It is the equivalent of: 15381 // 15382 // for i := 0; i < len(providerHints[0]); i++ 15383 // for j := 0; j < len(providerHints[1]); j++ 15384 // for k := 0; k < len(providerHints[2]); k++ 15385 // ... 15386 // for z := 0; z < len(providerHints[-1]); z++ 15387 // permutation := []TopologyHint{ 15388 // providerHints[0][i], 15389 // providerHints[1][j], 15390 // providerHints[2][k], 15391 // ... 15392 // providerHints[-1][z] 15393 // } 15394 // callback(permutation) 15395 func iterateAllProviderTopologyHints(allProviderHints [][]TopologyHint, callback func([]TopologyHint)) <span class="cov8" title="1">{ 15396 // Internal helper function to accumulate the permutation before calling the callback. 15397 var iterate func(i int, accum []TopologyHint) 15398 iterate = func(i int, accum []TopologyHint) </span><span class="cov8" title="1">{ 15399 // Base case: we have looped through all providers and have a full permutation. 15400 if i == len(allProviderHints) </span><span class="cov8" title="1">{ 15401 callback(accum) 15402 return 15403 }</span> 15404 15405 // Loop through all hints for provider 'i', and recurse to build the 15406 // the permutation of this hint with all hints from providers 'i++'. 15407 <span class="cov8" title="1">for j := range allProviderHints[i] </span><span class="cov8" title="1">{ 15408 iterate(i+1, append(accum, allProviderHints[i][j])) 15409 }</span> 15410 } 15411 <span class="cov8" title="1">iterate(0, []TopologyHint{})</span> 15412 } 15413 15414 // Merge a TopologyHints permutation to a single hint by performing a bitwise-AND 15415 // of their affinity masks. The hint shall be preferred if all hits in the permutation 15416 // are preferred. 15417 func mergePermutation(numaNodes []int, permutation []TopologyHint) TopologyHint <span class="cov8" title="1">{ 15418 // Get the NUMANodeAffinity from each hint in the permutation and see if any 15419 // of them encode unpreferred allocations. 15420 preferred := true 15421 defaultAffinity, _ := bitmask.NewBitMask(numaNodes...) 15422 var numaAffinities []bitmask.BitMask 15423 for _, hint := range permutation </span><span class="cov8" title="1">{ 15424 // Only consider hints that have an actual NUMANodeAffinity set. 15425 if hint.NUMANodeAffinity == nil </span><span class="cov0" title="0">{ 15426 numaAffinities = append(numaAffinities, defaultAffinity) 15427 }</span> else<span class="cov8" title="1"> { 15428 numaAffinities = append(numaAffinities, hint.NUMANodeAffinity) 15429 }</span> 15430 15431 <span class="cov8" title="1">if !hint.Preferred </span><span class="cov8" title="1">{ 15432 preferred = false 15433 }</span> 15434 } 15435 15436 // Merge the affinities using a bitwise-and operation. 15437 <span class="cov8" title="1">mergedAffinity := bitmask.And(defaultAffinity, numaAffinities...) 15438 // Build a mergedHint from the merged affinity mask, indicating if an 15439 // preferred allocation was used to generate the affinity mask or not. 15440 return TopologyHint{mergedAffinity, preferred}</span> 15441 } 15442 </pre> 15443 15444 <pre class="file" id="file67" style="display: none">/* 15445 Copyright 2021 The Volcano Authors. 15446 15447 Licensed under the Apache License, Version 2.0 (the "License"); 15448 you may not use this file except in compliance with the License. 15449 You may obtain a copy of the License at 15450 15451 http://www.apache.org/licenses/LICENSE-2.0 15452 15453 Unless required by applicable law or agreed to in writing, software 15454 distributed under the License is distributed on an "AS IS" BASIS, 15455 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15456 See the License for the specific language governing permissions and 15457 limitations under the License. 15458 */ 15459 15460 package policy 15461 15462 import "k8s.io/klog" 15463 15464 type policyBestEffort struct { 15465 numaNodes []int 15466 } 15467 15468 // NewPolicyBestEffort return a new policy interface 15469 func NewPolicyBestEffort(numaNodes []int) Policy <span class="cov8" title="1">{ 15470 return &policyBestEffort{numaNodes: numaNodes} 15471 }</span> 15472 15473 func (p *policyBestEffort) canAdmitPodResult(hint *TopologyHint) bool <span class="cov8" title="1">{ 15474 return true 15475 }</span> 15476 15477 func (p *policyBestEffort) Predicate(providersHints []map[string][]TopologyHint) (TopologyHint, bool) <span class="cov8" title="1">{ 15478 filteredProvidersHints := filterProvidersHints(providersHints) 15479 bestHint := mergeFilteredHints(p.numaNodes, filteredProvidersHints) 15480 admit := p.canAdmitPodResult(&bestHint) 15481 15482 klog.V(4).Infof("bestHint: %v admit %v\n", bestHint, admit) 15483 return bestHint, admit 15484 }</span> 15485 </pre> 15486 15487 <pre class="file" id="file68" style="display: none">/* 15488 Copyright 2021 The Volcano Authors. 15489 15490 Licensed under the Apache License, Version 2.0 (the "License"); 15491 you may not use this file except in compliance with the License. 15492 You may obtain a copy of the License at 15493 15494 http://www.apache.org/licenses/LICENSE-2.0 15495 15496 Unless required by applicable law or agreed to in writing, software 15497 distributed under the License is distributed on an "AS IS" BASIS, 15498 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15499 See the License for the specific language governing permissions and 15500 limitations under the License. 15501 */ 15502 15503 package policy 15504 15505 type policyNone struct { 15506 numaNodes []int 15507 } 15508 15509 // NewPolicyNone return a new policy interface 15510 func NewPolicyNone(numaNodes []int) Policy <span class="cov0" title="0">{ 15511 return &policyNone{numaNodes: numaNodes} 15512 }</span> 15513 15514 func (policy *policyNone) canAdmitPodResult(hint *TopologyHint) bool <span class="cov0" title="0">{ 15515 return true 15516 }</span> 15517 15518 func (policy *policyNone) Predicate(providersHints []map[string][]TopologyHint) (TopologyHint, bool) <span class="cov0" title="0">{ 15519 return TopologyHint{}, policy.canAdmitPodResult(nil) 15520 }</span> 15521 </pre> 15522 15523 <pre class="file" id="file69" style="display: none">/* 15524 Copyright 2021 The Volcano Authors. 15525 15526 Licensed under the Apache License, Version 2.0 (the "License"); 15527 you may not use this file except in compliance with the License. 15528 You may obtain a copy of the License at 15529 15530 http://www.apache.org/licenses/LICENSE-2.0 15531 15532 Unless required by applicable law or agreed to in writing, software 15533 distributed under the License is distributed on an "AS IS" BASIS, 15534 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15535 See the License for the specific language governing permissions and 15536 limitations under the License. 15537 */ 15538 15539 package policy 15540 15541 import "k8s.io/klog" 15542 15543 type policyRestricted struct { 15544 numaNodes []int 15545 } 15546 15547 // NewPolicyRestricted return a new policy interface 15548 func NewPolicyRestricted(numaNodes []int) Policy <span class="cov8" title="1">{ 15549 return &policyRestricted{numaNodes: numaNodes} 15550 }</span> 15551 15552 func (p *policyRestricted) canAdmitPodResult(hint *TopologyHint) bool <span class="cov8" title="1">{ 15553 return hint.Preferred 15554 }</span> 15555 15556 func (p *policyRestricted) Predicate(providersHints []map[string][]TopologyHint) (TopologyHint, bool) <span class="cov8" title="1">{ 15557 filteredHints := filterProvidersHints(providersHints) 15558 bestHint := mergeFilteredHints(p.numaNodes, filteredHints) 15559 admit := p.canAdmitPodResult(&bestHint) 15560 15561 klog.V(4).Infof("bestHint: %v admit %v\n", bestHint, admit) 15562 return bestHint, admit 15563 }</span> 15564 </pre> 15565 15566 <pre class="file" id="file70" style="display: none">/* 15567 Copyright 2021 The Volcano Authors. 15568 15569 Licensed under the Apache License, Version 2.0 (the "License"); 15570 you may not use this file except in compliance with the License. 15571 You may obtain a copy of the License at 15572 15573 http://www.apache.org/licenses/LICENSE-2.0 15574 15575 Unless required by applicable law or agreed to in writing, software 15576 distributed under the License is distributed on an "AS IS" BASIS, 15577 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15578 See the License for the specific language governing permissions and 15579 limitations under the License. 15580 */ 15581 15582 package policy 15583 15584 import "k8s.io/klog" 15585 15586 type policySingleNumaNode struct { 15587 numaNodes []int 15588 } 15589 15590 // NewPolicySingleNumaNode return a new policy interface 15591 func NewPolicySingleNumaNode(numaNodes []int) Policy <span class="cov8" title="1">{ 15592 return &policySingleNumaNode{numaNodes: numaNodes} 15593 }</span> 15594 15595 func (policy *policySingleNumaNode) canAdmitPodResult(hint *TopologyHint) bool <span class="cov8" title="1">{ 15596 return hint.Preferred 15597 }</span> 15598 15599 // Return hints that have valid bitmasks with exactly one bit set. 15600 func filterSingleNumaHints(allResourcesHints [][]TopologyHint) [][]TopologyHint <span class="cov8" title="1">{ 15601 var filteredResourcesHints [][]TopologyHint 15602 for _, oneResourceHints := range allResourcesHints </span><span class="cov8" title="1">{ 15603 var filtered []TopologyHint 15604 for _, hint := range oneResourceHints </span><span class="cov8" title="1">{ 15605 if hint.NUMANodeAffinity == nil && hint.Preferred </span><span class="cov0" title="0">{ 15606 filtered = append(filtered, hint) 15607 }</span> 15608 <span class="cov8" title="1">if hint.NUMANodeAffinity != nil && hint.NUMANodeAffinity.Count() == 1 && hint.Preferred </span><span class="cov8" title="1">{ 15609 filtered = append(filtered, hint) 15610 }</span> 15611 } 15612 <span class="cov8" title="1">filteredResourcesHints = append(filteredResourcesHints, filtered)</span> 15613 } 15614 <span class="cov8" title="1">return filteredResourcesHints</span> 15615 } 15616 15617 func (policy *policySingleNumaNode) Predicate(providersHints []map[string][]TopologyHint) (TopologyHint, bool) <span class="cov8" title="1">{ 15618 filteredHints := filterProvidersHints(providersHints) 15619 singleNumaHints := filterSingleNumaHints(filteredHints) 15620 bestHint := mergeFilteredHints(policy.numaNodes, singleNumaHints) 15621 klog.V(4).Infof("bestHint: %v\n", bestHint) 15622 admit := policy.canAdmitPodResult(&bestHint) 15623 return bestHint, admit 15624 }</span> 15625 </pre> 15626 15627 <pre class="file" id="file71" style="display: none">/* 15628 Copyright 2021 The Volcano Authors. 15629 15630 Licensed under the Apache License, Version 2.0 (the "License"); 15631 you may not use this file except in compliance with the License. 15632 You may obtain a copy of the License at 15633 15634 http://www.apache.org/licenses/LICENSE-2.0 15635 15636 Unless required by applicable law or agreed to in writing, software 15637 distributed under the License is distributed on an "AS IS" BASIS, 15638 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15639 See the License for the specific language governing permissions and 15640 limitations under the License. 15641 */ 15642 15643 package cpumanager 15644 15645 import ( 15646 "fmt" 15647 "sort" 15648 15649 "k8s.io/klog" 15650 "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" 15651 "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" 15652 ) 15653 15654 type cpuAccumulator struct { 15655 topo *topology.CPUTopology 15656 details topology.CPUDetails 15657 numCPUsNeeded int 15658 result cpuset.CPUSet 15659 } 15660 15661 func newCPUAccumulator(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int) *cpuAccumulator <span class="cov8" title="1">{ 15662 return &cpuAccumulator{ 15663 topo: topo, 15664 details: topo.CPUDetails.KeepOnly(availableCPUs), 15665 numCPUsNeeded: numCPUs, 15666 result: cpuset.NewCPUSet(), 15667 } 15668 }</span> 15669 15670 func (a *cpuAccumulator) take(cpus cpuset.CPUSet) <span class="cov8" title="1">{ 15671 a.result = a.result.Union(cpus) 15672 a.details = a.details.KeepOnly(a.details.CPUs().Difference(a.result)) 15673 a.numCPUsNeeded -= cpus.Size() 15674 }</span> 15675 15676 // isSocketFree Returns true if the supplied socket is fully available in `topoDetails`. 15677 func (a *cpuAccumulator) isSocketFree(socketID int) bool <span class="cov8" title="1">{ 15678 return a.details.CPUsInSockets(socketID).Size() == a.topo.CPUsPerSocket() 15679 }</span> 15680 15681 // isCoreFree Returns true if the supplied core is fully available in `topoDetails`. 15682 func (a *cpuAccumulator) isCoreFree(coreID int) bool <span class="cov8" title="1">{ 15683 return a.details.CPUsInCores(coreID).Size() == a.topo.CPUsPerCore() 15684 }</span> 15685 15686 // freeSockets Returns free socket IDs as a slice sorted by: 15687 // - socket ID, ascending. 15688 func (a *cpuAccumulator) freeSockets() []int <span class="cov8" title="1">{ 15689 return a.details.Sockets().Filter(a.isSocketFree).ToSlice() 15690 }</span> 15691 15692 // freeCores Returns core IDs as a slice sorted by: 15693 // - the number of whole available cores on the socket, ascending 15694 // - socket ID, ascending 15695 // - core ID, ascending 15696 func (a *cpuAccumulator) freeCores() []int <span class="cov8" title="1">{ 15697 socketIDs := a.details.Sockets().ToSliceNoSort() 15698 sort.Slice(socketIDs, 15699 func(i, j int) bool </span><span class="cov0" title="0">{ 15700 iCores := a.details.CoresInSockets(socketIDs[i]).Filter(a.isCoreFree) 15701 jCores := a.details.CoresInSockets(socketIDs[j]).Filter(a.isCoreFree) 15702 return iCores.Size() < jCores.Size() || socketIDs[i] < socketIDs[j] 15703 }</span>) 15704 15705 <span class="cov8" title="1">coreIDs := []int{} 15706 for _, s := range socketIDs </span><span class="cov8" title="1">{ 15707 coreIDs = append(coreIDs, a.details.CoresInSockets(s).Filter(a.isCoreFree).ToSlice()...) 15708 }</span> 15709 <span class="cov8" title="1">return coreIDs</span> 15710 } 15711 15712 // freeCPUs Returns CPU IDs as a slice sorted by: 15713 // - socket affinity with result 15714 // - number of CPUs available on the same socket 15715 // - number of CPUs available on the same core 15716 // - socket ID. 15717 // - core ID. 15718 func (a *cpuAccumulator) freeCPUs() []int <span class="cov8" title="1">{ 15719 result := []int{} 15720 cores := a.details.Cores().ToSlice() 15721 15722 sort.Slice( 15723 cores, 15724 func(i, j int) bool </span><span class="cov8" title="1">{ 15725 iCore := cores[i] 15726 jCore := cores[j] 15727 15728 iCPUs := a.topo.CPUDetails.CPUsInCores(iCore).ToSlice() 15729 jCPUs := a.topo.CPUDetails.CPUsInCores(jCore).ToSlice() 15730 15731 iSocket := a.topo.CPUDetails[iCPUs[0]].SocketID 15732 jSocket := a.topo.CPUDetails[jCPUs[0]].SocketID 15733 15734 // Compute the number of CPUs in the result reside on the same socket 15735 // as each core. 15736 iSocketColoScore := a.topo.CPUDetails.CPUsInSockets(iSocket).Intersection(a.result).Size() 15737 jSocketColoScore := a.topo.CPUDetails.CPUsInSockets(jSocket).Intersection(a.result).Size() 15738 15739 // Compute the number of available CPUs available on the same socket 15740 // as each core. 15741 iSocketFreeScore := a.details.CPUsInSockets(iSocket).Size() 15742 jSocketFreeScore := a.details.CPUsInSockets(jSocket).Size() 15743 15744 // Compute the number of available CPUs on each core. 15745 iCoreFreeScore := a.details.CPUsInCores(iCore).Size() 15746 jCoreFreeScore := a.details.CPUsInCores(jCore).Size() 15747 15748 return iSocketColoScore > jSocketColoScore || 15749 iSocketFreeScore < jSocketFreeScore || 15750 iCoreFreeScore < jCoreFreeScore || 15751 iSocket < jSocket || 15752 iCore < jCore 15753 }</span>) 15754 15755 // For each core, append sorted CPU IDs to result. 15756 <span class="cov8" title="1">for _, core := range cores </span><span class="cov8" title="1">{ 15757 result = append(result, a.details.CPUsInCores(core).ToSlice()...) 15758 }</span> 15759 <span class="cov8" title="1">return result</span> 15760 } 15761 15762 func (a *cpuAccumulator) needs(n int) bool <span class="cov8" title="1">{ 15763 return a.numCPUsNeeded >= n 15764 }</span> 15765 15766 func (a *cpuAccumulator) isSatisfied() bool <span class="cov8" title="1">{ 15767 return a.numCPUsNeeded < 1 15768 }</span> 15769 15770 func (a *cpuAccumulator) isFailed() bool <span class="cov8" title="1">{ 15771 return a.numCPUsNeeded > a.details.CPUs().Size() 15772 }</span> 15773 15774 // takeByTopology return the assigned cpuset 15775 func takeByTopology(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int) (cpuset.CPUSet, error) <span class="cov8" title="1">{ 15776 acc := newCPUAccumulator(topo, availableCPUs, numCPUs) 15777 if acc.isSatisfied() </span><span class="cov8" title="1">{ 15778 return acc.result, nil 15779 }</span> 15780 <span class="cov8" title="1">if acc.isFailed() </span><span class="cov8" title="1">{ 15781 return cpuset.NewCPUSet(), fmt.Errorf("not enough cpus available to satisfy request") 15782 }</span> 15783 15784 // Algorithm: topology-aware best-fit 15785 // 1. Acquire whole sockets, if available and the container requires at 15786 // least a socket's-worth of CPUs. 15787 <span class="cov8" title="1">if acc.needs(acc.topo.CPUsPerSocket()) </span><span class="cov8" title="1">{ 15788 for _, s := range acc.freeSockets() </span><span class="cov8" title="1">{ 15789 klog.V(4).Infof("[cpumanager] takeByTopology: claiming socket [%d]", s) 15790 acc.take(acc.details.CPUsInSockets(s)) 15791 if acc.isSatisfied() </span><span class="cov8" title="1">{ 15792 return acc.result, nil 15793 }</span> 15794 <span class="cov8" title="1">if !acc.needs(acc.topo.CPUsPerSocket()) </span><span class="cov8" title="1">{ 15795 break</span> 15796 } 15797 } 15798 } 15799 15800 // 2. Acquire whole cores, if available and the container requires at least 15801 // a core's-worth of CPUs. 15802 <span class="cov8" title="1">if acc.needs(acc.topo.CPUsPerCore()) </span><span class="cov8" title="1">{ 15803 for _, c := range acc.freeCores() </span><span class="cov8" title="1">{ 15804 klog.V(4).Infof("[cpumanager] takeByTopology: claiming core [%d]", c) 15805 acc.take(acc.details.CPUsInCores(c)) 15806 if acc.isSatisfied() </span><span class="cov8" title="1">{ 15807 return acc.result, nil 15808 }</span> 15809 <span class="cov8" title="1">if !acc.needs(acc.topo.CPUsPerCore()) </span><span class="cov0" title="0">{ 15810 break</span> 15811 } 15812 } 15813 } 15814 15815 // 3. Acquire single threads, preferring to fill partially-allocated cores 15816 // on the same sockets as the whole cores we have already taken in this 15817 // allocation. 15818 <span class="cov8" title="1">for _, c := range acc.freeCPUs() </span><span class="cov8" title="1">{ 15819 klog.V(4).Infof("[cpumanager] takeByTopology: claiming CPU [%d]", c) 15820 if acc.needs(1) </span><span class="cov8" title="1">{ 15821 acc.take(cpuset.NewCPUSet(c)) 15822 }</span> 15823 <span class="cov8" title="1">if acc.isSatisfied() </span><span class="cov8" title="1">{ 15824 return acc.result, nil 15825 }</span> 15826 } 15827 15828 <span class="cov0" title="0">return cpuset.NewCPUSet(), fmt.Errorf("failed to allocate cpus")</span> 15829 } 15830 </pre> 15831 15832 <pre class="file" id="file72" style="display: none">/* 15833 Copyright 2021 The Volcano Authors. 15834 15835 Licensed under the Apache License, Version 2.0 (the "License"); 15836 you may not use this file except in compliance with the License. 15837 You may obtain a copy of the License at 15838 15839 http://www.apache.org/licenses/LICENSE-2.0 15840 15841 Unless required by applicable law or agreed to in writing, software 15842 distributed under the License is distributed on an "AS IS" BASIS, 15843 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15844 See the License for the specific language governing permissions and 15845 limitations under the License. 15846 */ 15847 15848 package cpumanager 15849 15850 import ( 15851 "math" 15852 15853 v1 "k8s.io/api/core/v1" 15854 "k8s.io/klog" 15855 "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" 15856 "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" 15857 "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" 15858 15859 "volcano.sh/volcano/pkg/scheduler/api" 15860 "volcano.sh/volcano/pkg/scheduler/plugins/numaaware/policy" 15861 ) 15862 15863 type cpuMng struct { 15864 } 15865 15866 // NewProvider return a new provider 15867 func NewProvider() policy.HintProvider <span class="cov8" title="1">{ 15868 return &cpuMng{} 15869 }</span> 15870 15871 // Name return the cpu manager name 15872 func (mng *cpuMng) Name() string <span class="cov0" title="0">{ 15873 return "cpuMng" 15874 }</span> 15875 15876 // guaranteedCPUs return the intger num of request cpu 15877 func guaranteedCPUs(container *v1.Container) int <span class="cov8" title="1">{ 15878 cpuQuantity := container.Resources.Requests[v1.ResourceCPU] 15879 if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() </span><span class="cov0" title="0">{ 15880 return 0 15881 }</span> 15882 15883 <span class="cov8" title="1">return int(cpuQuantity.Value())</span> 15884 } 15885 15886 // generateCPUTopologyHints return the numa topology hints based on 15887 // - availableCPUs 15888 func generateCPUTopologyHints(availableCPUs cpuset.CPUSet, CPUDetails topology.CPUDetails, request int) []policy.TopologyHint <span class="cov8" title="1">{ 15889 minAffinitySize := CPUDetails.NUMANodes().Size() 15890 hints := []policy.TopologyHint{} 15891 bitmask.IterateBitMasks(CPUDetails.NUMANodes().ToSlice(), func(mask bitmask.BitMask) </span><span class="cov8" title="1">{ 15892 // First, update minAffinitySize for the current request size. 15893 cpusInMask := CPUDetails.CPUsInNUMANodes(mask.GetBits()...).Size() 15894 if cpusInMask >= request && mask.Count() < minAffinitySize </span><span class="cov8" title="1">{ 15895 minAffinitySize = mask.Count() 15896 }</span> 15897 15898 // Then check to see if we have enough CPUs available on the current 15899 // numa node bitmask to satisfy the CPU request. 15900 <span class="cov8" title="1">numMatching := 0 15901 // Finally, check to see if enough available CPUs remain on the current 15902 // NUMA node combination to satisfy the CPU request. 15903 for _, c := range availableCPUs.ToSlice() </span><span class="cov8" title="1">{ 15904 if mask.IsSet(CPUDetails[c].NUMANodeID) </span><span class="cov8" title="1">{ 15905 numMatching++ 15906 }</span> 15907 } 15908 15909 // If they don't, then move onto the next combination. 15910 <span class="cov8" title="1">if numMatching < request </span><span class="cov8" title="1">{ 15911 return 15912 }</span> 15913 15914 // Otherwise, create a new hint from the numa node bitmask and add it to the 15915 // list of hints. We set all hint preferences to 'false' on the first 15916 // pass through. 15917 <span class="cov8" title="1">hints = append(hints, policy.TopologyHint{ 15918 NUMANodeAffinity: mask, 15919 Preferred: false, 15920 })</span> 15921 }) 15922 15923 // Loop back through all hints and update the 'Preferred' field based on 15924 // counting the number of bits sets in the affinity mask and comparing it 15925 // to the minAffinitySize. Only those with an equal number of bits set (and 15926 // with a minimal set of numa nodes) will be considered preferred. 15927 <span class="cov8" title="1">for i := range hints </span><span class="cov8" title="1">{ 15928 if hints[i].NUMANodeAffinity.Count() == minAffinitySize </span><span class="cov8" title="1">{ 15929 hints[i].Preferred = true 15930 }</span> 15931 } 15932 15933 <span class="cov8" title="1">return hints</span> 15934 } 15935 15936 func (mng *cpuMng) GetTopologyHints(container *v1.Container, 15937 topoInfo *api.NumatopoInfo, resNumaSets api.ResNumaSets) map[string][]policy.TopologyHint <span class="cov8" title="1">{ 15938 if _, ok := container.Resources.Requests[v1.ResourceCPU]; !ok </span><span class="cov0" title="0">{ 15939 klog.Warningf("container %s has no cpu request", container.Name) 15940 return nil 15941 }</span> 15942 15943 <span class="cov8" title="1">requestNum := guaranteedCPUs(container) 15944 if requestNum == 0 </span><span class="cov0" title="0">{ 15945 klog.Warningf(" the cpu request isn't integer in container %s", container.Name) 15946 return nil 15947 }</span> 15948 15949 <span class="cov8" title="1">cputopo := &topology.CPUTopology{ 15950 NumCPUs: topoInfo.CPUDetail.CPUs().Size(), 15951 NumCores: topoInfo.CPUDetail.Cores().Size() * topoInfo.CPUDetail.Sockets().Size(), 15952 NumSockets: topoInfo.CPUDetail.Sockets().Size(), 15953 CPUDetails: topoInfo.CPUDetail, 15954 } 15955 15956 reserved := cpuset.NewCPUSet() 15957 reservedCPUs, ok := topoInfo.ResReserved[v1.ResourceCPU] 15958 if ok </span><span class="cov0" title="0">{ 15959 // Take the ceiling of the reservation, since fractional CPUs cannot be 15960 // exclusively allocated. 15961 reservedCPUsFloat := float64(reservedCPUs.MilliValue()) / 1000 15962 numReservedCPUs := int(math.Ceil(reservedCPUsFloat)) 15963 reserved, _ = takeByTopology(cputopo, cputopo.CPUDetails.CPUs(), numReservedCPUs) 15964 klog.V(4).Infof("[cpumanager] reserve cpuset :%v", reserved) 15965 }</span> 15966 15967 <span class="cov8" title="1">availableCPUSet, ok := resNumaSets[string(v1.ResourceCPU)] 15968 if !ok </span><span class="cov0" title="0">{ 15969 klog.Warningf("no cpu resource") 15970 return nil 15971 }</span> 15972 15973 <span class="cov8" title="1">availableCPUSet = availableCPUSet.Difference(reserved) 15974 klog.V(4).Infof("requested: %d, availableCPUSet: %v", requestNum, availableCPUSet) 15975 return map[string][]policy.TopologyHint{ 15976 string(v1.ResourceCPU): generateCPUTopologyHints(availableCPUSet, topoInfo.CPUDetail, requestNum), 15977 }</span> 15978 } 15979 15980 func (mng *cpuMng) Allocate(container *v1.Container, bestHit *policy.TopologyHint, 15981 topoInfo *api.NumatopoInfo, resNumaSets api.ResNumaSets) map[string]cpuset.CPUSet <span class="cov8" title="1">{ 15982 cputopo := &topology.CPUTopology{ 15983 NumCPUs: topoInfo.CPUDetail.CPUs().Size(), 15984 NumCores: topoInfo.CPUDetail.Cores().Size() * topoInfo.CPUDetail.Sockets().Size(), 15985 NumSockets: topoInfo.CPUDetail.Sockets().Size(), 15986 CPUDetails: topoInfo.CPUDetail, 15987 } 15988 15989 reserved := cpuset.NewCPUSet() 15990 reservedCPUs, ok := topoInfo.ResReserved[v1.ResourceCPU] 15991 if ok </span><span class="cov0" title="0">{ 15992 // Take the ceiling of the reservation, since fractional CPUs cannot be 15993 // exclusively allocated. 15994 reservedCPUsFloat := float64(reservedCPUs.MilliValue()) / 1000 15995 numReservedCPUs := int(math.Ceil(reservedCPUsFloat)) 15996 reserved, _ = takeByTopology(cputopo, cputopo.CPUDetails.CPUs(), numReservedCPUs) 15997 klog.V(3).Infof("[cpumanager] reserve cpuset :%v", reserved) 15998 }</span> 15999 16000 <span class="cov8" title="1">requestNum := guaranteedCPUs(container) 16001 availableCPUSet := resNumaSets[string(v1.ResourceCPU)] 16002 availableCPUSet = availableCPUSet.Difference(reserved) 16003 16004 klog.V(4).Infof("alignedCPUs: %v requestNum: %v bestHit %v", availableCPUSet, requestNum, bestHit) 16005 16006 result := cpuset.NewCPUSet() 16007 if bestHit.NUMANodeAffinity != nil </span><span class="cov8" title="1">{ 16008 alignedCPUs := cpuset.NewCPUSet() 16009 for _, numaNodeID := range bestHit.NUMANodeAffinity.GetBits() </span><span class="cov8" title="1">{ 16010 alignedCPUs = alignedCPUs.Union(availableCPUSet.Intersection(cputopo.CPUDetails.CPUsInNUMANodes(numaNodeID))) 16011 }</span> 16012 16013 <span class="cov8" title="1">numAlignedToAlloc := alignedCPUs.Size() 16014 if requestNum < numAlignedToAlloc </span><span class="cov8" title="1">{ 16015 numAlignedToAlloc = requestNum 16016 }</span> 16017 16018 <span class="cov8" title="1">alignedCPUs, err := takeByTopology(cputopo, alignedCPUs, numAlignedToAlloc) 16019 if err != nil </span><span class="cov0" title="0">{ 16020 return map[string]cpuset.CPUSet{ 16021 string(v1.ResourceCPU): cpuset.NewCPUSet(), 16022 } 16023 }</span> 16024 16025 <span class="cov8" title="1">result = result.Union(alignedCPUs)</span> 16026 } 16027 16028 // Get any remaining CPUs from what's leftover after attempting to grab aligned ones. 16029 <span class="cov8" title="1">remainingCPUs, err := takeByTopology(cputopo, availableCPUSet.Difference(result), requestNum-result.Size()) 16030 if err != nil </span><span class="cov8" title="1">{ 16031 return map[string]cpuset.CPUSet{ 16032 string(v1.ResourceCPU): cpuset.NewCPUSet(), 16033 } 16034 }</span> 16035 16036 <span class="cov8" title="1">result = result.Union(remainingCPUs) 16037 16038 return map[string]cpuset.CPUSet{ 16039 string(v1.ResourceCPU): result, 16040 }</span> 16041 } 16042 </pre> 16043 16044 <pre class="file" id="file73" style="display: none">/* 16045 Copyright 2020 The Volcano Authors. 16046 16047 Licensed under the Apache License, Version 2.0 (the "License"); 16048 you may not use this file except in compliance with the License. 16049 You may obtain a copy of the License at 16050 16051 http://www.apache.org/licenses/LICENSE-2.0 16052 16053 Unless required by applicable law or agreed to in writing, software 16054 distributed under the License is distributed on an "AS IS" BASIS, 16055 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16056 See the License for the specific language governing permissions and 16057 limitations under the License. 16058 */ 16059 16060 package predicates 16061 16062 import ( 16063 "fmt" 16064 "sync" 16065 16066 v1 "k8s.io/api/core/v1" 16067 "k8s.io/klog" 16068 16069 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 16070 ) 16071 16072 type predicateCache struct { 16073 sync.RWMutex 16074 cache map[string]map[string]bool //key_1: nodename key_2:pod uid 16075 } 16076 16077 // predicateCacheNew return cache map 16078 func predicateCacheNew() *predicateCache <span class="cov8" title="1">{ 16079 return &predicateCache{ 16080 cache: make(map[string]map[string]bool), 16081 } 16082 }</span> 16083 16084 // getPodTemplateUID return pod template key 16085 func getPodTemplateUID(pod *v1.Pod) string <span class="cov0" title="0">{ 16086 uid, found := pod.Annotations[batch.PodTemplateKey] 16087 if !found </span><span class="cov0" title="0">{ 16088 return "" 16089 }</span> 16090 16091 <span class="cov0" title="0">return uid</span> 16092 } 16093 16094 // PredicateWithCache: check the predicate result existed in cache 16095 func (pc *predicateCache) PredicateWithCache(nodeName string, pod *v1.Pod) (bool, error) <span class="cov0" title="0">{ 16096 podTemplateUID := getPodTemplateUID(pod) 16097 if podTemplateUID == "" </span><span class="cov0" title="0">{ 16098 return false, fmt.Errorf("no anonation of volcano.sh/template-uid in pod %s", pod.Name) 16099 }</span> 16100 16101 <span class="cov0" title="0">pc.RLock() 16102 defer pc.RUnlock() 16103 if nodeCache, exist := pc.cache[nodeName]; exist </span><span class="cov0" title="0">{ 16104 if result, exist := nodeCache[podTemplateUID]; exist </span><span class="cov0" title="0">{ 16105 klog.V(4).Infof("Predicate node %s and pod %s result %v", nodeName, pod.Name, result) 16106 return result, nil 16107 }</span> 16108 } 16109 16110 <span class="cov0" title="0">return false, fmt.Errorf("no information of node %s and pod %s in predicate cache", nodeName, pod.Name)</span> 16111 } 16112 16113 // UpdateCache update cache data 16114 func (pc *predicateCache) UpdateCache(nodeName string, pod *v1.Pod, fit bool) <span class="cov0" title="0">{ 16115 podTemplateUID := getPodTemplateUID(pod) 16116 if podTemplateUID == "" </span><span class="cov0" title="0">{ 16117 klog.V(3).Infof("Don't find pod %s template uid", pod.Name) 16118 return 16119 }</span> 16120 16121 <span class="cov0" title="0">pc.Lock() 16122 defer pc.Unlock() 16123 16124 if _, exist := pc.cache[nodeName]; !exist </span><span class="cov0" title="0">{ 16125 podCache := make(map[string]bool) 16126 podCache[podTemplateUID] = fit 16127 pc.cache[nodeName] = podCache 16128 }</span> else<span class="cov0" title="0"> { 16129 pc.cache[nodeName][podTemplateUID] = fit 16130 }</span> 16131 } 16132 </pre> 16133 16134 <pre class="file" id="file74" style="display: none">/* 16135 Copyright 2020 The Kubernetes Authors. 16136 16137 Licensed under the Apache License, Version 2.0 (the "License"); 16138 you may not use this file except in compliance with the License. 16139 You may obtain a copy of the License at 16140 16141 http://www.apache.org/licenses/LICENSE-2.0 16142 16143 Unless required by applicable law or agreed to in writing, software 16144 distributed under the License is distributed on an "AS IS" BASIS, 16145 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16146 See the License for the specific language governing permissions and 16147 limitations under the License. 16148 */ 16149 16150 package predicates 16151 16152 import ( 16153 "fmt" 16154 16155 v1 "k8s.io/api/core/v1" 16156 16157 "volcano.sh/volcano/pkg/scheduler/api" 16158 ) 16159 16160 // checkNodeGPUSharingPredicate checks if a gpu sharing pod can be scheduled on a node. 16161 func checkNodeGPUSharingPredicate(pod *v1.Pod, nodeInfo *api.NodeInfo) (bool, error) <span class="cov0" title="0">{ 16162 // no gpu sharing request 16163 if api.GetGPUResourceOfPod(pod) <= 0 </span><span class="cov0" title="0">{ 16164 return true, nil 16165 }</span> 16166 16167 <span class="cov0" title="0">id := predicateGPU(pod, nodeInfo) 16168 if id < 0 </span><span class="cov0" title="0">{ 16169 return false, fmt.Errorf("no enough gpu memory on single device of node %s", nodeInfo.Name) 16170 }</span> 16171 <span class="cov0" title="0">return true, nil</span> 16172 } 16173 16174 // predicateGPU returns the available GPU ID 16175 func predicateGPU(pod *v1.Pod, node *api.NodeInfo) int <span class="cov0" title="0">{ 16176 gpuRequest := api.GetGPUResourceOfPod(pod) 16177 allocatableGPUs := node.GetDevicesIdleGPUMemory() 16178 16179 for devID := 0; devID < len(allocatableGPUs); devID++ </span><span class="cov0" title="0">{ 16180 availableGPU, ok := allocatableGPUs[devID] 16181 if ok </span><span class="cov0" title="0">{ 16182 if availableGPU >= gpuRequest </span><span class="cov0" title="0">{ 16183 return devID 16184 }</span> 16185 } 16186 } 16187 16188 <span class="cov0" title="0">return -1</span> 16189 } 16190 </pre> 16191 16192 <pre class="file" id="file75" style="display: none">/* 16193 Copyright 2018 The Kubernetes Authors. 16194 16195 Licensed under the Apache License, Version 2.0 (the "License"); 16196 you may not use this file except in compliance with the License. 16197 You may obtain a copy of the License at 16198 16199 http://www.apache.org/licenses/LICENSE-2.0 16200 16201 Unless required by applicable law or agreed to in writing, software 16202 distributed under the License is distributed on an "AS IS" BASIS, 16203 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16204 See the License for the specific language governing permissions and 16205 limitations under the License. 16206 */ 16207 16208 package predicates 16209 16210 import ( 16211 "context" 16212 "fmt" 16213 "strings" 16214 16215 v1 "k8s.io/api/core/v1" 16216 16217 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 16218 "k8s.io/apimachinery/pkg/types" 16219 "k8s.io/klog" 16220 "k8s.io/kubernetes/pkg/scheduler/apis/config" 16221 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity" 16222 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeaffinity" 16223 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeports" 16224 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeunschedulable" 16225 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/tainttoleration" 16226 k8sframework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1" 16227 16228 "volcano.sh/volcano/pkg/scheduler/api" 16229 "volcano.sh/volcano/pkg/scheduler/framework" 16230 "volcano.sh/volcano/pkg/scheduler/plugins/util" 16231 "volcano.sh/volcano/pkg/scheduler/plugins/util/k8s" 16232 ) 16233 16234 const ( 16235 // PluginName indicates name of volcano scheduler plugin. 16236 PluginName = "predicates" 16237 16238 // GPUSharingPredicate is the key for enabling GPU Sharing Predicate in YAML 16239 GPUSharingPredicate = "predicate.GPUSharingEnable" 16240 16241 // CachePredicate control cache predicate feature 16242 CachePredicate = "predicate.CacheEnable" 16243 16244 // ProportionalPredicate is the key for enabling Proportional Predicate in YAML 16245 ProportionalPredicate = "predicate.ProportionalEnable" 16246 // ProportionalResource is the key for additional resource key name 16247 ProportionalResource = "predicate.resources" 16248 // ProportionalResourcesPrefix is the key prefix for additional resource key name 16249 ProportionalResourcesPrefix = ProportionalResource + "." 16250 ) 16251 16252 type predicatesPlugin struct { 16253 // Arguments given for the plugin 16254 pluginArguments framework.Arguments 16255 } 16256 16257 // New return predicate plugin 16258 func New(arguments framework.Arguments) framework.Plugin <span class="cov8" title="1">{ 16259 return &predicatesPlugin{pluginArguments: arguments} 16260 }</span> 16261 16262 func (pp *predicatesPlugin) Name() string <span class="cov8" title="1">{ 16263 return PluginName 16264 }</span> 16265 16266 type baseResource struct { 16267 CPU float64 16268 Memory float64 16269 } 16270 16271 type predicateEnable struct { 16272 gpuSharingEnable bool 16273 cacheEnable bool 16274 proportionalEnable bool 16275 proportional map[v1.ResourceName]baseResource 16276 } 16277 16278 func enablePredicate(args framework.Arguments) predicateEnable <span class="cov8" title="1">{ 16279 /* 16280 User Should give predicatesEnable in this format(predicate.GPUSharingEnable). 16281 Currently supported only GPUSharing predicate checks. 16282 16283 actions: "reclaim, allocate, backfill, preempt" 16284 tiers: 16285 - plugins: 16286 - name: priority 16287 - name: gang 16288 - name: conformance 16289 - plugins: 16290 - name: drf 16291 - name: predicates 16292 arguments: 16293 predicate.GPUSharingEnable: true 16294 predicate.CacheEnable: true 16295 predicate.ProportionalEnable: true 16296 predicate.resources: nvidia.com/gpu 16297 predicate.resources.nvidia.com/gpu.cpu: 4 16298 predicate.resources.nvidia.com/gpu.memory: 8 16299 - name: proportion 16300 - name: nodeorder 16301 */ 16302 16303 predicate := predicateEnable{ 16304 gpuSharingEnable: false, 16305 cacheEnable: false, 16306 proportionalEnable: false, 16307 } 16308 16309 // Checks whether predicate.GPUSharingEnable is provided or not, if given, modifies the value in predicateEnable struct. 16310 args.GetBool(&predicate.gpuSharingEnable, GPUSharingPredicate) 16311 args.GetBool(&predicate.cacheEnable, CachePredicate) 16312 // Checks whether predicate.ProportionalEnable is provided or not, if given, modifies the value in predicateEnable struct. 16313 args.GetBool(&predicate.proportionalEnable, ProportionalPredicate) 16314 resourcesProportional := make(map[v1.ResourceName]baseResource) 16315 resourcesStr := args[ProportionalResource] 16316 resources := strings.Split(resourcesStr, ",") 16317 for _, resource := range resources </span><span class="cov8" title="1">{ 16318 resource = strings.TrimSpace(resource) 16319 if resource == "" </span><span class="cov8" title="1">{ 16320 continue</span> 16321 } 16322 // proportional.resources.[ResourceName] 16323 <span class="cov0" title="0">cpuResourceKey := ProportionalResourcesPrefix + resource + ".cpu" 16324 cpuResourceRate := 1.0 16325 args.GetFloat64(&cpuResourceRate, cpuResourceKey) 16326 if cpuResourceRate < 0 </span><span class="cov0" title="0">{ 16327 cpuResourceRate = 1.0 16328 }</span> 16329 <span class="cov0" title="0">memoryResourceKey := ProportionalResourcesPrefix + resource + ".memory" 16330 memoryResourceRate := 1.0 16331 args.GetFloat64(&memoryResourceRate, memoryResourceKey) 16332 if memoryResourceRate < 0 </span><span class="cov0" title="0">{ 16333 memoryResourceRate = 1.0 16334 }</span> 16335 <span class="cov0" title="0">r := baseResource{ 16336 CPU: cpuResourceRate, 16337 Memory: memoryResourceRate, 16338 } 16339 resourcesProportional[v1.ResourceName(resource)] = r</span> 16340 } 16341 <span class="cov8" title="1">predicate.proportional = resourcesProportional 16342 16343 return predicate</span> 16344 } 16345 16346 func (pp *predicatesPlugin) OnSessionOpen(ssn *framework.Session) <span class="cov8" title="1">{ 16347 pl := util.NewPodListerFromNode(ssn) 16348 nodeMap := util.GenerateNodeMapAndSlice(ssn.Nodes) 16349 16350 pCache := predicateCacheNew() 16351 predicate := enablePredicate(pp.pluginArguments) 16352 16353 kubeClient := ssn.KubeClient() 16354 // Register event handlers to update task info in PodLister & nodeMap 16355 ssn.AddEventHandler(&framework.EventHandler{ 16356 AllocateFunc: func(event *framework.Event) </span><span class="cov8" title="1">{ 16357 pod := pl.UpdateTask(event.Task, event.Task.NodeName) 16358 16359 nodeName := event.Task.NodeName 16360 node, found := nodeMap[nodeName] 16361 if !found </span><span class="cov0" title="0">{ 16362 klog.Errorf("predicates, update pod %s/%s allocate to NOT EXIST node [%s]", pod.Namespace, pod.Name, nodeName) 16363 return 16364 }</span> 16365 16366 <span class="cov8" title="1">if predicate.gpuSharingEnable && api.GetGPUResourceOfPod(pod) > 0 </span><span class="cov0" title="0">{ 16367 nodeInfo, ok := ssn.Nodes[nodeName] 16368 if !ok </span><span class="cov0" title="0">{ 16369 klog.Errorf("Failed to get node %s info from cache", nodeName) 16370 return 16371 }</span> 16372 16373 <span class="cov0" title="0">id := predicateGPU(pod, nodeInfo) 16374 if id < 0 </span><span class="cov0" title="0">{ 16375 klog.Errorf("The node %s can't place the pod %s in ns %s", pod.Spec.NodeName, pod.Name, pod.Namespace) 16376 return 16377 }</span> 16378 <span class="cov0" title="0">patch := api.AddGPUIndexPatch(id) 16379 pod, err := kubeClient.CoreV1().Pods(pod.Namespace).Patch(context.TODO(), pod.Name, types.JSONPatchType, []byte(patch), metav1.PatchOptions{}) 16380 if err != nil </span><span class="cov0" title="0">{ 16381 klog.Errorf("Patch pod %s failed with patch %s: %v", pod.Name, patch, err) 16382 return 16383 }</span> 16384 <span class="cov0" title="0">dev, ok := nodeInfo.GPUDevices[id] 16385 if !ok </span><span class="cov0" title="0">{ 16386 klog.Errorf("Failed to get GPU %d from node %s", id, nodeName) 16387 return 16388 }</span> 16389 <span class="cov0" title="0">dev.PodMap[string(pod.UID)] = pod 16390 klog.V(4).Infof("predicates with gpu sharing, update pod %s/%s allocate to node [%s]", pod.Namespace, pod.Name, nodeName)</span> 16391 } 16392 16393 <span class="cov8" title="1">node.AddPod(pod) 16394 klog.V(4).Infof("predicates, update pod %s/%s allocate to node [%s]", pod.Namespace, pod.Name, nodeName)</span> 16395 }, 16396 DeallocateFunc: func(event *framework.Event) <span class="cov8" title="1">{ 16397 pod := pl.UpdateTask(event.Task, "") 16398 nodeName := event.Task.NodeName 16399 node, found := nodeMap[nodeName] 16400 if !found </span><span class="cov0" title="0">{ 16401 klog.Errorf("predicates, update pod %s/%s allocate from NOT EXIST node [%s]", pod.Namespace, pod.Name, nodeName) 16402 return 16403 }</span> 16404 16405 <span class="cov8" title="1">if predicate.gpuSharingEnable && api.GetGPUResourceOfPod(pod) > 0 </span><span class="cov0" title="0">{ 16406 // deallocate pod gpu id 16407 id := api.GetGPUIndex(pod) 16408 patch := api.RemoveGPUIndexPatch() 16409 _, err := kubeClient.CoreV1().Pods(pod.Namespace).Patch(context.TODO(), pod.Name, types.JSONPatchType, []byte(patch), metav1.PatchOptions{}) 16410 if err != nil </span><span class="cov0" title="0">{ 16411 klog.Errorf("Patch pod %s failed with patch %s: %v", pod.Name, patch, err) 16412 return 16413 }</span> 16414 16415 <span class="cov0" title="0">nodeInfo, ok := ssn.Nodes[nodeName] 16416 if !ok </span><span class="cov0" title="0">{ 16417 klog.Errorf("Failed to get node %s info from cache", nodeName) 16418 return 16419 }</span> 16420 <span class="cov0" title="0">if dev, ok := nodeInfo.GPUDevices[id]; ok </span><span class="cov0" title="0">{ 16421 delete(dev.PodMap, string(pod.UID)) 16422 }</span> 16423 16424 <span class="cov0" title="0">klog.V(4).Infof("predicates with gpu sharing, update pod %s/%s deallocate from node [%s]", pod.Namespace, pod.Name, nodeName)</span> 16425 } 16426 16427 <span class="cov8" title="1">err := node.RemovePod(pod) 16428 if err != nil </span><span class="cov0" title="0">{ 16429 klog.Errorf("predicates, remove pod %s/%s from node [%s] error: %v", pod.Namespace, pod.Name, nodeName, err) 16430 return 16431 }</span> 16432 <span class="cov8" title="1">klog.V(4).Infof("predicates, update pod %s/%s deallocate from node [%s]", pod.Namespace, pod.Name, nodeName)</span> 16433 }, 16434 }) 16435 16436 // Initialize k8s plugins 16437 // TODO: Add more predicates, k8s.io/kubernetes/pkg/scheduler/framework/plugins/legacy_registry.go 16438 <span class="cov8" title="1">handle := k8s.NewFrameworkHandle(nodeMap, ssn.KubeClient(), ssn.InformerFactory()) 16439 // 1. NodeUnschedulable 16440 plugin, _ := nodeunschedulable.New(nil, handle) 16441 nodeUnscheduleFilter := plugin.(*nodeunschedulable.NodeUnschedulable) 16442 // 2. NodeAffinity 16443 plugin, _ = nodeaffinity.New(nil, handle) 16444 nodeAffinityFilter := plugin.(*nodeaffinity.NodeAffinity) 16445 // 3. NodePorts 16446 plugin, _ = nodeports.New(nil, handle) 16447 nodePortFilter := plugin.(*nodeports.NodePorts) 16448 // 4. TaintToleration 16449 plugin, _ = tainttoleration.New(nil, handle) 16450 tolerationFilter := plugin.(*tainttoleration.TaintToleration) 16451 // 5. InterPodAffinity 16452 plArgs := &config.InterPodAffinityArgs{} 16453 plugin, _ = interpodaffinity.New(plArgs, handle) 16454 podAffinityFilter := plugin.(*interpodaffinity.InterPodAffinity) 16455 16456 ssn.AddPredicateFn(pp.Name(), func(task *api.TaskInfo, node *api.NodeInfo) error </span><span class="cov8" title="1">{ 16457 nodeInfo, found := nodeMap[node.Name] 16458 if !found </span><span class="cov0" title="0">{ 16459 return fmt.Errorf("failed to predicates, node info for %s not found", node.Name) 16460 }</span> 16461 16462 <span class="cov8" title="1">if node.Allocatable.MaxTaskNum <= len(nodeInfo.Pods) </span><span class="cov0" title="0">{ 16463 klog.V(4).Infof("NodePodNumber predicates Task <%s/%s> on Node <%s> failed", 16464 task.Namespace, task.Name, node.Name) 16465 return api.NewFitError(task, node, api.NodePodNumberExceeded) 16466 }</span> 16467 16468 <span class="cov8" title="1">state := k8sframework.NewCycleState() 16469 predicateByStablefilter := func(pod *v1.Pod, nodeInfo *k8sframework.NodeInfo) (bool, error) </span><span class="cov8" title="1">{ 16470 // CheckNodeUnschedulable 16471 status := nodeUnscheduleFilter.Filter(context.TODO(), state, task.Pod, nodeInfo) 16472 if !status.IsSuccess() </span><span class="cov0" title="0">{ 16473 return false, fmt.Errorf("plugin %s predicates failed %s", nodeunschedulable.Name, status.Message()) 16474 }</span> 16475 16476 // Check NodeAffinity 16477 <span class="cov8" title="1">status = nodeAffinityFilter.Filter(context.TODO(), state, task.Pod, nodeInfo) 16478 if !status.IsSuccess() </span><span class="cov8" title="1">{ 16479 return false, fmt.Errorf("plugin %s predicates failed %s", nodeaffinity.Name, status.Message()) 16480 }</span> 16481 16482 // PodToleratesNodeTaints: TaintToleration 16483 <span class="cov8" title="1">status = tolerationFilter.Filter(context.TODO(), state, task.Pod, nodeInfo) 16484 if !status.IsSuccess() </span><span class="cov0" title="0">{ 16485 return false, fmt.Errorf("plugin %s predicates failed %s", tainttoleration.Name, status.Message()) 16486 }</span> 16487 16488 <span class="cov8" title="1">return true, nil</span> 16489 } 16490 16491 // Check PredicateWithCache 16492 <span class="cov8" title="1">{ 16493 var err error 16494 var fit bool 16495 if predicate.cacheEnable </span><span class="cov0" title="0">{ 16496 fit, err = pCache.PredicateWithCache(node.Name, task.Pod) 16497 if err != nil </span><span class="cov0" title="0">{ 16498 fit, err = predicateByStablefilter(task.Pod, nodeInfo) 16499 pCache.UpdateCache(node.Name, task.Pod, fit) 16500 }</span> else<span class="cov0" title="0"> { 16501 if !fit </span><span class="cov0" title="0">{ 16502 err = fmt.Errorf("plugin equivalence cache predicates failed") 16503 }</span> 16504 } 16505 } else<span class="cov8" title="1"> { 16506 fit, err = predicateByStablefilter(task.Pod, nodeInfo) 16507 }</span> 16508 16509 <span class="cov8" title="1">if !fit </span><span class="cov8" title="1">{ 16510 return err 16511 }</span> 16512 } 16513 16514 // Check NodePorts 16515 <span class="cov8" title="1">nodePortFilter.PreFilter(context.TODO(), state, task.Pod) 16516 status := nodePortFilter.Filter(context.TODO(), state, nil, nodeInfo) 16517 if !status.IsSuccess() </span><span class="cov0" title="0">{ 16518 return fmt.Errorf("plugin %s predicates failed %s", nodeaffinity.Name, status.Message()) 16519 }</span> 16520 16521 // InterPodAffinity Predicate 16522 <span class="cov8" title="1">status = podAffinityFilter.PreFilter(context.TODO(), state, task.Pod) 16523 if !status.IsSuccess() </span><span class="cov0" title="0">{ 16524 return fmt.Errorf("plugin %s pre-predicates failed %s", interpodaffinity.Name, status.Message()) 16525 }</span> 16526 16527 <span class="cov8" title="1">status = podAffinityFilter.Filter(context.TODO(), state, task.Pod, nodeInfo) 16528 if !status.IsSuccess() </span><span class="cov0" title="0">{ 16529 return fmt.Errorf("plugin %s predicates failed %s", interpodaffinity.Name, status.Message()) 16530 }</span> 16531 16532 <span class="cov8" title="1">if predicate.gpuSharingEnable </span><span class="cov0" title="0">{ 16533 // CheckGPUSharingPredicate 16534 fit, err := checkNodeGPUSharingPredicate(task.Pod, node) 16535 if err != nil </span><span class="cov0" title="0">{ 16536 return err 16537 }</span> 16538 16539 <span class="cov0" title="0">klog.V(4).Infof("checkNodeGPUSharingPredicate predicates Task <%s/%s> on Node <%s>: fit %v", 16540 task.Namespace, task.Name, node.Name, fit)</span> 16541 } 16542 <span class="cov8" title="1">if predicate.proportionalEnable </span><span class="cov0" title="0">{ 16543 // Check ProportionalPredicate 16544 fit, err := checkNodeResourceIsProportional(task, node, predicate.proportional) 16545 if err != nil </span><span class="cov0" title="0">{ 16546 return err 16547 }</span> 16548 <span class="cov0" title="0">klog.V(4).Infof("checkNodeResourceIsProportional predicates Task <%s/%s> on Node <%s>: fit %v", 16549 task.Namespace, task.Name, node.Name, fit)</span> 16550 } 16551 <span class="cov8" title="1">return nil</span> 16552 }) 16553 } 16554 16555 func (pp *predicatesPlugin) OnSessionClose(ssn *framework.Session) {<span class="cov8" title="1">}</span> 16556 </pre> 16557 16558 <pre class="file" id="file76" style="display: none">/* 16559 Copyright 2018 The Kubernetes Authors. 16560 16561 Licensed under the Apache License, Version 2.0 (the "License"); 16562 you may not use this file except in compliance with the License. 16563 You may obtain a copy of the License at 16564 16565 http://www.apache.org/licenses/LICENSE-2.0 16566 16567 Unless required by applicable law or agreed to in writing, software 16568 distributed under the License is distributed on an "AS IS" BASIS, 16569 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16570 See the License for the specific language governing permissions and 16571 limitations under the License. 16572 */ 16573 16574 package predicates 16575 16576 import ( 16577 "fmt" 16578 16579 v1 "k8s.io/api/core/v1" 16580 16581 "volcano.sh/volcano/pkg/scheduler/api" 16582 ) 16583 16584 // checkNodeResourceIsProportional checks if a gpu:cpu:memory is Proportional 16585 func checkNodeResourceIsProportional(task *api.TaskInfo, node *api.NodeInfo, proportional map[v1.ResourceName]baseResource) (bool, error) <span class="cov8" title="1">{ 16586 for resourceName := range proportional </span><span class="cov8" title="1">{ 16587 if value, found := task.Resreq.ScalarResources[resourceName]; found && value > 0 </span><span class="cov8" title="1">{ 16588 return true, nil 16589 }</span> 16590 } 16591 <span class="cov8" title="1">for resourceName, resourceRate := range proportional </span><span class="cov8" title="1">{ 16592 if value, found := node.Idle.ScalarResources[resourceName]; found </span><span class="cov8" title="1">{ 16593 cpuReserved := value * resourceRate.CPU 16594 memoryReserved := value * resourceRate.Memory * 1000 * 1000 16595 r := node.Idle.Clone() 16596 r = r.Sub(task.Resreq) 16597 if r.MilliCPU < cpuReserved || r.Memory < memoryReserved </span><span class="cov8" title="1">{ 16598 return false, fmt.Errorf("proportional of resource %s check failed", resourceName) 16599 }</span> 16600 } 16601 } 16602 <span class="cov8" title="1">return true, nil</span> 16603 } 16604 </pre> 16605 16606 <pre class="file" id="file77" style="display: none">/* 16607 Copyright 2021 The Volcano Authors. 16608 16609 Licensed under the Apache License, Version 2.0 (the "License"); 16610 you may not use this file except in compliance with the License. 16611 You may obtain a copy of the License at 16612 16613 http://www.apache.org/licenses/LICENSE-2.0 16614 16615 Unless required by applicable law or agreed to in writing, software 16616 distributed under the License is distributed on an "AS IS" BASIS, 16617 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16618 See the License for the specific language governing permissions and 16619 limitations under the License. 16620 */ 16621 16622 package tasktopology 16623 16624 import ( 16625 "k8s.io/apimachinery/pkg/types" 16626 "k8s.io/klog" 16627 16628 "volcano.sh/volcano/pkg/scheduler/api" 16629 ) 16630 16631 type reqAction int 16632 16633 const ( 16634 reqSub reqAction = iota 16635 reqAdd 16636 ) 16637 16638 // Bucket is struct used to classify tasks by affinity and anti-affinity 16639 type Bucket struct { 16640 index int 16641 tasks map[types.UID]*api.TaskInfo 16642 taskNameSet map[string]int 16643 16644 // reqScore is score of resource 16645 // now, we regard 1 CPU and 1 GPU and 1Gi memory as the same score. 16646 reqScore float64 16647 request *api.Resource 16648 16649 boundTask int 16650 node map[string]int 16651 } 16652 16653 // NewBucket create a new empty bucket 16654 func NewBucket() *Bucket <span class="cov0" title="0">{ 16655 return &Bucket{ 16656 index: 0, 16657 tasks: make(map[types.UID]*api.TaskInfo), 16658 taskNameSet: make(map[string]int), 16659 16660 reqScore: 0, 16661 request: api.EmptyResource(), 16662 16663 boundTask: 0, 16664 node: make(map[string]int), 16665 } 16666 }</span> 16667 16668 // CalcResReq calculates task resources request 16669 func (b *Bucket) CalcResReq(req *api.Resource, action reqAction) <span class="cov0" title="0">{ 16670 if req == nil </span><span class="cov0" title="0">{ 16671 return 16672 }</span> 16673 16674 <span class="cov0" title="0">cpu := req.MilliCPU 16675 // treat 1Mi the same as 1m cpu 1m gpu 16676 mem := req.Memory / 1024 / 1024 16677 score := cpu + mem 16678 for _, request := range req.ScalarResources </span><span class="cov0" title="0">{ 16679 score += request 16680 }</span> 16681 16682 <span class="cov0" title="0">switch action </span>{ 16683 case reqSub:<span class="cov0" title="0"> 16684 b.reqScore -= score 16685 b.request.Sub(req)</span> 16686 case reqAdd:<span class="cov0" title="0"> 16687 b.reqScore += score 16688 b.request.Add(req)</span> 16689 default:<span class="cov0" title="0"> 16690 klog.V(3).Infof("Invalid action <%v> for resource <%v>", action, req)</span> 16691 } 16692 } 16693 16694 // AddTask adds task into bucket 16695 func (b *Bucket) AddTask(taskName string, task *api.TaskInfo) <span class="cov0" title="0">{ 16696 b.taskNameSet[taskName]++ 16697 if task.NodeName != "" </span><span class="cov0" title="0">{ 16698 b.node[task.NodeName]++ 16699 b.boundTask++ 16700 return 16701 }</span> 16702 16703 <span class="cov0" title="0">b.tasks[task.Pod.UID] = task 16704 b.CalcResReq(task.Resreq, reqAdd)</span> 16705 } 16706 16707 // TaskBound binds task to bucket 16708 func (b *Bucket) TaskBound(task *api.TaskInfo) <span class="cov0" title="0">{ 16709 b.node[task.NodeName]++ 16710 b.boundTask++ 16711 16712 delete(b.tasks, task.Pod.UID) 16713 b.CalcResReq(task.Resreq, reqSub) 16714 }</span> 16715 </pre> 16716 16717 <pre class="file" id="file78" style="display: none">/* 16718 Copyright 2021 The Volcano Authors. 16719 16720 Licensed under the Apache License, Version 2.0 (the "License"); 16721 you may not use this file except in compliance with the License. 16722 You may obtain a copy of the License at 16723 16724 http://www.apache.org/licenses/LICENSE-2.0 16725 16726 Unless required by applicable law or agreed to in writing, software 16727 distributed under the License is distributed on an "AS IS" BASIS, 16728 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16729 See the License for the specific language governing permissions and 16730 limitations under the License. 16731 */ 16732 16733 package tasktopology 16734 16735 import ( 16736 "fmt" 16737 "math" 16738 "sort" 16739 "strings" 16740 16741 "k8s.io/apimachinery/pkg/types" 16742 "k8s.io/klog" 16743 16744 "volcano.sh/volcano/pkg/scheduler/api" 16745 ) 16746 16747 type topologyType int 16748 16749 const ( 16750 selfAntiAffinity topologyType = iota 16751 interAntiAffinity 16752 selfAffinity 16753 interAffinity 16754 ) 16755 16756 // map[topologyType]priority, the larger number means the higher priority 16757 var affinityPriority = map[topologyType]int{ 16758 selfAntiAffinity: 4, 16759 interAffinity: 3, 16760 selfAffinity: 2, 16761 interAntiAffinity: 1, 16762 } 16763 16764 // JobManager is struct used to save infos about affinity and buckets of a job 16765 type JobManager struct { 16766 jobID api.JobID 16767 16768 buckets []*Bucket 16769 podInBucket map[types.UID]int 16770 podInTask map[types.UID]string 16771 taskOverPod map[string]map[types.UID]struct{} 16772 16773 taskAffinityPriority map[string]int // [taskName] -> priority 16774 taskExistOrder map[string]int 16775 interAffinity map[string]map[string]struct{} // [taskName]->[taskName] 16776 selfAffinity map[string]struct{} 16777 interAntiAffinity map[string]map[string]struct{} // [taskName]->[taskName] 16778 selfAntiAffinity map[string]struct{} 16779 16780 bucketMaxSize int 16781 nodeTaskSet map[string]map[string]int // [nodeName]->[taskName] 16782 } 16783 16784 // NewJobManager creates a new job manager for job 16785 func NewJobManager(jobID api.JobID) *JobManager <span class="cov0" title="0">{ 16786 return &JobManager{ 16787 jobID: jobID, 16788 16789 buckets: make([]*Bucket, 0), 16790 podInBucket: make(map[types.UID]int), 16791 podInTask: make(map[types.UID]string), 16792 taskOverPod: make(map[string]map[types.UID]struct{}), 16793 16794 taskAffinityPriority: make(map[string]int), 16795 taskExistOrder: make(map[string]int), 16796 interAffinity: make(map[string]map[string]struct{}), 16797 interAntiAffinity: make(map[string]map[string]struct{}), 16798 selfAffinity: make(map[string]struct{}), 16799 selfAntiAffinity: make(map[string]struct{}), 16800 16801 bucketMaxSize: 0, 16802 nodeTaskSet: make(map[string]map[string]int), 16803 } 16804 }</span> 16805 16806 // MarkOutOfBucket indicates task is outside of any bucket 16807 func (jm *JobManager) MarkOutOfBucket(uid types.UID) <span class="cov0" title="0">{ 16808 jm.podInBucket[uid] = OutOfBucket 16809 }</span> 16810 16811 // MarkTaskHasTopology indicates task has topology settings 16812 func (jm *JobManager) MarkTaskHasTopology(taskName string, topoType topologyType) <span class="cov0" title="0">{ 16813 priority := affinityPriority[topoType] 16814 if priority > jm.taskAffinityPriority[taskName] </span><span class="cov0" title="0">{ 16815 jm.taskAffinityPriority[taskName] = priority 16816 }</span> 16817 } 16818 16819 // ApplyTaskTopology transforms taskTopology to matrix 16820 // affinity: [[a, b], [c]] 16821 // interAffinity: 16822 // a b c 16823 // a - x - 16824 // b x - - 16825 // c - - - 16826 // selfAffinity: 16827 // a b c 16828 // - - x 16829 func (jm *JobManager) ApplyTaskTopology(topo *TaskTopology) <span class="cov0" title="0">{ 16830 for _, aff := range topo.Affinity </span><span class="cov0" title="0">{ 16831 if len(aff) == 1 </span><span class="cov0" title="0">{ 16832 taskName := aff[0] 16833 jm.selfAffinity[taskName] = struct{}{} 16834 jm.MarkTaskHasTopology(taskName, selfAffinity) 16835 continue</span> 16836 } 16837 <span class="cov0" title="0">for index, src := range aff </span><span class="cov0" title="0">{ 16838 for _, dst := range aff[:index] </span><span class="cov0" title="0">{ 16839 addAffinity(jm.interAffinity, src, dst) 16840 addAffinity(jm.interAffinity, dst, src) 16841 }</span> 16842 <span class="cov0" title="0">jm.MarkTaskHasTopology(src, interAffinity)</span> 16843 } 16844 } 16845 16846 <span class="cov0" title="0">for _, aff := range topo.AntiAffinity </span><span class="cov0" title="0">{ 16847 if len(aff) == 1 </span><span class="cov0" title="0">{ 16848 taskName := aff[0] 16849 jm.selfAntiAffinity[taskName] = struct{}{} 16850 jm.MarkTaskHasTopology(taskName, selfAntiAffinity) 16851 continue</span> 16852 } 16853 <span class="cov0" title="0">for index, src := range aff </span><span class="cov0" title="0">{ 16854 for _, dst := range aff[:index] </span><span class="cov0" title="0">{ 16855 addAffinity(jm.interAntiAffinity, src, dst) 16856 addAffinity(jm.interAntiAffinity, dst, src) 16857 }</span> 16858 <span class="cov0" title="0">jm.MarkTaskHasTopology(src, interAntiAffinity)</span> 16859 } 16860 } 16861 16862 <span class="cov0" title="0">length := len(topo.TaskOrder) 16863 for index, taskName := range topo.TaskOrder </span><span class="cov0" title="0">{ 16864 jm.taskExistOrder[taskName] = length - index 16865 }</span> 16866 } 16867 16868 // NewBucket creates a new bucket 16869 func (jm *JobManager) NewBucket() *Bucket <span class="cov0" title="0">{ 16870 bucket := NewBucket() 16871 bucket.index = len(jm.buckets) 16872 jm.buckets = append(jm.buckets, bucket) 16873 return bucket 16874 }</span> 16875 16876 // AddTaskToBucket adds task into bucket 16877 func (jm *JobManager) AddTaskToBucket(bucketIndex int, taskName string, task *api.TaskInfo) <span class="cov0" title="0">{ 16878 bucket := jm.buckets[bucketIndex] 16879 jm.podInBucket[task.Pod.UID] = bucketIndex 16880 bucket.AddTask(taskName, task) 16881 if size := len(bucket.tasks) + bucket.boundTask; size > jm.bucketMaxSize </span><span class="cov0" title="0">{ 16882 jm.bucketMaxSize = size 16883 }</span> 16884 } 16885 16886 // L compared with R, -1 for L < R, 0 for L == R, 1 for L > R 16887 func (jm *JobManager) taskAffinityOrder(L, R *api.TaskInfo) int <span class="cov0" title="0">{ 16888 LTaskName := jm.podInTask[L.Pod.UID] 16889 RTaskName := jm.podInTask[R.Pod.UID] 16890 16891 // in the same vk task, they are equal 16892 if LTaskName == RTaskName </span><span class="cov0" title="0">{ 16893 return 0 16894 }</span> 16895 16896 // use user defined order firstly 16897 <span class="cov0" title="0">LOrder := jm.taskExistOrder[LTaskName] 16898 ROrder := jm.taskExistOrder[RTaskName] 16899 if LOrder != ROrder </span><span class="cov0" title="0">{ 16900 if LOrder > ROrder </span><span class="cov0" title="0">{ 16901 return 1 16902 }</span> 16903 <span class="cov0" title="0">return -1</span> 16904 } 16905 16906 <span class="cov0" title="0">LPriority := jm.taskAffinityPriority[LTaskName] 16907 RPriority := jm.taskAffinityPriority[RTaskName] 16908 if LPriority != RPriority </span><span class="cov0" title="0">{ 16909 if LPriority > RPriority </span><span class="cov0" title="0">{ 16910 return 1 16911 }</span> 16912 <span class="cov0" title="0">return -1</span> 16913 } 16914 16915 // all affinity setting of L and R are the same, they are equal 16916 <span class="cov0" title="0">return 0</span> 16917 } 16918 16919 func (jm *JobManager) buildTaskInfo(tasks map[api.TaskID]*api.TaskInfo) []*api.TaskInfo <span class="cov0" title="0">{ 16920 taskWithoutBucket := make([]*api.TaskInfo, 0, len(tasks)) 16921 for _, task := range tasks </span><span class="cov0" title="0">{ 16922 pod := task.Pod 16923 16924 taskName := getTaskName(task) 16925 if taskName == "" </span><span class="cov0" title="0">{ 16926 jm.MarkOutOfBucket(pod.UID) 16927 continue</span> 16928 } 16929 <span class="cov0" title="0">if _, hasTopology := jm.taskAffinityPriority[taskName]; !hasTopology </span><span class="cov0" title="0">{ 16930 jm.MarkOutOfBucket(pod.UID) 16931 continue</span> 16932 } 16933 16934 <span class="cov0" title="0">jm.podInTask[pod.UID] = taskName 16935 taskSet, ok := jm.taskOverPod[taskName] 16936 if !ok </span><span class="cov0" title="0">{ 16937 taskSet = make(map[types.UID]struct{}) 16938 jm.taskOverPod[taskName] = taskSet 16939 }</span> 16940 <span class="cov0" title="0">taskSet[pod.UID] = struct{}{} 16941 taskWithoutBucket = append(taskWithoutBucket, task)</span> 16942 } 16943 <span class="cov0" title="0">return taskWithoutBucket</span> 16944 } 16945 16946 func (jm *JobManager) checkTaskSetAffinity(taskName string, taskNameSet map[string]int, onlyAnti bool) int <span class="cov0" title="0">{ 16947 bucketPodAff := 0 16948 16949 if taskName == "" </span><span class="cov0" title="0">{ 16950 return bucketPodAff 16951 }</span> 16952 16953 <span class="cov0" title="0">for taskNameInBucket, count := range taskNameSet </span><span class="cov0" title="0">{ 16954 theSameTask := taskNameInBucket == taskName 16955 16956 if !onlyAnti </span><span class="cov0" title="0">{ 16957 affinity := false 16958 if theSameTask </span><span class="cov0" title="0">{ 16959 _, affinity = jm.selfAffinity[taskName] 16960 }</span> else<span class="cov0" title="0"> { 16961 _, affinity = jm.interAffinity[taskName][taskNameInBucket] 16962 }</span> 16963 <span class="cov0" title="0">if affinity </span><span class="cov0" title="0">{ 16964 bucketPodAff += count 16965 }</span> 16966 } 16967 16968 <span class="cov0" title="0">antiAffinity := false 16969 if theSameTask </span><span class="cov0" title="0">{ 16970 _, antiAffinity = jm.selfAntiAffinity[taskName] 16971 }</span> else<span class="cov0" title="0"> { 16972 _, antiAffinity = jm.interAntiAffinity[taskName][taskNameInBucket] 16973 }</span> 16974 <span class="cov0" title="0">if antiAffinity </span><span class="cov0" title="0">{ 16975 bucketPodAff -= count 16976 }</span> 16977 } 16978 16979 <span class="cov0" title="0">return bucketPodAff</span> 16980 } 16981 16982 func (jm *JobManager) buildBucket(taskWithOrder []*api.TaskInfo) <span class="cov0" title="0">{ 16983 nodeBucketMapping := make(map[string]*Bucket) 16984 16985 for _, task := range taskWithOrder </span><span class="cov0" title="0">{ 16986 klog.V(5).Infof("jobID %s task with order task %s/%s", jm.jobID, task.Namespace, task.Name) 16987 16988 var selectedBucket *Bucket 16989 maxAffinity := math.MinInt32 16990 16991 taskName := getTaskName(task) 16992 16993 if task.NodeName != "" </span><span class="cov0" title="0">{ 16994 // generate bucket by node 16995 maxAffinity = 0 16996 selectedBucket = nodeBucketMapping[task.NodeName] 16997 }</span> else<span class="cov0" title="0"> { 16998 for _, bucket := range jm.buckets </span><span class="cov0" title="0">{ 16999 bucketPodAff := jm.checkTaskSetAffinity(taskName, bucket.taskNameSet, false) 17000 17001 // choose the best fit affinity, or balance resource between bucket 17002 if bucketPodAff > maxAffinity </span><span class="cov0" title="0">{ 17003 maxAffinity = bucketPodAff 17004 selectedBucket = bucket 17005 }</span> else<span class="cov0" title="0"> if bucketPodAff == maxAffinity && selectedBucket != nil && 17006 bucket.reqScore < selectedBucket.reqScore </span><span class="cov0" title="0">{ 17007 selectedBucket = bucket 17008 }</span> 17009 } 17010 } 17011 17012 <span class="cov0" title="0">if maxAffinity < 0 || selectedBucket == nil </span><span class="cov0" title="0">{ 17013 selectedBucket = jm.NewBucket() 17014 if task.NodeName != "" </span><span class="cov0" title="0">{ 17015 nodeBucketMapping[task.NodeName] = selectedBucket 17016 }</span> 17017 } 17018 17019 <span class="cov0" title="0">jm.AddTaskToBucket(selectedBucket.index, taskName, task)</span> 17020 } 17021 } 17022 17023 // ConstructBucket builds bucket for tasks 17024 func (jm *JobManager) ConstructBucket(tasks map[api.TaskID]*api.TaskInfo) <span class="cov0" title="0">{ 17025 taskWithoutBucket := jm.buildTaskInfo(tasks) 17026 17027 o := TaskOrder{ 17028 tasks: taskWithoutBucket, 17029 17030 manager: jm, 17031 } 17032 sort.Sort(sort.Reverse(&o)) 17033 17034 jm.buildBucket(o.tasks) 17035 }</span> 17036 17037 // TaskBound binds task to bucket 17038 func (jm *JobManager) TaskBound(task *api.TaskInfo) <span class="cov0" title="0">{ 17039 if taskName := getTaskName(task); taskName != "" </span><span class="cov0" title="0">{ 17040 set, ok := jm.nodeTaskSet[task.NodeName] 17041 if !ok </span><span class="cov0" title="0">{ 17042 set = make(map[string]int) 17043 jm.nodeTaskSet[task.NodeName] = set 17044 }</span> 17045 <span class="cov0" title="0">set[taskName]++</span> 17046 } 17047 17048 <span class="cov0" title="0">bucket := jm.GetBucket(task) 17049 if bucket != nil </span><span class="cov0" title="0">{ 17050 bucket.TaskBound(task) 17051 }</span> 17052 } 17053 17054 // GetBucket get bucket inside which task has been 17055 func (jm *JobManager) GetBucket(task *api.TaskInfo) *Bucket <span class="cov0" title="0">{ 17056 index, ok := jm.podInBucket[task.Pod.UID] 17057 if !ok || index == OutOfBucket </span><span class="cov0" title="0">{ 17058 return nil 17059 }</span> 17060 17061 <span class="cov0" title="0">bucket := jm.buckets[index] 17062 return bucket</span> 17063 } 17064 17065 func (jm *JobManager) String() string <span class="cov0" title="0">{ 17066 // saa: selfAntiAffinity 17067 // iaa: interAntiAffinity 17068 // sa: selfAffinity 17069 // ia: interAffinity 17070 msg := []string{ 17071 fmt.Sprintf("%s - job %s max %d || saa: %v - iaa: %v - sa: %v - ia: %v || priority: %v - order: %v || ", 17072 PluginName, jm.jobID, jm.bucketMaxSize, 17073 jm.selfAntiAffinity, jm.interAntiAffinity, 17074 jm.selfAffinity, jm.interAffinity, 17075 jm.taskAffinityPriority, jm.taskExistOrder, 17076 ), 17077 } 17078 17079 for _, bucket := range jm.buckets </span><span class="cov0" title="0">{ 17080 bucketMsg := fmt.Sprintf("b:%d -- ", bucket.index) 17081 var info []string 17082 for _, task := range bucket.tasks </span><span class="cov0" title="0">{ 17083 info = append(info, task.Pod.Name) 17084 }</span> 17085 <span class="cov0" title="0">bucketMsg += strings.Join(info, ", ") 17086 bucketMsg += "|" 17087 17088 info = nil 17089 for nodeName, count := range bucket.node </span><span class="cov0" title="0">{ 17090 info = append(info, fmt.Sprintf("n%s-%d", nodeName, count)) 17091 }</span> 17092 <span class="cov0" title="0">bucketMsg += strings.Join(info, ", ") 17093 17094 msg = append(msg, "["+bucketMsg+"]")</span> 17095 } 17096 <span class="cov0" title="0">return strings.Join(msg, " ")</span> 17097 } 17098 </pre> 17099 17100 <pre class="file" id="file79" style="display: none">/* 17101 Copyright 2021 The Volcano Authors. 17102 17103 Licensed under the Apache License, Version 2.0 (the "License"); 17104 you may not use this file except in compliance with the License. 17105 You may obtain a copy of the License at 17106 17107 http://www.apache.org/licenses/LICENSE-2.0 17108 17109 Unless required by applicable law or agreed to in writing, software 17110 distributed under the License is distributed on an "AS IS" BASIS, 17111 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17112 See the License for the specific language governing permissions and 17113 limitations under the License. 17114 */ 17115 17116 package tasktopology 17117 17118 import ( 17119 "fmt" 17120 "strings" 17121 "time" 17122 17123 "k8s.io/klog" 17124 "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1" 17125 17126 "volcano.sh/volcano/pkg/scheduler/api" 17127 "volcano.sh/volcano/pkg/scheduler/framework" 17128 ) 17129 17130 type taskTopologyPlugin struct { 17131 arguments framework.Arguments 17132 17133 weight int 17134 managers map[api.JobID]*JobManager 17135 } 17136 17137 // New function returns taskTopologyPlugin object 17138 func New(arguments framework.Arguments) framework.Plugin <span class="cov0" title="0">{ 17139 return &taskTopologyPlugin{ 17140 arguments: arguments, 17141 17142 weight: calculateWeight(arguments), 17143 managers: make(map[api.JobID]*JobManager), 17144 } 17145 }</span> 17146 17147 func (p *taskTopologyPlugin) Name() string <span class="cov0" title="0">{ 17148 return PluginName 17149 }</span> 17150 17151 // TaskOrderFn returns -1 to make l prior to r. 17152 // 17153 // for example: 17154 // A: 17155 // | bucket1 | bucket2 | out of bucket 17156 // | a1 a3 | a2 | a4 17157 // B: 17158 // | bucket1 | out of bucket 17159 // | b1 b2 | b3 17160 // the right task order should be: 17161 // a1 a3 a2 b1 b2 a4 b3 17162 func (p *taskTopologyPlugin) TaskOrderFn(l interface{}, r interface{}) int <span class="cov0" title="0">{ 17163 lv, ok := l.(*api.TaskInfo) 17164 if !ok </span><span class="cov0" title="0">{ 17165 klog.Errorf("Object is not a taskinfo") 17166 }</span> 17167 <span class="cov0" title="0">rv, ok := r.(*api.TaskInfo) 17168 if !ok </span><span class="cov0" title="0">{ 17169 klog.Errorf("Object is not a taskinfo") 17170 }</span> 17171 17172 <span class="cov0" title="0">lvJobManager := p.managers[lv.Job] 17173 rvJobManager := p.managers[rv.Job] 17174 17175 var lvBucket, rvBucket *Bucket 17176 if lvJobManager != nil </span><span class="cov0" title="0">{ 17177 lvBucket = lvJobManager.GetBucket(lv) 17178 }</span> else<span class="cov0" title="0"> { 17179 klog.V(4).Infof("No job manager for job <ID: %s>, do not return task order.", lv.Job) 17180 return 0 17181 }</span> 17182 <span class="cov0" title="0">if rvJobManager != nil </span><span class="cov0" title="0">{ 17183 rvBucket = rvJobManager.GetBucket(rv) 17184 }</span> else<span class="cov0" title="0"> { 17185 klog.V(4).Infof("No job manager for job <ID: %s>, do not return task order.", rv.Job) 17186 return 0 17187 }</span> 17188 17189 // the one have bucket would always prior to another 17190 <span class="cov0" title="0">lvInBucket := lvBucket != nil 17191 rvInBucket := rvBucket != nil 17192 if lvInBucket != rvInBucket </span><span class="cov0" title="0">{ 17193 if lvInBucket </span><span class="cov0" title="0">{ 17194 return -1 17195 }</span> 17196 <span class="cov0" title="0">return 1</span> 17197 } 17198 17199 // comparison between job is not the duty of this plugin 17200 <span class="cov0" title="0">if lv.Job != rv.Job </span><span class="cov0" title="0">{ 17201 return 0 17202 }</span> 17203 17204 // task out of bucket have no order 17205 <span class="cov0" title="0">if !lvInBucket && !rvInBucket </span><span class="cov0" title="0">{ 17206 return 0 17207 }</span> 17208 17209 // the big bucket should prior to small one 17210 <span class="cov0" title="0">lvHasTask := len(lvBucket.tasks) 17211 rvHasTask := len(rvBucket.tasks) 17212 if lvHasTask != rvHasTask </span><span class="cov0" title="0">{ 17213 if lvHasTask > rvHasTask </span><span class="cov0" title="0">{ 17214 return -1 17215 }</span> 17216 <span class="cov0" title="0">return 1</span> 17217 } 17218 17219 <span class="cov0" title="0">lvBucketIndex := lvBucket.index 17220 rvBucketIndex := rvBucket.index 17221 // in the same bucket, the affinityOrder is ok 17222 if lvBucketIndex == rvBucketIndex </span><span class="cov0" title="0">{ 17223 affinityOrder := lvJobManager.taskAffinityOrder(lv, rv) 17224 return -affinityOrder 17225 }</span> 17226 17227 // the old bucket should prior to young one 17228 <span class="cov0" title="0">if lvBucketIndex < rvBucketIndex </span><span class="cov0" title="0">{ 17229 return -1 17230 }</span> 17231 <span class="cov0" title="0">return 1</span> 17232 } 17233 17234 func (p *taskTopologyPlugin) calcBucketScore(task *api.TaskInfo, node *api.NodeInfo) (int, *JobManager, error) <span class="cov0" title="0">{ 17235 // task could never fits the node 17236 maxResource := node.Idle.Clone().Add(node.Releasing) 17237 if req := task.Resreq; req != nil && maxResource.LessPartly(req, api.Zero) </span><span class="cov0" title="0">{ 17238 return 0, nil, nil 17239 }</span> 17240 17241 <span class="cov0" title="0">jobManager, hasManager := p.managers[task.Job] 17242 if !hasManager </span><span class="cov0" title="0">{ 17243 return 0, nil, nil 17244 }</span> 17245 17246 <span class="cov0" title="0">bucket := jobManager.GetBucket(task) 17247 // task out of bucket 17248 if bucket == nil </span><span class="cov0" title="0">{ 17249 return 0, jobManager, nil 17250 }</span> 17251 17252 // 1. bound task in bucket is the base score of this node 17253 <span class="cov0" title="0">score := bucket.node[node.Name] 17254 17255 // 2. task inter/self anti-affinity should be calculated 17256 if nodeTaskSet := jobManager.nodeTaskSet[node.Name]; nodeTaskSet != nil </span><span class="cov0" title="0">{ 17257 taskName := getTaskName(task) 17258 affinityScore := jobManager.checkTaskSetAffinity(taskName, nodeTaskSet, true) 17259 if affinityScore < 0 </span><span class="cov0" title="0">{ 17260 score += affinityScore 17261 }</span> 17262 } 17263 <span class="cov0" title="0">klog.V(4).Infof("task %s/%s, node %s, additional score %d, task %d", 17264 task.Namespace, task.Name, node.Name, score, len(bucket.tasks)) 17265 17266 // 3. the other tasks in bucket take into considering 17267 score += len(bucket.tasks) 17268 if bucket.request == nil || bucket.request.LessEqual(maxResource, api.Zero) </span><span class="cov0" title="0">{ 17269 return score, jobManager, nil 17270 }</span> 17271 17272 <span class="cov0" title="0">remains := bucket.request.Clone() 17273 // randomly (by map) take out task to make the bucket fits the node 17274 for bucketTaskID, bucketTask := range bucket.tasks </span><span class="cov0" title="0">{ 17275 // current task should kept in bucket 17276 if bucketTaskID == task.Pod.UID || bucketTask.Resreq == nil </span><span class="cov0" title="0">{ 17277 continue</span> 17278 } 17279 <span class="cov0" title="0">remains.Sub(bucketTask.Resreq) 17280 score-- 17281 if remains.LessEqual(maxResource, api.Zero) </span><span class="cov0" title="0">{ 17282 break</span> 17283 } 17284 } 17285 // here, the bucket remained request will always fit the maxResource 17286 <span class="cov0" title="0">return score, jobManager, nil</span> 17287 } 17288 17289 func (p *taskTopologyPlugin) NodeOrderFn(task *api.TaskInfo, node *api.NodeInfo) (float64, error) <span class="cov0" title="0">{ 17290 score, jobManager, err := p.calcBucketScore(task, node) 17291 if err != nil </span><span class="cov0" title="0">{ 17292 return 0, err 17293 }</span> 17294 <span class="cov0" title="0">fScore := float64(score * p.weight) 17295 if jobManager != nil && jobManager.bucketMaxSize != 0 </span><span class="cov0" title="0">{ 17296 fScore = fScore * float64(v1alpha1.MaxNodeScore) / float64(jobManager.bucketMaxSize) 17297 }</span> 17298 <span class="cov0" title="0">klog.V(4).Infof("task %s/%s at node %s has bucket score %d, score %f", 17299 task.Namespace, task.Name, node.Name, score, fScore) 17300 return fScore, nil</span> 17301 } 17302 17303 func (p *taskTopologyPlugin) AllocateFunc(event *framework.Event) <span class="cov0" title="0">{ 17304 task := event.Task 17305 17306 jobManager, hasManager := p.managers[task.Job] 17307 if !hasManager </span><span class="cov0" title="0">{ 17308 return 17309 }</span> 17310 <span class="cov0" title="0">jobManager.TaskBound(task)</span> 17311 } 17312 17313 func (p *taskTopologyPlugin) initBucket(ssn *framework.Session) <span class="cov0" title="0">{ 17314 for jobID, job := range ssn.Jobs </span><span class="cov0" title="0">{ 17315 if noPendingTasks(job) </span><span class="cov0" title="0">{ 17316 klog.V(4).Infof("No pending tasks in job <%s/%s> by plugin %s.", 17317 job.Namespace, job.Name, PluginName) 17318 continue</span> 17319 } 17320 17321 <span class="cov0" title="0">jobTopology, err := readTopologyFromPgAnnotations(job) 17322 if err != nil </span><span class="cov0" title="0">{ 17323 klog.V(4).Infof("Failed to read task topology from job <%s/%s> annotations, error: %s.", 17324 job.Namespace, job.Name, err.Error()) 17325 continue</span> 17326 } 17327 <span class="cov0" title="0">if jobTopology == nil </span><span class="cov0" title="0">{ 17328 continue</span> 17329 } 17330 17331 <span class="cov0" title="0">manager := NewJobManager(jobID) 17332 manager.ApplyTaskTopology(jobTopology) 17333 manager.ConstructBucket(job.Tasks) 17334 17335 p.managers[job.UID] = manager</span> 17336 } 17337 } 17338 17339 func affinityCheck(job *api.JobInfo, affinity [][]string) error <span class="cov8" title="1">{ 17340 if job == nil || affinity == nil </span><span class="cov0" title="0">{ 17341 return fmt.Errorf("empty input, job: %v, affinity: %v", job, affinity) 17342 }</span> 17343 17344 <span class="cov8" title="1">var taskNumber = len(job.Tasks) 17345 var taskRef = make(map[string]bool, taskNumber) 17346 for _, task := range job.Tasks </span><span class="cov8" title="1">{ 17347 tmpStrings := strings.Split(task.Name, "-") 17348 if _, exist := taskRef[tmpStrings[len(tmpStrings)-2]]; !exist </span><span class="cov8" title="1">{ 17349 taskRef[tmpStrings[len(tmpStrings)-2]] = true 17350 }</span> 17351 } 17352 17353 <span class="cov8" title="1">for _, aff := range affinity </span><span class="cov8" title="1">{ 17354 affTasks := make(map[string]bool, len(aff)) 17355 for _, task := range aff </span><span class="cov8" title="1">{ 17356 if len(task) == 0 </span><span class="cov8" title="1">{ 17357 continue</span> 17358 } 17359 <span class="cov8" title="1">if _, exist := taskRef[task]; !exist </span><span class="cov8" title="1">{ 17360 return fmt.Errorf("task %s do not exist in job <%s/%s>", task, job.Namespace, job.Name) 17361 }</span> 17362 <span class="cov8" title="1">if _, exist := affTasks[task]; exist </span><span class="cov8" title="1">{ 17363 return fmt.Errorf("task %s is duplicated in job <%s/%s>", task, job.Namespace, job.Name) 17364 }</span> 17365 <span class="cov8" title="1">affTasks[task] = true</span> 17366 } 17367 } 17368 17369 <span class="cov8" title="1">return nil</span> 17370 } 17371 17372 func splitAnnotations(job *api.JobInfo, annotation string) ([][]string, error) <span class="cov8" title="1">{ 17373 affinityStr := strings.Split(annotation, ";") 17374 if len(affinityStr) == 0 </span><span class="cov0" title="0">{ 17375 return nil, nil 17376 }</span> 17377 <span class="cov8" title="1">var affinity = make([][]string, len(affinityStr)) 17378 for i, str := range affinityStr </span><span class="cov8" title="1">{ 17379 affinity[i] = strings.Split(str, ",") 17380 }</span> 17381 <span class="cov8" title="1">if err := affinityCheck(job, affinity); err != nil </span><span class="cov8" title="1">{ 17382 klog.V(4).Infof("Job <%s/%s> affinity key invalid: %s.", 17383 job.Namespace, job.Name, err.Error()) 17384 return nil, err 17385 }</span> 17386 <span class="cov8" title="1">return affinity, nil</span> 17387 } 17388 17389 func readTopologyFromPgAnnotations(job *api.JobInfo) (*TaskTopology, error) <span class="cov8" title="1">{ 17390 jobAffinityStr, affinityExist := job.PodGroup.Annotations[JobAffinityAnnotations] 17391 jobAntiAffinityStr, antiAffinityExist := job.PodGroup.Annotations[JobAntiAffinityAnnotations] 17392 taskOrderStr, taskOrderExist := job.PodGroup.Annotations[TaskOrderAnnotations] 17393 17394 if !(affinityExist || antiAffinityExist || taskOrderExist) </span><span class="cov8" title="1">{ 17395 return nil, nil 17396 }</span> 17397 17398 <span class="cov8" title="1">var jobTopology = TaskTopology{ 17399 Affinity: nil, 17400 AntiAffinity: nil, 17401 TaskOrder: nil, 17402 } 17403 17404 if affinityExist </span><span class="cov8" title="1">{ 17405 affinities, err := splitAnnotations(job, jobAffinityStr) 17406 if err != nil </span><span class="cov8" title="1">{ 17407 klog.V(4).Infof("Job <%s/%s> affinity key invalid: %s.", 17408 job.Namespace, job.Name, err.Error()) 17409 return nil, err 17410 }</span> 17411 <span class="cov8" title="1">jobTopology.Affinity = affinities</span> 17412 } 17413 17414 <span class="cov8" title="1">if antiAffinityExist </span><span class="cov8" title="1">{ 17415 affinities, err := splitAnnotations(job, jobAntiAffinityStr) 17416 if err != nil </span><span class="cov8" title="1">{ 17417 klog.V(4).Infof("Job <%s/%s> anti affinity key invalid: %s.", 17418 job.Namespace, job.Name, err.Error()) 17419 return nil, err 17420 }</span> 17421 <span class="cov8" title="1">jobTopology.AntiAffinity = affinities</span> 17422 } 17423 17424 <span class="cov8" title="1">if taskOrderExist </span><span class="cov8" title="1">{ 17425 jobTopology.TaskOrder = strings.Split(taskOrderStr, ",") 17426 if err := affinityCheck(job, [][]string{jobTopology.TaskOrder}); err != nil </span><span class="cov8" title="1">{ 17427 klog.V(4).Infof("Job <%s/%s> task order key invalid: %s.", 17428 job.Namespace, job.Name, err.Error()) 17429 return nil, err 17430 }</span> 17431 } 17432 17433 <span class="cov8" title="1">return &jobTopology, nil</span> 17434 } 17435 17436 func (p *taskTopologyPlugin) OnSessionOpen(ssn *framework.Session) <span class="cov0" title="0">{ 17437 start := time.Now() 17438 klog.V(3).Infof("start to init task topology plugin, weight[%d], defined order %v", p.weight, affinityPriority) 17439 17440 p.initBucket(ssn) 17441 17442 ssn.AddTaskOrderFn(p.Name(), p.TaskOrderFn) 17443 17444 ssn.AddNodeOrderFn(p.Name(), p.NodeOrderFn) 17445 17446 ssn.AddEventHandler(&framework.EventHandler{ 17447 AllocateFunc: p.AllocateFunc, 17448 }) 17449 17450 klog.V(3).Infof("finished to init task topology plugin, using time %v", time.Since(start)) 17451 }</span> 17452 17453 func (p *taskTopologyPlugin) OnSessionClose(ssn *framework.Session) <span class="cov0" title="0">{ 17454 p.managers = nil 17455 }</span> 17456 </pre> 17457 17458 <pre class="file" id="file80" style="display: none">/* 17459 Copyright 2021 The Volcano Authors. 17460 17461 Licensed under the Apache License, Version 2.0 (the "License"); 17462 you may not use this file except in compliance with the License. 17463 You may obtain a copy of the License at 17464 17465 http://www.apache.org/licenses/LICENSE-2.0 17466 17467 Unless required by applicable law or agreed to in writing, software 17468 distributed under the License is distributed on an "AS IS" BASIS, 17469 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17470 See the License for the specific language governing permissions and 17471 limitations under the License. 17472 */ 17473 17474 package tasktopology 17475 17476 import ( 17477 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 17478 "volcano.sh/volcano/pkg/scheduler/api" 17479 "volcano.sh/volcano/pkg/scheduler/framework" 17480 ) 17481 17482 const ( 17483 // PluginName indicates name of volcano scheduler plugin 17484 PluginName = "task-topology" 17485 // PluginWeight is task-topology plugin weight in nodeOrderFn 17486 PluginWeight = "task-topology.weight" 17487 // JobAffinityKey is the key to read in task-topology arguments from job annotations 17488 JobAffinityKey = "volcano.sh/task-topology" 17489 // OutOfBucket indicates task is outside of any bucket 17490 OutOfBucket = -1 17491 17492 // JobAffinityAnnotations is the key to read in task-topology affinity arguments from podgroup annotations 17493 JobAffinityAnnotations = "volcano.sh/task-topology-affinity" 17494 // JobAntiAffinityAnnotations is the key to read in task-topology anti-affinity arguments from podgroup annotations 17495 JobAntiAffinityAnnotations = "volcano.sh/task-topology-anti-affinity" 17496 // TaskOrderAnnotations is the key to read in task-topology task order arguments from podgroup annotations 17497 TaskOrderAnnotations = "volcano.sh/task-topology-task-order" 17498 ) 17499 17500 // TaskTopology is struct used to save affinity infos of a job read from job plugin or annotations 17501 type TaskTopology struct { 17502 Affinity [][]string `json:"affinity,omitempty"` 17503 AntiAffinity [][]string `json:"antiAffinity,omitempty"` 17504 TaskOrder []string `json:"taskOrder,omitempty"` 17505 } 17506 17507 func calculateWeight(args framework.Arguments) int <span class="cov0" title="0">{ 17508 /* 17509 User Should give taskTopologyWeight in this format(task-topology.weight). 17510 17511 actions: "enqueue, reclaim, allocate, backfill, preempt" 17512 tiers: 17513 - plugins: 17514 - name: task-topology 17515 arguments: 17516 task-topology.weight: 10 17517 */ 17518 // Values are initialized to 1. 17519 weight := 1 17520 17521 args.GetInt(&weight, PluginWeight) 17522 17523 return weight 17524 }</span> 17525 17526 func getTaskName(task *api.TaskInfo) string <span class="cov0" title="0">{ 17527 return task.Pod.Annotations[v1alpha1.TaskSpecKey] 17528 }</span> 17529 17530 func addAffinity(m map[string]map[string]struct{}, src, dst string) <span class="cov0" title="0">{ 17531 srcMap, ok := m[src] 17532 if !ok </span><span class="cov0" title="0">{ 17533 srcMap = make(map[string]struct{}) 17534 m[src] = srcMap 17535 }</span> 17536 <span class="cov0" title="0">srcMap[dst] = struct{}{}</span> 17537 } 17538 17539 func noPendingTasks(job *api.JobInfo) bool <span class="cov0" title="0">{ 17540 return len(job.TaskStatusIndex[api.Pending]) == 0 17541 }</span> 17542 17543 // TaskOrder is struct used to save task order 17544 type TaskOrder struct { 17545 tasks []*api.TaskInfo 17546 manager *JobManager 17547 } 17548 17549 func (p *TaskOrder) Len() int <span class="cov0" title="0">{ return len(p.tasks) }</span> 17550 17551 func (p *TaskOrder) Swap(l, r int) <span class="cov0" title="0">{ 17552 p.tasks[l], p.tasks[r] = p.tasks[r], p.tasks[l] 17553 }</span> 17554 17555 func (p *TaskOrder) Less(l, r int) bool <span class="cov0" title="0">{ 17556 L := p.tasks[l] 17557 R := p.tasks[r] 17558 17559 LHasNode := L.NodeName != "" 17560 RHasNode := R.NodeName != "" 17561 if LHasNode || RHasNode </span><span class="cov0" title="0">{ 17562 // the task bounded would have high priority 17563 if LHasNode != RHasNode </span><span class="cov0" title="0">{ 17564 return !LHasNode 17565 }</span> 17566 // all bound, any order is alright 17567 <span class="cov0" title="0">return L.NodeName > R.NodeName</span> 17568 } 17569 17570 <span class="cov0" title="0">result := p.manager.taskAffinityOrder(L, R) 17571 // they have the same taskAffinity order, any order is alright 17572 if result == 0 </span><span class="cov0" title="0">{ 17573 return L.Name > R.Name 17574 }</span> 17575 <span class="cov0" title="0">return result < 0</span> 17576 } 17577 </pre> 17578 17579 <pre class="file" id="file81" style="display: none">/* 17580 Copyright 2021 The Volcano Authors. 17581 17582 Licensed under the Apache License, Version 2.0 (the "License"); 17583 you may not use this file except in compliance with the License. 17584 You may obtain a copy of the License at 17585 17586 http://www.apache.org/licenses/LICENSE-2.0 17587 17588 Unless required by applicable law or agreed to in writing, software 17589 distributed under the License is distributed on an "AS IS" BASIS, 17590 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17591 See the License for the specific language governing permissions and 17592 limitations under the License. 17593 */ 17594 17595 package tdm 17596 17597 import ( 17598 "fmt" 17599 "strings" 17600 "time" 17601 17602 "k8s.io/apimachinery/pkg/util/intstr" 17603 "k8s.io/klog" 17604 "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1" 17605 17606 "volcano.sh/volcano/pkg/scheduler/api" 17607 "volcano.sh/volcano/pkg/scheduler/framework" 17608 tutil "volcano.sh/volcano/pkg/scheduler/plugins/util" 17609 "volcano.sh/volcano/pkg/scheduler/util" 17610 ) 17611 17612 const ( 17613 // PluginName indicates name of volcano scheduler plugin. 17614 PluginName = "tdm" 17615 // revocableZoneLayout revocable zone layout 17616 revocableZoneLayout = "15:04" 17617 revocableZoneLabelPrefix = "tdm.revocable-zone." 17618 evictPeriodLabel = "tdm.evict.period" 17619 defaultPodEvictNum = 1 17620 ) 17621 17622 var lastEvictAt time.Time 17623 17624 /* 17625 actions: "enqueue, reclaim, allocate, preempt" 17626 tiers: 17627 - plugins: 17628 - name: tdm 17629 arguments: 17630 tdm.revocable-zone.rz1: 10:00-21:00 17631 tdm.revocable-zone.rz2: 12:00-14:00 17632 tdm.evict.period: 1m 17633 */ 17634 17635 type tdmPlugin struct { 17636 revocableZone map[string]string 17637 // evictPeriod 17638 // default 1m 17639 evictPeriod time.Duration 17640 } 17641 17642 // New function returns prioritizePlugin object 17643 func New(args framework.Arguments) framework.Plugin <span class="cov8" title="1">{ 17644 revocableZone := make(map[string]string) 17645 evictPeriod := time.Minute 17646 17647 for k, v := range args </span><span class="cov8" title="1">{ 17648 if strings.Contains(k, revocableZoneLabelPrefix) </span><span class="cov8" title="1">{ 17649 revocableZone[strings.Replace(k, revocableZoneLabelPrefix, "", 1)] = v 17650 }</span> 17651 } 17652 17653 <span class="cov8" title="1">if period, ok := args[evictPeriodLabel]; ok </span><span class="cov8" title="1">{ 17654 if d, err := time.ParseDuration(period); err == nil </span><span class="cov8" title="1">{ 17655 evictPeriod = d 17656 }</span> 17657 } 17658 17659 <span class="cov8" title="1">return &tdmPlugin{revocableZone, evictPeriod}</span> 17660 } 17661 17662 func (tp *tdmPlugin) Name() string <span class="cov8" title="1">{ 17663 return PluginName 17664 }</span> 17665 17666 func parseRevocableZone(rzRaw string) (start, end time.Time, err error) <span class="cov8" title="1">{ 17667 rzValues := strings.Split(strings.TrimSpace(rzRaw), "-") 17668 17669 if len(rzValues) != 2 </span><span class="cov8" title="1">{ 17670 err = fmt.Errorf("revocable zone %v format error", rzRaw) 17671 return 17672 }</span> 17673 17674 <span class="cov8" title="1">t1, err := time.Parse(revocableZoneLayout, rzValues[0]) 17675 if err != nil </span><span class="cov8" title="1">{ 17676 return 17677 }</span> 17678 17679 <span class="cov8" title="1">t2, err := time.Parse(revocableZoneLayout, rzValues[1]) 17680 if err != nil </span><span class="cov0" title="0">{ 17681 return 17682 }</span> 17683 17684 <span class="cov8" title="1">now := time.Now() 17685 17686 start = time.Date(now.Year(), now.Month(), now.Day(), t1.Hour(), t1.Minute(), 0, 0, now.Location()) 17687 if t1.After(t2) || t1.Equal(t2) </span><span class="cov8" title="1">{ 17688 end = time.Date(now.Year(), now.Month(), now.Day()+1, t2.Hour(), t2.Minute(), 0, 0, now.Location()) 17689 }</span> else<span class="cov8" title="1"> { 17690 end = time.Date(now.Year(), now.Month(), now.Day(), t2.Hour(), t2.Minute(), 0, 0, now.Location()) 17691 }</span> 17692 17693 <span class="cov8" title="1">return</span> 17694 } 17695 17696 func (tp *tdmPlugin) availableRevocableZone(rz string) error <span class="cov8" title="1">{ 17697 // rzRaw format 00:00-23:59 17698 rzRaw, ok := tp.revocableZone[rz] 17699 if !ok </span><span class="cov0" title="0">{ 17700 return fmt.Errorf("revocable zone %v not support", rz) 17701 }</span> 17702 17703 <span class="cov8" title="1">now := time.Now() 17704 17705 start, end, err := parseRevocableZone(rzRaw) 17706 if err != nil </span><span class="cov0" title="0">{ 17707 return err 17708 }</span> 17709 17710 <span class="cov8" title="1">if now.Unix() < start.Unix() || now.Unix() > end.Unix() </span><span class="cov8" title="1">{ 17711 return fmt.Errorf("current time beyond revocable zone %v:%v", rz, rzRaw) 17712 }</span> 17713 17714 <span class="cov8" title="1">return nil</span> 17715 } 17716 17717 func (tp *tdmPlugin) OnSessionOpen(ssn *framework.Session) <span class="cov8" title="1">{ 17718 klog.V(4).Infof("Enter tdm plugin ...") 17719 if klog.V(4) </span><span class="cov0" title="0">{ 17720 defer func() </span><span class="cov0" title="0">{ 17721 klog.V(4).Infof("Leaving tdm plugin.") 17722 }</span>() 17723 } 17724 17725 // tdm plugin just handle revocable node 17726 <span class="cov8" title="1">predicateFn := func(task *api.TaskInfo, node *api.NodeInfo) error </span><span class="cov8" title="1">{ 17727 if node.RevocableZone == "" </span><span class="cov8" title="1">{ 17728 return nil 17729 }</span> 17730 17731 <span class="cov8" title="1">if err := tp.availableRevocableZone(node.RevocableZone); err != nil </span><span class="cov8" title="1">{ 17732 return fmt.Errorf("plugin %s predicates %w", tp.Name(), err) 17733 }</span> 17734 17735 <span class="cov8" title="1">klog.V(4).Infof("TDM node %v revocable zone %v:%v is active", node.Name, node.RevocableZone, tp.revocableZone[node.RevocableZone]) 17736 17737 if len(task.RevocableZone) == 0 </span><span class="cov8" title="1">{ 17738 msg := fmt.Sprintf("task %s/%s is not allow to dispatch to revocable node %s", task.Namespace, task.Name, node.Name) 17739 return fmt.Errorf("plugin %s predicates %s", tp.Name(), msg) 17740 }</span> 17741 17742 <span class="cov8" title="1">klog.V(4).Infof("TDM filter for Task %s/%s on node %s pass.", task.Namespace, task.Name, node.Name) 17743 return nil</span> 17744 } 17745 17746 // tdm plugin just handle revocable node 17747 <span class="cov8" title="1">nodeOrderFn := func(task *api.TaskInfo, node *api.NodeInfo) (float64, error) </span><span class="cov8" title="1">{ 17748 score := 0.0 17749 17750 if node.RevocableZone == "" </span><span class="cov8" title="1">{ 17751 return score, nil 17752 }</span> 17753 17754 <span class="cov8" title="1">if err := tp.availableRevocableZone(node.RevocableZone); err != nil </span><span class="cov0" title="0">{ 17755 klog.V(4).Infof("TDM not available %s", err) 17756 return score, err 17757 }</span> 17758 17759 <span class="cov8" title="1">if len(task.RevocableZone) == 0 </span><span class="cov0" title="0">{ 17760 klog.V(4).Infof("TDM task %s/%s is not allow to dispatch to revocable node %s", task.Namespace, task.Name, node.Name) 17761 return score, nil 17762 }</span> 17763 17764 <span class="cov8" title="1">score = float64(v1alpha1.MaxNodeScore) 17765 17766 klog.V(4).Infof("TDM score for Task %s/%s on node %s is: %v", task.Namespace, task.Name, node.Name, score) 17767 return score, nil</span> 17768 } 17769 17770 <span class="cov8" title="1">preemptableFn := func(preemptor *api.TaskInfo, preemptees []*api.TaskInfo) ([]*api.TaskInfo, int) </span><span class="cov0" title="0">{ 17771 // for the preemptable or can use revocablezone workload, they can not preempt other tasks. 17772 if preemptor.Preemptable || len(preemptor.RevocableZone) > 0 </span><span class="cov0" title="0">{ 17773 klog.V(4).Infof("TDM task %s/%s is preemptable, do nothing skip", preemptor.Namespace, preemptor.Name) 17774 return nil, tutil.Reject 17775 }</span> 17776 17777 <span class="cov0" title="0">var victims []*api.TaskInfo 17778 tasksMap := make(map[api.JobID][]*api.TaskInfo) 17779 17780 // find preemptable tasks which appear on none revocable node 17781 for _, task := range preemptees </span><span class="cov0" title="0">{ 17782 if !task.Preemptable || task.Status != api.Running </span><span class="cov0" title="0">{ 17783 continue</span> 17784 } 17785 17786 <span class="cov0" title="0">node, ok := ssn.Nodes[task.NodeName] 17787 if !ok </span><span class="cov0" title="0">{ 17788 continue</span> 17789 } 17790 17791 <span class="cov0" title="0">if node.RevocableZone != "" </span><span class="cov0" title="0">{ 17792 continue</span> 17793 } 17794 17795 <span class="cov0" title="0">tasksMap[task.Job] = append(tasksMap[task.Job], task)</span> 17796 } 17797 17798 <span class="cov0" title="0">for jobID, preemptableTasks := range tasksMap </span><span class="cov0" title="0">{ 17799 if job, ok := ssn.Jobs[jobID]; ok </span><span class="cov0" title="0">{ 17800 victims = append(victims, tp.maxVictims(job, preemptableTasks)...) 17801 }</span> 17802 } 17803 17804 <span class="cov0" title="0">klog.V(4).Infof("TDM victims are %+v", victims) 17805 17806 return victims, tutil.Permit</span> 17807 } 17808 17809 <span class="cov8" title="1">victimsFn := func() []*api.TaskInfo </span><span class="cov8" title="1">{ 17810 if lastEvictAt.Add(tp.evictPeriod).After(time.Now()) </span><span class="cov0" title="0">{ 17811 klog.V(4).Infof("TDM next evict time at %v", lastEvictAt) 17812 return nil 17813 }</span> 17814 17815 <span class="cov8" title="1">klog.V(4).Infof("TDM start to find victims") 17816 17817 // find preemptable task on timeout revocable zone node 17818 victims := make([]*api.TaskInfo, 0) 17819 for rz := range tp.revocableZone </span><span class="cov8" title="1">{ 17820 if err := tp.availableRevocableZone(rz); err != nil </span><span class="cov8" title="1">{ 17821 klog.V(4).Infof("TDM revocable zone %v disactive, %v", rz, err) 17822 // rz disactive, then evict preemptable tasks by job from the revocable node 17823 for jobID, preemtableTasks := range tp.revocableNodePreemptableTask(rz, ssn) </span><span class="cov8" title="1">{ 17824 if job, ok := ssn.Jobs[jobID]; ok </span><span class="cov8" title="1">{ 17825 victims = append(victims, tp.maxVictims(job, preemtableTasks)...) 17826 }</span> 17827 } 17828 } 17829 } 17830 17831 // need to consider concurrency? 17832 <span class="cov8" title="1">lastEvictAt = time.Now() 17833 17834 klog.V(4).Infof("TDM got %v victims", len(victims)) 17835 17836 return victims</span> 17837 } 17838 17839 <span class="cov8" title="1">jobOrderFn := func(l, r interface{}) int </span><span class="cov0" title="0">{ 17840 lv := l.(*api.JobInfo) 17841 rv := r.(*api.JobInfo) 17842 17843 if lv.Preemptable == rv.Preemptable </span><span class="cov0" title="0">{ 17844 return 0 17845 }</span> 17846 17847 <span class="cov0" title="0">if !lv.Preemptable </span><span class="cov0" title="0">{ 17848 return -1 17849 }</span> 17850 17851 <span class="cov0" title="0">return 1</span> 17852 } 17853 17854 <span class="cov8" title="1">jobPipelinedFn := func(obj interface{}) int </span><span class="cov0" title="0">{ 17855 jobInfo := obj.(*api.JobInfo) 17856 occupied := jobInfo.WaitingTaskNum() + jobInfo.ReadyTaskNum() 17857 if occupied >= jobInfo.MinAvailable </span><span class="cov0" title="0">{ 17858 return tutil.Permit 17859 }</span> 17860 <span class="cov0" title="0">return tutil.Reject</span> 17861 } 17862 17863 <span class="cov8" title="1">jobStarvingFn := func(obj interface{}) bool </span><span class="cov0" title="0">{ 17864 jobInfo := obj.(*api.JobInfo) 17865 // allow none preemptable elastic job (deployment) preempt task 17866 if jobInfo.Preemptable </span><span class="cov0" title="0">{ 17867 return false 17868 }</span> 17869 <span class="cov0" title="0">return len(jobInfo.TaskStatusIndex[api.Pending]) > 0</span> 17870 } 17871 17872 <span class="cov8" title="1">ssn.AddPredicateFn(tp.Name(), predicateFn) 17873 ssn.AddNodeOrderFn(tp.Name(), nodeOrderFn) 17874 ssn.AddPreemptableFn(tp.Name(), preemptableFn) 17875 ssn.AddVictimTasksFns(tp.Name(), victimsFn) 17876 ssn.AddJobOrderFn(tp.Name(), jobOrderFn) 17877 ssn.AddJobPipelinedFn(tp.Name(), jobPipelinedFn) 17878 ssn.AddJobStarvingFns(tp.Name(), jobStarvingFn)</span> 17879 } 17880 17881 func (tp *tdmPlugin) maxVictims(job *api.JobInfo, victims []*api.TaskInfo) []*api.TaskInfo <span class="cov8" title="1">{ 17882 maxPodEvictNum := tp.getMaxPodEvictNum(job) 17883 targetNum := util.GetMinInt(maxPodEvictNum, len(victims)) 17884 klog.V(3).Infof("Job <%s/%s> max evict:%v, potential victims number:%v, max victims number:%v", 17885 job.Namespace, job.Name, maxPodEvictNum, len(victims), targetNum) 17886 17887 return victims[:targetNum] 17888 }</span> 17889 17890 // get max pod evict number from job budget configure 17891 func (tp *tdmPlugin) getMaxPodEvictNum(job *api.JobInfo) int <span class="cov8" title="1">{ 17892 jobRunningTaskNum := len(job.TaskStatusIndex[api.Running]) 17893 if job.Budget.MaxUnavilable != "" </span><span class="cov8" title="1">{ 17894 maxUnavilable := tp.parseIntStr(job.Budget.MaxUnavilable, len(job.Tasks)) 17895 finalTaskNum := len(job.TaskStatusIndex[api.Succeeded]) + len(job.TaskStatusIndex[api.Failed]) 17896 realUnavilable := len(job.Tasks) - finalTaskNum - jobRunningTaskNum 17897 if realUnavilable >= maxUnavilable </span><span class="cov0" title="0">{ 17898 return 0 17899 }</span> 17900 <span class="cov8" title="1">return maxUnavilable - realUnavilable</span> 17901 } 17902 17903 <span class="cov8" title="1">if job.Budget.MinAvailable != "" </span><span class="cov8" title="1">{ 17904 minAvailable := tp.parseIntStr(job.Budget.MinAvailable, len(job.Tasks)) 17905 if jobRunningTaskNum >= minAvailable </span><span class="cov8" title="1">{ 17906 return jobRunningTaskNum - minAvailable 17907 }</span> 17908 } 17909 17910 <span class="cov0" title="0">return defaultPodEvictNum</span> 17911 } 17912 17913 func (tp *tdmPlugin) parseIntStr(input string, taskNum int) int <span class="cov8" title="1">{ 17914 resultValue := 0 17915 tmp := intstr.Parse(input) 17916 switch tmp.Type </span>{ 17917 case intstr.Int:<span class="cov8" title="1"> 17918 resultValue = tmp.IntValue()</span> 17919 case intstr.String:<span class="cov8" title="1"> 17920 if v, err := intstr.GetValueFromIntOrPercent(&tmp, taskNum, true); err == nil </span><span class="cov8" title="1">{ 17921 resultValue = v 17922 }</span> else<span class="cov0" title="0"> { 17923 klog.Warningf("TDM get percent value err: %v", err) 17924 }</span> 17925 } 17926 17927 <span class="cov8" title="1">return resultValue</span> 17928 } 17929 17930 func (tp *tdmPlugin) revocableNodePreemptableTask(rz string, ssn *framework.Session) map[api.JobID][]*api.TaskInfo <span class="cov8" title="1">{ 17931 tasksMap := make(map[api.JobID][]*api.TaskInfo) 17932 for _, node := range ssn.RevocableNodes </span><span class="cov8" title="1">{ 17933 if node.RevocableZone != rz </span><span class="cov0" title="0">{ 17934 continue</span> 17935 } 17936 17937 <span class="cov8" title="1">for _, task := range node.Tasks </span><span class="cov8" title="1">{ 17938 if task.Preemptable </span><span class="cov8" title="1">{ 17939 if task.Status == api.Running </span><span class="cov8" title="1">{ 17940 tasksMap[task.Job] = append(tasksMap[task.Job], task) 17941 }</span> 17942 } 17943 } 17944 } 17945 17946 <span class="cov8" title="1">return tasksMap</span> 17947 } 17948 17949 func (tp *tdmPlugin) OnSessionClose(ssn *framework.Session) {<span class="cov8" title="1">}</span> 17950 </pre> 17951 17952 <pre class="file" id="file82" style="display: none">/* 17953 Copyright 2017 The Kubernetes Authors. 17954 17955 Licensed under the Apache License, Version 2.0 (the "License"); 17956 you may not use this file except in compliance with the License. 17957 You may obtain a copy of the License at 17958 17959 http://www.apache.org/licenses/LICENSE-2.0 17960 17961 Unless required by applicable law or agreed to in writing, software 17962 distributed under the License is distributed on an "AS IS" BASIS, 17963 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17964 See the License for the specific language governing permissions and 17965 limitations under the License. 17966 */ 17967 17968 package scheduler 17969 17970 import ( 17971 "fmt" 17972 "path/filepath" 17973 "sync" 17974 "time" 17975 17976 "github.com/fsnotify/fsnotify" 17977 "k8s.io/apimachinery/pkg/util/wait" 17978 "k8s.io/client-go/rest" 17979 "k8s.io/klog" 17980 17981 "volcano.sh/volcano/pkg/filewatcher" 17982 schedcache "volcano.sh/volcano/pkg/scheduler/cache" 17983 "volcano.sh/volcano/pkg/scheduler/conf" 17984 "volcano.sh/volcano/pkg/scheduler/framework" 17985 "volcano.sh/volcano/pkg/scheduler/metrics" 17986 ) 17987 17988 // Scheduler watches for new unscheduled pods for volcano. It attempts to find 17989 // nodes that they fit on and writes bindings back to the api server. 17990 type Scheduler struct { 17991 cache schedcache.Cache 17992 schedulerConf string 17993 fileWatcher filewatcher.FileWatcher 17994 schedulePeriod time.Duration 17995 once sync.Once 17996 17997 mutex sync.Mutex 17998 actions []framework.Action 17999 plugins []conf.Tier 18000 configurations []conf.Configuration 18001 } 18002 18003 // NewScheduler returns a scheduler 18004 func NewScheduler( 18005 config *rest.Config, 18006 schedulerName string, 18007 schedulerConf string, 18008 period time.Duration, 18009 defaultQueue string, 18010 ) (*Scheduler, error) <span class="cov0" title="0">{ 18011 var watcher filewatcher.FileWatcher 18012 if schedulerConf != "" </span><span class="cov0" title="0">{ 18013 var err error 18014 path := filepath.Dir(schedulerConf) 18015 watcher, err = filewatcher.NewFileWatcher(path) 18016 if err != nil </span><span class="cov0" title="0">{ 18017 return nil, fmt.Errorf("failed creating filewatcher for %s: %v", schedulerConf, err) 18018 }</span> 18019 } 18020 18021 <span class="cov0" title="0">scheduler := &Scheduler{ 18022 schedulerConf: schedulerConf, 18023 fileWatcher: watcher, 18024 cache: schedcache.New(config, schedulerName, defaultQueue), 18025 schedulePeriod: period, 18026 } 18027 18028 return scheduler, nil</span> 18029 } 18030 18031 // Run runs the Scheduler 18032 func (pc *Scheduler) Run(stopCh <-chan struct{}) <span class="cov0" title="0">{ 18033 pc.loadSchedulerConf() 18034 go pc.watchSchedulerConf(stopCh) 18035 // Start cache for policy. 18036 go pc.cache.Run(stopCh) 18037 pc.cache.WaitForCacheSync(stopCh) 18038 go wait.Until(pc.runOnce, pc.schedulePeriod, stopCh) 18039 }</span> 18040 18041 func (pc *Scheduler) runOnce() <span class="cov0" title="0">{ 18042 klog.V(4).Infof("Start scheduling ...") 18043 scheduleStartTime := time.Now() 18044 defer klog.V(4).Infof("End scheduling ...") 18045 18046 pc.mutex.Lock() 18047 actions := pc.actions 18048 plugins := pc.plugins 18049 configurations := pc.configurations 18050 pc.mutex.Unlock() 18051 18052 ssn := framework.OpenSession(pc.cache, plugins, configurations) 18053 defer framework.CloseSession(ssn) 18054 18055 for _, action := range actions </span><span class="cov0" title="0">{ 18056 actionStartTime := time.Now() 18057 action.Execute(ssn) 18058 metrics.UpdateActionDuration(action.Name(), metrics.Duration(actionStartTime)) 18059 }</span> 18060 <span class="cov0" title="0">metrics.UpdateE2eDuration(metrics.Duration(scheduleStartTime))</span> 18061 } 18062 18063 func (pc *Scheduler) loadSchedulerConf() <span class="cov0" title="0">{ 18064 var err error 18065 pc.once.Do(func() </span><span class="cov0" title="0">{ 18066 pc.actions, pc.plugins, pc.configurations, err = unmarshalSchedulerConf(defaultSchedulerConf) 18067 if err != nil </span><span class="cov0" title="0">{ 18068 klog.Errorf("unmarshal scheduler config %s failed: %v", defaultSchedulerConf, err) 18069 panic("invalid default configuration")</span> 18070 } 18071 }) 18072 18073 <span class="cov0" title="0">var config string 18074 if len(pc.schedulerConf) != 0 </span><span class="cov0" title="0">{ 18075 if config, err = readSchedulerConf(pc.schedulerConf); err != nil </span><span class="cov0" title="0">{ 18076 klog.Errorf("Failed to read scheduler configuration '%s', using previous configuration: %v", 18077 pc.schedulerConf, err) 18078 return 18079 }</span> 18080 } 18081 18082 <span class="cov0" title="0">actions, plugins, configurations, err := unmarshalSchedulerConf(config) 18083 if err != nil </span><span class="cov0" title="0">{ 18084 klog.Errorf("scheduler config %s is invalid: %v", config, err) 18085 return 18086 }</span> 18087 18088 <span class="cov0" title="0">pc.mutex.Lock() 18089 // If it is valid, use the new configuration 18090 pc.actions = actions 18091 pc.plugins = plugins 18092 pc.configurations = configurations 18093 pc.mutex.Unlock()</span> 18094 } 18095 18096 func (pc *Scheduler) watchSchedulerConf(stopCh <-chan struct{}) <span class="cov0" title="0">{ 18097 if pc.fileWatcher == nil </span><span class="cov0" title="0">{ 18098 return 18099 }</span> 18100 <span class="cov0" title="0">eventCh := pc.fileWatcher.Events() 18101 errCh := pc.fileWatcher.Errors() 18102 for </span><span class="cov0" title="0">{ 18103 select </span>{ 18104 case event, ok := <-eventCh:<span class="cov0" title="0"> 18105 if !ok </span><span class="cov0" title="0">{ 18106 return 18107 }</span> 18108 <span class="cov0" title="0">klog.V(4).Infof("watch %s event: %v", pc.schedulerConf, event) 18109 if event.Op&fsnotify.Write == fsnotify.Write || event.Op&fsnotify.Create == fsnotify.Create </span><span class="cov0" title="0">{ 18110 pc.loadSchedulerConf() 18111 }</span> 18112 case err, ok := <-errCh:<span class="cov0" title="0"> 18113 if !ok </span><span class="cov0" title="0">{ 18114 return 18115 }</span> 18116 <span class="cov0" title="0">klog.Infof("watch %s error: %v", pc.schedulerConf, err)</span> 18117 case <-stopCh:<span class="cov0" title="0"> 18118 return</span> 18119 } 18120 } 18121 } 18122 </pre> 18123 18124 <pre class="file" id="file83" style="display: none">/* 18125 Copyright 2018 The Kubernetes Authors. 18126 18127 Licensed under the Apache License, Version 2.0 (the "License"); 18128 you may not use this file except in compliance with the License. 18129 You may obtain a copy of the License at 18130 18131 http://www.apache.org/licenses/LICENSE-2.0 18132 18133 Unless required by applicable law or agreed to in writing, software 18134 distributed under the License is distributed on an "AS IS" BASIS, 18135 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18136 See the License for the specific language governing permissions and 18137 limitations under the License. 18138 */ 18139 18140 package scheduler 18141 18142 import ( 18143 "fmt" 18144 "io/ioutil" 18145 "strings" 18146 18147 "gopkg.in/yaml.v2" 18148 18149 "volcano.sh/volcano/pkg/scheduler/conf" 18150 "volcano.sh/volcano/pkg/scheduler/framework" 18151 "volcano.sh/volcano/pkg/scheduler/plugins" 18152 ) 18153 18154 var defaultSchedulerConf = ` 18155 actions: "enqueue, allocate, backfill" 18156 tiers: 18157 - plugins: 18158 - name: priority 18159 - name: gang 18160 - name: conformance 18161 - plugins: 18162 - name: overcommit 18163 - name: drf 18164 - name: predicates 18165 - name: proportion 18166 - name: nodeorder 18167 ` 18168 18169 func unmarshalSchedulerConf(confStr string) ([]framework.Action, []conf.Tier, []conf.Configuration, error) <span class="cov8" title="1">{ 18170 var actions []framework.Action 18171 18172 schedulerConf := &conf.SchedulerConfiguration{} 18173 18174 if err := yaml.Unmarshal([]byte(confStr), schedulerConf); err != nil </span><span class="cov0" title="0">{ 18175 return nil, nil, nil, err 18176 }</span> 18177 // Set default settings for each plugin if not set 18178 <span class="cov8" title="1">for i, tier := range schedulerConf.Tiers </span><span class="cov8" title="1">{ 18179 // drf with hierarchy enabled 18180 hdrf := false 18181 // proportion enabled 18182 proportion := false 18183 for j := range tier.Plugins </span><span class="cov8" title="1">{ 18184 if tier.Plugins[j].Name == "drf" && 18185 tier.Plugins[j].EnabledHierarchy != nil && 18186 *tier.Plugins[j].EnabledHierarchy </span><span class="cov0" title="0">{ 18187 hdrf = true 18188 }</span> 18189 <span class="cov8" title="1">if tier.Plugins[j].Name == "proportion" </span><span class="cov8" title="1">{ 18190 proportion = true 18191 }</span> 18192 <span class="cov8" title="1">plugins.ApplyPluginConfDefaults(&schedulerConf.Tiers[i].Plugins[j])</span> 18193 } 18194 <span class="cov8" title="1">if hdrf && proportion </span><span class="cov0" title="0">{ 18195 return nil, nil, nil, fmt.Errorf("proportion and drf with hierarchy enabled conflicts") 18196 }</span> 18197 } 18198 18199 <span class="cov8" title="1">actionNames := strings.Split(schedulerConf.Actions, ",") 18200 for _, actionName := range actionNames </span><span class="cov8" title="1">{ 18201 if action, found := framework.GetAction(strings.TrimSpace(actionName)); found </span><span class="cov8" title="1">{ 18202 actions = append(actions, action) 18203 }</span> else<span class="cov0" title="0"> { 18204 return nil, nil, nil, fmt.Errorf("failed to found Action %s, ignore it", actionName) 18205 }</span> 18206 } 18207 18208 <span class="cov8" title="1">return actions, schedulerConf.Tiers, schedulerConf.Configurations, nil</span> 18209 } 18210 18211 func readSchedulerConf(confPath string) (string, error) <span class="cov0" title="0">{ 18212 dat, err := ioutil.ReadFile(confPath) 18213 if err != nil </span><span class="cov0" title="0">{ 18214 return "", err 18215 }</span> 18216 <span class="cov0" title="0">return string(dat), nil</span> 18217 } 18218 </pre> 18219 18220 <pre class="file" id="file84" style="display: none">/* 18221 Copyright 2017 The Kubernetes Authors. 18222 18223 Licensed under the Apache License, Version 2.0 (the "License"); 18224 you may not use this file except in compliance with the License. 18225 You may obtain a copy of the License at 18226 18227 http://www.apache.org/licenses/LICENSE-2.0 18228 18229 Unless required by applicable law or agreed to in writing, software 18230 distributed under the License is distributed on an "AS IS" BASIS, 18231 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18232 See the License for the specific language governing permissions and 18233 limitations under the License. 18234 */ 18235 18236 package util 18237 18238 import ( 18239 "container/heap" 18240 18241 "volcano.sh/volcano/pkg/scheduler/api" 18242 ) 18243 18244 //PriorityQueue implements a scheduling queue. 18245 type PriorityQueue struct { 18246 queue priorityQueue 18247 } 18248 18249 type priorityQueue struct { 18250 items []interface{} 18251 lessFn api.LessFn 18252 } 18253 18254 // NewPriorityQueue returns a PriorityQueue 18255 func NewPriorityQueue(lessFn api.LessFn) *PriorityQueue <span class="cov0" title="0">{ 18256 return &PriorityQueue{ 18257 queue: priorityQueue{ 18258 items: make([]interface{}, 0), 18259 lessFn: lessFn, 18260 }, 18261 } 18262 }</span> 18263 18264 // Push pushes element in the priority Queue 18265 func (q *PriorityQueue) Push(it interface{}) <span class="cov0" title="0">{ 18266 heap.Push(&q.queue, it) 18267 }</span> 18268 18269 // Pop pops element in the priority Queue 18270 func (q *PriorityQueue) Pop() interface{} <span class="cov0" title="0">{ 18271 if q.Len() == 0 </span><span class="cov0" title="0">{ 18272 return nil 18273 }</span> 18274 18275 <span class="cov0" title="0">return heap.Pop(&q.queue)</span> 18276 } 18277 18278 // Empty check if queue is empty 18279 func (q *PriorityQueue) Empty() bool <span class="cov0" title="0">{ 18280 return q.queue.Len() == 0 18281 }</span> 18282 18283 // Len returns Len of the priority queue 18284 func (q *PriorityQueue) Len() int <span class="cov0" title="0">{ 18285 return q.queue.Len() 18286 }</span> 18287 18288 func (pq *priorityQueue) Len() int <span class="cov0" title="0">{ return len(pq.items) }</span> 18289 18290 func (pq *priorityQueue) Less(i, j int) bool <span class="cov0" title="0">{ 18291 if pq.lessFn == nil </span><span class="cov0" title="0">{ 18292 return i < j 18293 }</span> 18294 18295 // We want Pop to give us the highest, not lowest, priority so we use greater than here. 18296 <span class="cov0" title="0">return pq.lessFn(pq.items[i], pq.items[j])</span> 18297 } 18298 18299 func (pq priorityQueue) Swap(i, j int) <span class="cov0" title="0">{ 18300 pq.items[i], pq.items[j] = pq.items[j], pq.items[i] 18301 }</span> 18302 18303 func (pq *priorityQueue) Push(x interface{}) <span class="cov0" title="0">{ 18304 (*pq).items = append((*pq).items, x) 18305 }</span> 18306 18307 func (pq *priorityQueue) Pop() interface{} <span class="cov0" title="0">{ 18308 old := (*pq).items 18309 n := len(old) 18310 item := old[n-1] 18311 (*pq).items = old[0 : n-1] 18312 return item 18313 }</span> 18314 </pre> 18315 18316 <pre class="file" id="file85" style="display: none">/* 18317 Copyright 2019 The Kubernetes Authors. 18318 18319 Licensed under the Apache License, Version 2.0 (the "License"); 18320 you may not use this file except in compliance with the License. 18321 You may obtain a copy of the License at 18322 18323 http://www.apache.org/licenses/LICENSE-2.0 18324 18325 Unless required by applicable law or agreed to in writing, software 18326 distributed under the License is distributed on an "AS IS" BASIS, 18327 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18328 See the License for the specific language governing permissions and 18329 limitations under the License. 18330 */ 18331 18332 package util 18333 18334 import ( 18335 "context" 18336 "fmt" 18337 "math" 18338 "math/rand" 18339 "sort" 18340 "sync" 18341 "sync/atomic" 18342 18343 "k8s.io/client-go/util/workqueue" 18344 "k8s.io/klog" 18345 k8sframework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1" 18346 18347 "volcano.sh/volcano/cmd/scheduler/app/options" 18348 "volcano.sh/volcano/pkg/scheduler/api" 18349 ) 18350 18351 const baselinePercentageOfNodesToFind = 50 18352 18353 var lastProcessedNodeIndex int 18354 18355 // Reservation is used to record target job and locked nodes 18356 var Reservation *ResourceReservation 18357 18358 func init() <span class="cov8" title="1">{ 18359 Reservation = NewResourceReservation() 18360 }</span> 18361 18362 // CalculateNumOfFeasibleNodesToFind returns the number of feasible nodes that once found, 18363 // the scheduler stops its search for more feasible nodes. 18364 func CalculateNumOfFeasibleNodesToFind(numAllNodes int32) (numNodes int32) <span class="cov0" title="0">{ 18365 opts := options.ServerOpts 18366 if numAllNodes <= opts.MinNodesToFind || opts.PercentageOfNodesToFind >= 100 </span><span class="cov0" title="0">{ 18367 return numAllNodes 18368 }</span> 18369 18370 <span class="cov0" title="0">adaptivePercentage := opts.PercentageOfNodesToFind 18371 if adaptivePercentage <= 0 </span><span class="cov0" title="0">{ 18372 adaptivePercentage = baselinePercentageOfNodesToFind - numAllNodes/125 18373 if adaptivePercentage < opts.MinPercentageOfNodesToFind </span><span class="cov0" title="0">{ 18374 adaptivePercentage = opts.MinPercentageOfNodesToFind 18375 }</span> 18376 } 18377 18378 <span class="cov0" title="0">numNodes = numAllNodes * adaptivePercentage / 100 18379 if numNodes < opts.MinNodesToFind </span><span class="cov0" title="0">{ 18380 numNodes = opts.MinNodesToFind 18381 }</span> 18382 <span class="cov0" title="0">return numNodes</span> 18383 } 18384 18385 // PredicateNodes returns the specified number of nodes that fit a task 18386 func PredicateNodes(task *api.TaskInfo, nodes []*api.NodeInfo, fn api.PredicateFn) ([]*api.NodeInfo, *api.FitErrors) <span class="cov0" title="0">{ 18387 //var workerLock sync.Mutex 18388 18389 var errorLock sync.Mutex 18390 fe := api.NewFitErrors() 18391 18392 allNodes := len(nodes) 18393 if allNodes == 0 </span><span class="cov0" title="0">{ 18394 return make([]*api.NodeInfo, 0), fe 18395 }</span> 18396 <span class="cov0" title="0">numNodesToFind := CalculateNumOfFeasibleNodesToFind(int32(allNodes)) 18397 18398 //allocate enough space to avoid growing it 18399 predicateNodes := make([]*api.NodeInfo, numNodesToFind) 18400 18401 numFoundNodes := int32(0) 18402 processedNodes := int32(0) 18403 18404 //create a context with cancellation 18405 ctx, cancel := context.WithCancel(context.Background()) 18406 18407 checkNode := func(index int) </span><span class="cov0" title="0">{ 18408 // Check the nodes starting from where is left off in the previous scheduling cycle, 18409 // to make sure all nodes have the same chance of being examined across pods. 18410 node := nodes[(lastProcessedNodeIndex+index)%allNodes] 18411 atomic.AddInt32(&processedNodes, 1) 18412 klog.V(4).Infof("Considering Task <%v/%v> on node <%v>: <%v> vs. <%v>", 18413 task.Namespace, task.Name, node.Name, task.Resreq, node.Idle) 18414 18415 // TODO (k82cn): Enable eCache for performance improvement. 18416 if err := fn(task, node); err != nil </span><span class="cov0" title="0">{ 18417 klog.V(3).Infof("Predicates failed for task <%s/%s> on node <%s>: %v", 18418 task.Namespace, task.Name, node.Name, err) 18419 errorLock.Lock() 18420 fe.SetNodeError(node.Name, err) 18421 errorLock.Unlock() 18422 return 18423 }</span> 18424 18425 //check if the number of found nodes is more than the numNodesTofind 18426 <span class="cov0" title="0">length := atomic.AddInt32(&numFoundNodes, 1) 18427 if length > numNodesToFind </span><span class="cov0" title="0">{ 18428 cancel() 18429 atomic.AddInt32(&numFoundNodes, -1) 18430 }</span> else<span class="cov0" title="0"> { 18431 predicateNodes[length-1] = node 18432 }</span> 18433 } 18434 18435 //workqueue.ParallelizeUntil(context.TODO(), 16, len(nodes), checkNode) 18436 <span class="cov0" title="0">workqueue.ParallelizeUntil(ctx, 16, allNodes, checkNode) 18437 18438 //processedNodes := int(numFoundNodes) + len(filteredNodesStatuses) + len(failedPredicateMap) 18439 lastProcessedNodeIndex = (lastProcessedNodeIndex + int(processedNodes)) % allNodes 18440 predicateNodes = predicateNodes[:numFoundNodes] 18441 return predicateNodes, fe</span> 18442 } 18443 18444 // PrioritizeNodes returns a map whose key is node's score and value are corresponding nodes 18445 func PrioritizeNodes(task *api.TaskInfo, nodes []*api.NodeInfo, batchFn api.BatchNodeOrderFn, mapFn api.NodeOrderMapFn, reduceFn api.NodeOrderReduceFn) map[float64][]*api.NodeInfo <span class="cov0" title="0">{ 18446 pluginNodeScoreMap := map[string]k8sframework.NodeScoreList{} 18447 nodeOrderScoreMap := map[string]float64{} 18448 nodeScores := map[float64][]*api.NodeInfo{} 18449 var workerLock sync.Mutex 18450 scoreNode := func(index int) </span><span class="cov0" title="0">{ 18451 node := nodes[index] 18452 mapScores, orderScore, err := mapFn(task, node) 18453 if err != nil </span><span class="cov0" title="0">{ 18454 klog.Errorf("Error in Calculating Priority for the node:%v", err) 18455 return 18456 }</span> 18457 18458 <span class="cov0" title="0">workerLock.Lock() 18459 for plugin, score := range mapScores </span><span class="cov0" title="0">{ 18460 nodeScoreMap, ok := pluginNodeScoreMap[plugin] 18461 if !ok </span><span class="cov0" title="0">{ 18462 nodeScoreMap = k8sframework.NodeScoreList{} 18463 }</span> 18464 <span class="cov0" title="0">hp := k8sframework.NodeScore{} 18465 hp.Name = node.Name 18466 hp.Score = int64(math.Floor(score)) 18467 pluginNodeScoreMap[plugin] = append(nodeScoreMap, hp)</span> 18468 } 18469 <span class="cov0" title="0">nodeOrderScoreMap[node.Name] = orderScore 18470 workerLock.Unlock()</span> 18471 } 18472 <span class="cov0" title="0">workqueue.ParallelizeUntil(context.TODO(), 16, len(nodes), scoreNode) 18473 reduceScores, err := reduceFn(task, pluginNodeScoreMap) 18474 if err != nil </span><span class="cov0" title="0">{ 18475 klog.Errorf("Error in Calculating Priority for the node:%v", err) 18476 return nodeScores 18477 }</span> 18478 18479 <span class="cov0" title="0">batchNodeScore, err := batchFn(task, nodes) 18480 if err != nil </span><span class="cov0" title="0">{ 18481 klog.Errorf("Error in Calculating batch Priority for the node, err %v", err) 18482 return nodeScores 18483 }</span> 18484 18485 <span class="cov0" title="0">for _, node := range nodes </span><span class="cov0" title="0">{ 18486 if score, found := reduceScores[node.Name]; found </span><span class="cov0" title="0">{ 18487 if orderScore, ok := nodeOrderScoreMap[node.Name]; ok </span><span class="cov0" title="0">{ 18488 score += orderScore 18489 }</span> 18490 <span class="cov0" title="0">if batchScore, ok := batchNodeScore[node.Name]; ok </span><span class="cov0" title="0">{ 18491 score += batchScore 18492 }</span> 18493 <span class="cov0" title="0">nodeScores[score] = append(nodeScores[score], node)</span> 18494 } else<span class="cov0" title="0"> { 18495 // If no plugin is applied to this node, the default is 0.0 18496 score = 0.0 18497 if orderScore, ok := nodeOrderScoreMap[node.Name]; ok </span><span class="cov0" title="0">{ 18498 score += orderScore 18499 }</span> 18500 <span class="cov0" title="0">if batchScore, ok := batchNodeScore[node.Name]; ok </span><span class="cov0" title="0">{ 18501 score += batchScore 18502 }</span> 18503 <span class="cov0" title="0">nodeScores[score] = append(nodeScores[score], node)</span> 18504 } 18505 } 18506 <span class="cov0" title="0">return nodeScores</span> 18507 } 18508 18509 // SortNodes returns nodes by order of score 18510 func SortNodes(nodeScores map[float64][]*api.NodeInfo) []*api.NodeInfo <span class="cov0" title="0">{ 18511 var nodesInorder []*api.NodeInfo 18512 var keys []float64 18513 for key := range nodeScores </span><span class="cov0" title="0">{ 18514 keys = append(keys, key) 18515 }</span> 18516 <span class="cov0" title="0">sort.Sort(sort.Reverse(sort.Float64Slice(keys))) 18517 for _, key := range keys </span><span class="cov0" title="0">{ 18518 nodes := nodeScores[key] 18519 nodesInorder = append(nodesInorder, nodes...) 18520 }</span> 18521 <span class="cov0" title="0">return nodesInorder</span> 18522 } 18523 18524 // SelectBestNode returns best node whose score is highest, pick one randomly if there are many nodes with same score. 18525 func SelectBestNode(nodeScores map[float64][]*api.NodeInfo) *api.NodeInfo <span class="cov8" title="1">{ 18526 var bestNodes []*api.NodeInfo 18527 maxScore := -1.0 18528 for score, nodes := range nodeScores </span><span class="cov8" title="1">{ 18529 if score > maxScore </span><span class="cov8" title="1">{ 18530 maxScore = score 18531 bestNodes = nodes 18532 }</span> 18533 } 18534 18535 <span class="cov8" title="1">if len(bestNodes) == 0 </span><span class="cov8" title="1">{ 18536 return nil 18537 }</span> 18538 18539 <span class="cov8" title="1">return bestNodes[rand.Intn(len(bestNodes))]</span> 18540 } 18541 18542 // GetNodeList returns values of the map 'nodes' 18543 func GetNodeList(nodes map[string]*api.NodeInfo, nodeList []string) []*api.NodeInfo <span class="cov0" title="0">{ 18544 result := make([]*api.NodeInfo, 0, len(nodeList)) 18545 for _, nodename := range nodeList </span><span class="cov0" title="0">{ 18546 if ni, ok := nodes[nodename]; ok </span><span class="cov0" title="0">{ 18547 result = append(result, ni) 18548 }</span> 18549 } 18550 <span class="cov0" title="0">return result</span> 18551 } 18552 18553 // ValidateVictims returns an error if the resources of the victims can't satisfy the preemptor 18554 func ValidateVictims(preemptor *api.TaskInfo, node *api.NodeInfo, victims []*api.TaskInfo) error <span class="cov0" title="0">{ 18555 if len(victims) == 0 </span><span class="cov0" title="0">{ 18556 return fmt.Errorf("no victims") 18557 }</span> 18558 <span class="cov0" title="0">futureIdle := node.FutureIdle() 18559 for _, victim := range victims </span><span class="cov0" title="0">{ 18560 futureIdle.Add(victim.Resreq) 18561 }</span> 18562 // Every resource of the preemptor needs to be less or equal than corresponding 18563 // idle resource after preemption. 18564 <span class="cov0" title="0">if !preemptor.InitResreq.LessEqual(futureIdle, api.Zero) </span><span class="cov0" title="0">{ 18565 return fmt.Errorf("not enough resources: requested <%v>, but future idle <%v>", 18566 preemptor.InitResreq, futureIdle) 18567 }</span> 18568 <span class="cov0" title="0">return nil</span> 18569 } 18570 18571 // ResourceReservation is struct used for resource reservation 18572 type ResourceReservation struct { 18573 TargetJob *api.JobInfo 18574 LockedNodes map[string]*api.NodeInfo 18575 } 18576 18577 // NewResourceReservation is used to create global instance 18578 func NewResourceReservation() *ResourceReservation <span class="cov8" title="1">{ 18579 return &ResourceReservation{ 18580 TargetJob: nil, 18581 LockedNodes: map[string]*api.NodeInfo{}, 18582 } 18583 }</span> 18584 18585 // GetMinInt return minimum int from vals 18586 func GetMinInt(vals ...int) int <span class="cov8" title="1">{ 18587 if len(vals) == 0 </span><span class="cov8" title="1">{ 18588 return 0 18589 }</span> 18590 18591 <span class="cov8" title="1">min := vals[0] 18592 for _, val := range vals </span><span class="cov8" title="1">{ 18593 if val <= min </span><span class="cov8" title="1">{ 18594 min = val 18595 }</span> 18596 } 18597 <span class="cov8" title="1">return min</span> 18598 } 18599 </pre> 18600 18601 <pre class="file" id="file86" style="display: none">/* 18602 Copyright 2019 The Kubernetes Authors. 18603 18604 Licensed under the Apache License, Version 2.0 (the "License"); 18605 you may not use this file except in compliance with the License. 18606 You may obtain a copy of the License at 18607 18608 http://www.apache.org/licenses/LICENSE-2.0 18609 18610 Unless required by applicable law or agreed to in writing, software 18611 distributed under the License is distributed on an "AS IS" BASIS, 18612 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18613 See the License for the specific language governing permissions and 18614 limitations under the License. 18615 */ 18616 18617 package util 18618 18619 import ( 18620 "fmt" 18621 "sync" 18622 18623 v1 "k8s.io/api/core/v1" 18624 "k8s.io/apimachinery/pkg/api/resource" 18625 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 18626 "k8s.io/apimachinery/pkg/types" 18627 volumescheduling "k8s.io/kubernetes/pkg/controller/volume/scheduling" 18628 18629 schedulingv2 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 18630 "volcano.sh/volcano/pkg/scheduler/api" 18631 ) 18632 18633 // BuildResourceList builts resource list object 18634 func BuildResourceList(cpu string, memory string) v1.ResourceList <span class="cov0" title="0">{ 18635 return v1.ResourceList{ 18636 v1.ResourceCPU: resource.MustParse(cpu), 18637 v1.ResourceMemory: resource.MustParse(memory), 18638 api.GPUResourceName: resource.MustParse("0"), 18639 } 18640 }</span> 18641 18642 // BuildResourceListWithGPU builts resource list with GPU 18643 func BuildResourceListWithGPU(cpu string, memory string, GPU string) v1.ResourceList <span class="cov0" title="0">{ 18644 return v1.ResourceList{ 18645 v1.ResourceCPU: resource.MustParse(cpu), 18646 v1.ResourceMemory: resource.MustParse(memory), 18647 api.GPUResourceName: resource.MustParse(GPU), 18648 } 18649 }</span> 18650 18651 // BuildNode builts node object 18652 func BuildNode(name string, alloc v1.ResourceList, labels map[string]string) *v1.Node <span class="cov0" title="0">{ 18653 return &v1.Node{ 18654 ObjectMeta: metav1.ObjectMeta{ 18655 Name: name, 18656 Labels: labels, 18657 Annotations: map[string]string{}, 18658 }, 18659 Status: v1.NodeStatus{ 18660 Capacity: alloc, 18661 Allocatable: alloc, 18662 }, 18663 } 18664 }</span> 18665 18666 // BuildPod builts Pod object 18667 func BuildPod(namespace, name, nodename string, p v1.PodPhase, req v1.ResourceList, groupName string, labels map[string]string, selector map[string]string) *v1.Pod <span class="cov0" title="0">{ 18668 return &v1.Pod{ 18669 ObjectMeta: metav1.ObjectMeta{ 18670 UID: types.UID(fmt.Sprintf("%v-%v", namespace, name)), 18671 Name: name, 18672 Namespace: namespace, 18673 Labels: labels, 18674 Annotations: map[string]string{ 18675 schedulingv2.KubeGroupNameAnnotationKey: groupName, 18676 }, 18677 }, 18678 Status: v1.PodStatus{ 18679 Phase: p, 18680 }, 18681 Spec: v1.PodSpec{ 18682 NodeName: nodename, 18683 NodeSelector: selector, 18684 Containers: []v1.Container{ 18685 { 18686 Resources: v1.ResourceRequirements{ 18687 Requests: req, 18688 }, 18689 }, 18690 }, 18691 }, 18692 } 18693 }</span> 18694 18695 // FakeBinder is used as fake binder 18696 type FakeBinder struct { 18697 sync.Mutex 18698 Binds map[string]string 18699 Channel chan string 18700 } 18701 18702 // Bind used by fake binder struct to bind pods 18703 func (fb *FakeBinder) Bind(p *v1.Pod, hostname string) error <span class="cov0" title="0">{ 18704 fb.Lock() 18705 defer fb.Unlock() 18706 18707 key := fmt.Sprintf("%v/%v", p.Namespace, p.Name) 18708 fb.Binds[key] = hostname 18709 18710 fb.Channel <- key 18711 18712 return nil 18713 }</span> 18714 18715 // FakeEvictor is used as fake evictor 18716 type FakeEvictor struct { 18717 sync.Mutex 18718 evicts []string 18719 Channel chan string 18720 } 18721 18722 // Evicts returns copy of evicted pods. 18723 func (fe *FakeEvictor) Evicts() []string <span class="cov0" title="0">{ 18724 fe.Lock() 18725 defer fe.Unlock() 18726 return append([]string{}, fe.evicts...) 18727 }</span> 18728 18729 // Evict is used by fake evictor to evict pods 18730 func (fe *FakeEvictor) Evict(p *v1.Pod, reason string) error <span class="cov0" title="0">{ 18731 fe.Lock() 18732 defer fe.Unlock() 18733 18734 fmt.Println("PodName: ", p.Name) 18735 key := fmt.Sprintf("%v/%v", p.Namespace, p.Name) 18736 fe.evicts = append(fe.evicts, key) 18737 18738 fe.Channel <- key 18739 18740 return nil 18741 }</span> 18742 18743 // FakeStatusUpdater is used for fake status update 18744 type FakeStatusUpdater struct { 18745 } 18746 18747 // UpdatePodCondition is a empty function 18748 func (ftsu *FakeStatusUpdater) UpdatePodCondition(pod *v1.Pod, podCondition *v1.PodCondition) (*v1.Pod, error) <span class="cov0" title="0">{ 18749 // do nothing here 18750 return nil, nil 18751 }</span> 18752 18753 // UpdatePodGroup is a empty function 18754 func (ftsu *FakeStatusUpdater) UpdatePodGroup(pg *api.PodGroup) (*api.PodGroup, error) <span class="cov0" title="0">{ 18755 // do nothing here 18756 return nil, nil 18757 }</span> 18758 18759 // FakeVolumeBinder is used as fake volume binder 18760 type FakeVolumeBinder struct { 18761 } 18762 18763 // AllocateVolumes is a empty function 18764 func (fvb *FakeVolumeBinder) AllocateVolumes(task *api.TaskInfo, hostname string, podVolumes *volumescheduling.PodVolumes) error <span class="cov0" title="0">{ 18765 return nil 18766 }</span> 18767 18768 // BindVolumes is a empty function 18769 func (fvb *FakeVolumeBinder) BindVolumes(task *api.TaskInfo, podVolumes *volumescheduling.PodVolumes) error <span class="cov0" title="0">{ 18770 return nil 18771 }</span> 18772 18773 // GetPodVolumes is a empty function 18774 func (fvb *FakeVolumeBinder) GetPodVolumes(task *api.TaskInfo, node *v1.Node) (*volumescheduling.PodVolumes, error) <span class="cov0" title="0">{ 18775 return nil, nil 18776 }</span> 18777 </pre> 18778 18779 <pre class="file" id="file87" style="display: none">/* 18780 Copyright 2018 The Volcano Authors. 18781 18782 Licensed under the Apache License, Version 2.0 (the "License"); 18783 you may not use this file except in compliance with the License. 18784 You may obtain a copy of the License at 18785 18786 http://www.apache.org/licenses/LICENSE-2.0 18787 18788 Unless required by applicable law or agreed to in writing, software 18789 distributed under the License is distributed on an "AS IS" BASIS, 18790 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18791 See the License for the specific language governing permissions and 18792 limitations under the License. 18793 */ 18794 18795 package mutate 18796 18797 import ( 18798 "encoding/json" 18799 "fmt" 18800 "strconv" 18801 18802 "k8s.io/api/admission/v1beta1" 18803 whv1beta1 "k8s.io/api/admissionregistration/v1beta1" 18804 v1 "k8s.io/api/core/v1" 18805 "k8s.io/klog" 18806 18807 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 18808 "volcano.sh/volcano/pkg/webhooks/router" 18809 "volcano.sh/volcano/pkg/webhooks/schema" 18810 "volcano.sh/volcano/pkg/webhooks/util" 18811 ) 18812 18813 const ( 18814 // DefaultQueue constant stores the name of the queue as "default" 18815 DefaultQueue = "default" 18816 // DefaultMaxRetry is the default number of retries. 18817 DefaultMaxRetry = 3 18818 18819 defaultSchedulerName = "volcano" 18820 18821 defaultMaxRetry int32 = 3 18822 ) 18823 18824 func init() <span class="cov8" title="1">{ 18825 router.RegisterAdmission(service) 18826 }</span> 18827 18828 var service = &router.AdmissionService{ 18829 Path: "/jobs/mutate", 18830 Func: Jobs, 18831 18832 MutatingConfig: &whv1beta1.MutatingWebhookConfiguration{ 18833 Webhooks: []whv1beta1.MutatingWebhook{{ 18834 Name: "mutatejob.volcano.sh", 18835 Rules: []whv1beta1.RuleWithOperations{ 18836 { 18837 Operations: []whv1beta1.OperationType{whv1beta1.Create}, 18838 Rule: whv1beta1.Rule{ 18839 APIGroups: []string{"batch.volcano.sh"}, 18840 APIVersions: []string{"v1alpha1"}, 18841 Resources: []string{"jobs"}, 18842 }, 18843 }, 18844 }, 18845 }}, 18846 }, 18847 } 18848 18849 type patchOperation struct { 18850 Op string `json:"op"` 18851 Path string `json:"path"` 18852 Value interface{} `json:"value,omitempty"` 18853 } 18854 18855 // Jobs mutate jobs. 18856 func Jobs(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse <span class="cov0" title="0">{ 18857 klog.V(3).Infof("mutating jobs") 18858 18859 job, err := schema.DecodeJob(ar.Request.Object, ar.Request.Resource) 18860 if err != nil </span><span class="cov0" title="0">{ 18861 return util.ToAdmissionResponse(err) 18862 }</span> 18863 18864 <span class="cov0" title="0">var patchBytes []byte 18865 switch ar.Request.Operation </span>{ 18866 case v1beta1.Create:<span class="cov0" title="0"> 18867 patchBytes, _ = createPatch(job)</span> 18868 default:<span class="cov0" title="0"> 18869 err = fmt.Errorf("expect operation to be 'CREATE' ") 18870 return util.ToAdmissionResponse(err)</span> 18871 } 18872 18873 <span class="cov0" title="0">klog.V(3).Infof("AdmissionResponse: patch=%v", string(patchBytes)) 18874 reviewResponse := v1beta1.AdmissionResponse{ 18875 Allowed: true, 18876 Patch: patchBytes, 18877 } 18878 pt := v1beta1.PatchTypeJSONPatch 18879 reviewResponse.PatchType = &pt 18880 18881 return &reviewResponse</span> 18882 } 18883 18884 func createPatch(job *v1alpha1.Job) ([]byte, error) <span class="cov0" title="0">{ 18885 var patch []patchOperation 18886 pathQueue := patchDefaultQueue(job) 18887 if pathQueue != nil </span><span class="cov0" title="0">{ 18888 patch = append(patch, *pathQueue) 18889 }</span> 18890 <span class="cov0" title="0">pathScheduler := patchDefaultScheduler(job) 18891 if pathScheduler != nil </span><span class="cov0" title="0">{ 18892 patch = append(patch, *pathScheduler) 18893 }</span> 18894 <span class="cov0" title="0">pathMaxRetry := patchDefaultMaxRetry(job) 18895 if pathMaxRetry != nil </span><span class="cov0" title="0">{ 18896 patch = append(patch, *pathMaxRetry) 18897 }</span> 18898 <span class="cov0" title="0">pathSpec := mutateSpec(job.Spec.Tasks, "/spec/tasks") 18899 if pathSpec != nil </span><span class="cov0" title="0">{ 18900 patch = append(patch, *pathSpec) 18901 }</span> 18902 <span class="cov0" title="0">pathMinAvailable := patchDefaultMinAvailable(job) 18903 if pathMinAvailable != nil </span><span class="cov0" title="0">{ 18904 patch = append(patch, *pathMinAvailable) 18905 }</span> 18906 <span class="cov0" title="0">return json.Marshal(patch)</span> 18907 } 18908 18909 func patchDefaultQueue(job *v1alpha1.Job) *patchOperation <span class="cov0" title="0">{ 18910 //Add default queue if not specified. 18911 if job.Spec.Queue == "" </span><span class="cov0" title="0">{ 18912 return &patchOperation{Op: "add", Path: "/spec/queue", Value: DefaultQueue} 18913 }</span> 18914 <span class="cov0" title="0">return nil</span> 18915 } 18916 18917 func patchDefaultScheduler(job *v1alpha1.Job) *patchOperation <span class="cov0" title="0">{ 18918 // Add default scheduler name if not specified. 18919 if job.Spec.SchedulerName == "" </span><span class="cov0" title="0">{ 18920 return &patchOperation{Op: "add", Path: "/spec/schedulerName", Value: defaultSchedulerName} 18921 }</span> 18922 <span class="cov0" title="0">return nil</span> 18923 } 18924 18925 func patchDefaultMaxRetry(job *v1alpha1.Job) *patchOperation <span class="cov0" title="0">{ 18926 // Add default maxRetry if maxRetry is zero. 18927 if job.Spec.MaxRetry == 0 </span><span class="cov0" title="0">{ 18928 return &patchOperation{Op: "add", Path: "/spec/maxRetry", Value: DefaultMaxRetry} 18929 }</span> 18930 <span class="cov0" title="0">return nil</span> 18931 } 18932 18933 func patchDefaultMinAvailable(job *v1alpha1.Job) *patchOperation <span class="cov0" title="0">{ 18934 // Add default minAvailable if minAvailable is zero. 18935 if job.Spec.MinAvailable == 0 </span><span class="cov0" title="0">{ 18936 var jobMinAvailable int32 18937 for _, task := range job.Spec.Tasks </span><span class="cov0" title="0">{ 18938 if task.MinAvailable != nil </span><span class="cov0" title="0">{ 18939 jobMinAvailable += *task.MinAvailable 18940 }</span> else<span class="cov0" title="0"> { 18941 jobMinAvailable += task.Replicas 18942 }</span> 18943 } 18944 18945 <span class="cov0" title="0">return &patchOperation{Op: "add", Path: "/spec/minAvailable", Value: jobMinAvailable}</span> 18946 } 18947 <span class="cov0" title="0">return nil</span> 18948 } 18949 18950 func mutateSpec(tasks []v1alpha1.TaskSpec, basePath string) *patchOperation <span class="cov8" title="1">{ 18951 patched := false 18952 for index := range tasks </span><span class="cov8" title="1">{ 18953 // add default task name 18954 taskName := tasks[index].Name 18955 if len(taskName) == 0 </span><span class="cov8" title="1">{ 18956 patched = true 18957 tasks[index].Name = v1alpha1.DefaultTaskSpec + strconv.Itoa(index) 18958 }</span> 18959 18960 <span class="cov8" title="1">if tasks[index].Template.Spec.HostNetwork && tasks[index].Template.Spec.DNSPolicy == "" </span><span class="cov0" title="0">{ 18961 patched = true 18962 tasks[index].Template.Spec.DNSPolicy = v1.DNSClusterFirstWithHostNet 18963 }</span> 18964 18965 <span class="cov8" title="1">if tasks[index].MinAvailable == nil </span><span class="cov8" title="1">{ 18966 patched = true 18967 minAvailable := tasks[index].Replicas 18968 tasks[index].MinAvailable = &minAvailable 18969 }</span> 18970 18971 <span class="cov8" title="1">if tasks[index].MaxRetry == 0 </span><span class="cov8" title="1">{ 18972 patched = true 18973 tasks[index].MaxRetry = defaultMaxRetry 18974 }</span> 18975 } 18976 <span class="cov8" title="1">if !patched </span><span class="cov0" title="0">{ 18977 return nil 18978 }</span> 18979 <span class="cov8" title="1">return &patchOperation{ 18980 Op: "replace", 18981 Path: basePath, 18982 Value: tasks, 18983 }</span> 18984 } 18985 </pre> 18986 18987 <pre class="file" id="file88" style="display: none">/* 18988 Copyright 2018 The Volcano Authors. 18989 18990 Licensed under the Apache License, Version 2.0 (the "License"); 18991 you may not use this file except in compliance with the License. 18992 You may obtain a copy of the License at 18993 18994 http://www.apache.org/licenses/LICENSE-2.0 18995 18996 Unless required by applicable law or agreed to in writing, software 18997 distributed under the License is distributed on an "AS IS" BASIS, 18998 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18999 See the License for the specific language governing permissions and 19000 limitations under the License. 19001 */ 19002 19003 package validate 19004 19005 import ( 19006 "context" 19007 "fmt" 19008 "strings" 19009 19010 "k8s.io/api/admission/v1beta1" 19011 whv1beta1 "k8s.io/api/admissionregistration/v1beta1" 19012 v1 "k8s.io/api/core/v1" 19013 apiequality "k8s.io/apimachinery/pkg/api/equality" 19014 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 19015 "k8s.io/apimachinery/pkg/util/validation" 19016 "k8s.io/apimachinery/pkg/util/validation/field" 19017 "k8s.io/klog" 19018 k8score "k8s.io/kubernetes/pkg/apis/core" 19019 k8scorev1 "k8s.io/kubernetes/pkg/apis/core/v1" 19020 v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" 19021 k8scorevalid "k8s.io/kubernetes/pkg/apis/core/validation" 19022 19023 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 19024 schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 19025 jobhelpers "volcano.sh/volcano/pkg/controllers/job/helpers" 19026 "volcano.sh/volcano/pkg/controllers/job/plugins" 19027 "volcano.sh/volcano/pkg/webhooks/router" 19028 "volcano.sh/volcano/pkg/webhooks/schema" 19029 "volcano.sh/volcano/pkg/webhooks/util" 19030 ) 19031 19032 func init() <span class="cov8" title="1">{ 19033 router.RegisterAdmission(service) 19034 }</span> 19035 19036 var service = &router.AdmissionService{ 19037 Path: "/jobs/validate", 19038 Func: AdmitJobs, 19039 19040 Config: config, 19041 19042 ValidatingConfig: &whv1beta1.ValidatingWebhookConfiguration{ 19043 Webhooks: []whv1beta1.ValidatingWebhook{{ 19044 Name: "validatejob.volcano.sh", 19045 Rules: []whv1beta1.RuleWithOperations{ 19046 { 19047 Operations: []whv1beta1.OperationType{whv1beta1.Create, whv1beta1.Update}, 19048 Rule: whv1beta1.Rule{ 19049 APIGroups: []string{"batch.volcano.sh"}, 19050 APIVersions: []string{"v1alpha1"}, 19051 Resources: []string{"jobs"}, 19052 }, 19053 }, 19054 }, 19055 }}, 19056 }, 19057 } 19058 19059 var config = &router.AdmissionServiceConfig{} 19060 19061 // AdmitJobs is to admit jobs and return response. 19062 func AdmitJobs(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse <span class="cov0" title="0">{ 19063 klog.V(3).Infof("admitting jobs -- %s", ar.Request.Operation) 19064 19065 job, err := schema.DecodeJob(ar.Request.Object, ar.Request.Resource) 19066 if err != nil </span><span class="cov0" title="0">{ 19067 return util.ToAdmissionResponse(err) 19068 }</span> 19069 <span class="cov0" title="0">var msg string 19070 reviewResponse := v1beta1.AdmissionResponse{} 19071 reviewResponse.Allowed = true 19072 19073 switch ar.Request.Operation </span>{ 19074 case v1beta1.Create:<span class="cov0" title="0"> 19075 msg = validateJobCreate(job, &reviewResponse)</span> 19076 case v1beta1.Update:<span class="cov0" title="0"> 19077 oldJob, err := schema.DecodeJob(ar.Request.OldObject, ar.Request.Resource) 19078 if err != nil </span><span class="cov0" title="0">{ 19079 return util.ToAdmissionResponse(err) 19080 }</span> 19081 <span class="cov0" title="0">err = validateJobUpdate(oldJob, job) 19082 if err != nil </span><span class="cov0" title="0">{ 19083 return util.ToAdmissionResponse(err) 19084 }</span> 19085 default:<span class="cov0" title="0"> 19086 err := fmt.Errorf("expect operation to be 'CREATE' or 'UPDATE'") 19087 return util.ToAdmissionResponse(err)</span> 19088 } 19089 19090 <span class="cov0" title="0">if !reviewResponse.Allowed </span><span class="cov0" title="0">{ 19091 reviewResponse.Result = &metav1.Status{Message: strings.TrimSpace(msg)} 19092 }</span> 19093 <span class="cov0" title="0">return &reviewResponse</span> 19094 } 19095 19096 func validateJobCreate(job *v1alpha1.Job, reviewResponse *v1beta1.AdmissionResponse) string <span class="cov8" title="1">{ 19097 var msg string 19098 taskNames := map[string]string{} 19099 var totalReplicas int32 19100 19101 if job.Spec.MinAvailable < 0 </span><span class="cov8" title="1">{ 19102 reviewResponse.Allowed = false 19103 return "job 'minAvailable' must be >= 0." 19104 }</span> 19105 19106 <span class="cov8" title="1">if job.Spec.MaxRetry < 0 </span><span class="cov8" title="1">{ 19107 reviewResponse.Allowed = false 19108 return "'maxRetry' cannot be less than zero." 19109 }</span> 19110 19111 <span class="cov8" title="1">if job.Spec.TTLSecondsAfterFinished != nil && *job.Spec.TTLSecondsAfterFinished < 0 </span><span class="cov8" title="1">{ 19112 reviewResponse.Allowed = false 19113 return "'ttlSecondsAfterFinished' cannot be less than zero." 19114 }</span> 19115 19116 <span class="cov8" title="1">if len(job.Spec.Tasks) == 0 </span><span class="cov8" title="1">{ 19117 reviewResponse.Allowed = false 19118 return "No task specified in job spec" 19119 }</span> 19120 19121 <span class="cov8" title="1">for index, task := range job.Spec.Tasks </span><span class="cov8" title="1">{ 19122 if task.Replicas < 0 </span><span class="cov8" title="1">{ 19123 msg += fmt.Sprintf(" 'replicas' < 0 in task: %s;", task.Name) 19124 }</span> 19125 19126 <span class="cov8" title="1">if task.MinAvailable != nil && *task.MinAvailable > task.Replicas </span><span class="cov0" title="0">{ 19127 msg += fmt.Sprintf(" 'minAvailable' is greater than 'replicas' in task: %s, job: %s", task.Name, job.Name) 19128 }</span> 19129 19130 // count replicas 19131 <span class="cov8" title="1">totalReplicas += task.Replicas 19132 19133 // validate task name 19134 if errMsgs := validation.IsDNS1123Label(task.Name); len(errMsgs) > 0 </span><span class="cov8" title="1">{ 19135 msg += fmt.Sprintf(" %v;", errMsgs) 19136 }</span> 19137 19138 // duplicate task name 19139 <span class="cov8" title="1">if _, found := taskNames[task.Name]; found </span><span class="cov8" title="1">{ 19140 msg += fmt.Sprintf(" duplicated task name %s;", task.Name) 19141 break</span> 19142 } else<span class="cov8" title="1"> { 19143 taskNames[task.Name] = task.Name 19144 }</span> 19145 19146 <span class="cov8" title="1">if err := validatePolicies(task.Policies, field.NewPath("spec.tasks.policies")); err != nil </span><span class="cov8" title="1">{ 19147 msg += err.Error() + fmt.Sprintf(" valid events are %v, valid actions are %v", 19148 getValidEvents(), getValidActions()) 19149 }</span> 19150 <span class="cov8" title="1">podName := jobhelpers.MakePodName(job.Name, task.Name, index) 19151 msg += validateK8sPodNameLength(podName) 19152 msg += validateTaskTemplate(task, job, index)</span> 19153 } 19154 19155 <span class="cov8" title="1">msg += validateJobName(job) 19156 19157 if totalReplicas < job.Spec.MinAvailable </span><span class="cov8" title="1">{ 19158 msg += "job 'minAvailable' should not be greater than total replicas in tasks;" 19159 }</span> 19160 19161 <span class="cov8" title="1">if err := validatePolicies(job.Spec.Policies, field.NewPath("spec.policies")); err != nil </span><span class="cov8" title="1">{ 19162 msg = msg + err.Error() + fmt.Sprintf(" valid events are %v, valid actions are %v;", 19163 getValidEvents(), getValidActions()) 19164 }</span> 19165 19166 // invalid job plugins 19167 <span class="cov8" title="1">if len(job.Spec.Plugins) != 0 </span><span class="cov8" title="1">{ 19168 for name := range job.Spec.Plugins </span><span class="cov8" title="1">{ 19169 if _, found := plugins.GetPluginBuilder(name); !found </span><span class="cov8" title="1">{ 19170 msg += fmt.Sprintf(" unable to find job plugin: %s", name) 19171 }</span> 19172 } 19173 } 19174 19175 <span class="cov8" title="1">if err := validateIO(job.Spec.Volumes); err != nil </span><span class="cov8" title="1">{ 19176 msg += err.Error() 19177 }</span> 19178 19179 <span class="cov8" title="1">queue, err := config.VolcanoClient.SchedulingV1beta1().Queues().Get(context.TODO(), job.Spec.Queue, metav1.GetOptions{}) 19180 if err != nil </span><span class="cov8" title="1">{ 19181 msg += fmt.Sprintf(" unable to find job queue: %v", err) 19182 }</span> else<span class="cov8" title="1"> if queue.Status.State != schedulingv1beta1.QueueStateOpen </span><span class="cov0" title="0">{ 19183 msg += fmt.Sprintf("can only submit job to queue with state `Open`, "+ 19184 "queue `%s` status is `%s`", queue.Name, queue.Status.State) 19185 }</span> 19186 19187 <span class="cov8" title="1">if msg != "" </span><span class="cov8" title="1">{ 19188 reviewResponse.Allowed = false 19189 }</span> 19190 19191 <span class="cov8" title="1">return msg</span> 19192 } 19193 19194 func validateJobUpdate(old, new *v1alpha1.Job) error <span class="cov8" title="1">{ 19195 var totalReplicas int32 19196 for _, task := range new.Spec.Tasks </span><span class="cov8" title="1">{ 19197 if task.Replicas < 0 </span><span class="cov0" title="0">{ 19198 return fmt.Errorf("'replicas' must be >= 0 in task: %s", task.Name) 19199 }</span> 19200 19201 <span class="cov8" title="1">if task.MinAvailable != nil && *task.MinAvailable > task.Replicas </span><span class="cov0" title="0">{ 19202 return fmt.Errorf("'minAvailable' must be <= 'replicas' in task: %s;", task.Name) 19203 }</span> 19204 // count replicas 19205 <span class="cov8" title="1">totalReplicas += task.Replicas</span> 19206 } 19207 <span class="cov8" title="1">if new.Spec.MinAvailable > totalReplicas </span><span class="cov8" title="1">{ 19208 return fmt.Errorf("job 'minAvailable' must not be greater than total replicas") 19209 }</span> 19210 <span class="cov8" title="1">if new.Spec.MinAvailable < 0 </span><span class="cov8" title="1">{ 19211 return fmt.Errorf("job 'minAvailable' must be >= 0") 19212 }</span> 19213 19214 <span class="cov8" title="1">if len(old.Spec.Tasks) != len(new.Spec.Tasks) </span><span class="cov8" title="1">{ 19215 return fmt.Errorf("job updates may not add or remove tasks") 19216 }</span> 19217 // other fields under spec are not allowed to mutate 19218 <span class="cov8" title="1">new.Spec.MinAvailable = old.Spec.MinAvailable 19219 new.Spec.PriorityClassName = old.Spec.PriorityClassName 19220 for i := range new.Spec.Tasks </span><span class="cov8" title="1">{ 19221 new.Spec.Tasks[i].Replicas = old.Spec.Tasks[i].Replicas 19222 new.Spec.Tasks[i].MinAvailable = old.Spec.Tasks[i].MinAvailable 19223 }</span> 19224 19225 // job controller will update the pvc name if not provided 19226 <span class="cov8" title="1">for i := range new.Spec.Volumes </span><span class="cov0" title="0">{ 19227 if new.Spec.Volumes[i].VolumeClaim != nil </span><span class="cov0" title="0">{ 19228 new.Spec.Volumes[i].VolumeClaimName = "" 19229 }</span> 19230 } 19231 <span class="cov8" title="1">for i := range old.Spec.Volumes </span><span class="cov0" title="0">{ 19232 if old.Spec.Volumes[i].VolumeClaim != nil </span><span class="cov0" title="0">{ 19233 old.Spec.Volumes[i].VolumeClaimName = "" 19234 }</span> 19235 } 19236 19237 <span class="cov8" title="1">if !apiequality.Semantic.DeepEqual(new.Spec, old.Spec) </span><span class="cov8" title="1">{ 19238 return fmt.Errorf("job updates may not change fields other than `minAvailable`, `tasks[*].replicas under spec`") 19239 }</span> 19240 19241 <span class="cov8" title="1">return nil</span> 19242 } 19243 19244 func validateTaskTemplate(task v1alpha1.TaskSpec, job *v1alpha1.Job, index int) string <span class="cov8" title="1">{ 19245 var v1PodTemplate v1.PodTemplate 19246 v1PodTemplate.Template = *task.Template.DeepCopy() 19247 k8scorev1.SetObjectDefaults_PodTemplate(&v1PodTemplate) 19248 19249 var coreTemplateSpec k8score.PodTemplateSpec 19250 k8scorev1.Convert_v1_PodTemplateSpec_To_core_PodTemplateSpec(&v1PodTemplate.Template, &coreTemplateSpec, nil) 19251 19252 // Skip verify container SecurityContex.Privileged as it depends on 19253 // the kube-apiserver `allow-privileged` flag. 19254 for i, container := range coreTemplateSpec.Spec.Containers </span><span class="cov8" title="1">{ 19255 if container.SecurityContext != nil && container.SecurityContext.Privileged != nil </span><span class="cov8" title="1">{ 19256 coreTemplateSpec.Spec.Containers[i].SecurityContext.Privileged = nil 19257 }</span> 19258 } 19259 19260 <span class="cov8" title="1">corePodTemplate := k8score.PodTemplate{ 19261 ObjectMeta: metav1.ObjectMeta{ 19262 Name: task.Name, 19263 Namespace: job.Namespace, 19264 }, 19265 Template: coreTemplateSpec, 19266 } 19267 19268 if allErrs := k8scorevalid.ValidatePodTemplate(&corePodTemplate); len(allErrs) > 0 </span><span class="cov8" title="1">{ 19269 msg := fmt.Sprintf("spec.task[%d].", index) 19270 for index := range allErrs </span><span class="cov8" title="1">{ 19271 msg += allErrs[index].Error() + ". " 19272 }</span> 19273 <span class="cov8" title="1">return msg</span> 19274 } 19275 19276 <span class="cov8" title="1">msg := validateTaskTopoPolicy(task, index) 19277 if msg != "" </span><span class="cov0" title="0">{ 19278 return msg 19279 }</span> 19280 19281 <span class="cov8" title="1">return ""</span> 19282 } 19283 19284 func validateK8sPodNameLength(podName string) string <span class="cov8" title="1">{ 19285 if errMsgs := validation.IsQualifiedName(podName); len(errMsgs) > 0 </span><span class="cov0" title="0">{ 19286 return fmt.Sprintf("create pod with name %s validate failed %v;", podName, errMsgs) 19287 }</span> 19288 <span class="cov8" title="1">return ""</span> 19289 } 19290 19291 func validateJobName(job *v1alpha1.Job) string <span class="cov8" title="1">{ 19292 if errMsgs := validation.IsQualifiedName(job.Name); len(errMsgs) > 0 </span><span class="cov0" title="0">{ 19293 return fmt.Sprintf("create job with name %s validate failed %v", job.Name, errMsgs) 19294 }</span> 19295 <span class="cov8" title="1">return ""</span> 19296 } 19297 19298 func validateTaskTopoPolicy(task v1alpha1.TaskSpec, index int) string <span class="cov8" title="1">{ 19299 if task.TopologyPolicy == "" || task.TopologyPolicy == v1alpha1.None </span><span class="cov8" title="1">{ 19300 return "" 19301 }</span> 19302 19303 <span class="cov8" title="1">template := task.Template.DeepCopy() 19304 19305 for id, container := range template.Spec.Containers </span><span class="cov8" title="1">{ 19306 if len(container.Resources.Requests) == 0 </span><span class="cov8" title="1">{ 19307 template.Spec.Containers[id].Resources.Requests = container.Resources.Limits.DeepCopy() 19308 }</span> 19309 } 19310 19311 <span class="cov8" title="1">for id, container := range template.Spec.InitContainers </span><span class="cov0" title="0">{ 19312 if len(container.Resources.Requests) == 0 </span><span class="cov0" title="0">{ 19313 template.Spec.InitContainers[id].Resources.Requests = container.Resources.Limits.DeepCopy() 19314 }</span> 19315 } 19316 19317 <span class="cov8" title="1">pod := &v1.Pod{ 19318 Spec: template.Spec, 19319 } 19320 19321 if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed </span><span class="cov8" title="1">{ 19322 return fmt.Sprintf("spec.task[%d] isn't Guaranteed pod, kind=%v", index, v1qos.GetPodQOS(pod)) 19323 }</span> 19324 19325 <span class="cov8" title="1">for id, container := range append(template.Spec.Containers, template.Spec.InitContainers...) </span><span class="cov8" title="1">{ 19326 requestNum := guaranteedCPUs(container) 19327 if requestNum == 0 </span><span class="cov8" title="1">{ 19328 return fmt.Sprintf("the cpu request isn't an integer in spec.task[%d] container[%d].", 19329 index, id) 19330 }</span> 19331 } 19332 19333 <span class="cov8" title="1">return ""</span> 19334 } 19335 19336 func guaranteedCPUs(container v1.Container) int <span class="cov8" title="1">{ 19337 cpuQuantity := container.Resources.Requests[v1.ResourceCPU] 19338 if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() </span><span class="cov8" title="1">{ 19339 return 0 19340 }</span> 19341 19342 <span class="cov8" title="1">return int(cpuQuantity.Value())</span> 19343 } 19344 </pre> 19345 19346 <pre class="file" id="file89" style="display: none">/* 19347 Copyright 2018 The Volcano Authors. 19348 19349 Licensed under the Apache License, Version 2.0 (the "License"); 19350 you may not use this file except in compliance with the License. 19351 You may obtain a copy of the License at 19352 19353 http://www.apache.org/licenses/LICENSE-2.0 19354 19355 Unless required by applicable law or agreed to in writing, software 19356 distributed under the License is distributed on an "AS IS" BASIS, 19357 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19358 See the License for the specific language governing permissions and 19359 limitations under the License. 19360 */ 19361 19362 package validate 19363 19364 import ( 19365 "fmt" 19366 19367 "github.com/hashicorp/go-multierror" 19368 19369 "k8s.io/apimachinery/pkg/util/validation/field" 19370 "k8s.io/kubernetes/pkg/apis/core/validation" 19371 19372 batchv1alpha1 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 19373 busv1alpha1 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 19374 ) 19375 19376 // policyEventMap defines all policy events and whether to allow external use. 19377 var policyEventMap = map[busv1alpha1.Event]bool{ 19378 busv1alpha1.AnyEvent: true, 19379 busv1alpha1.PodFailedEvent: true, 19380 busv1alpha1.PodEvictedEvent: true, 19381 busv1alpha1.JobUnknownEvent: true, 19382 busv1alpha1.TaskCompletedEvent: true, 19383 busv1alpha1.TaskFailedEvent: true, 19384 busv1alpha1.OutOfSyncEvent: false, 19385 busv1alpha1.CommandIssuedEvent: false, 19386 busv1alpha1.JobUpdatedEvent: true, 19387 } 19388 19389 // policyActionMap defines all policy actions and whether to allow external use. 19390 var policyActionMap = map[busv1alpha1.Action]bool{ 19391 busv1alpha1.AbortJobAction: true, 19392 busv1alpha1.RestartJobAction: true, 19393 busv1alpha1.RestartTaskAction: true, 19394 busv1alpha1.TerminateJobAction: true, 19395 busv1alpha1.CompleteJobAction: true, 19396 busv1alpha1.ResumeJobAction: true, 19397 busv1alpha1.SyncJobAction: false, 19398 busv1alpha1.EnqueueAction: false, 19399 busv1alpha1.SyncQueueAction: false, 19400 busv1alpha1.OpenQueueAction: false, 19401 busv1alpha1.CloseQueueAction: false, 19402 } 19403 19404 func validatePolicies(policies []batchv1alpha1.LifecyclePolicy, fldPath *field.Path) error <span class="cov8" title="1">{ 19405 var err error 19406 policyEvents := map[busv1alpha1.Event]struct{}{} 19407 exitCodes := map[int32]struct{}{} 19408 19409 for _, policy := range policies </span><span class="cov8" title="1">{ 19410 if (policy.Event != "" || len(policy.Events) != 0) && policy.ExitCode != nil </span><span class="cov8" title="1">{ 19411 err = multierror.Append(err, fmt.Errorf("must not specify event and exitCode simultaneously")) 19412 break</span> 19413 } 19414 19415 <span class="cov8" title="1">if policy.Event == "" && len(policy.Events) == 0 && policy.ExitCode == nil </span><span class="cov8" title="1">{ 19416 err = multierror.Append(err, fmt.Errorf("either event and exitCode should be specified")) 19417 break</span> 19418 } 19419 19420 <span class="cov8" title="1">if len(policy.Event) != 0 || len(policy.Events) != 0 </span><span class="cov8" title="1">{ 19421 bFlag := false 19422 policyEventsList := getEventList(policy) 19423 for _, event := range policyEventsList </span><span class="cov8" title="1">{ 19424 if allow, ok := policyEventMap[event]; !ok || !allow </span><span class="cov8" title="1">{ 19425 err = multierror.Append(err, field.Invalid(fldPath, event, "invalid policy event")) 19426 bFlag = true 19427 break</span> 19428 } 19429 19430 <span class="cov8" title="1">if allow, ok := policyActionMap[policy.Action]; !ok || !allow </span><span class="cov8" title="1">{ 19431 err = multierror.Append(err, field.Invalid(fldPath, policy.Action, "invalid policy action")) 19432 bFlag = true 19433 break</span> 19434 } 19435 <span class="cov8" title="1">if _, found := policyEvents[event]; found </span><span class="cov8" title="1">{ 19436 err = multierror.Append(err, fmt.Errorf("duplicate event %v across different policy", event)) 19437 bFlag = true 19438 break</span> 19439 } else<span class="cov8" title="1"> { 19440 policyEvents[event] = struct{}{} 19441 }</span> 19442 } 19443 <span class="cov8" title="1">if bFlag </span><span class="cov8" title="1">{ 19444 break</span> 19445 } 19446 } else<span class="cov8" title="1"> { 19447 if *policy.ExitCode == 0 </span><span class="cov8" title="1">{ 19448 err = multierror.Append(err, fmt.Errorf("0 is not a valid error code")) 19449 break</span> 19450 } 19451 <span class="cov8" title="1">if _, found := exitCodes[*policy.ExitCode]; found </span><span class="cov8" title="1">{ 19452 err = multierror.Append(err, fmt.Errorf("duplicate exitCode %v", *policy.ExitCode)) 19453 break</span> 19454 } else<span class="cov8" title="1"> { 19455 exitCodes[*policy.ExitCode] = struct{}{} 19456 }</span> 19457 } 19458 } 19459 19460 <span class="cov8" title="1">if _, found := policyEvents[busv1alpha1.AnyEvent]; found && len(policyEvents) > 1 </span><span class="cov8" title="1">{ 19461 err = multierror.Append(err, fmt.Errorf("if there's * here, no other policy should be here")) 19462 }</span> 19463 19464 <span class="cov8" title="1">return err</span> 19465 } 19466 19467 func getEventList(policy batchv1alpha1.LifecyclePolicy) []busv1alpha1.Event <span class="cov8" title="1">{ 19468 policyEventsList := policy.Events 19469 if len(policy.Event) > 0 </span><span class="cov8" title="1">{ 19470 policyEventsList = append(policyEventsList, policy.Event) 19471 }</span> 19472 <span class="cov8" title="1">uniquePolicyEventlist := removeDuplicates(policyEventsList) 19473 return uniquePolicyEventlist</span> 19474 } 19475 19476 func removeDuplicates(eventList []busv1alpha1.Event) []busv1alpha1.Event <span class="cov8" title="1">{ 19477 keys := make(map[busv1alpha1.Event]bool) 19478 list := []busv1alpha1.Event{} 19479 for _, val := range eventList </span><span class="cov8" title="1">{ 19480 if _, value := keys[val]; !value </span><span class="cov8" title="1">{ 19481 keys[val] = true 19482 list = append(list, val) 19483 }</span> 19484 } 19485 <span class="cov8" title="1">return list</span> 19486 } 19487 19488 func getValidEvents() []busv1alpha1.Event <span class="cov8" title="1">{ 19489 var events []busv1alpha1.Event 19490 for e, allow := range policyEventMap </span><span class="cov8" title="1">{ 19491 if allow </span><span class="cov8" title="1">{ 19492 events = append(events, e) 19493 }</span> 19494 } 19495 19496 <span class="cov8" title="1">return events</span> 19497 } 19498 19499 func getValidActions() []busv1alpha1.Action <span class="cov8" title="1">{ 19500 var actions []busv1alpha1.Action 19501 for a, allow := range policyActionMap </span><span class="cov8" title="1">{ 19502 if allow </span><span class="cov8" title="1">{ 19503 actions = append(actions, a) 19504 }</span> 19505 } 19506 19507 <span class="cov8" title="1">return actions</span> 19508 } 19509 19510 // validateIO validates IO configuration. 19511 func validateIO(volumes []batchv1alpha1.VolumeSpec) error <span class="cov8" title="1">{ 19512 volumeMap := map[string]bool{} 19513 for _, volume := range volumes </span><span class="cov8" title="1">{ 19514 if len(volume.MountPath) == 0 </span><span class="cov8" title="1">{ 19515 return fmt.Errorf(" mountPath is required;") 19516 }</span> 19517 <span class="cov8" title="1">if _, found := volumeMap[volume.MountPath]; found </span><span class="cov8" title="1">{ 19518 return fmt.Errorf(" duplicated mountPath: %s;", volume.MountPath) 19519 }</span> 19520 <span class="cov8" title="1">if volume.VolumeClaim == nil && volume.VolumeClaimName == "" </span><span class="cov8" title="1">{ 19521 return fmt.Errorf(" either VolumeClaim or VolumeClaimName must be specified;") 19522 }</span> 19523 <span class="cov8" title="1">if len(volume.VolumeClaimName) != 0 </span><span class="cov8" title="1">{ 19524 if volume.VolumeClaim != nil </span><span class="cov0" title="0">{ 19525 return fmt.Errorf("conflict: If you want to use an existing PVC, just specify VolumeClaimName." + 19526 "If you want to create a new PVC, you do not need to specify VolumeClaimName") 19527 }</span> 19528 <span class="cov8" title="1">if errMsgs := validation.ValidatePersistentVolumeName(volume.VolumeClaimName, false); len(errMsgs) > 0 </span><span class="cov0" title="0">{ 19529 return fmt.Errorf("invalid VolumeClaimName %s : %v", volume.VolumeClaimName, errMsgs) 19530 }</span> 19531 } 19532 19533 <span class="cov8" title="1">volumeMap[volume.MountPath] = true</span> 19534 } 19535 <span class="cov8" title="1">return nil</span> 19536 } 19537 </pre> 19538 19539 <pre class="file" id="file90" style="display: none">/* 19540 Copyright 2021 The Volcano Authors. 19541 19542 Licensed under the Apache License, Version 2.0 (the "License"); 19543 you may not use this file except in compliance with the License. 19544 You may obtain a copy of the License at 19545 19546 http://www.apache.org/licenses/LICENSE-2.0 19547 19548 Unless required by applicable law or agreed to in writing, software 19549 distributed under the License is distributed on an "AS IS" BASIS, 19550 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19551 See the License for the specific language governing permissions and 19552 limitations under the License. 19553 */ 19554 19555 package mutate 19556 19557 import ( 19558 "github.com/imdario/mergo" 19559 "gopkg.in/yaml.v2" 19560 "k8s.io/klog" 19561 19562 v1 "k8s.io/api/core/v1" 19563 19564 wkconfig "volcano.sh/volcano/pkg/webhooks/config" 19565 ) 19566 19567 type annotationResGroup struct{} 19568 19569 const ( 19570 // defaultAnnotationKey: default annotation key 19571 defaultAnnotationKey = "volcano.sh/resource-group" 19572 ) 19573 19574 // NewAnnotationResGroup create a new structure 19575 func NewAnnotationResGroup() ResGroup <span class="cov8" title="1">{ 19576 return &annotationResGroup{} 19577 }</span> 19578 19579 // getAnnotation get annotations from the resource group 19580 func getAnnotation(resGroupConfig wkconfig.ResGroupConfig) map[string]string <span class="cov8" title="1">{ 19581 annotations := make(map[string]string) 19582 for _, val := range resGroupConfig.Object.Value </span><span class="cov8" title="1">{ 19583 tmp := make(map[string]string) 19584 err := yaml.Unmarshal([]byte(val), &tmp) 19585 if err != nil </span><span class="cov0" title="0">{ 19586 continue</span> 19587 } 19588 19589 <span class="cov8" title="1">if err := mergo.Merge(&annotations, &tmp); err != nil </span><span class="cov0" title="0">{ 19590 klog.Errorf("annotations merge failed, err=%v", err) 19591 continue</span> 19592 } 19593 } 19594 19595 <span class="cov8" title="1">return annotations</span> 19596 } 19597 19598 // IsBelongResGroup adjust whether pod is belong to the resource group 19599 func (resGroup *annotationResGroup) IsBelongResGroup(pod *v1.Pod, resGroupConfig wkconfig.ResGroupConfig) bool <span class="cov8" title="1">{ 19600 if resGroupConfig.Object.Key != "" && resGroupConfig.Object.Key != "annotation" </span><span class="cov0" title="0">{ 19601 return false 19602 }</span> 19603 19604 <span class="cov8" title="1">annotations := getAnnotation(resGroupConfig) 19605 klog.V(3).Infof("annotations : %v", annotations) 19606 for key, annotation := range annotations </span><span class="cov8" title="1">{ 19607 if pod.Annotations[key] == annotation </span><span class="cov8" title="1">{ 19608 return true 19609 }</span> 19610 } 19611 19612 <span class="cov8" title="1">if resGroupConfig.Object.Key == "" && pod.Annotations[defaultAnnotationKey] == resGroupConfig.ResourceGroup </span><span class="cov8" title="1">{ 19613 return true 19614 }</span> 19615 19616 <span class="cov8" title="1">return false</span> 19617 } 19618 </pre> 19619 19620 <pre class="file" id="file91" style="display: none">/* 19621 Copyright 2021 The Volcano Authors. 19622 19623 Licensed under the Apache License, Version 2.0 (the "License"); 19624 you may not use this file except in compliance with the License. 19625 You may obtain a copy of the License at 19626 19627 http://www.apache.org/licenses/LICENSE-2.0 19628 19629 Unless required by applicable law or agreed to in writing, software 19630 distributed under the License is distributed on an "AS IS" BASIS, 19631 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19632 See the License for the specific language governing permissions and 19633 limitations under the License. 19634 */ 19635 19636 package mutate 19637 19638 import ( 19639 v1 "k8s.io/api/core/v1" 19640 19641 wkconfig "volcano.sh/volcano/pkg/webhooks/config" 19642 ) 19643 19644 // ResGroup interface for resource group 19645 type ResGroup interface { 19646 IsBelongResGroup(pod *v1.Pod, resGroupConfig wkconfig.ResGroupConfig) bool 19647 } 19648 19649 // GetResGroup return the interface besed on resourceGroup.Object.Key 19650 func GetResGroup(resourceGroup wkconfig.ResGroupConfig) ResGroup <span class="cov8" title="1">{ 19651 switch resourceGroup.Object.Key </span>{ 19652 case "namespace":<span class="cov8" title="1"> 19653 return NewNamespaceResGroup()</span> 19654 case "annotation":<span class="cov8" title="1"> 19655 return NewAnnotationResGroup()</span> 19656 } 19657 <span class="cov8" title="1">return NewAnnotationResGroup()</span> 19658 } 19659 </pre> 19660 19661 <pre class="file" id="file92" style="display: none">/* 19662 Copyright 2021 The Volcano Authors. 19663 19664 Licensed under the Apache License, Version 2.0 (the "License"); 19665 you may not use this file except in compliance with the License. 19666 You may obtain a copy of the License at 19667 19668 http://www.apache.org/licenses/LICENSE-2.0 19669 19670 Unless required by applicable law or agreed to in writing, software 19671 distributed under the License is distributed on an "AS IS" BASIS, 19672 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19673 See the License for the specific language governing permissions and 19674 limitations under the License. 19675 */ 19676 19677 package mutate 19678 19679 import ( 19680 "encoding/json" 19681 "fmt" 19682 19683 "k8s.io/api/admission/v1beta1" 19684 whv1beta1 "k8s.io/api/admissionregistration/v1beta1" 19685 v1 "k8s.io/api/core/v1" 19686 "k8s.io/klog" 19687 19688 wkconfig "volcano.sh/volcano/pkg/webhooks/config" 19689 "volcano.sh/volcano/pkg/webhooks/router" 19690 "volcano.sh/volcano/pkg/webhooks/schema" 19691 "volcano.sh/volcano/pkg/webhooks/util" 19692 ) 19693 19694 // patchOperation define the patch operation structure 19695 type patchOperation struct { 19696 Op string `json:"op"` 19697 Path string `json:"path"` 19698 Value interface{} `json:"value,omitempty"` 19699 } 19700 19701 // init register mutate pod 19702 func init() <span class="cov8" title="1">{ 19703 router.RegisterAdmission(service) 19704 }</span> 19705 19706 var service = &router.AdmissionService{ 19707 Path: "/pods/mutate", 19708 Func: Pods, 19709 Config: config, 19710 MutatingConfig: &whv1beta1.MutatingWebhookConfiguration{ 19711 Webhooks: []whv1beta1.MutatingWebhook{{ 19712 Name: "mutatepod.volcano.sh", 19713 Rules: []whv1beta1.RuleWithOperations{ 19714 { 19715 Operations: []whv1beta1.OperationType{whv1beta1.Create}, 19716 Rule: whv1beta1.Rule{ 19717 APIGroups: []string{""}, 19718 APIVersions: []string{"v1"}, 19719 Resources: []string{"pods"}, 19720 }, 19721 }, 19722 }, 19723 }}, 19724 }, 19725 } 19726 19727 var config = &router.AdmissionServiceConfig{} 19728 19729 // Pods mutate pods. 19730 func Pods(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse <span class="cov0" title="0">{ 19731 klog.V(3).Infof("mutating pods -- %s", ar.Request.Operation) 19732 pod, err := schema.DecodePod(ar.Request.Object, ar.Request.Resource) 19733 if err != nil </span><span class="cov0" title="0">{ 19734 return util.ToAdmissionResponse(err) 19735 }</span> 19736 19737 <span class="cov0" title="0">if pod.Namespace == "" </span><span class="cov0" title="0">{ 19738 pod.Namespace = ar.Request.Namespace 19739 }</span> 19740 19741 <span class="cov0" title="0">var patchBytes []byte 19742 switch ar.Request.Operation </span>{ 19743 case v1beta1.Create:<span class="cov0" title="0"> 19744 patchBytes, _ = createPatch(pod)</span> 19745 default:<span class="cov0" title="0"> 19746 err = fmt.Errorf("expect operation to be 'CREATE' ") 19747 return util.ToAdmissionResponse(err)</span> 19748 } 19749 19750 <span class="cov0" title="0">reviewResponse := v1beta1.AdmissionResponse{ 19751 Allowed: true, 19752 Patch: patchBytes, 19753 } 19754 pt := v1beta1.PatchTypeJSONPatch 19755 reviewResponse.PatchType = &pt 19756 19757 return &reviewResponse</span> 19758 } 19759 19760 // createPatch patch pod 19761 func createPatch(pod *v1.Pod) ([]byte, error) <span class="cov8" title="1">{ 19762 if config.ConfigData == nil </span><span class="cov0" title="0">{ 19763 klog.V(5).Infof("admission configuration is empty.") 19764 return nil, nil 19765 }</span> 19766 19767 <span class="cov8" title="1">var patch []patchOperation 19768 config.ConfigData.Lock() 19769 defer config.ConfigData.Unlock() 19770 19771 for _, resourceGroup := range config.ConfigData.ResGroupsConfig </span><span class="cov8" title="1">{ 19772 klog.V(3).Infof("resourceGroup %s", resourceGroup.ResourceGroup) 19773 group := GetResGroup(resourceGroup) 19774 if !group.IsBelongResGroup(pod, resourceGroup) </span><span class="cov8" title="1">{ 19775 continue</span> 19776 } 19777 19778 <span class="cov8" title="1">patchLabel := patchLabels(pod, resourceGroup) 19779 if patchLabel != nil </span><span class="cov8" title="1">{ 19780 patch = append(patch, *patchLabel) 19781 }</span> 19782 19783 <span class="cov8" title="1">patchToleration := patchTaintToleration(pod, resourceGroup) 19784 if patchToleration != nil </span><span class="cov8" title="1">{ 19785 patch = append(patch, *patchToleration) 19786 }</span> 19787 <span class="cov8" title="1">patchScheduler := patchSchedulerName(resourceGroup) 19788 if patchScheduler != nil </span><span class="cov8" title="1">{ 19789 patch = append(patch, *patchScheduler) 19790 }</span> 19791 19792 <span class="cov8" title="1">klog.V(5).Infof("pod patch %v", patch) 19793 return json.Marshal(patch)</span> 19794 } 19795 19796 <span class="cov8" title="1">return json.Marshal(patch)</span> 19797 } 19798 19799 // patchLabels patch label 19800 func patchLabels(pod *v1.Pod, resGroupConfig wkconfig.ResGroupConfig) *patchOperation <span class="cov8" title="1">{ 19801 if len(resGroupConfig.Labels) == 0 </span><span class="cov0" title="0">{ 19802 return nil 19803 }</span> 19804 19805 <span class="cov8" title="1">nodeSelector := make(map[string]string) 19806 for key, label := range pod.Spec.NodeSelector </span><span class="cov0" title="0">{ 19807 nodeSelector[key] = label 19808 }</span> 19809 19810 <span class="cov8" title="1">for key, label := range resGroupConfig.Labels </span><span class="cov8" title="1">{ 19811 nodeSelector[key] = label 19812 }</span> 19813 19814 <span class="cov8" title="1">return &patchOperation{Op: "add", Path: "/spec/nodeSelector", Value: nodeSelector}</span> 19815 } 19816 19817 // patchTaintToleration patch taint toleration 19818 func patchTaintToleration(pod *v1.Pod, resGroupConfig wkconfig.ResGroupConfig) *patchOperation <span class="cov8" title="1">{ 19819 if len(resGroupConfig.Tolerations) == 0 </span><span class="cov8" title="1">{ 19820 return nil 19821 }</span> 19822 19823 <span class="cov8" title="1">var dst []v1.Toleration 19824 dst = append(dst, pod.Spec.Tolerations...) 19825 dst = append(dst, resGroupConfig.Tolerations...) 19826 19827 return &patchOperation{Op: "add", Path: "/spec/tolerations", Value: dst}</span> 19828 } 19829 19830 // patchSchedulerName patch scheduler 19831 func patchSchedulerName(resGroupConfig wkconfig.ResGroupConfig) *patchOperation <span class="cov8" title="1">{ 19832 if resGroupConfig.SchedulerName == "" </span><span class="cov0" title="0">{ 19833 return nil 19834 }</span> 19835 19836 <span class="cov8" title="1">return &patchOperation{Op: "add", Path: "/spec/schedulerName", Value: resGroupConfig.SchedulerName}</span> 19837 } 19838 </pre> 19839 19840 <pre class="file" id="file93" style="display: none">/* 19841 Copyright 2021 The Volcano Authors. 19842 19843 Licensed under the Apache License, Version 2.0 (the "License"); 19844 you may not use this file except in compliance with the License. 19845 You may obtain a copy of the License at 19846 19847 http://www.apache.org/licenses/LICENSE-2.0 19848 19849 Unless required by applicable law or agreed to in writing, software 19850 distributed under the License is distributed on an "AS IS" BASIS, 19851 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19852 See the License for the specific language governing permissions and 19853 limitations under the License. 19854 */ 19855 19856 package mutate 19857 19858 import ( 19859 v1 "k8s.io/api/core/v1" 19860 19861 wkconfig "volcano.sh/volcano/pkg/webhooks/config" 19862 ) 19863 19864 type namespaceResGroup struct{} 19865 19866 // NewNamespaceResGroup create a new structure 19867 func NewNamespaceResGroup() ResGroup <span class="cov8" title="1">{ 19868 return &namespaceResGroup{} 19869 }</span> 19870 19871 // IsBelongResGroup adjust whether pod is belong to the resource group 19872 func (resGroup *namespaceResGroup) IsBelongResGroup(pod *v1.Pod, resGroupConfig wkconfig.ResGroupConfig) bool <span class="cov8" title="1">{ 19873 if resGroupConfig.Object.Key != "namespace" </span><span class="cov0" title="0">{ 19874 return false 19875 }</span> 19876 19877 <span class="cov8" title="1">for _, val := range resGroupConfig.Object.Value </span><span class="cov8" title="1">{ 19878 if pod.Namespace == val </span><span class="cov8" title="1">{ 19879 return true 19880 }</span> 19881 } 19882 19883 <span class="cov8" title="1">return false</span> 19884 } 19885 </pre> 19886 19887 <pre class="file" id="file94" style="display: none">/* 19888 Copyright 2019 The Volcano Authors. 19889 19890 Licensed under the Apache License, Version 2.0 (the "License"); 19891 you may not use this file except in compliance with the License. 19892 You may obtain a copy of the License at 19893 19894 http://www.apache.org/licenses/LICENSE-2.0 19895 19896 Unless required by applicable law or agreed to in writing, software 19897 distributed under the License is distributed on an "AS IS" BASIS, 19898 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19899 See the License for the specific language governing permissions and 19900 limitations under the License. 19901 */ 19902 19903 package validate 19904 19905 import ( 19906 "context" 19907 "fmt" 19908 "strconv" 19909 19910 "strings" 19911 19912 "k8s.io/api/admission/v1beta1" 19913 whv1beta1 "k8s.io/api/admissionregistration/v1beta1" 19914 v1 "k8s.io/api/core/v1" 19915 apierrors "k8s.io/apimachinery/pkg/api/errors" 19916 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 19917 "k8s.io/apimachinery/pkg/util/intstr" 19918 "k8s.io/klog" 19919 19920 "volcano.sh/apis/pkg/apis/helpers" 19921 vcv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 19922 "volcano.sh/volcano/pkg/webhooks/router" 19923 "volcano.sh/volcano/pkg/webhooks/schema" 19924 "volcano.sh/volcano/pkg/webhooks/util" 19925 ) 19926 19927 func init() <span class="cov8" title="1">{ 19928 router.RegisterAdmission(service) 19929 }</span> 19930 19931 var service = &router.AdmissionService{ 19932 Path: "/pods/validate", 19933 Func: AdmitPods, 19934 19935 Config: config, 19936 19937 ValidatingConfig: &whv1beta1.ValidatingWebhookConfiguration{ 19938 Webhooks: []whv1beta1.ValidatingWebhook{{ 19939 Name: "validatepod.volcano.sh", 19940 Rules: []whv1beta1.RuleWithOperations{ 19941 { 19942 Operations: []whv1beta1.OperationType{whv1beta1.Create}, 19943 Rule: whv1beta1.Rule{ 19944 APIGroups: []string{""}, 19945 APIVersions: []string{"v1"}, 19946 Resources: []string{"pods"}, 19947 }, 19948 }, 19949 }, 19950 }}, 19951 }, 19952 } 19953 19954 var config = &router.AdmissionServiceConfig{} 19955 19956 // AdmitPods is to admit pods and return response. 19957 func AdmitPods(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse <span class="cov0" title="0">{ 19958 klog.V(3).Infof("admitting pods -- %s", ar.Request.Operation) 19959 19960 pod, err := schema.DecodePod(ar.Request.Object, ar.Request.Resource) 19961 if err != nil </span><span class="cov0" title="0">{ 19962 return util.ToAdmissionResponse(err) 19963 }</span> 19964 19965 <span class="cov0" title="0">var msg string 19966 reviewResponse := v1beta1.AdmissionResponse{} 19967 reviewResponse.Allowed = true 19968 19969 switch ar.Request.Operation </span>{ 19970 case v1beta1.Create:<span class="cov0" title="0"> 19971 msg = validatePod(pod, &reviewResponse)</span> 19972 default:<span class="cov0" title="0"> 19973 err := fmt.Errorf("expect operation to be 'CREATE'") 19974 return util.ToAdmissionResponse(err)</span> 19975 } 19976 19977 <span class="cov0" title="0">if !reviewResponse.Allowed </span><span class="cov0" title="0">{ 19978 reviewResponse.Result = &metav1.Status{Message: strings.TrimSpace(msg)} 19979 }</span> 19980 <span class="cov0" title="0">return &reviewResponse</span> 19981 } 19982 19983 /* 19984 allow pods to create when 19985 1. schedulerName of pod isn't volcano 19986 2. pod has Podgroup whose phase isn't Pending 19987 3. normal pods whose schedulerName is volcano don't have podgroup. 19988 4. check pod budget annotations configure 19989 */ 19990 func validatePod(pod *v1.Pod, reviewResponse *v1beta1.AdmissionResponse) string <span class="cov8" title="1">{ 19991 if pod.Spec.SchedulerName != config.SchedulerName </span><span class="cov8" title="1">{ 19992 return "" 19993 }</span> 19994 19995 <span class="cov8" title="1">pgName := "" 19996 msg := "" 19997 19998 // vc-job, SN == volcano 19999 if pod.Annotations != nil </span><span class="cov8" title="1">{ 20000 pgName = pod.Annotations[vcv1beta1.KubeGroupNameAnnotationKey] 20001 }</span> 20002 <span class="cov8" title="1">if pgName != "" </span><span class="cov8" title="1">{ 20003 if err := checkPGPhase(pod, pgName, true); err != nil </span><span class="cov8" title="1">{ 20004 msg = err.Error() 20005 reviewResponse.Allowed = false 20006 }</span> 20007 <span class="cov8" title="1">return msg</span> 20008 } 20009 20010 // normal pod, SN == volcano 20011 <span class="cov8" title="1">pgName = helpers.GeneratePodgroupName(pod) 20012 if err := checkPGPhase(pod, pgName, false); err != nil </span><span class="cov8" title="1">{ 20013 msg = err.Error() 20014 reviewResponse.Allowed = false 20015 }</span> 20016 20017 // check pod annotatations 20018 <span class="cov8" title="1">if err := validateAnnotation(pod); err != nil </span><span class="cov0" title="0">{ 20019 msg = err.Error() 20020 reviewResponse.Allowed = false 20021 }</span> 20022 20023 <span class="cov8" title="1">return msg</span> 20024 } 20025 20026 func checkPGPhase(pod *v1.Pod, pgName string, isVCJob bool) error <span class="cov8" title="1">{ 20027 pg, err := config.VolcanoClient.SchedulingV1beta1().PodGroups(pod.Namespace).Get(context.TODO(), pgName, metav1.GetOptions{}) 20028 if err != nil </span><span class="cov8" title="1">{ 20029 if isVCJob || (!isVCJob && !apierrors.IsNotFound(err)) </span><span class="cov8" title="1">{ 20030 return fmt.Errorf("failed to get PodGroup for pod <%s/%s>: %v", pod.Namespace, pod.Name, err) 20031 }</span> 20032 <span class="cov0" title="0">return nil</span> 20033 } 20034 <span class="cov8" title="1">if pg.Status.Phase != vcv1beta1.PodGroupPending </span><span class="cov0" title="0">{ 20035 return nil 20036 }</span> 20037 <span class="cov8" title="1">return fmt.Errorf("failed to create pod <%s/%s> as the podgroup phase is Pending", 20038 pod.Namespace, pod.Name)</span> 20039 } 20040 20041 func validateAnnotation(pod *v1.Pod) error <span class="cov8" title="1">{ 20042 num := 0 20043 if len(pod.Annotations) > 0 </span><span class="cov0" title="0">{ 20044 keys := []string{ 20045 vcv1beta1.JDBMinAvailable, 20046 vcv1beta1.JDBMaxUnavailable, 20047 } 20048 for _, key := range keys </span><span class="cov0" title="0">{ 20049 if value, found := pod.Annotations[key]; found </span><span class="cov0" title="0">{ 20050 num++ 20051 if err := validateIntPercentageStr(key, value); err != nil </span><span class="cov0" title="0">{ 20052 recordEvent(err) 20053 return err 20054 }</span> 20055 } 20056 } 20057 <span class="cov0" title="0">if num > 1 </span><span class="cov0" title="0">{ 20058 return fmt.Errorf("not allow configure multiple annotations <%v> at same time", keys) 20059 }</span> 20060 } 20061 <span class="cov8" title="1">return nil</span> 20062 } 20063 20064 func recordEvent(err error) <span class="cov0" title="0">{ 20065 config.Recorder.Eventf(nil, v1.EventTypeWarning, "Admit", "Create pod failed due to %v", err) 20066 }</span> 20067 20068 func validateIntPercentageStr(key, value string) error <span class="cov0" title="0">{ 20069 tmp := intstr.Parse(value) 20070 switch tmp.Type </span>{ 20071 case intstr.Int:<span class="cov0" title="0"> 20072 if tmp.IntValue() <= 0 </span><span class="cov0" title="0">{ 20073 return fmt.Errorf("invalid value <%q> for %v, it must be a positive integer", value, key) 20074 }</span> 20075 <span class="cov0" title="0">return nil</span> 20076 case intstr.String:<span class="cov0" title="0"> 20077 s := strings.Replace(tmp.StrVal, "%", "", -1) 20078 v, err := strconv.Atoi(s) 20079 if err != nil </span><span class="cov0" title="0">{ 20080 return fmt.Errorf("invalid value %v for %v", err, key) 20081 }</span> 20082 <span class="cov0" title="0">if v <= 0 || v >= 100 </span><span class="cov0" title="0">{ 20083 return fmt.Errorf("invalid value <%q> for %v, it must be a valid percentage which between 1%% ~ 99%%", tmp.StrVal, key) 20084 }</span> 20085 <span class="cov0" title="0">return nil</span> 20086 } 20087 <span class="cov0" title="0">return fmt.Errorf("invalid type: neither int nor percentage for %v", key)</span> 20088 } 20089 </pre> 20090 20091 <pre class="file" id="file95" style="display: none">/* 20092 Copyright 2018 The Volcano Authors. 20093 20094 Licensed under the Apache License, Version 2.0 (the "License"); 20095 you may not use this file except in compliance with the License. 20096 You may obtain a copy of the License at 20097 20098 http://www.apache.org/licenses/LICENSE-2.0 20099 20100 Unless required by applicable law or agreed to in writing, software 20101 distributed under the License is distributed on an "AS IS" BASIS, 20102 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20103 See the License for the specific language governing permissions and 20104 limitations under the License. 20105 */ 20106 20107 package mutate 20108 20109 import ( 20110 "encoding/json" 20111 "fmt" 20112 "strings" 20113 20114 "k8s.io/api/admission/v1beta1" 20115 whv1beta1 "k8s.io/api/admissionregistration/v1beta1" 20116 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 20117 "k8s.io/klog" 20118 20119 schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 20120 "volcano.sh/volcano/pkg/webhooks/router" 20121 "volcano.sh/volcano/pkg/webhooks/schema" 20122 "volcano.sh/volcano/pkg/webhooks/util" 20123 ) 20124 20125 func init() <span class="cov8" title="1">{ 20126 router.RegisterAdmission(service) 20127 }</span> 20128 20129 var service = &router.AdmissionService{ 20130 Path: "/queues/mutate", 20131 Func: Queues, 20132 20133 MutatingConfig: &whv1beta1.MutatingWebhookConfiguration{ 20134 Webhooks: []whv1beta1.MutatingWebhook{{ 20135 Name: "mutatequeue.volcano.sh", 20136 Rules: []whv1beta1.RuleWithOperations{ 20137 { 20138 Operations: []whv1beta1.OperationType{whv1beta1.Create}, 20139 Rule: whv1beta1.Rule{ 20140 APIGroups: []string{schedulingv1beta1.SchemeGroupVersion.Group}, 20141 APIVersions: []string{schedulingv1beta1.SchemeGroupVersion.Version}, 20142 Resources: []string{"queues"}, 20143 }, 20144 }, 20145 }, 20146 }}, 20147 }, 20148 } 20149 20150 type patchOperation struct { 20151 Op string `json:"op"` 20152 Path string `json:"path"` 20153 Value interface{} `json:"value,omitempty"` 20154 } 20155 20156 // Queues mutate queues. 20157 func Queues(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse <span class="cov8" title="1">{ 20158 klog.V(3).Infof("Mutating %s queue %s.", ar.Request.Operation, ar.Request.Name) 20159 20160 queue, err := schema.DecodeQueue(ar.Request.Object, ar.Request.Resource) 20161 if err != nil </span><span class="cov0" title="0">{ 20162 return util.ToAdmissionResponse(err) 20163 }</span> 20164 20165 <span class="cov8" title="1">var patchBytes []byte 20166 switch ar.Request.Operation </span>{ 20167 case v1beta1.Create:<span class="cov8" title="1"> 20168 patchBytes, err = createQueuePatch(queue)</span> 20169 default:<span class="cov8" title="1"> 20170 return util.ToAdmissionResponse(fmt.Errorf("invalid operation `%s`, "+ 20171 "expect operation to be `CREATE`", ar.Request.Operation))</span> 20172 } 20173 20174 <span class="cov8" title="1">if err != nil </span><span class="cov0" title="0">{ 20175 return &v1beta1.AdmissionResponse{ 20176 Allowed: false, 20177 Result: &metav1.Status{Message: err.Error()}, 20178 } 20179 }</span> 20180 20181 <span class="cov8" title="1">pt := v1beta1.PatchTypeJSONPatch 20182 return &v1beta1.AdmissionResponse{ 20183 Allowed: true, 20184 Patch: patchBytes, 20185 PatchType: &pt, 20186 }</span> 20187 } 20188 20189 func createQueuePatch(queue *schedulingv1beta1.Queue) ([]byte, error) <span class="cov8" title="1">{ 20190 var patch []patchOperation 20191 20192 // add root node if the root node not specified 20193 hierarchy := queue.Annotations[schedulingv1beta1.KubeHierarchyAnnotationKey] 20194 hierarchicalWeights := queue.Annotations[schedulingv1beta1.KubeHierarchyWeightAnnotationKey] 20195 20196 if hierarchy != "" && hierarchicalWeights != "" && !strings.HasPrefix(hierarchy, "root") </span><span class="cov8" title="1">{ 20197 // based on https://tools.ietf.org/html/rfc6901#section-3 20198 // escape "/" with "~1" 20199 patch = append(patch, patchOperation{ 20200 Op: "add", 20201 Path: fmt.Sprintf("/metadata/annotations/%s", strings.ReplaceAll(schedulingv1beta1.KubeHierarchyAnnotationKey, "/", "~1")), 20202 Value: fmt.Sprintf("root/%s", hierarchy), 20203 }) 20204 patch = append(patch, patchOperation{ 20205 Op: "add", 20206 Path: fmt.Sprintf("/metadata/annotations/%s", strings.ReplaceAll(schedulingv1beta1.KubeHierarchyWeightAnnotationKey, "/", "~1")), 20207 Value: fmt.Sprintf("1/%s", hierarchicalWeights), 20208 }) 20209 }</span> 20210 20211 <span class="cov8" title="1">trueValue := true 20212 if queue.Spec.Reclaimable == nil </span><span class="cov8" title="1">{ 20213 patch = append(patch, patchOperation{ 20214 Op: "add", 20215 Path: "/spec/reclaimable", 20216 Value: &trueValue, 20217 }) 20218 }</span> 20219 20220 <span class="cov8" title="1">defaultWeight := 1 20221 if queue.Spec.Weight == 0 </span><span class="cov0" title="0">{ 20222 patch = append(patch, patchOperation{ 20223 Op: "add", 20224 Path: "/spec/weight", 20225 Value: &defaultWeight, 20226 }) 20227 }</span> 20228 20229 <span class="cov8" title="1">return json.Marshal(patch)</span> 20230 } 20231 </pre> 20232 20233 <pre class="file" id="file96" style="display: none">/* 20234 Copyright 2018 The Volcano Authors. 20235 20236 Licensed under the Apache License, Version 2.0 (the "License"); 20237 you may not use this file except in compliance with the License. 20238 You may obtain a copy of the License at 20239 20240 http://www.apache.org/licenses/LICENSE-2.0 20241 20242 Unless required by applicable law or agreed to in writing, software 20243 distributed under the License is distributed on an "AS IS" BASIS, 20244 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20245 See the License for the specific language governing permissions and 20246 limitations under the License. 20247 */ 20248 20249 package validate 20250 20251 import ( 20252 "context" 20253 "fmt" 20254 "strconv" 20255 "strings" 20256 20257 "k8s.io/api/admission/v1beta1" 20258 whv1beta1 "k8s.io/api/admissionregistration/v1beta1" 20259 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 20260 "k8s.io/apimachinery/pkg/util/validation/field" 20261 "k8s.io/klog" 20262 20263 schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 20264 "volcano.sh/volcano/pkg/webhooks/router" 20265 "volcano.sh/volcano/pkg/webhooks/schema" 20266 "volcano.sh/volcano/pkg/webhooks/util" 20267 ) 20268 20269 func init() <span class="cov8" title="1">{ 20270 router.RegisterAdmission(service) 20271 }</span> 20272 20273 var service = &router.AdmissionService{ 20274 Path: "/queues/validate", 20275 Func: AdmitQueues, 20276 20277 Config: config, 20278 20279 ValidatingConfig: &whv1beta1.ValidatingWebhookConfiguration{ 20280 Webhooks: []whv1beta1.ValidatingWebhook{{ 20281 Name: "validatequeue.volcano.sh", 20282 Rules: []whv1beta1.RuleWithOperations{ 20283 { 20284 Operations: []whv1beta1.OperationType{whv1beta1.Create, whv1beta1.Update, whv1beta1.Delete}, 20285 Rule: whv1beta1.Rule{ 20286 APIGroups: []string{schedulingv1beta1.SchemeGroupVersion.Group}, 20287 APIVersions: []string{schedulingv1beta1.SchemeGroupVersion.Version}, 20288 Resources: []string{"queues"}, 20289 }, 20290 }, 20291 }, 20292 }}, 20293 }, 20294 } 20295 20296 var config = &router.AdmissionServiceConfig{} 20297 20298 // AdmitQueues is to admit queues and return response. 20299 func AdmitQueues(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse <span class="cov8" title="1">{ 20300 klog.V(3).Infof("Admitting %s queue %s.", ar.Request.Operation, ar.Request.Name) 20301 20302 queue, err := schema.DecodeQueue(ar.Request.Object, ar.Request.Resource) 20303 if err != nil </span><span class="cov0" title="0">{ 20304 return util.ToAdmissionResponse(err) 20305 }</span> 20306 20307 <span class="cov8" title="1">switch ar.Request.Operation </span>{ 20308 case v1beta1.Create, v1beta1.Update:<span class="cov8" title="1"> 20309 err = validateQueue(queue)</span> 20310 case v1beta1.Delete:<span class="cov8" title="1"> 20311 err = validateQueueDeleting(ar.Request.Name)</span> 20312 default:<span class="cov8" title="1"> 20313 return util.ToAdmissionResponse(fmt.Errorf("invalid operation `%s`, "+ 20314 "expect operation to be `CREATE`, `UPDATE` or `DELETE`", ar.Request.Operation))</span> 20315 } 20316 20317 <span class="cov8" title="1">if err != nil </span><span class="cov8" title="1">{ 20318 return &v1beta1.AdmissionResponse{ 20319 Allowed: false, 20320 Result: &metav1.Status{Message: err.Error()}, 20321 } 20322 }</span> 20323 20324 <span class="cov8" title="1">return &v1beta1.AdmissionResponse{ 20325 Allowed: true, 20326 }</span> 20327 } 20328 20329 func validateQueue(queue *schedulingv1beta1.Queue) error <span class="cov8" title="1">{ 20330 errs := field.ErrorList{} 20331 resourcePath := field.NewPath("requestBody") 20332 20333 errs = append(errs, validateStateOfQueue(queue.Status.State, resourcePath.Child("spec").Child("state"))...) 20334 errs = append(errs, validateWeightOfQueue(queue.Spec.Weight, resourcePath.Child("spec").Child("weight"))...) 20335 errs = append(errs, validateHierarchicalAttributes(queue, resourcePath.Child("metadata").Child("annotations"))...) 20336 20337 if len(errs) > 0 </span><span class="cov8" title="1">{ 20338 return errs.ToAggregate() 20339 }</span> 20340 20341 <span class="cov8" title="1">return nil</span> 20342 } 20343 func validateHierarchicalAttributes(queue *schedulingv1beta1.Queue, fldPath *field.Path) field.ErrorList <span class="cov8" title="1">{ 20344 errs := field.ErrorList{} 20345 hierarchy := queue.Annotations[schedulingv1beta1.KubeHierarchyAnnotationKey] 20346 hierarchicalWeights := queue.Annotations[schedulingv1beta1.KubeHierarchyWeightAnnotationKey] 20347 if hierarchy != "" || hierarchicalWeights != "" </span><span class="cov8" title="1">{ 20348 paths := strings.Split(hierarchy, "/") 20349 weights := strings.Split(hierarchicalWeights, "/") 20350 // path length must be the same with weights length 20351 if len(paths) != len(weights) </span><span class="cov8" title="1">{ 20352 return append(errs, field.Invalid(fldPath, hierarchy, 20353 fmt.Sprintf("%s must have the same length with %s", 20354 schedulingv1beta1.KubeHierarchyAnnotationKey, 20355 schedulingv1beta1.KubeHierarchyWeightAnnotationKey, 20356 ))) 20357 }</span> 20358 20359 // check weights format 20360 <span class="cov8" title="1">for _, weight := range weights </span><span class="cov8" title="1">{ 20361 weightFloat, err := strconv.ParseFloat(weight, 64) 20362 if err != nil </span><span class="cov8" title="1">{ 20363 return append(errs, field.Invalid(fldPath, hierarchicalWeights, 20364 fmt.Sprintf("%s in the %s is invalid number: %v", 20365 weight, hierarchicalWeights, err, 20366 ))) 20367 }</span> 20368 <span class="cov8" title="1">if weightFloat <= 0 </span><span class="cov8" title="1">{ 20369 return append(errs, field.Invalid(fldPath, hierarchicalWeights, 20370 fmt.Sprintf("%s in the %s must be larger than 0", 20371 weight, hierarchicalWeights, 20372 ))) 20373 }</span> 20374 } 20375 20376 // The node is not allowed to be in the sub path of a node. 20377 // For example, a queue with "root/sci" conflicts with a queue with "root/sci/dev" 20378 <span class="cov8" title="1">queueList, err := config.VolcanoClient.SchedulingV1beta1().Queues().List(context.TODO(), metav1.ListOptions{}) 20379 if err != nil </span><span class="cov0" title="0">{ 20380 return append(errs, field.Invalid(fldPath, hierarchy, 20381 fmt.Sprintf("checking %s, list queues failed: %v", 20382 schedulingv1beta1.KubeHierarchyAnnotationKey, 20383 err, 20384 ))) 20385 }</span> 20386 <span class="cov8" title="1">for _, queueInTree := range queueList.Items </span><span class="cov8" title="1">{ 20387 hierarchyInTree := queueInTree.Annotations[schedulingv1beta1.KubeHierarchyAnnotationKey] 20388 if hierarchyInTree != "" && queue.Name != queueInTree.Name && 20389 strings.HasPrefix(hierarchyInTree, hierarchy) </span><span class="cov8" title="1">{ 20390 return append(errs, field.Invalid(fldPath, hierarchy, 20391 fmt.Sprintf("%s is not allowed to be in the sub path of %s of queue %s", 20392 hierarchy, hierarchyInTree, queueInTree.Name))) 20393 }</span> 20394 } 20395 } 20396 <span class="cov8" title="1">return errs</span> 20397 } 20398 20399 func validateStateOfQueue(value schedulingv1beta1.QueueState, fldPath *field.Path) field.ErrorList <span class="cov8" title="1">{ 20400 errs := field.ErrorList{} 20401 20402 if len(value) == 0 </span><span class="cov8" title="1">{ 20403 return errs 20404 }</span> 20405 20406 <span class="cov8" title="1">validQueueStates := []schedulingv1beta1.QueueState{ 20407 schedulingv1beta1.QueueStateOpen, 20408 schedulingv1beta1.QueueStateClosed, 20409 } 20410 20411 for _, validQueue := range validQueueStates </span><span class="cov8" title="1">{ 20412 if value == validQueue </span><span class="cov8" title="1">{ 20413 return errs 20414 }</span> 20415 } 20416 20417 <span class="cov8" title="1">return append(errs, field.Invalid(fldPath, value, fmt.Sprintf("queue state must be in %v", validQueueStates)))</span> 20418 } 20419 20420 func validateWeightOfQueue(value int32, fldPath *field.Path) field.ErrorList <span class="cov8" title="1">{ 20421 errs := field.ErrorList{} 20422 if value > 0 </span><span class="cov8" title="1">{ 20423 return errs 20424 }</span> 20425 <span class="cov8" title="1">return append(errs, field.Invalid(fldPath, value, "queue weight must be a positive integer"))</span> 20426 } 20427 20428 func validateQueueDeleting(queue string) error <span class="cov8" title="1">{ 20429 if queue == "default" </span><span class="cov8" title="1">{ 20430 return fmt.Errorf("`%s` queue can not be deleted", "default") 20431 }</span> 20432 20433 <span class="cov8" title="1">q, err := config.VolcanoClient.SchedulingV1beta1().Queues().Get(context.TODO(), queue, metav1.GetOptions{}) 20434 if err != nil </span><span class="cov0" title="0">{ 20435 return err 20436 }</span> 20437 20438 <span class="cov8" title="1">if q.Status.State != schedulingv1beta1.QueueStateClosed </span><span class="cov8" title="1">{ 20439 return fmt.Errorf("only queue with state `%s` can be deleted, queue `%s` state is `%s`", 20440 schedulingv1beta1.QueueStateClosed, q.Name, q.Status.State) 20441 }</span> 20442 20443 <span class="cov8" title="1">return nil</span> 20444 } 20445 </pre> 20446 20447 </div> 20448 </body> 20449 <script> 20450 (function() { 20451 var files = document.getElementById('files'); 20452 var visible; 20453 files.addEventListener('change', onChange, false); 20454 function select(part) { 20455 if (visible) 20456 visible.style.display = 'none'; 20457 visible = document.getElementById(part); 20458 if (!visible) 20459 return; 20460 files.value = part; 20461 visible.style.display = 'block'; 20462 location.hash = part; 20463 } 20464 function onChange() { 20465 select(files.value); 20466 window.scrollTo(0, 0); 20467 } 20468 if (location.hash != "") { 20469 select(location.hash.substr(1)); 20470 } 20471 if (!visible) { 20472 select("file0"); 20473 } 20474 })(); 20475 </script> 20476 </html>