volcano.sh/volcano@v1.9.0/docs/ut_coverage/UT_coverage_v1.5.0.html (about) 1 2 <!DOCTYPE html> 3 <html> 4 <head> 5 <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> 6 <title>job: Go Coverage Report</title> 7 <style> 8 body { 9 background: black; 10 color: rgb(80, 80, 80); 11 } 12 body, pre, #legend span { 13 font-family: Menlo, monospace; 14 font-weight: bold; 15 } 16 #topbar { 17 background: black; 18 position: fixed; 19 top: 0; left: 0; right: 0; 20 height: 42px; 21 border-bottom: 1px solid rgb(80, 80, 80); 22 } 23 #content { 24 margin-top: 50px; 25 } 26 #nav, #legend { 27 float: left; 28 margin-left: 10px; 29 } 30 #legend { 31 margin-top: 12px; 32 } 33 #nav { 34 margin-top: 10px; 35 } 36 #legend span { 37 margin: 0 5px; 38 } 39 .cov0 { color: rgb(192, 0, 0) } 40 .cov1 { color: rgb(128, 128, 128) } 41 .cov2 { color: rgb(116, 140, 131) } 42 .cov3 { color: rgb(104, 152, 134) } 43 .cov4 { color: rgb(92, 164, 137) } 44 .cov5 { color: rgb(80, 176, 140) } 45 .cov6 { color: rgb(68, 188, 143) } 46 .cov7 { color: rgb(56, 200, 146) } 47 .cov8 { color: rgb(44, 212, 149) } 48 .cov9 { color: rgb(32, 224, 152) } 49 .cov10 { color: rgb(20, 236, 155) } 50 51 </style> 52 </head> 53 <body> 54 <div id="topbar"> 55 <div id="nav"> 56 <select id="files"> 57 58 <option value="file0">volcano.sh/volcano/pkg/cli/job/common.go (100.0%)</option> 59 60 <option value="file1">volcano.sh/volcano/pkg/cli/job/delete.go (73.3%)</option> 61 62 <option value="file2">volcano.sh/volcano/pkg/cli/job/list.go (78.8%)</option> 63 64 <option value="file3">volcano.sh/volcano/pkg/cli/job/resume.go (70.0%)</option> 65 66 <option value="file4">volcano.sh/volcano/pkg/cli/job/run.go (66.0%)</option> 67 68 <option value="file5">volcano.sh/volcano/pkg/cli/job/suspend.go (70.0%)</option> 69 70 <option value="file6">volcano.sh/volcano/pkg/cli/job/util.go (90.2%)</option> 71 72 <option value="file7">volcano.sh/volcano/pkg/cli/job/view.go (88.9%)</option> 73 74 <option value="file8">volcano.sh/volcano/pkg/cli/queue/common.go (100.0%)</option> 75 76 <option value="file9">volcano.sh/volcano/pkg/cli/queue/create.go (83.3%)</option> 77 78 <option value="file10">volcano.sh/volcano/pkg/cli/queue/delete.go (88.9%)</option> 79 80 <option value="file11">volcano.sh/volcano/pkg/cli/queue/get.go (80.0%)</option> 81 82 <option value="file12">volcano.sh/volcano/pkg/cli/queue/list.go (80.0%)</option> 83 84 <option value="file13">volcano.sh/volcano/pkg/cli/queue/operate.go (95.5%)</option> 85 86 <option value="file14">volcano.sh/volcano/pkg/cli/queue/util.go (76.9%)</option> 87 88 <option value="file15">volcano.sh/volcano/pkg/cli/util/util.go (40.8%)</option> 89 90 <option value="file16">volcano.sh/volcano/pkg/cli/vcancel/cancel.go (73.3%)</option> 91 92 <option value="file17">volcano.sh/volcano/pkg/cli/vresume/resume.go (70.0%)</option> 93 94 <option value="file18">volcano.sh/volcano/pkg/cli/vsuspend/suspend.go (70.0%)</option> 95 96 <option value="file19">volcano.sh/volcano/pkg/controllers/apis/job_info.go (75.0%)</option> 97 98 <option value="file20">volcano.sh/volcano/pkg/controllers/apis/request.go (100.0%)</option> 99 100 <option value="file21">volcano.sh/volcano/pkg/controllers/cache/cache.go (58.2%)</option> 101 102 <option value="file22">volcano.sh/volcano/pkg/controllers/garbagecollector/garbagecollector.go (37.4%)</option> 103 104 <option value="file23">volcano.sh/volcano/pkg/controllers/job/helpers/helpers.go (50.0%)</option> 105 106 <option value="file24">volcano.sh/volcano/pkg/controllers/job/job_controller.go (45.7%)</option> 107 108 <option value="file25">volcano.sh/volcano/pkg/controllers/job/job_controller_actions.go (53.3%)</option> 109 110 <option value="file26">volcano.sh/volcano/pkg/controllers/job/job_controller_handler.go (49.0%)</option> 111 112 <option value="file27">volcano.sh/volcano/pkg/controllers/job/job_controller_plugins.go (72.2%)</option> 113 114 <option value="file28">volcano.sh/volcano/pkg/controllers/job/job_controller_resync.go (3.8%)</option> 115 116 <option value="file29">volcano.sh/volcano/pkg/controllers/job/job_controller_util.go (81.5%)</option> 117 118 <option value="file30">volcano.sh/volcano/pkg/controllers/job/plugins/distributed-framework/tensorflow/tensorflow.go (67.3%)</option> 119 120 <option value="file31">volcano.sh/volcano/pkg/controllers/job/plugins/ssh/ssh.go (10.6%)</option> 121 122 <option value="file32">volcano.sh/volcano/pkg/controllers/podgroup/pg_controller.go (31.9%)</option> 123 124 <option value="file33">volcano.sh/volcano/pkg/controllers/podgroup/pg_controller_handler.go (51.6%)</option> 125 126 <option value="file34">volcano.sh/volcano/pkg/controllers/queue/queue_controller.go (33.9%)</option> 127 128 <option value="file35">volcano.sh/volcano/pkg/controllers/queue/queue_controller_action.go (25.0%)</option> 129 130 <option value="file36">volcano.sh/volcano/pkg/controllers/queue/queue_controller_handler.go (57.8%)</option> 131 132 <option value="file37">volcano.sh/volcano/pkg/controllers/queue/queue_controller_util.go (0.0%)</option> 133 134 <option value="file38">volcano.sh/volcano/pkg/scheduler/actions/allocate/allocate.go (77.2%)</option> 135 136 <option value="file39">volcano.sh/volcano/pkg/scheduler/actions/elect/elect.go (71.4%)</option> 137 138 <option value="file40">volcano.sh/volcano/pkg/scheduler/actions/preempt/preempt.go (85.9%)</option> 139 140 <option value="file41">volcano.sh/volcano/pkg/scheduler/actions/reclaim/reclaim.go (78.4%)</option> 141 142 <option value="file42">volcano.sh/volcano/pkg/scheduler/api/cluster_info.go (0.0%)</option> 143 144 <option value="file43">volcano.sh/volcano/pkg/scheduler/api/device_info.go (43.8%)</option> 145 146 <option value="file44">volcano.sh/volcano/pkg/scheduler/api/helpers.go (40.0%)</option> 147 148 <option value="file45">volcano.sh/volcano/pkg/scheduler/api/helpers/helpers.go (46.9%)</option> 149 150 <option value="file46">volcano.sh/volcano/pkg/scheduler/api/job_info.go (39.0%)</option> 151 152 <option value="file47">volcano.sh/volcano/pkg/scheduler/api/namespace_info.go (83.3%)</option> 153 154 <option value="file48">volcano.sh/volcano/pkg/scheduler/api/node_info.go (44.6%)</option> 155 156 <option value="file49">volcano.sh/volcano/pkg/scheduler/api/numa_info.go (0.0%)</option> 157 158 <option value="file50">volcano.sh/volcano/pkg/scheduler/api/pod_group_info.go (0.0%)</option> 159 160 <option value="file51">volcano.sh/volcano/pkg/scheduler/api/pod_info.go (37.5%)</option> 161 162 <option value="file52">volcano.sh/volcano/pkg/scheduler/api/queue_info.go (0.0%)</option> 163 164 <option value="file53">volcano.sh/volcano/pkg/scheduler/api/resource_info.go (69.1%)</option> 165 166 <option value="file54">volcano.sh/volcano/pkg/scheduler/api/silo_cluster_info.go (0.0%)</option> 167 168 <option value="file55">volcano.sh/volcano/pkg/scheduler/api/test_utils.go (100.0%)</option> 169 170 <option value="file56">volcano.sh/volcano/pkg/scheduler/api/types.go (31.2%)</option> 171 172 <option value="file57">volcano.sh/volcano/pkg/scheduler/api/unschedule_info.go (51.9%)</option> 173 174 <option value="file58">volcano.sh/volcano/pkg/scheduler/cache/cache.go (5.5%)</option> 175 176 <option value="file59">volcano.sh/volcano/pkg/scheduler/cache/event_handlers.go (29.7%)</option> 177 178 <option value="file60">volcano.sh/volcano/pkg/scheduler/cache/factory.go (66.7%)</option> 179 180 <option value="file61">volcano.sh/volcano/pkg/scheduler/cache/util.go (0.0%)</option> 181 182 <option value="file62">volcano.sh/volcano/pkg/scheduler/framework/arguments.go (67.6%)</option> 183 184 <option value="file63">volcano.sh/volcano/pkg/scheduler/framework/framework.go (0.0%)</option> 185 186 <option value="file64">volcano.sh/volcano/pkg/scheduler/framework/job_updater.go (0.0%)</option> 187 188 <option value="file65">volcano.sh/volcano/pkg/scheduler/framework/plugins.go (2.7%)</option> 189 190 <option value="file66">volcano.sh/volcano/pkg/scheduler/framework/session.go (0.0%)</option> 191 192 <option value="file67">volcano.sh/volcano/pkg/scheduler/framework/session_plugins.go (0.0%)</option> 193 194 <option value="file68">volcano.sh/volcano/pkg/scheduler/framework/statement.go (0.0%)</option> 195 196 <option value="file69">volcano.sh/volcano/pkg/scheduler/plugins/binpack/binpack.go (69.6%)</option> 197 198 <option value="file70">volcano.sh/volcano/pkg/scheduler/plugins/drf/drf.go (48.4%)</option> 199 200 <option value="file71">volcano.sh/volcano/pkg/scheduler/plugins/numaaware/policy/factory.go (0.0%)</option> 201 202 <option value="file72">volcano.sh/volcano/pkg/scheduler/plugins/numaaware/policy/policy.go (80.4%)</option> 203 204 <option value="file73">volcano.sh/volcano/pkg/scheduler/plugins/numaaware/policy/policy_best_effort.go (100.0%)</option> 205 206 <option value="file74">volcano.sh/volcano/pkg/scheduler/plugins/numaaware/policy/policy_none.go (0.0%)</option> 207 208 <option value="file75">volcano.sh/volcano/pkg/scheduler/plugins/numaaware/policy/policy_restricted.go (100.0%)</option> 209 210 <option value="file76">volcano.sh/volcano/pkg/scheduler/plugins/numaaware/policy/policy_single_numa_node.go (94.4%)</option> 211 212 <option value="file77">volcano.sh/volcano/pkg/scheduler/plugins/numaaware/provider/cpumanager/cpu_assignment.go (92.4%)</option> 213 214 <option value="file78">volcano.sh/volcano/pkg/scheduler/plugins/numaaware/provider/cpumanager/cpu_mng.go (77.0%)</option> 215 216 <option value="file79">volcano.sh/volcano/pkg/scheduler/plugins/predicates/cache.go (3.8%)</option> 217 218 <option value="file80">volcano.sh/volcano/pkg/scheduler/plugins/predicates/gpu.go (0.0%)</option> 219 220 <option value="file81">volcano.sh/volcano/pkg/scheduler/plugins/predicates/predicates.go (52.0%)</option> 221 222 <option value="file82">volcano.sh/volcano/pkg/scheduler/plugins/predicates/proportional.go (100.0%)</option> 223 224 <option value="file83">volcano.sh/volcano/pkg/scheduler/plugins/task-topology/bucket.go (0.0%)</option> 225 226 <option value="file84">volcano.sh/volcano/pkg/scheduler/plugins/task-topology/manager.go (0.0%)</option> 227 228 <option value="file85">volcano.sh/volcano/pkg/scheduler/plugins/task-topology/topology.go (32.1%)</option> 229 230 <option value="file86">volcano.sh/volcano/pkg/scheduler/plugins/task-topology/util.go (0.0%)</option> 231 232 <option value="file87">volcano.sh/volcano/pkg/scheduler/plugins/tdm/tdm.go (68.6%)</option> 233 234 <option value="file88">volcano.sh/volcano/pkg/scheduler/scheduler.go (0.0%)</option> 235 236 <option value="file89">volcano.sh/volcano/pkg/scheduler/util.go (68.0%)</option> 237 238 <option value="file90">volcano.sh/volcano/pkg/scheduler/util/predicate_helper.go (0.0%)</option> 239 240 <option value="file91">volcano.sh/volcano/pkg/scheduler/util/priority_queue.go (0.0%)</option> 241 242 <option value="file92">volcano.sh/volcano/pkg/scheduler/util/scheduler_helper.go (18.8%)</option> 243 244 <option value="file93">volcano.sh/volcano/pkg/scheduler/util/test_utils.go (0.0%)</option> 245 246 <option value="file94">volcano.sh/volcano/pkg/webhooks/admission/jobs/mutate/mutate_job.go (21.2%)</option> 247 248 <option value="file95">volcano.sh/volcano/pkg/webhooks/admission/jobs/validate/admit_job.go (78.1%)</option> 249 250 <option value="file96">volcano.sh/volcano/pkg/webhooks/admission/jobs/validate/util.go (98.2%)</option> 251 252 <option value="file97">volcano.sh/volcano/pkg/webhooks/admission/pods/mutate/annotation.go (81.0%)</option> 253 254 <option value="file98">volcano.sh/volcano/pkg/webhooks/admission/pods/mutate/factory.go (100.0%)</option> 255 256 <option value="file99">volcano.sh/volcano/pkg/webhooks/admission/pods/mutate/mutate_pod.go (64.3%)</option> 257 258 <option value="file100">volcano.sh/volcano/pkg/webhooks/admission/pods/mutate/namespace.go (85.7%)</option> 259 260 <option value="file101">volcano.sh/volcano/pkg/webhooks/admission/pods/validate/admit_pod.go (39.7%)</option> 261 262 <option value="file102">volcano.sh/volcano/pkg/webhooks/admission/queues/mutate/mutate_queue.go (88.5%)</option> 263 264 <option value="file103">volcano.sh/volcano/pkg/webhooks/admission/queues/validate/validate_queue.go (95.2%)</option> 265 266 </select> 267 </div> 268 <div id="legend"> 269 <span>not tracked</span> 270 271 <span class="cov0">not covered</span> 272 <span class="cov8">covered</span> 273 274 </div> 275 </div> 276 <div id="content"> 277 278 <pre class="file" id="file0" style="display: none">/* 279 Copyright 2018 The Volcano Authors. 280 281 Licensed under the Apache License, Version 2.0 (the "License"); 282 you may not use this file except in compliance with the License. 283 You may obtain a copy of the License at 284 285 http://www.apache.org/licenses/LICENSE-2.0 286 287 Unless required by applicable law or agreed to in writing, software 288 distributed under the License is distributed on an "AS IS" BASIS, 289 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 290 See the License for the specific language governing permissions and 291 limitations under the License. 292 */ 293 294 package job 295 296 import ( 297 "os" 298 "path/filepath" 299 300 "github.com/spf13/cobra" 301 ) 302 303 type commonFlags struct { 304 Master string 305 Kubeconfig string 306 } 307 308 func initFlags(cmd *cobra.Command, cf *commonFlags) <span class="cov8" title="1">{ 309 cmd.Flags().StringVarP(&cf.Master, "master", "s", "", "the address of apiserver") 310 311 kubeConfFile := os.Getenv("KUBECONFIG") 312 if kubeConfFile == "" </span><span class="cov8" title="1">{ 313 if home := homeDir(); home != "" </span><span class="cov8" title="1">{ 314 kubeConfFile = filepath.Join(home, ".kube", "config") 315 }</span> 316 } 317 <span class="cov8" title="1">cmd.Flags().StringVarP(&cf.Kubeconfig, "kubeconfig", "k", kubeConfFile, "(optional) absolute path to the kubeconfig file")</span> 318 } 319 </pre> 320 321 <pre class="file" id="file1" style="display: none">/* 322 Copyright 2019 The Volcano Authors. 323 324 Licensed under the Apache License, Version 2.0 (the "License"); 325 you may not use this file except in compliance with the License. 326 You may obtain a copy of the License at 327 328 http://www.apache.org/licenses/LICENSE-2.0 329 330 Unless required by applicable law or agreed to in writing, software 331 distributed under the License is distributed on an "AS IS" BASIS, 332 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 333 See the License for the specific language governing permissions and 334 limitations under the License. 335 */ 336 337 package job 338 339 import ( 340 "context" 341 "fmt" 342 343 "github.com/spf13/cobra" 344 345 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 346 347 "volcano.sh/apis/pkg/client/clientset/versioned" 348 "volcano.sh/volcano/pkg/cli/util" 349 ) 350 351 type deleteFlags struct { 352 commonFlags 353 354 Namespace string 355 JobName string 356 } 357 358 var deleteJobFlags = &deleteFlags{} 359 360 // InitDeleteFlags init the delete command flags. 361 func InitDeleteFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 362 initFlags(cmd, &deleteJobFlags.commonFlags) 363 364 cmd.Flags().StringVarP(&deleteJobFlags.Namespace, "namespace", "n", "default", "the namespace of job") 365 cmd.Flags().StringVarP(&deleteJobFlags.JobName, "name", "N", "", "the name of job") 366 }</span> 367 368 // DeleteJob delete the job. 369 func DeleteJob() error <span class="cov8" title="1">{ 370 config, err := util.BuildConfig(deleteJobFlags.Master, deleteJobFlags.Kubeconfig) 371 if err != nil </span><span class="cov0" title="0">{ 372 return err 373 }</span> 374 375 <span class="cov8" title="1">if deleteJobFlags.JobName == "" </span><span class="cov0" title="0">{ 376 err := fmt.Errorf("job name is mandatory to delete a particular job") 377 return err 378 }</span> 379 380 <span class="cov8" title="1">jobClient := versioned.NewForConfigOrDie(config) 381 err = jobClient.BatchV1alpha1().Jobs(deleteJobFlags.Namespace).Delete(context.TODO(), deleteJobFlags.JobName, metav1.DeleteOptions{}) 382 if err != nil </span><span class="cov0" title="0">{ 383 return err 384 }</span> 385 <span class="cov8" title="1">fmt.Printf("delete job %v successfully\n", deleteJobFlags.JobName) 386 return nil</span> 387 } 388 </pre> 389 390 <pre class="file" id="file2" style="display: none">/* 391 Copyright 2018 The Volcano Authors. 392 393 Licensed under the Apache License, Version 2.0 (the "License"); 394 you may not use this file except in compliance with the License. 395 You may obtain a copy of the License at 396 397 http://www.apache.org/licenses/LICENSE-2.0 398 399 Unless required by applicable law or agreed to in writing, software 400 distributed under the License is distributed on an "AS IS" BASIS, 401 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 402 See the License for the specific language governing permissions and 403 limitations under the License. 404 */ 405 406 package job 407 408 import ( 409 "context" 410 "fmt" 411 "io" 412 "os" 413 "strings" 414 415 "github.com/spf13/cobra" 416 417 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 418 419 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 420 "volcano.sh/apis/pkg/client/clientset/versioned" 421 "volcano.sh/volcano/pkg/cli/util" 422 ) 423 424 type listFlags struct { 425 commonFlags 426 427 Namespace string 428 SchedulerName string 429 allNamespace bool 430 selector string 431 } 432 433 const ( 434 435 // Name name etc below key words are used in job print format 436 Name string = "Name" 437 // Creation create 438 Creation string = "Creation" 439 // Phase phase 440 Phase string = "Phase" 441 // Replicas replicas 442 Replicas string = "Replicas" 443 // Min minimum 444 Min string = "Min" 445 // Scheduler scheduler 446 Scheduler string = "Scheduler" 447 // Pending pending 448 Pending string = "Pending" 449 // Running running 450 Running string = "Running" 451 // Succeeded success 452 Succeeded string = "Succeeded" 453 // Terminating terminating 454 Terminating string = "Terminating" 455 // Version version 456 Version string = "Version" 457 // Failed failed 458 Failed string = "Failed" 459 // Unknown pod 460 Unknown string = "Unknown" 461 // RetryCount retry count 462 RetryCount string = "RetryCount" 463 // JobType job type 464 JobType string = "JobType" 465 // Namespace job namespace 466 Namespace string = "Namespace" 467 ) 468 469 var listJobFlags = &listFlags{} 470 471 // InitListFlags init list command flags. 472 func InitListFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 473 initFlags(cmd, &listJobFlags.commonFlags) 474 475 cmd.Flags().StringVarP(&listJobFlags.Namespace, "namespace", "n", "default", "the namespace of job") 476 cmd.Flags().StringVarP(&listJobFlags.SchedulerName, "scheduler", "S", "", "list job with specified scheduler name") 477 cmd.Flags().BoolVarP(&listJobFlags.allNamespace, "all-namespaces", "", false, "list jobs in all namespaces") 478 cmd.Flags().StringVarP(&listJobFlags.selector, "selector", "", "", "fuzzy matching jobName") 479 }</span> 480 481 // ListJobs lists all jobs details. 482 func ListJobs() error <span class="cov8" title="1">{ 483 config, err := util.BuildConfig(listJobFlags.Master, listJobFlags.Kubeconfig) 484 if err != nil </span><span class="cov0" title="0">{ 485 return err 486 }</span> 487 <span class="cov8" title="1">if listJobFlags.allNamespace </span><span class="cov8" title="1">{ 488 listJobFlags.Namespace = "" 489 }</span> 490 <span class="cov8" title="1">jobClient := versioned.NewForConfigOrDie(config) 491 jobs, err := jobClient.BatchV1alpha1().Jobs(listJobFlags.Namespace).List(context.TODO(), metav1.ListOptions{}) 492 if err != nil </span><span class="cov0" title="0">{ 493 return err 494 }</span> 495 496 <span class="cov8" title="1">if len(jobs.Items) == 0 </span><span class="cov0" title="0">{ 497 fmt.Printf("No resources found\n") 498 return nil 499 }</span> 500 <span class="cov8" title="1">PrintJobs(jobs, os.Stdout) 501 502 return nil</span> 503 } 504 505 // PrintJobs prints all jobs details. 506 func PrintJobs(jobs *v1alpha1.JobList, writer io.Writer) <span class="cov8" title="1">{ 507 maxLenInfo := getMaxLen(jobs) 508 509 titleFormat := "%%-%ds%%-15s%%-12s%%-12s%%-12s%%-6s%%-10s%%-10s%%-12s%%-10s%%-12s%%-10s\n" 510 contentFormat := "%%-%ds%%-15s%%-12s%%-12s%%-12d%%-6d%%-10d%%-10d%%-12d%%-10d%%-12d%%-10d\n" 511 512 var err error 513 if listJobFlags.allNamespace </span><span class="cov8" title="1">{ 514 _, err = fmt.Fprintf(writer, fmt.Sprintf("%%-%ds"+titleFormat, maxLenInfo[1], maxLenInfo[0]), 515 Namespace, Name, Creation, Phase, JobType, Replicas, Min, Pending, Running, Succeeded, Failed, Unknown, RetryCount) 516 }</span> else<span class="cov8" title="1"> { 517 _, err = fmt.Fprintf(writer, fmt.Sprintf(titleFormat, maxLenInfo[0]), 518 Name, Creation, Phase, JobType, Replicas, Min, Pending, Running, Succeeded, Failed, Unknown, RetryCount) 519 }</span> 520 <span class="cov8" title="1">if err != nil </span><span class="cov0" title="0">{ 521 fmt.Printf("Failed to print list command result: %s.\n", err) 522 }</span> 523 524 <span class="cov8" title="1">for _, job := range jobs.Items </span><span class="cov8" title="1">{ 525 if listJobFlags.SchedulerName != "" && listJobFlags.SchedulerName != job.Spec.SchedulerName </span><span class="cov0" title="0">{ 526 continue</span> 527 } 528 <span class="cov8" title="1">if !strings.Contains(job.Name, listJobFlags.selector) </span><span class="cov0" title="0">{ 529 continue</span> 530 } 531 <span class="cov8" title="1">replicas := int32(0) 532 for _, ts := range job.Spec.Tasks </span><span class="cov0" title="0">{ 533 replicas += ts.Replicas 534 }</span> 535 <span class="cov8" title="1">jobType := job.ObjectMeta.Labels[v1alpha1.JobTypeKey] 536 if jobType == "" </span><span class="cov8" title="1">{ 537 jobType = "Batch" 538 }</span> 539 540 <span class="cov8" title="1">if listJobFlags.allNamespace </span><span class="cov8" title="1">{ 541 _, err = fmt.Fprintf(writer, fmt.Sprintf("%%-%ds"+contentFormat, maxLenInfo[1], maxLenInfo[0]), 542 job.Namespace, job.Name, job.CreationTimestamp.Format("2006-01-02"), job.Status.State.Phase, jobType, replicas, 543 job.Status.MinAvailable, job.Status.Pending, job.Status.Running, job.Status.Succeeded, job.Status.Failed, job.Status.Unknown, job.Status.RetryCount) 544 }</span> else<span class="cov8" title="1"> { 545 _, err = fmt.Fprintf(writer, fmt.Sprintf(contentFormat, maxLenInfo[0]), 546 job.Name, job.CreationTimestamp.Format("2006-01-02"), job.Status.State.Phase, jobType, replicas, 547 job.Status.MinAvailable, job.Status.Pending, job.Status.Running, job.Status.Succeeded, job.Status.Failed, job.Status.Unknown, job.Status.RetryCount) 548 }</span> 549 <span class="cov8" title="1">if err != nil </span><span class="cov0" title="0">{ 550 fmt.Printf("Failed to print list command result: %s.\n", err) 551 }</span> 552 } 553 } 554 555 func getMaxLen(jobs *v1alpha1.JobList) []int <span class="cov8" title="1">{ 556 maxNameLen := len(Name) 557 maxNamespaceLen := len(Namespace) 558 for _, job := range jobs.Items </span><span class="cov8" title="1">{ 559 if len(job.Name) > maxNameLen </span><span class="cov0" title="0">{ 560 maxNameLen = len(job.Name) 561 }</span> 562 <span class="cov8" title="1">if len(job.Namespace) > maxNamespaceLen </span><span class="cov0" title="0">{ 563 maxNamespaceLen = len(job.Namespace) 564 }</span> 565 } 566 567 <span class="cov8" title="1">return []int{maxNameLen + 3, maxNamespaceLen + 3}</span> 568 } 569 </pre> 570 571 <pre class="file" id="file3" style="display: none">/* 572 Copyright 2018 The Volcano Authors. 573 574 Licensed under the Apache License, Version 2.0 (the "License"); 575 you may not use this file except in compliance with the License. 576 You may obtain a copy of the License at 577 578 http://www.apache.org/licenses/LICENSE-2.0 579 580 Unless required by applicable law or agreed to in writing, software 581 distributed under the License is distributed on an "AS IS" BASIS, 582 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 583 See the License for the specific language governing permissions and 584 limitations under the License. 585 */ 586 587 package job 588 589 import ( 590 "fmt" 591 592 "github.com/spf13/cobra" 593 594 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 595 "volcano.sh/volcano/pkg/cli/util" 596 ) 597 598 type resumeFlags struct { 599 commonFlags 600 601 Namespace string 602 JobName string 603 } 604 605 var resumeJobFlags = &resumeFlags{} 606 607 // InitResumeFlags init resume command flags. 608 func InitResumeFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 609 initFlags(cmd, &resumeJobFlags.commonFlags) 610 611 cmd.Flags().StringVarP(&resumeJobFlags.Namespace, "namespace", "n", "default", "the namespace of job") 612 cmd.Flags().StringVarP(&resumeJobFlags.JobName, "name", "N", "", "the name of job") 613 }</span> 614 615 // ResumeJob resumes the job. 616 func ResumeJob() error <span class="cov8" title="1">{ 617 config, err := util.BuildConfig(resumeJobFlags.Master, resumeJobFlags.Kubeconfig) 618 if err != nil </span><span class="cov0" title="0">{ 619 return err 620 }</span> 621 <span class="cov8" title="1">if resumeJobFlags.JobName == "" </span><span class="cov0" title="0">{ 622 err := fmt.Errorf("job name is mandatory to resume a particular job") 623 return err 624 }</span> 625 626 <span class="cov8" title="1">return createJobCommand(config, 627 resumeJobFlags.Namespace, resumeJobFlags.JobName, 628 v1alpha1.ResumeJobAction)</span> 629 } 630 </pre> 631 632 <pre class="file" id="file4" style="display: none">/* 633 Copyright 2018 The Volcano Authors. 634 635 Licensed under the Apache License, Version 2.0 (the "License"); 636 you may not use this file except in compliance with the License. 637 You may obtain a copy of the License at 638 639 http://www.apache.org/licenses/LICENSE-2.0 640 641 Unless required by applicable law or agreed to in writing, software 642 distributed under the License is distributed on an "AS IS" BASIS, 643 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 644 See the License for the specific language governing permissions and 645 limitations under the License. 646 */ 647 648 package job 649 650 import ( 651 "context" 652 "fmt" 653 "io/ioutil" 654 "strings" 655 656 "github.com/spf13/cobra" 657 658 v1 "k8s.io/api/core/v1" 659 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 660 "sigs.k8s.io/yaml" 661 662 vcbatch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 663 "volcano.sh/apis/pkg/client/clientset/versioned" 664 "volcano.sh/volcano/pkg/cli/util" 665 ) 666 667 type runFlags struct { 668 commonFlags 669 670 Name string 671 Namespace string 672 Image string 673 674 MinAvailable int 675 Replicas int 676 Requests string 677 Limits string 678 SchedulerName string 679 FileName string 680 } 681 682 var launchJobFlags = &runFlags{} 683 684 // InitRunFlags init the run flags. 685 func InitRunFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 686 initFlags(cmd, &launchJobFlags.commonFlags) 687 688 cmd.Flags().StringVarP(&launchJobFlags.Image, "image", "i", "busybox", "the container image of job") 689 cmd.Flags().StringVarP(&launchJobFlags.Namespace, "namespace", "n", "default", "the namespace of job") 690 cmd.Flags().StringVarP(&launchJobFlags.Name, "name", "N", "", "the name of job") 691 cmd.Flags().IntVarP(&launchJobFlags.MinAvailable, "min", "m", 1, "the minimal available tasks of job") 692 cmd.Flags().IntVarP(&launchJobFlags.Replicas, "replicas", "r", 1, "the total tasks of job") 693 cmd.Flags().StringVarP(&launchJobFlags.Requests, "requests", "R", "cpu=1000m,memory=100Mi", "the resource request of the task") 694 cmd.Flags().StringVarP(&launchJobFlags.Limits, "limits", "L", "cpu=1000m,memory=100Mi", "the resource limit of the task") 695 cmd.Flags().StringVarP(&launchJobFlags.SchedulerName, "scheduler", "S", "volcano", "the scheduler for this job") 696 cmd.Flags().StringVarP(&launchJobFlags.FileName, "filename", "f", "", "the yaml file of job") 697 }</span> 698 699 var jobName = "job.volcano.sh" 700 701 // RunJob creates the job. 702 func RunJob() error <span class="cov8" title="1">{ 703 config, err := util.BuildConfig(launchJobFlags.Master, launchJobFlags.Kubeconfig) 704 if err != nil </span><span class="cov0" title="0">{ 705 return err 706 }</span> 707 708 <span class="cov8" title="1">if launchJobFlags.Name == "" && launchJobFlags.FileName == "" </span><span class="cov0" title="0">{ 709 err = fmt.Errorf("job name cannot be left blank") 710 return err 711 }</span> 712 713 <span class="cov8" title="1">req, err := populateResourceListV1(launchJobFlags.Requests) 714 if err != nil </span><span class="cov0" title="0">{ 715 return err 716 }</span> 717 718 <span class="cov8" title="1">limit, err := populateResourceListV1(launchJobFlags.Limits) 719 if err != nil </span><span class="cov0" title="0">{ 720 return err 721 }</span> 722 723 <span class="cov8" title="1">job, err := readFile(launchJobFlags.FileName) 724 if err != nil </span><span class="cov0" title="0">{ 725 return err 726 }</span> 727 728 <span class="cov8" title="1">if job == nil </span><span class="cov8" title="1">{ 729 job = constructLaunchJobFlagsJob(launchJobFlags, req, limit) 730 }</span> 731 732 <span class="cov8" title="1">jobClient := versioned.NewForConfigOrDie(config) 733 newJob, err := jobClient.BatchV1alpha1().Jobs(launchJobFlags.Namespace).Create(context.TODO(), job, metav1.CreateOptions{}) 734 if err != nil </span><span class="cov0" title="0">{ 735 return err 736 }</span> 737 738 <span class="cov8" title="1">if newJob.Spec.Queue == "" </span><span class="cov8" title="1">{ 739 newJob.Spec.Queue = "default" 740 }</span> 741 742 <span class="cov8" title="1">fmt.Printf("run job %v successfully\n", newJob.Name) 743 744 return nil</span> 745 } 746 747 func readFile(filename string) (*vcbatch.Job, error) <span class="cov8" title="1">{ 748 if filename == "" </span><span class="cov8" title="1">{ 749 return nil, nil 750 }</span> 751 752 <span class="cov0" title="0">if !strings.Contains(filename, ".yaml") && !strings.Contains(filename, ".yml") </span><span class="cov0" title="0">{ 753 return nil, fmt.Errorf("only support yaml file") 754 }</span> 755 756 <span class="cov0" title="0">file, err := ioutil.ReadFile(filename) 757 if err != nil </span><span class="cov0" title="0">{ 758 return nil, fmt.Errorf("failed to read file, err: %v", err) 759 }</span> 760 761 <span class="cov0" title="0">var job vcbatch.Job 762 if err := yaml.Unmarshal(file, &job); err != nil </span><span class="cov0" title="0">{ 763 return nil, fmt.Errorf("failed to unmarshal file, err: %v", err) 764 }</span> 765 766 <span class="cov0" title="0">return &job, nil</span> 767 } 768 769 func constructLaunchJobFlagsJob(launchJobFlags *runFlags, req, limit v1.ResourceList) *vcbatch.Job <span class="cov8" title="1">{ 770 return &vcbatch.Job{ 771 ObjectMeta: metav1.ObjectMeta{ 772 Name: launchJobFlags.Name, 773 Namespace: launchJobFlags.Namespace, 774 }, 775 Spec: vcbatch.JobSpec{ 776 MinAvailable: int32(launchJobFlags.MinAvailable), 777 SchedulerName: launchJobFlags.SchedulerName, 778 Tasks: []vcbatch.TaskSpec{ 779 { 780 Replicas: int32(launchJobFlags.Replicas), 781 782 Template: v1.PodTemplateSpec{ 783 ObjectMeta: metav1.ObjectMeta{ 784 Name: launchJobFlags.Name, 785 Labels: map[string]string{jobName: launchJobFlags.Name}, 786 }, 787 Spec: v1.PodSpec{ 788 RestartPolicy: v1.RestartPolicyNever, 789 Containers: []v1.Container{ 790 { 791 Image: launchJobFlags.Image, 792 Name: launchJobFlags.Name, 793 ImagePullPolicy: v1.PullIfNotPresent, 794 Resources: v1.ResourceRequirements{ 795 Limits: limit, 796 Requests: req, 797 }, 798 }, 799 }, 800 }, 801 }, 802 }, 803 }, 804 }, 805 } 806 }</span> 807 </pre> 808 809 <pre class="file" id="file5" style="display: none">/* 810 Copyright 2018 The Volcano Authors. 811 812 Licensed under the Apache License, Version 2.0 (the "License"); 813 you may not use this file except in compliance with the License. 814 You may obtain a copy of the License at 815 816 http://www.apache.org/licenses/LICENSE-2.0 817 818 Unless required by applicable law or agreed to in writing, software 819 distributed under the License is distributed on an "AS IS" BASIS, 820 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 821 See the License for the specific language governing permissions and 822 limitations under the License. 823 */ 824 825 package job 826 827 import ( 828 "fmt" 829 830 "github.com/spf13/cobra" 831 832 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 833 "volcano.sh/volcano/pkg/cli/util" 834 ) 835 836 type suspendFlags struct { 837 commonFlags 838 839 Namespace string 840 JobName string 841 } 842 843 var suspendJobFlags = &suspendFlags{} 844 845 // InitSuspendFlags init suspend related flags. 846 func InitSuspendFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 847 initFlags(cmd, &suspendJobFlags.commonFlags) 848 849 cmd.Flags().StringVarP(&suspendJobFlags.Namespace, "namespace", "n", "default", "the namespace of job") 850 cmd.Flags().StringVarP(&suspendJobFlags.JobName, "name", "N", "", "the name of job") 851 }</span> 852 853 // SuspendJob suspends the job. 854 func SuspendJob() error <span class="cov8" title="1">{ 855 config, err := util.BuildConfig(suspendJobFlags.Master, suspendJobFlags.Kubeconfig) 856 if err != nil </span><span class="cov0" title="0">{ 857 return err 858 }</span> 859 860 <span class="cov8" title="1">if suspendJobFlags.JobName == "" </span><span class="cov0" title="0">{ 861 err := fmt.Errorf("job name is mandatory to suspend a particular job") 862 return err 863 }</span> 864 865 <span class="cov8" title="1">return createJobCommand(config, 866 suspendJobFlags.Namespace, suspendJobFlags.JobName, 867 v1alpha1.AbortJobAction)</span> 868 } 869 </pre> 870 871 <pre class="file" id="file6" style="display: none">/* 872 Copyright 2018 The Volcano Authors. 873 874 Licensed under the Apache License, Version 2.0 (the "License"); 875 you may not use this file except in compliance with the License. 876 You may obtain a copy of the License at 877 878 http://www.apache.org/licenses/LICENSE-2.0 879 880 Unless required by applicable law or agreed to in writing, software 881 distributed under the License is distributed on an "AS IS" BASIS, 882 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 883 See the License for the specific language governing permissions and 884 limitations under the License. 885 */ 886 887 package job 888 889 import ( 890 "context" 891 "fmt" 892 "os" 893 "strings" 894 "time" 895 896 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 897 898 v1 "k8s.io/api/core/v1" 899 "k8s.io/apimachinery/pkg/api/resource" 900 "k8s.io/client-go/rest" 901 902 vcbus "volcano.sh/apis/pkg/apis/bus/v1alpha1" 903 "volcano.sh/apis/pkg/apis/helpers" 904 "volcano.sh/apis/pkg/client/clientset/versioned" 905 ) 906 907 func homeDir() string <span class="cov8" title="1">{ 908 if h := os.Getenv("HOME"); h != "" </span><span class="cov8" title="1">{ 909 return h 910 }</span> 911 <span class="cov0" title="0">return os.Getenv("USERPROFILE")</span> // windows 912 } 913 914 // populateResourceListV1 takes strings of form <resourceName1>=<value1>,<resourceName1>=<value2> 915 // and returns ResourceList. 916 func populateResourceListV1(spec string) (v1.ResourceList, error) <span class="cov8" title="1">{ 917 // empty input gets a nil response to preserve generator test expected behaviors 918 if spec == "" </span><span class="cov8" title="1">{ 919 return nil, nil 920 }</span> 921 922 <span class="cov8" title="1">result := v1.ResourceList{} 923 resourceStatements := strings.Split(spec, ",") 924 for _, resourceStatement := range resourceStatements </span><span class="cov8" title="1">{ 925 parts := strings.Split(resourceStatement, "=") 926 if len(parts) != 2 </span><span class="cov0" title="0">{ 927 return nil, fmt.Errorf("invalid argument syntax %v, expected <resource>=<value>", resourceStatement) 928 }</span> 929 <span class="cov8" title="1">resourceName := v1.ResourceName(parts[0]) 930 resourceQuantity, err := resource.ParseQuantity(parts[1]) 931 if err != nil </span><span class="cov0" title="0">{ 932 return nil, err 933 }</span> 934 <span class="cov8" title="1">result[resourceName] = resourceQuantity</span> 935 } 936 <span class="cov8" title="1">return result, nil</span> 937 } 938 939 func createJobCommand(config *rest.Config, ns, name string, action vcbus.Action) error <span class="cov8" title="1">{ 940 jobClient := versioned.NewForConfigOrDie(config) 941 job, err := jobClient.BatchV1alpha1().Jobs(ns).Get(context.TODO(), name, metav1.GetOptions{}) 942 if err != nil </span><span class="cov0" title="0">{ 943 return err 944 }</span> 945 946 <span class="cov8" title="1">ctrlRef := metav1.NewControllerRef(job, helpers.JobKind) 947 cmd := &vcbus.Command{ 948 ObjectMeta: metav1.ObjectMeta{ 949 GenerateName: fmt.Sprintf("%s-%s-", 950 job.Name, strings.ToLower(string(action))), 951 Namespace: job.Namespace, 952 OwnerReferences: []metav1.OwnerReference{ 953 *ctrlRef, 954 }, 955 }, 956 TargetObject: ctrlRef, 957 Action: string(action), 958 } 959 960 if _, err := jobClient.BusV1alpha1().Commands(ns).Create(context.TODO(), cmd, metav1.CreateOptions{}); err != nil </span><span class="cov0" title="0">{ 961 return err 962 }</span> 963 964 <span class="cov8" title="1">return nil</span> 965 } 966 967 func translateTimestampSince(timestamp metav1.Time) string <span class="cov8" title="1">{ 968 if timestamp.IsZero() </span><span class="cov8" title="1">{ 969 return "<unknown>" 970 }</span> 971 <span class="cov8" title="1">return HumanDuration(time.Since(timestamp.Time))</span> 972 } 973 974 // HumanDuration translate time.Duration to human readable time string. 975 func HumanDuration(d time.Duration) string <span class="cov8" title="1">{ 976 // Allow deviation no more than 2 seconds(excluded) to tolerate machine time 977 // inconsistence, it can be considered as almost now. 978 if seconds := int(d.Seconds()); seconds < -1 </span><span class="cov8" title="1">{ 979 return "<invalid>" 980 }</span> else<span class="cov8" title="1"> if seconds < 0 </span><span class="cov0" title="0">{ 981 return "0s" 982 }</span> else<span class="cov8" title="1"> if seconds < 60*2 </span><span class="cov8" title="1">{ 983 return fmt.Sprintf("%ds", seconds) 984 }</span> 985 <span class="cov8" title="1">minutes := int(d / time.Minute) 986 if minutes < 10 </span><span class="cov8" title="1">{ 987 s := int(d/time.Second) % 60 988 if s == 0 </span><span class="cov8" title="1">{ 989 return fmt.Sprintf("%dm", minutes) 990 }</span> 991 <span class="cov8" title="1">return fmt.Sprintf("%dm%ds", minutes, s)</span> 992 } else<span class="cov8" title="1"> if minutes < 60*3 </span><span class="cov8" title="1">{ 993 return fmt.Sprintf("%dm", minutes) 994 }</span> 995 <span class="cov8" title="1">hours := int(d / time.Hour) 996 if hours < 8 </span><span class="cov8" title="1">{ 997 m := int(d/time.Minute) % 60 998 if m == 0 </span><span class="cov8" title="1">{ 999 return fmt.Sprintf("%dh", hours) 1000 }</span> 1001 <span class="cov8" title="1">return fmt.Sprintf("%dh%dm", hours, m)</span> 1002 } else<span class="cov8" title="1"> if hours < 48 </span><span class="cov8" title="1">{ 1003 return fmt.Sprintf("%dh", hours) 1004 }</span> else<span class="cov8" title="1"> if hours < 24*8 </span><span class="cov8" title="1">{ 1005 h := hours % 24 1006 if h == 0 </span><span class="cov8" title="1">{ 1007 return fmt.Sprintf("%dd", hours/24) 1008 }</span> 1009 <span class="cov8" title="1">return fmt.Sprintf("%dd%dh", hours/24, h)</span> 1010 } else<span class="cov8" title="1"> if hours < 24*365*2 </span><span class="cov8" title="1">{ 1011 return fmt.Sprintf("%dd", hours/24) 1012 }</span> else<span class="cov8" title="1"> if hours < 24*365*8 </span><span class="cov8" title="1">{ 1013 return fmt.Sprintf("%dy%dd", hours/24/365, (hours/24)%365) 1014 }</span> 1015 <span class="cov8" title="1">return fmt.Sprintf("%dy", hours/24/365)</span> 1016 } 1017 </pre> 1018 1019 <pre class="file" id="file7" style="display: none">/* 1020 Copyright 2019 The Volcano Authors. 1021 1022 Licensed under the Apache License, Version 2.0 (the "License"); 1023 you may not use this file except in compliance with the License. 1024 You may obtain a copy of the License at 1025 1026 http://www.apache.org/licenses/LICENSE-2.0 1027 1028 Unless required by applicable law or agreed to in writing, software 1029 distributed under the License is distributed on an "AS IS" BASIS, 1030 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1031 See the License for the specific language governing permissions and 1032 limitations under the License. 1033 */ 1034 1035 package job 1036 1037 import ( 1038 "context" 1039 "encoding/json" 1040 "fmt" 1041 "io" 1042 "os" 1043 "strings" 1044 1045 "github.com/spf13/cobra" 1046 1047 coreV1 "k8s.io/api/core/v1" 1048 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 1049 "k8s.io/client-go/kubernetes" 1050 "k8s.io/client-go/rest" 1051 1052 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 1053 "volcano.sh/apis/pkg/client/clientset/versioned" 1054 "volcano.sh/volcano/pkg/cli/util" 1055 ) 1056 1057 type viewFlags struct { 1058 commonFlags 1059 1060 Namespace string 1061 JobName string 1062 } 1063 1064 // level of print indent. 1065 const ( 1066 Level0 = iota 1067 Level1 1068 Level2 1069 ) 1070 1071 var viewJobFlags = &viewFlags{} 1072 1073 // InitViewFlags init the view command flags. 1074 func InitViewFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 1075 initFlags(cmd, &viewJobFlags.commonFlags) 1076 1077 cmd.Flags().StringVarP(&viewJobFlags.Namespace, "namespace", "n", "default", "the namespace of job") 1078 cmd.Flags().StringVarP(&viewJobFlags.JobName, "name", "N", "", "the name of job") 1079 }</span> 1080 1081 // ViewJob gives full details of the job. 1082 func ViewJob() error <span class="cov8" title="1">{ 1083 config, err := util.BuildConfig(viewJobFlags.Master, viewJobFlags.Kubeconfig) 1084 if err != nil </span><span class="cov0" title="0">{ 1085 return err 1086 }</span> 1087 <span class="cov8" title="1">if viewJobFlags.JobName == "" </span><span class="cov0" title="0">{ 1088 err := fmt.Errorf("job name (specified by --name or -N) is mandatory to view a particular job") 1089 return err 1090 }</span> 1091 1092 <span class="cov8" title="1">jobClient := versioned.NewForConfigOrDie(config) 1093 job, err := jobClient.BatchV1alpha1().Jobs(viewJobFlags.Namespace).Get(context.TODO(), viewJobFlags.JobName, metav1.GetOptions{}) 1094 if err != nil </span><span class="cov0" title="0">{ 1095 return err 1096 }</span> 1097 <span class="cov8" title="1">if job == nil </span><span class="cov0" title="0">{ 1098 fmt.Printf("No resources found\n") 1099 return nil 1100 }</span> 1101 <span class="cov8" title="1">PrintJobInfo(job, os.Stdout) 1102 PrintEvents(GetEvents(config, job), os.Stdout) 1103 return nil</span> 1104 } 1105 1106 // PrintJobInfo print the job detailed info into writer. 1107 func PrintJobInfo(job *v1alpha1.Job, writer io.Writer) <span class="cov8" title="1">{ 1108 WriteLine(writer, Level0, "Name: \t%s\n", job.Name) 1109 WriteLine(writer, Level0, "Namespace: \t%s\n", job.Namespace) 1110 if len(job.Labels) > 0 </span><span class="cov8" title="1">{ 1111 label, _ := json.Marshal(job.Labels) 1112 WriteLine(writer, Level0, "Labels: \t%s\n", string(label)) 1113 }</span> else<span class="cov0" title="0"> { 1114 WriteLine(writer, Level0, "Labels: \t<none>\n") 1115 }</span> 1116 <span class="cov8" title="1">if len(job.Annotations) > 0 </span><span class="cov8" title="1">{ 1117 annotation, _ := json.Marshal(job.Annotations) 1118 WriteLine(writer, Level0, "Annotations:\t%s\n", string(annotation)) 1119 }</span> else<span class="cov0" title="0"> { 1120 WriteLine(writer, Level0, "Annotations:\t<none>\n") 1121 }</span> 1122 <span class="cov8" title="1">WriteLine(writer, Level0, "API Version:\t%s\n", job.APIVersion) 1123 WriteLine(writer, Level0, "Kind: \t%s\n", job.Kind) 1124 1125 WriteLine(writer, Level0, "Metadata:\n") 1126 WriteLine(writer, Level1, "Creation Timestamp:\t%s\n", job.CreationTimestamp) 1127 WriteLine(writer, Level1, "Generate Name: \t%s\n", job.GenerateName) 1128 WriteLine(writer, Level1, "Generation: \t%d\n", job.Generation) 1129 WriteLine(writer, Level1, "Resource Version: \t%s\n", job.ResourceVersion) 1130 WriteLine(writer, Level1, "Self Link: \t%s\n", job.SelfLink) 1131 WriteLine(writer, Level1, "UID: \t%s\n", job.UID) 1132 1133 WriteLine(writer, Level0, "Spec:\n") 1134 WriteLine(writer, Level1, "Min Available: \t%d\n", job.Spec.MinAvailable) 1135 WriteLine(writer, Level1, "Plugins:\n") 1136 WriteLine(writer, Level2, "Env:\t%v\n", job.Spec.Plugins["env"]) 1137 WriteLine(writer, Level2, "Ssh:\t%v\n", job.Spec.Plugins["ssh"]) 1138 WriteLine(writer, Level1, "Scheduler Name: \t%s\n", job.Spec.SchedulerName) 1139 WriteLine(writer, Level1, "Tasks:\n") 1140 for i := 0; i < len(job.Spec.Tasks); i++ </span><span class="cov8" title="1">{ 1141 WriteLine(writer, Level2, "Name:\t%s\n", job.Spec.Tasks[i].Name) 1142 WriteLine(writer, Level2, "Replicas:\t%d\n", job.Spec.Tasks[i].Replicas) 1143 WriteLine(writer, Level2, "Template:\n") 1144 WriteLine(writer, Level2+1, "Metadata:\n") 1145 WriteLine(writer, Level2+2, "Annotations:\n") 1146 WriteLine(writer, Level2+3, "Cri . Cci . Io / Container - Type: \t%s\n", job.Spec.Tasks[i].Template.ObjectMeta.Annotations["cri.cci.io/container-type"]) 1147 WriteLine(writer, Level2+3, "Kubernetes . Io / Availablezone: \t%s\n", job.Spec.Tasks[i].Template.ObjectMeta.Annotations["kubernetes.io/availablezone"]) 1148 WriteLine(writer, Level2+3, "Network . Alpha . Kubernetes . Io / Network:\t%s\n", job.Spec.Tasks[i].Template.ObjectMeta.Annotations["network.alpha.kubernetes.io/network"]) 1149 WriteLine(writer, Level2+2, "Creation Timestamp:\t%s\n", job.Spec.Tasks[i].Template.ObjectMeta.CreationTimestamp) 1150 1151 WriteLine(writer, Level2+1, "Spec:\n") 1152 WriteLine(writer, Level2+2, "Containers:\n") 1153 for j := 0; j < len(job.Spec.Tasks[i].Template.Spec.Containers); j++ </span><span class="cov8" title="1">{ 1154 WriteLine(writer, Level2+3, "Command:\n") 1155 for k := 0; k < len(job.Spec.Tasks[i].Template.Spec.Containers[j].Command); k++ </span><span class="cov8" title="1">{ 1156 WriteLine(writer, Level2+4, "%s\n", job.Spec.Tasks[i].Template.Spec.Containers[j].Command[k]) 1157 }</span> 1158 <span class="cov8" title="1">WriteLine(writer, Level2+3, "Image:\t%s\n", job.Spec.Tasks[i].Template.Spec.Containers[j].Image) 1159 WriteLine(writer, Level2+3, "Name: \t%s\n", job.Spec.Tasks[i].Template.Spec.Containers[j].Name) 1160 WriteLine(writer, Level2+3, "Ports:\n") 1161 for k := 0; k < len(job.Spec.Tasks[i].Template.Spec.Containers[j].Ports); k++ </span><span class="cov8" title="1">{ 1162 WriteLine(writer, Level2+4, "Container Port:\t%d\n", job.Spec.Tasks[i].Template.Spec.Containers[j].Ports[k].ContainerPort) 1163 WriteLine(writer, Level2+4, "Name: \t%s\n", job.Spec.Tasks[i].Template.Spec.Containers[j].Ports[k].Name) 1164 }</span> 1165 <span class="cov8" title="1">WriteLine(writer, Level2+3, "Resources:\n") 1166 WriteLine(writer, Level2+4, "Limits:\n") 1167 WriteLine(writer, Level2+5, "Cpu: \t%s\n", job.Spec.Tasks[i].Template.Spec.Containers[j].Resources.Limits.Cpu()) 1168 WriteLine(writer, Level2+5, "Memory:\t%s\n", job.Spec.Tasks[i].Template.Spec.Containers[j].Resources.Limits.Memory()) 1169 WriteLine(writer, Level2+4, "Requests:\n") 1170 WriteLine(writer, Level2+5, "Cpu: \t%s\n", job.Spec.Tasks[i].Template.Spec.Containers[j].Resources.Requests.Cpu()) 1171 WriteLine(writer, Level2+5, "Memory:\t%s\n", job.Spec.Tasks[i].Template.Spec.Containers[j].Resources.Requests.Memory()) 1172 WriteLine(writer, Level2+4, "Working Dir:\t%s\n", job.Spec.Tasks[i].Template.Spec.Containers[j].WorkingDir)</span> 1173 } 1174 <span class="cov8" title="1">WriteLine(writer, Level2+2, "Image Pull Secrets:\n") 1175 for j := 0; j < len(job.Spec.Tasks[i].Template.Spec.ImagePullSecrets); j++ </span><span class="cov8" title="1">{ 1176 WriteLine(writer, Level2+3, "Name: \t%s\n", job.Spec.Tasks[i].Template.Spec.ImagePullSecrets[j].Name) 1177 }</span> 1178 <span class="cov8" title="1">WriteLine(writer, Level2+2, "Restart Policy: \t%s\n", job.Spec.Tasks[i].Template.Spec.RestartPolicy)</span> 1179 } 1180 1181 <span class="cov8" title="1">WriteLine(writer, Level0, "Status:\n") 1182 if job.Status.Succeeded > 0 </span><span class="cov8" title="1">{ 1183 WriteLine(writer, Level1, "Succeeded: \t%d\n", job.Status.Succeeded) 1184 }</span> 1185 <span class="cov8" title="1">if job.Status.Pending > 0 </span><span class="cov8" title="1">{ 1186 WriteLine(writer, Level1, "Pending: \t%d\n", job.Status.Pending) 1187 }</span> 1188 <span class="cov8" title="1">if job.Status.Running > 0 </span><span class="cov8" title="1">{ 1189 WriteLine(writer, Level1, "Running: \t%d\n", job.Status.Running) 1190 }</span> 1191 <span class="cov8" title="1">if job.Status.Failed > 0 </span><span class="cov8" title="1">{ 1192 WriteLine(writer, Level1, "Failed: \t%d\n", job.Status.Failed) 1193 }</span> 1194 <span class="cov8" title="1">if job.Status.Terminating > 0 </span><span class="cov8" title="1">{ 1195 WriteLine(writer, Level1, "Terminating: \t%d\n", job.Status.Terminating) 1196 }</span> 1197 <span class="cov8" title="1">if job.Status.Unknown > 0 </span><span class="cov8" title="1">{ 1198 WriteLine(writer, Level1, "Unknown: \t%d\n", job.Status.Unknown) 1199 }</span> 1200 <span class="cov8" title="1">if job.Status.RetryCount > 0 </span><span class="cov8" title="1">{ 1201 WriteLine(writer, Level1, "RetryCount: \t%d\n", job.Status.RetryCount) 1202 }</span> 1203 <span class="cov8" title="1">if job.Status.MinAvailable > 0 </span><span class="cov8" title="1">{ 1204 WriteLine(writer, Level1, "Min Available:\t%d\n", job.Status.MinAvailable) 1205 }</span> 1206 <span class="cov8" title="1">if job.Status.Version > 0 </span><span class="cov8" title="1">{ 1207 WriteLine(writer, Level1, "Version: \t%d\n", job.Status.Version) 1208 }</span> 1209 1210 <span class="cov8" title="1">WriteLine(writer, Level1, "State:\n") 1211 WriteLine(writer, Level2, "Phase:\t%s\n", job.Status.State.Phase) 1212 if len(job.Status.ControlledResources) > 0 </span><span class="cov8" title="1">{ 1213 WriteLine(writer, Level1, "Controlled Resources:\n") 1214 for key, value := range job.Status.ControlledResources </span><span class="cov8" title="1">{ 1215 WriteLine(writer, Level2, "%s: \t%s\n", key, value) 1216 }</span> 1217 } 1218 <span class="cov8" title="1">if len(job.Status.Conditions) > 0 </span><span class="cov0" title="0">{ 1219 WriteLine(writer, Level1, "Conditions:\n Status\tTransitionTime\n") 1220 for _, c := range job.Status.Conditions </span><span class="cov0" title="0">{ 1221 WriteLine(writer, Level2, "%v \t%v \n", 1222 c.Status, 1223 c.LastTransitionTime) 1224 }</span> 1225 } 1226 } 1227 1228 // PrintEvents print event info to writer. 1229 func PrintEvents(events []coreV1.Event, writer io.Writer) <span class="cov8" title="1">{ 1230 if len(events) > 0 </span><span class="cov8" title="1">{ 1231 WriteLine(writer, Level0, "%s:\n%-15s\t%-40s\t%-30s\t%-40s\t%s\n", "Events", "Type", "Reason", "Age", "Form", "Message") 1232 WriteLine(writer, Level0, "%-15s\t%-40s\t%-30s\t%-40s\t%s\n", "-------", "-------", "-------", "-------", "-------") 1233 for _, e := range events </span><span class="cov8" title="1">{ 1234 var interval string 1235 if e.Count > 1 </span><span class="cov8" title="1">{ 1236 interval = fmt.Sprintf("%s (x%d over %s)", translateTimestampSince(e.LastTimestamp), e.Count, translateTimestampSince(e.FirstTimestamp)) 1237 }</span> else<span class="cov8" title="1"> { 1238 interval = translateTimestampSince(e.FirstTimestamp) 1239 }</span> 1240 <span class="cov8" title="1">EventSourceString := []string{e.Source.Component} 1241 if len(e.Source.Host) > 0 </span><span class="cov0" title="0">{ 1242 EventSourceString = append(EventSourceString, e.Source.Host) 1243 }</span> 1244 <span class="cov8" title="1">WriteLine(writer, Level0, "%-15v\t%-40v\t%-30s\t%-40s\t%v\n", 1245 e.Type, 1246 e.Reason, 1247 interval, 1248 strings.Join(EventSourceString, ", "), 1249 strings.TrimSpace(e.Message), 1250 )</span> 1251 } 1252 } else<span class="cov0" title="0"> { 1253 WriteLine(writer, Level0, "Events: \t<none>\n") 1254 }</span> 1255 } 1256 1257 // GetEvents get the job event by config. 1258 func GetEvents(config *rest.Config, job *v1alpha1.Job) []coreV1.Event <span class="cov8" title="1">{ 1259 kubernetes, err := kubernetes.NewForConfig(config) 1260 if err != nil </span><span class="cov0" title="0">{ 1261 fmt.Printf("%v\n", err) 1262 return nil 1263 }</span> 1264 <span class="cov8" title="1">events, _ := kubernetes.CoreV1().Events(viewJobFlags.Namespace).List(context.TODO(), metav1.ListOptions{}) 1265 var jobEvents []coreV1.Event 1266 for _, v := range events.Items </span><span class="cov8" title="1">{ 1267 if strings.HasPrefix(v.ObjectMeta.Name, job.Name+".") </span><span class="cov8" title="1">{ 1268 jobEvents = append(jobEvents, v) 1269 }</span> 1270 } 1271 <span class="cov8" title="1">return jobEvents</span> 1272 } 1273 1274 // WriteLine write lines with specified indent. 1275 func WriteLine(writer io.Writer, spaces int, content string, params ...interface{}) <span class="cov8" title="1">{ 1276 prefix := "" 1277 for i := 0; i < spaces; i++ </span><span class="cov8" title="1">{ 1278 prefix += " " 1279 }</span> 1280 <span class="cov8" title="1">fmt.Fprintf(writer, prefix+content, params...)</span> 1281 } 1282 </pre> 1283 1284 <pre class="file" id="file8" style="display: none">/* 1285 Copyright 2019 The Volcano Authors. 1286 1287 Licensed under the Apache License, Version 2.0 (the "License"); 1288 you may not use this file except in compliance with the License. 1289 You may obtain a copy of the License at 1290 1291 http://www.apache.org/licenses/LICENSE-2.0 1292 1293 Unless required by applicable law or agreed to in writing, software 1294 distributed under the License is distributed on an "AS IS" BASIS, 1295 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1296 See the License for the specific language governing permissions and 1297 limitations under the License. 1298 */ 1299 1300 package queue 1301 1302 import ( 1303 "os" 1304 "path/filepath" 1305 1306 "github.com/spf13/cobra" 1307 ) 1308 1309 type commonFlags struct { 1310 Master string 1311 Kubeconfig string 1312 SchedulerName string 1313 } 1314 1315 func initFlags(cmd *cobra.Command, cf *commonFlags) <span class="cov8" title="1">{ 1316 cmd.Flags().StringVarP(&cf.SchedulerName, "scheduler", "", "volcano", "the scheduler for this job") 1317 cmd.Flags().StringVarP(&cf.Master, "master", "s", "", "the address of apiserver") 1318 1319 kubeConfFile := os.Getenv("KUBECONFIG") 1320 if kubeConfFile == "" </span><span class="cov8" title="1">{ 1321 if home := homeDir(); home != "" </span><span class="cov8" title="1">{ 1322 kubeConfFile = filepath.Join(home, ".kube", "config") 1323 }</span> 1324 } 1325 <span class="cov8" title="1">cmd.Flags().StringVarP(&cf.Kubeconfig, "kubeconfig", "k", kubeConfFile, "(optional) absolute path to the kubeconfig file")</span> 1326 } 1327 </pre> 1328 1329 <pre class="file" id="file9" style="display: none">/* 1330 Copyright 2019 The Volcano Authors. 1331 1332 Licensed under the Apache License, Version 2.0 (the "License"); 1333 you may not use this file except in compliance with the License. 1334 You may obtain a copy of the License at 1335 1336 http://www.apache.org/licenses/LICENSE-2.0 1337 1338 Unless required by applicable law or agreed to in writing, software 1339 distributed under the License is distributed on an "AS IS" BASIS, 1340 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1341 See the License for the specific language governing permissions and 1342 limitations under the License. 1343 */ 1344 1345 package queue 1346 1347 import ( 1348 "context" 1349 1350 "github.com/spf13/cobra" 1351 1352 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 1353 1354 schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 1355 "volcano.sh/apis/pkg/client/clientset/versioned" 1356 ) 1357 1358 type createFlags struct { 1359 commonFlags 1360 1361 Name string 1362 Weight int32 1363 // State is state of Queue 1364 State string 1365 } 1366 1367 var createQueueFlags = &createFlags{} 1368 1369 // InitCreateFlags is used to init all flags during queue creating. 1370 func InitCreateFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 1371 initFlags(cmd, &createQueueFlags.commonFlags) 1372 1373 cmd.Flags().StringVarP(&createQueueFlags.Name, "name", "n", "test", "the name of queue") 1374 cmd.Flags().Int32VarP(&createQueueFlags.Weight, "weight", "w", 1, "the weight of the queue") 1375 1376 cmd.Flags().StringVarP(&createQueueFlags.State, "state", "S", "Open", "the state of queue") 1377 }</span> 1378 1379 // CreateQueue create queue. 1380 func CreateQueue() error <span class="cov8" title="1">{ 1381 config, err := buildConfig(createQueueFlags.Master, createQueueFlags.Kubeconfig) 1382 if err != nil </span><span class="cov0" title="0">{ 1383 return err 1384 }</span> 1385 1386 <span class="cov8" title="1">queue := &schedulingv1beta1.Queue{ 1387 ObjectMeta: metav1.ObjectMeta{ 1388 Name: createQueueFlags.Name, 1389 }, 1390 Spec: schedulingv1beta1.QueueSpec{ 1391 Weight: createQueueFlags.Weight, 1392 }, 1393 Status: schedulingv1beta1.QueueStatus{ 1394 State: schedulingv1beta1.QueueState(createQueueFlags.State), 1395 }, 1396 } 1397 1398 queueClient := versioned.NewForConfigOrDie(config) 1399 if _, err := queueClient.SchedulingV1beta1().Queues().Create(context.TODO(), queue, metav1.CreateOptions{}); err != nil </span><span class="cov0" title="0">{ 1400 return err 1401 }</span> 1402 1403 <span class="cov8" title="1">return nil</span> 1404 } 1405 </pre> 1406 1407 <pre class="file" id="file10" style="display: none">/* 1408 Copyright 2017 The Kubernetes Authors. 1409 1410 Licensed under the Apache License, Version 2.0 (the "License"); 1411 you may not use this file except in compliance with the License. 1412 You may obtain a copy of the License at 1413 1414 http://www.apache.org/licenses/LICENSE-2.0 1415 1416 Unless required by applicable law or agreed to in writing, software 1417 distributed under the License is distributed on an "AS IS" BASIS, 1418 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1419 See the License for the specific language governing permissions and 1420 limitations under the License. 1421 */ 1422 1423 package queue 1424 1425 import ( 1426 "context" 1427 "fmt" 1428 1429 "volcano.sh/apis/pkg/client/clientset/versioned" 1430 1431 "github.com/spf13/cobra" 1432 1433 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 1434 ) 1435 1436 type deleteFlags struct { 1437 commonFlags 1438 1439 // Name is name of queue 1440 Name string 1441 } 1442 1443 var deleteQueueFlags = &deleteFlags{} 1444 1445 // InitDeleteFlags is used to init all flags during queue deleting. 1446 func InitDeleteFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 1447 initFlags(cmd, &deleteQueueFlags.commonFlags) 1448 1449 cmd.Flags().StringVarP(&deleteQueueFlags.Name, "name", "n", "", "the name of queue") 1450 }</span> 1451 1452 // DeleteQueue delete queue. 1453 func DeleteQueue() error <span class="cov8" title="1">{ 1454 config, err := buildConfig(deleteQueueFlags.Master, deleteQueueFlags.Kubeconfig) 1455 if err != nil </span><span class="cov0" title="0">{ 1456 return err 1457 }</span> 1458 1459 <span class="cov8" title="1">if len(deleteQueueFlags.Name) == 0 </span><span class="cov8" title="1">{ 1460 return fmt.Errorf("queue name must be specified") 1461 }</span> 1462 1463 <span class="cov8" title="1">queueClient := versioned.NewForConfigOrDie(config) 1464 return queueClient.SchedulingV1beta1().Queues().Delete(context.TODO(), deleteQueueFlags.Name, metav1.DeleteOptions{})</span> 1465 } 1466 </pre> 1467 1468 <pre class="file" id="file11" style="display: none">/* 1469 Copyright 2019 The Volcano Authors. 1470 1471 Licensed under the Apache License, Version 2.0 (the "License"); 1472 you may not use this file except in compliance with the License. 1473 You may obtain a copy of the License at 1474 1475 http://www.apache.org/licenses/LICENSE-2.0 1476 1477 Unless required by applicable law or agreed to in writing, software 1478 distributed under the License is distributed on an "AS IS" BASIS, 1479 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1480 See the License for the specific language governing permissions and 1481 limitations under the License. 1482 */ 1483 1484 package queue 1485 1486 import ( 1487 "context" 1488 "fmt" 1489 "io" 1490 "os" 1491 1492 "github.com/spf13/cobra" 1493 1494 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 1495 1496 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 1497 "volcano.sh/apis/pkg/client/clientset/versioned" 1498 ) 1499 1500 type getFlags struct { 1501 commonFlags 1502 1503 Name string 1504 } 1505 1506 var getQueueFlags = &getFlags{} 1507 1508 // InitGetFlags is used to init all flags. 1509 func InitGetFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 1510 initFlags(cmd, &getQueueFlags.commonFlags) 1511 1512 cmd.Flags().StringVarP(&getQueueFlags.Name, "name", "n", "", "the name of queue") 1513 }</span> 1514 1515 // GetQueue gets a queue. 1516 func GetQueue() error <span class="cov8" title="1">{ 1517 config, err := buildConfig(getQueueFlags.Master, getQueueFlags.Kubeconfig) 1518 if err != nil </span><span class="cov0" title="0">{ 1519 return err 1520 }</span> 1521 1522 <span class="cov8" title="1">if getQueueFlags.Name == "" </span><span class="cov8" title="1">{ 1523 err := fmt.Errorf("name is mandatory to get the particular queue details") 1524 return err 1525 }</span> 1526 1527 <span class="cov8" title="1">queueClient := versioned.NewForConfigOrDie(config) 1528 queue, err := queueClient.SchedulingV1beta1().Queues().Get(context.TODO(), getQueueFlags.Name, metav1.GetOptions{}) 1529 if err != nil </span><span class="cov0" title="0">{ 1530 return err 1531 }</span> 1532 1533 <span class="cov8" title="1">PrintQueue(queue, os.Stdout) 1534 1535 return nil</span> 1536 } 1537 1538 // PrintQueue prints queue information. 1539 func PrintQueue(queue *v1beta1.Queue, writer io.Writer) <span class="cov8" title="1">{ 1540 _, err := fmt.Fprintf(writer, "%-25s%-8s%-8s%-8s%-8s%-8s%-8s\n", 1541 Name, Weight, State, Inqueue, Pending, Running, Unknown) 1542 if err != nil </span><span class="cov0" title="0">{ 1543 fmt.Printf("Failed to print queue command result: %s.\n", err) 1544 }</span> 1545 <span class="cov8" title="1">_, err = fmt.Fprintf(writer, "%-25s%-8d%-8s%-8d%-8d%-8d%-8d\n", 1546 queue.Name, queue.Spec.Weight, queue.Status.State, queue.Status.Inqueue, 1547 queue.Status.Pending, queue.Status.Running, queue.Status.Unknown) 1548 if err != nil </span><span class="cov0" title="0">{ 1549 fmt.Printf("Failed to print queue command result: %s.\n", err) 1550 }</span> 1551 } 1552 </pre> 1553 1554 <pre class="file" id="file12" style="display: none">/* 1555 Copyright 2019 The Volcano Authors. 1556 1557 Licensed under the Apache License, Version 2.0 (the "License"); 1558 you may not use this file except in compliance with the License. 1559 You may obtain a copy of the License at 1560 1561 http://www.apache.org/licenses/LICENSE-2.0 1562 1563 Unless required by applicable law or agreed to in writing, software 1564 distributed under the License is distributed on an "AS IS" BASIS, 1565 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1566 See the License for the specific language governing permissions and 1567 limitations under the License. 1568 */ 1569 1570 package queue 1571 1572 import ( 1573 "context" 1574 "fmt" 1575 "io" 1576 "os" 1577 1578 "github.com/spf13/cobra" 1579 1580 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 1581 1582 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 1583 "volcano.sh/apis/pkg/client/clientset/versioned" 1584 ) 1585 1586 type listFlags struct { 1587 commonFlags 1588 } 1589 1590 const ( 1591 // Weight of the queue 1592 Weight string = "Weight" 1593 1594 // Name of queue 1595 Name string = "Name" 1596 1597 // Pending status of the queue 1598 Pending string = "Pending" 1599 1600 // Running status of the queue 1601 Running string = "Running" 1602 1603 // Unknown status of the queue 1604 Unknown string = "Unknown" 1605 1606 // Inqueue status of queue 1607 Inqueue string = "Inqueue" 1608 1609 // State is state of queue 1610 State string = "State" 1611 ) 1612 1613 var listQueueFlags = &listFlags{} 1614 1615 // InitListFlags inits all flags. 1616 func InitListFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 1617 initFlags(cmd, &listQueueFlags.commonFlags) 1618 }</span> 1619 1620 // ListQueue lists all the queue. 1621 func ListQueue() error <span class="cov8" title="1">{ 1622 config, err := buildConfig(listQueueFlags.Master, listQueueFlags.Kubeconfig) 1623 if err != nil </span><span class="cov0" title="0">{ 1624 return err 1625 }</span> 1626 1627 <span class="cov8" title="1">jobClient := versioned.NewForConfigOrDie(config) 1628 queues, err := jobClient.SchedulingV1beta1().Queues().List(context.TODO(), metav1.ListOptions{}) 1629 if err != nil </span><span class="cov0" title="0">{ 1630 return err 1631 }</span> 1632 1633 <span class="cov8" title="1">if len(queues.Items) == 0 </span><span class="cov8" title="1">{ 1634 fmt.Printf("No resources found\n") 1635 return nil 1636 }</span> 1637 <span class="cov8" title="1">PrintQueues(queues, os.Stdout) 1638 1639 return nil</span> 1640 } 1641 1642 // PrintQueues prints queue information. 1643 func PrintQueues(queues *v1beta1.QueueList, writer io.Writer) <span class="cov8" title="1">{ 1644 _, err := fmt.Fprintf(writer, "%-25s%-8s%-8s%-8s%-8s%-8s%-8s\n", 1645 Name, Weight, State, Inqueue, Pending, Running, Unknown) 1646 if err != nil </span><span class="cov0" title="0">{ 1647 fmt.Printf("Failed to print queue command result: %s.\n", err) 1648 }</span> 1649 <span class="cov8" title="1">for _, queue := range queues.Items </span><span class="cov8" title="1">{ 1650 _, err = fmt.Fprintf(writer, "%-25s%-8d%-8s%-8d%-8d%-8d%-8d\n", 1651 queue.Name, queue.Spec.Weight, queue.Status.State, queue.Status.Inqueue, 1652 queue.Status.Pending, queue.Status.Running, queue.Status.Unknown) 1653 if err != nil </span><span class="cov0" title="0">{ 1654 fmt.Printf("Failed to print queue command result: %s.\n", err) 1655 }</span> 1656 } 1657 } 1658 </pre> 1659 1660 <pre class="file" id="file13" style="display: none">/* 1661 Copyright 2017 The Kubernetes Authors. 1662 1663 Licensed under the Apache License, Version 2.0 (the "License"); 1664 you may not use this file except in compliance with the License. 1665 You may obtain a copy of the License at 1666 1667 http://www.apache.org/licenses/LICENSE-2.0 1668 1669 Unless required by applicable law or agreed to in writing, software 1670 distributed under the License is distributed on an "AS IS" BASIS, 1671 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1672 See the License for the specific language governing permissions and 1673 limitations under the License. 1674 */ 1675 1676 package queue 1677 1678 import ( 1679 "context" 1680 "fmt" 1681 1682 "github.com/spf13/cobra" 1683 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 1684 1685 "k8s.io/apimachinery/pkg/types" 1686 1687 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 1688 "volcano.sh/apis/pkg/client/clientset/versioned" 1689 ) 1690 1691 const ( 1692 // ActionOpen is `open` action 1693 ActionOpen = "open" 1694 // ActionClose is `close` action 1695 ActionClose = "close" 1696 // ActionUpdate is `update` action 1697 ActionUpdate = "update" 1698 ) 1699 1700 type operateFlags struct { 1701 commonFlags 1702 1703 // Name is name of queue 1704 Name string 1705 // Weight is weight of queue 1706 Weight int32 1707 // Action is operation action of queue 1708 Action string 1709 } 1710 1711 var operateQueueFlags = &operateFlags{} 1712 1713 // InitOperateFlags is used to init all flags during queue operating 1714 func InitOperateFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 1715 initFlags(cmd, &operateQueueFlags.commonFlags) 1716 1717 cmd.Flags().StringVarP(&operateQueueFlags.Name, "name", "n", "", "the name of queue") 1718 cmd.Flags().Int32VarP(&operateQueueFlags.Weight, "weight", "w", 0, "the weight of the queue") 1719 cmd.Flags().StringVarP(&operateQueueFlags.Action, "action", "a", "", 1720 "operate action to queue, valid actions are open, close, update") 1721 }</span> 1722 1723 // OperateQueue operates queue 1724 func OperateQueue() error <span class="cov8" title="1">{ 1725 config, err := buildConfig(operateQueueFlags.Master, operateQueueFlags.Kubeconfig) 1726 if err != nil </span><span class="cov0" title="0">{ 1727 return err 1728 }</span> 1729 1730 <span class="cov8" title="1">if len(operateQueueFlags.Name) == 0 </span><span class="cov8" title="1">{ 1731 return fmt.Errorf("queue name must be specified") 1732 }</span> 1733 1734 <span class="cov8" title="1">var action v1alpha1.Action 1735 1736 switch operateQueueFlags.Action </span>{ 1737 case ActionOpen:<span class="cov8" title="1"> 1738 action = v1alpha1.OpenQueueAction</span> 1739 case ActionClose:<span class="cov8" title="1"> 1740 action = v1alpha1.CloseQueueAction</span> 1741 case ActionUpdate:<span class="cov8" title="1"> 1742 if operateQueueFlags.Weight == 0 </span><span class="cov8" title="1">{ 1743 return fmt.Errorf("when %s queue %s, weight must be specified, "+ 1744 "the value must be greater than 0", ActionUpdate, operateQueueFlags.Name) 1745 }</span> 1746 1747 <span class="cov8" title="1">queueClient := versioned.NewForConfigOrDie(config) 1748 patchBytes := []byte(fmt.Sprintf(`{"spec":{"weight":%d}}`, operateQueueFlags.Weight)) 1749 _, err := queueClient.SchedulingV1beta1().Queues().Patch(context.TODO(), 1750 operateQueueFlags.Name, types.MergePatchType, patchBytes, metav1.PatchOptions{}) 1751 1752 return err</span> 1753 case "":<span class="cov8" title="1"> 1754 return fmt.Errorf("action can not be null")</span> 1755 default:<span class="cov8" title="1"> 1756 return fmt.Errorf("action %s invalid, valid actions are %s, %s and %s", 1757 operateQueueFlags.Action, ActionOpen, ActionClose, ActionUpdate)</span> 1758 } 1759 1760 <span class="cov8" title="1">return createQueueCommand(config, action)</span> 1761 } 1762 </pre> 1763 1764 <pre class="file" id="file14" style="display: none">/* 1765 Copyright 2019 The Volcano Authors. 1766 1767 Licensed under the Apache License, Version 2.0 (the "License"); 1768 you may not use this file except in compliance with the License. 1769 You may obtain a copy of the License at 1770 1771 http://www.apache.org/licenses/LICENSE-2.0 1772 1773 Unless required by applicable law or agreed to in writing, software 1774 distributed under the License is distributed on an "AS IS" BASIS, 1775 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1776 See the License for the specific language governing permissions and 1777 limitations under the License. 1778 */ 1779 1780 package queue 1781 1782 import ( 1783 "context" 1784 "fmt" 1785 "os" 1786 "strings" 1787 1788 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 1789 // Initialize client auth plugin. 1790 _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" 1791 "k8s.io/client-go/rest" 1792 "k8s.io/client-go/tools/clientcmd" 1793 1794 busv1alpha1 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 1795 "volcano.sh/apis/pkg/apis/helpers" 1796 "volcano.sh/apis/pkg/client/clientset/versioned" 1797 ) 1798 1799 func homeDir() string <span class="cov8" title="1">{ 1800 if h := os.Getenv("HOME"); h != "" </span><span class="cov8" title="1">{ 1801 return h 1802 }</span> 1803 <span class="cov0" title="0">return os.Getenv("USERPROFILE")</span> // windows 1804 } 1805 1806 func buildConfig(master, kubeconfig string) (*rest.Config, error) <span class="cov8" title="1">{ 1807 return clientcmd.BuildConfigFromFlags(master, kubeconfig) 1808 }</span> 1809 1810 func createQueueCommand(config *rest.Config, action busv1alpha1.Action) error <span class="cov8" title="1">{ 1811 queueClient := versioned.NewForConfigOrDie(config) 1812 queue, err := queueClient.SchedulingV1beta1().Queues().Get(context.TODO(), operateQueueFlags.Name, metav1.GetOptions{}) 1813 if err != nil </span><span class="cov0" title="0">{ 1814 return err 1815 }</span> 1816 1817 <span class="cov8" title="1">ctrlRef := metav1.NewControllerRef(queue, helpers.V1beta1QueueKind) 1818 cmd := &busv1alpha1.Command{ 1819 ObjectMeta: metav1.ObjectMeta{ 1820 GenerateName: fmt.Sprintf("%s-%s-", 1821 queue.Name, strings.ToLower(string(action))), 1822 OwnerReferences: []metav1.OwnerReference{ 1823 *ctrlRef, 1824 }, 1825 }, 1826 TargetObject: ctrlRef, 1827 Action: string(action), 1828 } 1829 1830 if _, err := queueClient.BusV1alpha1().Commands("default").Create(context.TODO(), cmd, metav1.CreateOptions{}); err != nil </span><span class="cov0" title="0">{ 1831 return err 1832 }</span> 1833 1834 <span class="cov8" title="1">return nil</span> 1835 } 1836 </pre> 1837 1838 <pre class="file" id="file15" style="display: none">/* 1839 Copyright 2019 The Volcano Authors. 1840 1841 Licensed under the Apache License, Version 2.0 (the "License"); 1842 you may not use this file except in compliance with the License. 1843 You may obtain a copy of the License at 1844 1845 http://www.apache.org/licenses/LICENSE-2.0 1846 1847 Unless required by applicable law or agreed to in writing, software 1848 distributed under the License is distributed on an "AS IS" BASIS, 1849 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1850 See the License for the specific language governing permissions and 1851 limitations under the License. 1852 */ 1853 1854 package util 1855 1856 import ( 1857 "context" 1858 "fmt" 1859 "os" 1860 "path/filepath" 1861 "strings" 1862 "time" 1863 1864 "github.com/spf13/cobra" 1865 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 1866 1867 v1 "k8s.io/api/core/v1" 1868 "k8s.io/apimachinery/pkg/api/resource" 1869 "k8s.io/client-go/rest" 1870 "k8s.io/client-go/tools/clientcmd" 1871 1872 vcbus "volcano.sh/apis/pkg/apis/bus/v1alpha1" 1873 "volcano.sh/apis/pkg/apis/helpers" 1874 "volcano.sh/apis/pkg/client/clientset/versioned" 1875 ) 1876 1877 // CommonFlags are the flags that most command lines have. 1878 type CommonFlags struct { 1879 Master string 1880 Kubeconfig string 1881 } 1882 1883 // InitFlags initializes the common flags for most command lines. 1884 func InitFlags(cmd *cobra.Command, cf *CommonFlags) <span class="cov0" title="0">{ 1885 cmd.Flags().StringVarP(&cf.Master, "master", "s", "", "the address of apiserver") 1886 1887 kubeConfFile := os.Getenv("KUBECONFIG") 1888 if kubeConfFile == "" </span><span class="cov0" title="0">{ 1889 if home := HomeDir(); home != "" </span><span class="cov0" title="0">{ 1890 kubeConfFile = filepath.Join(home, ".kube", "config") 1891 }</span> 1892 } 1893 <span class="cov0" title="0">cmd.Flags().StringVarP(&cf.Kubeconfig, "kubeconfig", "k", kubeConfFile, "(optional) absolute path to the kubeconfig file")</span> 1894 } 1895 1896 // HomeDir gets the env $HOME. 1897 func HomeDir() string <span class="cov0" title="0">{ 1898 if h := os.Getenv("HOME"); h != "" </span><span class="cov0" title="0">{ 1899 return h 1900 }</span> 1901 <span class="cov0" title="0">return os.Getenv("USERPROFILE")</span> // windows 1902 } 1903 1904 // BuildConfig builds the configure file for command lines. 1905 func BuildConfig(master, kubeconfig string) (*rest.Config, error) <span class="cov0" title="0">{ 1906 return clientcmd.BuildConfigFromFlags(master, kubeconfig) 1907 }</span> 1908 1909 // PopulateResourceListV1 takes strings of form <resourceName1>=<value1>,<resourceName1>=<value2> and returns ResourceList. 1910 func PopulateResourceListV1(spec string) (v1.ResourceList, error) <span class="cov0" title="0">{ 1911 // empty input gets a nil response to preserve generator test expected behaviors 1912 if spec == "" </span><span class="cov0" title="0">{ 1913 return nil, nil 1914 }</span> 1915 1916 <span class="cov0" title="0">result := v1.ResourceList{} 1917 resourceStatements := strings.Split(spec, ",") 1918 for _, resourceStatement := range resourceStatements </span><span class="cov0" title="0">{ 1919 parts := strings.Split(resourceStatement, "=") 1920 if len(parts) != 2 </span><span class="cov0" title="0">{ 1921 return nil, fmt.Errorf("invalid argument syntax %v, expected <resource>=<value>", resourceStatement) 1922 }</span> 1923 <span class="cov0" title="0">resourceName := v1.ResourceName(parts[0]) 1924 resourceQuantity, err := resource.ParseQuantity(parts[1]) 1925 if err != nil </span><span class="cov0" title="0">{ 1926 return nil, err 1927 }</span> 1928 <span class="cov0" title="0">result[resourceName] = resourceQuantity</span> 1929 } 1930 <span class="cov0" title="0">return result, nil</span> 1931 } 1932 1933 // CreateQueueCommand executes a command such as open/close 1934 func CreateQueueCommand(vcClient *versioned.Clientset, ns, name string, action vcbus.Action) error <span class="cov0" title="0">{ 1935 queue, err := vcClient.SchedulingV1beta1().Queues().Get(context.TODO(), name, metav1.GetOptions{}) 1936 if err != nil </span><span class="cov0" title="0">{ 1937 return err 1938 }</span> 1939 <span class="cov0" title="0">ctrlRef := metav1.NewControllerRef(queue, helpers.V1beta1QueueKind) 1940 cmd := &vcbus.Command{ 1941 ObjectMeta: metav1.ObjectMeta{ 1942 GenerateName: fmt.Sprintf("%s-%s-", 1943 queue.Name, strings.ToLower(string(action))), 1944 Namespace: queue.Namespace, 1945 OwnerReferences: []metav1.OwnerReference{ 1946 *ctrlRef, 1947 }, 1948 }, 1949 TargetObject: ctrlRef, 1950 Action: string(action), 1951 } 1952 1953 if _, err := vcClient.BusV1alpha1().Commands(ns).Create(context.TODO(), cmd, metav1.CreateOptions{}); err != nil </span><span class="cov0" title="0">{ 1954 return err 1955 }</span> 1956 1957 <span class="cov0" title="0">return nil</span> 1958 } 1959 1960 // CreateJobCommand executes a command such as resume/suspend. 1961 func CreateJobCommand(config *rest.Config, ns, name string, action vcbus.Action) error <span class="cov0" title="0">{ 1962 jobClient := versioned.NewForConfigOrDie(config) 1963 job, err := jobClient.BatchV1alpha1().Jobs(ns).Get(context.TODO(), name, metav1.GetOptions{}) 1964 if err != nil </span><span class="cov0" title="0">{ 1965 return err 1966 }</span> 1967 1968 <span class="cov0" title="0">ctrlRef := metav1.NewControllerRef(job, helpers.JobKind) 1969 cmd := &vcbus.Command{ 1970 ObjectMeta: metav1.ObjectMeta{ 1971 GenerateName: fmt.Sprintf("%s-%s-", 1972 job.Name, strings.ToLower(string(action))), 1973 Namespace: job.Namespace, 1974 OwnerReferences: []metav1.OwnerReference{ 1975 *ctrlRef, 1976 }, 1977 }, 1978 TargetObject: ctrlRef, 1979 Action: string(action), 1980 } 1981 1982 if _, err := jobClient.BusV1alpha1().Commands(ns).Create(context.TODO(), cmd, metav1.CreateOptions{}); err != nil </span><span class="cov0" title="0">{ 1983 return err 1984 }</span> 1985 1986 <span class="cov0" title="0">return nil</span> 1987 } 1988 1989 // TranslateTimestampSince translates the time stamp. 1990 func TranslateTimestampSince(timestamp metav1.Time) string <span class="cov0" title="0">{ 1991 if timestamp.IsZero() </span><span class="cov0" title="0">{ 1992 return "<unknown>" 1993 }</span> 1994 <span class="cov0" title="0">return HumanDuration(time.Since(timestamp.Time))</span> 1995 } 1996 1997 // HumanDuration translate time.Duration to human readable time string. 1998 func HumanDuration(d time.Duration) string <span class="cov8" title="1">{ 1999 // Allow deviation no more than 2 seconds(excluded) to tolerate machine time 2000 // inconsistence, it can be considered as almost now. 2001 if seconds := int(d.Seconds()); seconds < -1 </span><span class="cov8" title="1">{ 2002 return "<invalid>" 2003 }</span> else<span class="cov8" title="1"> if seconds < 0 </span><span class="cov0" title="0">{ 2004 return "0s" 2005 }</span> else<span class="cov8" title="1"> if seconds < 60*2 </span><span class="cov8" title="1">{ 2006 return fmt.Sprintf("%ds", seconds) 2007 }</span> 2008 <span class="cov8" title="1">minutes := int(d / time.Minute) 2009 if minutes < 10 </span><span class="cov8" title="1">{ 2010 s := int(d/time.Second) % 60 2011 if s == 0 </span><span class="cov8" title="1">{ 2012 return fmt.Sprintf("%dm", minutes) 2013 }</span> 2014 <span class="cov8" title="1">return fmt.Sprintf("%dm%ds", minutes, s)</span> 2015 } else<span class="cov8" title="1"> if minutes < 60*3 </span><span class="cov8" title="1">{ 2016 return fmt.Sprintf("%dm", minutes) 2017 }</span> 2018 <span class="cov8" title="1">hours := int(d / time.Hour) 2019 if hours < 8 </span><span class="cov8" title="1">{ 2020 m := int(d/time.Minute) % 60 2021 if m == 0 </span><span class="cov8" title="1">{ 2022 return fmt.Sprintf("%dh", hours) 2023 }</span> 2024 <span class="cov8" title="1">return fmt.Sprintf("%dh%dm", hours, m)</span> 2025 } else<span class="cov8" title="1"> if hours < 48 </span><span class="cov8" title="1">{ 2026 return fmt.Sprintf("%dh", hours) 2027 }</span> else<span class="cov8" title="1"> if hours < 24*8 </span><span class="cov8" title="1">{ 2028 h := hours % 24 2029 if h == 0 </span><span class="cov8" title="1">{ 2030 return fmt.Sprintf("%dd", hours/24) 2031 }</span> 2032 <span class="cov8" title="1">return fmt.Sprintf("%dd%dh", hours/24, h)</span> 2033 } else<span class="cov8" title="1"> if hours < 24*365*2 </span><span class="cov8" title="1">{ 2034 return fmt.Sprintf("%dd", hours/24) 2035 }</span> else<span class="cov8" title="1"> if hours < 24*365*8 </span><span class="cov8" title="1">{ 2036 return fmt.Sprintf("%dy%dd", hours/24/365, (hours/24)%365) 2037 }</span> 2038 <span class="cov8" title="1">return fmt.Sprintf("%dy", hours/24/365)</span> 2039 } 2040 </pre> 2041 2042 <pre class="file" id="file16" style="display: none">/* 2043 Copyright 2019 The Volcano Authors. 2044 2045 Licensed under the Apache License, Version 2.0 (the "License"); 2046 you may not use this file except in compliance with the License. 2047 You may obtain a copy of the License at 2048 2049 http://www.apache.org/licenses/LICENSE-2.0 2050 2051 Unless required by applicable law or agreed to in writing, software 2052 distributed under the License is distributed on an "AS IS" BASIS, 2053 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 2054 See the License for the specific language governing permissions and 2055 limitations under the License. 2056 */ 2057 2058 package vcancel 2059 2060 import ( 2061 "context" 2062 "fmt" 2063 2064 "github.com/spf13/cobra" 2065 2066 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 2067 2068 "volcano.sh/apis/pkg/client/clientset/versioned" 2069 "volcano.sh/volcano/pkg/cli/util" 2070 ) 2071 2072 type cancelFlags struct { 2073 util.CommonFlags 2074 2075 Namespace string 2076 JobName string 2077 } 2078 2079 var cancelJobFlags = &cancelFlags{} 2080 2081 // InitCancelFlags init the cancel command flags. 2082 func InitCancelFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 2083 util.InitFlags(cmd, &cancelJobFlags.CommonFlags) 2084 2085 cmd.Flags().StringVarP(&cancelJobFlags.Namespace, "namespace", "N", "default", "the namespace of job") 2086 cmd.Flags().StringVarP(&cancelJobFlags.JobName, "name", "n", "", "the name of job") 2087 }</span> 2088 2089 // CancelJob cancel the job. 2090 func CancelJob() error <span class="cov8" title="1">{ 2091 config, err := util.BuildConfig(cancelJobFlags.Master, cancelJobFlags.Kubeconfig) 2092 if err != nil </span><span class="cov0" title="0">{ 2093 return err 2094 }</span> 2095 2096 <span class="cov8" title="1">if cancelJobFlags.JobName == "" </span><span class="cov0" title="0">{ 2097 err := fmt.Errorf("job name is mandatory to cancel a particular job") 2098 return err 2099 }</span> 2100 2101 <span class="cov8" title="1">jobClient := versioned.NewForConfigOrDie(config) 2102 err = jobClient.BatchV1alpha1().Jobs(cancelJobFlags.Namespace).Delete(context.TODO(), cancelJobFlags.JobName, metav1.DeleteOptions{}) 2103 if err != nil </span><span class="cov0" title="0">{ 2104 return err 2105 }</span> 2106 <span class="cov8" title="1">fmt.Printf("cancel job %v successfully\n", cancelJobFlags.JobName) 2107 return nil</span> 2108 } 2109 </pre> 2110 2111 <pre class="file" id="file17" style="display: none">/* 2112 Copyright 2019 The Volcano Authors. 2113 2114 Licensed under the Apache License, Version 2.0 (the "License"); 2115 you may not use this file except in compliance with the License. 2116 You may obtain a copy of the License at 2117 2118 http://www.apache.org/licenses/LICENSE-2.0 2119 2120 Unless required by applicable law or agreed to in writing, software 2121 distributed under the License is distributed on an "AS IS" BASIS, 2122 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 2123 See the License for the specific language governing permissions and 2124 limitations under the License. 2125 */ 2126 2127 package vresume 2128 2129 import ( 2130 "fmt" 2131 2132 "github.com/spf13/cobra" 2133 2134 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 2135 "volcano.sh/volcano/pkg/cli/util" 2136 ) 2137 2138 type resumeFlags struct { 2139 util.CommonFlags 2140 2141 Namespace string 2142 JobName string 2143 } 2144 2145 var resumeJobFlags = &resumeFlags{} 2146 2147 // InitResumeFlags init resume command flags. 2148 func InitResumeFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 2149 util.InitFlags(cmd, &resumeJobFlags.CommonFlags) 2150 2151 cmd.Flags().StringVarP(&resumeJobFlags.Namespace, "namespace", "N", "default", "the namespace of job") 2152 cmd.Flags().StringVarP(&resumeJobFlags.JobName, "name", "n", "", "the name of job") 2153 }</span> 2154 2155 // ResumeJob resumes the job. 2156 func ResumeJob() error <span class="cov8" title="1">{ 2157 config, err := util.BuildConfig(resumeJobFlags.Master, resumeJobFlags.Kubeconfig) 2158 if err != nil </span><span class="cov0" title="0">{ 2159 return err 2160 }</span> 2161 <span class="cov8" title="1">if resumeJobFlags.JobName == "" </span><span class="cov0" title="0">{ 2162 err := fmt.Errorf("job name is mandatory to resume a particular job") 2163 return err 2164 }</span> 2165 2166 <span class="cov8" title="1">return util.CreateJobCommand(config, 2167 resumeJobFlags.Namespace, resumeJobFlags.JobName, 2168 v1alpha1.ResumeJobAction)</span> 2169 } 2170 </pre> 2171 2172 <pre class="file" id="file18" style="display: none">/* 2173 Copyright 2019 The Volcano Authors. 2174 2175 Licensed under the Apache License, Version 2.0 (the "License"); 2176 you may not use this file except in compliance with the License. 2177 You may obtain a copy of the License at 2178 2179 http://www.apache.org/licenses/LICENSE-2.0 2180 2181 Unless required by applicable law or agreed to in writing, software 2182 distributed under the License is distributed on an "AS IS" BASIS, 2183 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 2184 See the License for the specific language governing permissions and 2185 limitations under the License. 2186 */ 2187 2188 package vsuspend 2189 2190 import ( 2191 "fmt" 2192 2193 "github.com/spf13/cobra" 2194 2195 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 2196 "volcano.sh/volcano/pkg/cli/util" 2197 ) 2198 2199 type suspendFlags struct { 2200 util.CommonFlags 2201 2202 Namespace string 2203 JobName string 2204 } 2205 2206 var suspendJobFlags = &suspendFlags{} 2207 2208 // InitSuspendFlags init suspend related flags. 2209 func InitSuspendFlags(cmd *cobra.Command) <span class="cov8" title="1">{ 2210 util.InitFlags(cmd, &suspendJobFlags.CommonFlags) 2211 2212 cmd.Flags().StringVarP(&suspendJobFlags.Namespace, "namespace", "N", "default", "the namespace of job") 2213 cmd.Flags().StringVarP(&suspendJobFlags.JobName, "name", "n", "", "the name of job") 2214 }</span> 2215 2216 // SuspendJob suspends the job. 2217 func SuspendJob() error <span class="cov8" title="1">{ 2218 config, err := util.BuildConfig(suspendJobFlags.Master, suspendJobFlags.Kubeconfig) 2219 if err != nil </span><span class="cov0" title="0">{ 2220 return err 2221 }</span> 2222 2223 <span class="cov8" title="1">if suspendJobFlags.JobName == "" </span><span class="cov0" title="0">{ 2224 err := fmt.Errorf("job name is mandatory to suspend a particular job") 2225 return err 2226 }</span> 2227 2228 <span class="cov8" title="1">return util.CreateJobCommand(config, 2229 suspendJobFlags.Namespace, suspendJobFlags.JobName, 2230 v1alpha1.AbortJobAction)</span> 2231 } 2232 </pre> 2233 2234 <pre class="file" id="file19" style="display: none">/* 2235 Copyright 2019 The Volcano Authors. 2236 2237 Licensed under the Apache License, Version 2.0 (the "License"); 2238 you may not use this file except in compliance with the License. 2239 You may obtain a copy of the License at 2240 2241 http://www.apache.org/licenses/LICENSE-2.0 2242 2243 Unless required by applicable law or agreed to in writing, software 2244 distributed under the License is distributed on an "AS IS" BASIS, 2245 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 2246 See the License for the specific language governing permissions and 2247 limitations under the License. 2248 */ 2249 2250 package apis 2251 2252 import ( 2253 "fmt" 2254 2255 v1 "k8s.io/api/core/v1" 2256 2257 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 2258 ) 2259 2260 //JobInfo struct. 2261 type JobInfo struct { 2262 Namespace string 2263 Name string 2264 2265 Job *batch.Job 2266 Pods map[string]map[string]*v1.Pod 2267 } 2268 2269 //Clone function clones the k8s pod values to the JobInfo struct. 2270 func (ji *JobInfo) Clone() *JobInfo <span class="cov8" title="1">{ 2271 job := &JobInfo{ 2272 Namespace: ji.Namespace, 2273 Name: ji.Name, 2274 Job: ji.Job, 2275 2276 Pods: make(map[string]map[string]*v1.Pod), 2277 } 2278 2279 for key, pods := range ji.Pods </span><span class="cov0" title="0">{ 2280 job.Pods[key] = make(map[string]*v1.Pod) 2281 for pn, pod := range pods </span><span class="cov0" title="0">{ 2282 job.Pods[key][pn] = pod 2283 }</span> 2284 } 2285 2286 <span class="cov8" title="1">return job</span> 2287 } 2288 2289 //SetJob sets the volcano jobs values to the JobInfo struct. 2290 func (ji *JobInfo) SetJob(job *batch.Job) <span class="cov8" title="1">{ 2291 ji.Name = job.Name 2292 ji.Namespace = job.Namespace 2293 ji.Job = job 2294 }</span> 2295 2296 //AddPod adds the k8s pod object values to the Pods field 2297 //of JobStruct if it doesn't exist. Otherwise it throws error. 2298 func (ji *JobInfo) AddPod(pod *v1.Pod) error <span class="cov8" title="1">{ 2299 taskName, found := pod.Annotations[batch.TaskSpecKey] 2300 if !found </span><span class="cov0" title="0">{ 2301 return fmt.Errorf("failed to find taskName of Pod <%s/%s>", 2302 pod.Namespace, pod.Name) 2303 }</span> 2304 2305 <span class="cov8" title="1">_, found = pod.Annotations[batch.JobVersion] 2306 if !found </span><span class="cov0" title="0">{ 2307 return fmt.Errorf("failed to find jobVersion of Pod <%s/%s>", 2308 pod.Namespace, pod.Name) 2309 }</span> 2310 2311 <span class="cov8" title="1">if _, found := ji.Pods[taskName]; !found </span><span class="cov8" title="1">{ 2312 ji.Pods[taskName] = make(map[string]*v1.Pod) 2313 }</span> 2314 <span class="cov8" title="1">if _, found := ji.Pods[taskName][pod.Name]; found </span><span class="cov8" title="1">{ 2315 return fmt.Errorf("duplicated pod") 2316 }</span> 2317 <span class="cov8" title="1">ji.Pods[taskName][pod.Name] = pod 2318 2319 return nil</span> 2320 } 2321 2322 //UpdatePod updates the k8s pod object values to the existing pod. 2323 func (ji *JobInfo) UpdatePod(pod *v1.Pod) error <span class="cov8" title="1">{ 2324 taskName, found := pod.Annotations[batch.TaskSpecKey] 2325 if !found </span><span class="cov0" title="0">{ 2326 return fmt.Errorf("failed to find taskName of Pod <%s/%s>", 2327 pod.Namespace, pod.Name) 2328 }</span> 2329 <span class="cov8" title="1">_, found = pod.Annotations[batch.JobVersion] 2330 if !found </span><span class="cov0" title="0">{ 2331 return fmt.Errorf("failed to find jobVersion of Pod <%s/%s>", 2332 pod.Namespace, pod.Name) 2333 }</span> 2334 2335 <span class="cov8" title="1">if _, found := ji.Pods[taskName]; !found </span><span class="cov0" title="0">{ 2336 return fmt.Errorf("can not find task %s in cache", taskName) 2337 }</span> 2338 <span class="cov8" title="1">if _, found := ji.Pods[taskName][pod.Name]; !found </span><span class="cov0" title="0">{ 2339 return fmt.Errorf("can not find pod <%s/%s> in cache", 2340 pod.Namespace, pod.Name) 2341 }</span> 2342 <span class="cov8" title="1">ji.Pods[taskName][pod.Name] = pod 2343 2344 return nil</span> 2345 } 2346 2347 //DeletePod deletes the given k8s pod from the JobInfo struct. 2348 func (ji *JobInfo) DeletePod(pod *v1.Pod) error <span class="cov8" title="1">{ 2349 taskName, found := pod.Annotations[batch.TaskSpecKey] 2350 if !found </span><span class="cov0" title="0">{ 2351 return fmt.Errorf("failed to find taskName of Pod <%s/%s>", 2352 pod.Namespace, pod.Name) 2353 }</span> 2354 <span class="cov8" title="1">_, found = pod.Annotations[batch.JobVersion] 2355 if !found </span><span class="cov0" title="0">{ 2356 return fmt.Errorf("failed to find jobVersion of Pod <%s/%s>", 2357 pod.Namespace, pod.Name) 2358 }</span> 2359 2360 <span class="cov8" title="1">if pods, found := ji.Pods[taskName]; found </span><span class="cov8" title="1">{ 2361 delete(pods, pod.Name) 2362 if len(pods) == 0 </span><span class="cov8" title="1">{ 2363 delete(ji.Pods, taskName) 2364 }</span> 2365 } 2366 2367 <span class="cov8" title="1">return nil</span> 2368 } 2369 </pre> 2370 2371 <pre class="file" id="file20" style="display: none">/* 2372 Copyright 2019 The Volcano Authors. 2373 2374 Licensed under the Apache License, Version 2.0 (the "License"); 2375 you may not use this file except in compliance with the License. 2376 You may obtain a copy of the License at 2377 2378 http://www.apache.org/licenses/LICENSE-2.0 2379 2380 Unless required by applicable law or agreed to in writing, software 2381 distributed under the License is distributed on an "AS IS" BASIS, 2382 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 2383 See the License for the specific language governing permissions and 2384 limitations under the License. 2385 */ 2386 2387 package apis 2388 2389 import ( 2390 "fmt" 2391 2392 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 2393 ) 2394 2395 //Request struct. 2396 type Request struct { 2397 Namespace string 2398 JobName string 2399 TaskName string 2400 QueueName string 2401 2402 Event v1alpha1.Event 2403 ExitCode int32 2404 Action v1alpha1.Action 2405 JobVersion int32 2406 } 2407 2408 // String function returns the request in string format. 2409 func (r Request) String() string <span class="cov8" title="1">{ 2410 return fmt.Sprintf( 2411 "Queue: %s, Job: %s/%s, Task:%s, Event:%s, ExitCode:%d, Action:%s, JobVersion: %d", 2412 r.QueueName, r.Namespace, r.JobName, r.TaskName, r.Event, r.ExitCode, r.Action, r.JobVersion) 2413 }</span> 2414 </pre> 2415 2416 <pre class="file" id="file21" style="display: none">/* 2417 Copyright 2019 The Volcano Authors. 2418 2419 Licensed under the Apache License, Version 2.0 (the "License"); 2420 you may not use this file except in compliance with the License. 2421 You may obtain a copy of the License at 2422 2423 http://www.apache.org/licenses/LICENSE-2.0 2424 2425 Unless required by applicable law or agreed to in writing, software 2426 distributed under the License is distributed on an "AS IS" BASIS, 2427 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 2428 See the License for the specific language governing permissions and 2429 limitations under the License. 2430 */ 2431 2432 package cache 2433 2434 import ( 2435 "fmt" 2436 "sync" 2437 "time" 2438 2439 "golang.org/x/time/rate" 2440 v1 "k8s.io/api/core/v1" 2441 "k8s.io/apimachinery/pkg/util/wait" 2442 "k8s.io/client-go/util/workqueue" 2443 "k8s.io/klog" 2444 2445 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 2446 "volcano.sh/volcano/pkg/controllers/apis" 2447 ) 2448 2449 type jobCache struct { 2450 sync.Mutex 2451 2452 jobs map[string]*apis.JobInfo 2453 deletedJobs workqueue.RateLimitingInterface 2454 } 2455 2456 func keyFn(ns, name string) string <span class="cov8" title="1">{ 2457 return fmt.Sprintf("%s/%s", ns, name) 2458 }</span> 2459 2460 //JobKeyByName gets the key for the job name. 2461 func JobKeyByName(namespace string, name string) string <span class="cov0" title="0">{ 2462 return keyFn(namespace, name) 2463 }</span> 2464 2465 //JobKeyByReq gets the key for the job request. 2466 func JobKeyByReq(req *apis.Request) string <span class="cov0" title="0">{ 2467 return keyFn(req.Namespace, req.JobName) 2468 }</span> 2469 2470 //JobKey gets the "ns"/"name" format of the given job. 2471 func JobKey(job *v1alpha1.Job) string <span class="cov8" title="1">{ 2472 return keyFn(job.Namespace, job.Name) 2473 }</span> 2474 2475 func jobTerminated(job *apis.JobInfo) bool <span class="cov0" title="0">{ 2476 return job.Job == nil && len(job.Pods) == 0 2477 }</span> 2478 2479 func jobKeyOfPod(pod *v1.Pod) (string, error) <span class="cov8" title="1">{ 2480 jobName, found := pod.Annotations[v1alpha1.JobNameKey] 2481 if !found </span><span class="cov8" title="1">{ 2482 return "", fmt.Errorf("failed to find job name of pod <%s/%s>", 2483 pod.Namespace, pod.Name) 2484 }</span> 2485 2486 <span class="cov8" title="1">return keyFn(pod.Namespace, jobName), nil</span> 2487 } 2488 2489 // New gets the job Cache. 2490 func New() Cache <span class="cov8" title="1">{ 2491 queue := workqueue.NewMaxOfRateLimiter( 2492 workqueue.NewItemExponentialFailureRateLimiter(5*time.Millisecond, 180*time.Second), 2493 // 10 qps, 100 bucket size. This is only for retry speed and its only the overall factor (not per item) 2494 &workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(10), 100)}, 2495 ) 2496 2497 return &jobCache{ 2498 jobs: map[string]*apis.JobInfo{}, 2499 deletedJobs: workqueue.NewRateLimitingQueue(queue), 2500 } 2501 }</span> 2502 2503 func (jc *jobCache) Get(key string) (*apis.JobInfo, error) <span class="cov8" title="1">{ 2504 jc.Lock() 2505 defer jc.Unlock() 2506 2507 job, found := jc.jobs[key] 2508 if !found </span><span class="cov8" title="1">{ 2509 return nil, fmt.Errorf("failed to find job <%s>", key) 2510 }</span> 2511 2512 <span class="cov8" title="1">if job.Job == nil </span><span class="cov8" title="1">{ 2513 return nil, fmt.Errorf("job <%s> is not ready", key) 2514 }</span> 2515 2516 <span class="cov8" title="1">return job.Clone(), nil</span> 2517 } 2518 2519 func (jc *jobCache) GetStatus(key string) (*v1alpha1.JobStatus, error) <span class="cov8" title="1">{ 2520 jc.Lock() 2521 defer jc.Unlock() 2522 2523 job, found := jc.jobs[key] 2524 if !found </span><span class="cov8" title="1">{ 2525 return nil, fmt.Errorf("failed to find job <%s>", key) 2526 }</span> 2527 2528 <span class="cov8" title="1">if job.Job == nil </span><span class="cov0" title="0">{ 2529 return nil, fmt.Errorf("job <%s> is not ready", key) 2530 }</span> 2531 2532 <span class="cov8" title="1">status := job.Job.Status 2533 2534 return &status, nil</span> 2535 } 2536 2537 func (jc *jobCache) Add(job *v1alpha1.Job) error <span class="cov8" title="1">{ 2538 jc.Lock() 2539 defer jc.Unlock() 2540 key := JobKey(job) 2541 if jobInfo, found := jc.jobs[key]; found </span><span class="cov8" title="1">{ 2542 if jobInfo.Job == nil </span><span class="cov0" title="0">{ 2543 jobInfo.SetJob(job) 2544 2545 return nil 2546 }</span> 2547 <span class="cov8" title="1">return fmt.Errorf("duplicated jobInfo <%v>", key)</span> 2548 } 2549 2550 <span class="cov8" title="1">jc.jobs[key] = &apis.JobInfo{ 2551 Name: job.Name, 2552 Namespace: job.Namespace, 2553 2554 Job: job, 2555 Pods: make(map[string]map[string]*v1.Pod), 2556 } 2557 2558 return nil</span> 2559 } 2560 2561 func (jc *jobCache) Update(obj *v1alpha1.Job) error <span class="cov8" title="1">{ 2562 jc.Lock() 2563 defer jc.Unlock() 2564 2565 key := JobKey(obj) 2566 job, found := jc.jobs[key] 2567 if !found </span><span class="cov8" title="1">{ 2568 return fmt.Errorf("failed to find job <%v>", key) 2569 }</span> 2570 <span class="cov8" title="1">job.Job = obj 2571 2572 return nil</span> 2573 } 2574 2575 func (jc *jobCache) Delete(obj *v1alpha1.Job) error <span class="cov8" title="1">{ 2576 jc.Lock() 2577 defer jc.Unlock() 2578 2579 key := JobKey(obj) 2580 jobInfo, found := jc.jobs[key] 2581 if !found </span><span class="cov8" title="1">{ 2582 return fmt.Errorf("failed to find job <%v>", key) 2583 }</span> 2584 <span class="cov8" title="1">jobInfo.Job = nil 2585 jc.deleteJob(jobInfo) 2586 2587 return nil</span> 2588 } 2589 2590 func (jc *jobCache) AddPod(pod *v1.Pod) error <span class="cov8" title="1">{ 2591 jc.Lock() 2592 defer jc.Unlock() 2593 2594 key, err := jobKeyOfPod(pod) 2595 if err != nil </span><span class="cov8" title="1">{ 2596 return err 2597 }</span> 2598 2599 <span class="cov8" title="1">job, found := jc.jobs[key] 2600 if !found </span><span class="cov0" title="0">{ 2601 job = &apis.JobInfo{ 2602 Pods: make(map[string]map[string]*v1.Pod), 2603 } 2604 jc.jobs[key] = job 2605 }</span> 2606 2607 <span class="cov8" title="1">return job.AddPod(pod)</span> 2608 } 2609 2610 func (jc *jobCache) UpdatePod(pod *v1.Pod) error <span class="cov8" title="1">{ 2611 jc.Lock() 2612 defer jc.Unlock() 2613 2614 key, err := jobKeyOfPod(pod) 2615 if err != nil </span><span class="cov0" title="0">{ 2616 return err 2617 }</span> 2618 2619 <span class="cov8" title="1">job, found := jc.jobs[key] 2620 if !found </span><span class="cov0" title="0">{ 2621 job = &apis.JobInfo{ 2622 Pods: make(map[string]map[string]*v1.Pod), 2623 } 2624 jc.jobs[key] = job 2625 }</span> 2626 2627 <span class="cov8" title="1">return job.UpdatePod(pod)</span> 2628 } 2629 2630 func (jc *jobCache) DeletePod(pod *v1.Pod) error <span class="cov8" title="1">{ 2631 jc.Lock() 2632 defer jc.Unlock() 2633 2634 key, err := jobKeyOfPod(pod) 2635 if err != nil </span><span class="cov0" title="0">{ 2636 return err 2637 }</span> 2638 2639 <span class="cov8" title="1">job, found := jc.jobs[key] 2640 if !found </span><span class="cov0" title="0">{ 2641 job = &apis.JobInfo{ 2642 Pods: make(map[string]map[string]*v1.Pod), 2643 } 2644 jc.jobs[key] = job 2645 }</span> 2646 2647 <span class="cov8" title="1">if err := job.DeletePod(pod); err != nil </span><span class="cov0" title="0">{ 2648 return err 2649 }</span> 2650 2651 <span class="cov8" title="1">if jc.jobs[key].Job == nil </span><span class="cov0" title="0">{ 2652 jc.deleteJob(job) 2653 }</span> 2654 2655 <span class="cov8" title="1">return nil</span> 2656 } 2657 2658 func (jc *jobCache) Run(stopCh <-chan struct{}) <span class="cov0" title="0">{ 2659 wait.Until(jc.worker, 0, stopCh) 2660 }</span> 2661 2662 func (jc *jobCache) TaskCompleted(jobKey, taskName string) bool <span class="cov8" title="1">{ 2663 jc.Lock() 2664 defer jc.Unlock() 2665 2666 var taskReplicas, completed int32 2667 2668 jobInfo, found := jc.jobs[jobKey] 2669 if !found </span><span class="cov0" title="0">{ 2670 return false 2671 }</span> 2672 2673 <span class="cov8" title="1">taskPods, found := jobInfo.Pods[taskName] 2674 2675 if !found </span><span class="cov0" title="0">{ 2676 return false 2677 }</span> 2678 2679 <span class="cov8" title="1">if jobInfo.Job == nil </span><span class="cov0" title="0">{ 2680 return false 2681 }</span> 2682 2683 <span class="cov8" title="1">for _, task := range jobInfo.Job.Spec.Tasks </span><span class="cov8" title="1">{ 2684 if task.Name == taskName </span><span class="cov8" title="1">{ 2685 taskReplicas = task.Replicas 2686 break</span> 2687 } 2688 } 2689 <span class="cov8" title="1">if taskReplicas <= 0 </span><span class="cov0" title="0">{ 2690 return false 2691 }</span> 2692 2693 <span class="cov8" title="1">for _, pod := range taskPods </span><span class="cov8" title="1">{ 2694 if pod.Status.Phase == v1.PodSucceeded </span><span class="cov8" title="1">{ 2695 completed++ 2696 }</span> 2697 } 2698 <span class="cov8" title="1">return completed >= taskReplicas</span> 2699 } 2700 2701 func (jc *jobCache) TaskFailed(jobKey, taskName string) bool <span class="cov0" title="0">{ 2702 jc.Lock() 2703 defer jc.Unlock() 2704 2705 var taskReplicas, retried, maxRetry int32 2706 2707 jobInfo, found := jc.jobs[jobKey] 2708 if !found </span><span class="cov0" title="0">{ 2709 return false 2710 }</span> 2711 2712 <span class="cov0" title="0">taskPods, found := jobInfo.Pods[taskName] 2713 2714 if !found || jobInfo.Job == nil </span><span class="cov0" title="0">{ 2715 return false 2716 }</span> 2717 2718 <span class="cov0" title="0">for _, task := range jobInfo.Job.Spec.Tasks </span><span class="cov0" title="0">{ 2719 if task.Name == taskName </span><span class="cov0" title="0">{ 2720 maxRetry = task.MaxRetry 2721 taskReplicas = task.Replicas 2722 break</span> 2723 } 2724 } 2725 2726 // maxRetry == -1 means no limit 2727 <span class="cov0" title="0">if taskReplicas == 0 || maxRetry == -1 </span><span class="cov0" title="0">{ 2728 return false 2729 }</span> 2730 2731 // Compatible with existing job 2732 <span class="cov0" title="0">if maxRetry == 0 </span><span class="cov0" title="0">{ 2733 maxRetry = 3 2734 }</span> 2735 2736 <span class="cov0" title="0">for _, pod := range taskPods </span><span class="cov0" title="0">{ 2737 if pod.Status.Phase == v1.PodRunning || pod.Status.Phase == v1.PodPending </span><span class="cov0" title="0">{ 2738 for j := range pod.Status.InitContainerStatuses </span><span class="cov0" title="0">{ 2739 stat := pod.Status.InitContainerStatuses[j] 2740 retried += stat.RestartCount 2741 }</span> 2742 <span class="cov0" title="0">for j := range pod.Status.ContainerStatuses </span><span class="cov0" title="0">{ 2743 stat := pod.Status.ContainerStatuses[j] 2744 retried += stat.RestartCount 2745 }</span> 2746 } 2747 } 2748 <span class="cov0" title="0">return retried > maxRetry</span> 2749 } 2750 2751 func (jc *jobCache) worker() <span class="cov0" title="0">{ 2752 for jc.processCleanupJob() </span>{<span class="cov0" title="0"> 2753 }</span> 2754 } 2755 2756 func (jc *jobCache) processCleanupJob() bool <span class="cov0" title="0">{ 2757 obj, shutdown := jc.deletedJobs.Get() 2758 if shutdown </span><span class="cov0" title="0">{ 2759 return false 2760 }</span> 2761 <span class="cov0" title="0">defer jc.deletedJobs.Done(obj) 2762 2763 job, ok := obj.(*apis.JobInfo) 2764 if !ok </span><span class="cov0" title="0">{ 2765 klog.Errorf("failed to convert %v to *apis.JobInfo", obj) 2766 return true 2767 }</span> 2768 2769 <span class="cov0" title="0">jc.Mutex.Lock() 2770 defer jc.Mutex.Unlock() 2771 2772 if jobTerminated(job) </span><span class="cov0" title="0">{ 2773 jc.deletedJobs.Forget(obj) 2774 key := keyFn(job.Namespace, job.Name) 2775 delete(jc.jobs, key) 2776 klog.V(3).Infof("Job <%s> was deleted.", key) 2777 }</span> else<span class="cov0" title="0"> { 2778 // Retry 2779 jc.deleteJob(job) 2780 }</span> 2781 <span class="cov0" title="0">return true</span> 2782 } 2783 2784 func (jc *jobCache) deleteJob(job *apis.JobInfo) <span class="cov8" title="1">{ 2785 klog.V(3).Infof("Try to delete Job <%v/%v>", 2786 job.Namespace, job.Name) 2787 2788 jc.deletedJobs.AddRateLimited(job) 2789 }</span> 2790 </pre> 2791 2792 <pre class="file" id="file22" style="display: none">/* 2793 Copyright 2019 The Volcano Authors. 2794 2795 Licensed under the Apache License, Version 2.0 (the "License"); 2796 you may not use this file except in compliance with the License. 2797 You may obtain a copy of the License at 2798 2799 http://www.apache.org/licenses/LICENSE-2.0 2800 2801 Unless required by applicable law or agreed to in writing, software 2802 distributed under the License is distributed on an "AS IS" BASIS, 2803 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 2804 See the License for the specific language governing permissions and 2805 limitations under the License. 2806 */ 2807 2808 package garbagecollector 2809 2810 import ( 2811 "context" 2812 "fmt" 2813 "time" 2814 2815 "k8s.io/apimachinery/pkg/api/errors" 2816 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 2817 "k8s.io/apimachinery/pkg/util/wait" 2818 "k8s.io/client-go/tools/cache" 2819 "k8s.io/client-go/util/workqueue" 2820 "k8s.io/klog" 2821 2822 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 2823 vcclientset "volcano.sh/apis/pkg/client/clientset/versioned" 2824 informerfactory "volcano.sh/apis/pkg/client/informers/externalversions" 2825 batchinformers "volcano.sh/apis/pkg/client/informers/externalversions/batch/v1alpha1" 2826 batchlisters "volcano.sh/apis/pkg/client/listers/batch/v1alpha1" 2827 "volcano.sh/volcano/pkg/controllers/framework" 2828 ) 2829 2830 func init() <span class="cov8" title="1">{ 2831 framework.RegisterController(&gccontroller{}) 2832 }</span> 2833 2834 // gccontroller runs reflectors to watch for changes of managed API 2835 // objects. Currently it only watches Jobs. Triggered by Job creation 2836 // and updates, it enqueues Jobs that have non-nil `.spec.ttlSecondsAfterFinished` 2837 // to the `queue`. The gccontroller has workers who consume `queue`, check whether 2838 // the Job TTL has expired or not; if the Job TTL hasn't expired, it will add the 2839 // Job to the queue after the TTL is expected to expire; if the TTL has expired, the 2840 // worker will send requests to the API server to delete the Jobs accordingly. 2841 // This is implemented outside of Job controller for separation of concerns, and 2842 // because it will be extended to handle other finishable resource types. 2843 type gccontroller struct { 2844 vcClient vcclientset.Interface 2845 2846 jobInformer batchinformers.JobInformer 2847 2848 // A store of jobs 2849 jobLister batchlisters.JobLister 2850 jobSynced func() bool 2851 2852 // queues that need to be updated. 2853 queue workqueue.RateLimitingInterface 2854 } 2855 2856 func (gc *gccontroller) Name() string <span class="cov8" title="1">{ 2857 return "gc-controller" 2858 }</span> 2859 2860 // Initialize creates an instance of gccontroller. 2861 func (gc *gccontroller) Initialize(opt *framework.ControllerOption) error <span class="cov8" title="1">{ 2862 gc.vcClient = opt.VolcanoClient 2863 jobInformer := informerfactory.NewSharedInformerFactory(gc.vcClient, 0).Batch().V1alpha1().Jobs() 2864 2865 gc.jobInformer = jobInformer 2866 gc.jobLister = jobInformer.Lister() 2867 gc.jobSynced = jobInformer.Informer().HasSynced 2868 gc.queue = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) 2869 2870 jobInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 2871 AddFunc: gc.addJob, 2872 UpdateFunc: gc.updateJob, 2873 }) 2874 2875 return nil 2876 }</span> 2877 2878 // Run starts the worker to clean up Jobs. 2879 func (gc *gccontroller) Run(stopCh <-chan struct{}) <span class="cov0" title="0">{ 2880 defer gc.queue.ShutDown() 2881 2882 klog.Infof("Starting garbage collector") 2883 defer klog.Infof("Shutting down garbage collector") 2884 2885 go gc.jobInformer.Informer().Run(stopCh) 2886 if !cache.WaitForCacheSync(stopCh, gc.jobSynced) </span><span class="cov0" title="0">{ 2887 return 2888 }</span> 2889 2890 <span class="cov0" title="0">go wait.Until(gc.worker, time.Second, stopCh) 2891 2892 <-stopCh</span> 2893 } 2894 2895 func (gc *gccontroller) addJob(obj interface{}) <span class="cov0" title="0">{ 2896 job := obj.(*v1alpha1.Job) 2897 klog.V(4).Infof("Adding job %s/%s", job.Namespace, job.Name) 2898 2899 if job.DeletionTimestamp == nil && needsCleanup(job) </span><span class="cov0" title="0">{ 2900 gc.enqueue(job) 2901 }</span> 2902 } 2903 2904 func (gc *gccontroller) updateJob(old, cur interface{}) <span class="cov0" title="0">{ 2905 job := cur.(*v1alpha1.Job) 2906 klog.V(4).Infof("Updating job %s/%s", job.Namespace, job.Name) 2907 2908 if job.DeletionTimestamp == nil && needsCleanup(job) </span><span class="cov0" title="0">{ 2909 gc.enqueue(job) 2910 }</span> 2911 } 2912 2913 func (gc *gccontroller) enqueue(job *v1alpha1.Job) <span class="cov0" title="0">{ 2914 klog.V(4).Infof("Add job %s/%s to cleanup", job.Namespace, job.Name) 2915 key, err := cache.MetaNamespaceKeyFunc(job) 2916 if err != nil </span><span class="cov0" title="0">{ 2917 klog.Errorf("couldn't get key for object %#v: %v", job, err) 2918 return 2919 }</span> 2920 2921 <span class="cov0" title="0">gc.queue.Add(key)</span> 2922 } 2923 2924 func (gc *gccontroller) enqueueAfter(job *v1alpha1.Job, after time.Duration) <span class="cov8" title="1">{ 2925 key, err := cache.MetaNamespaceKeyFunc(job) 2926 if err != nil </span><span class="cov0" title="0">{ 2927 klog.Errorf("couldn't get key for object %#v: %v", job, err) 2928 return 2929 }</span> 2930 2931 <span class="cov8" title="1">gc.queue.AddAfter(key, after)</span> 2932 } 2933 2934 func (gc *gccontroller) worker() <span class="cov0" title="0">{ 2935 for gc.processNextWorkItem() </span>{<span class="cov0" title="0"> 2936 }</span> 2937 } 2938 2939 func (gc *gccontroller) processNextWorkItem() bool <span class="cov0" title="0">{ 2940 key, quit := gc.queue.Get() 2941 if quit </span><span class="cov0" title="0">{ 2942 return false 2943 }</span> 2944 <span class="cov0" title="0">defer gc.queue.Done(key) 2945 2946 err := gc.processJob(key.(string)) 2947 gc.handleErr(err, key) 2948 2949 return true</span> 2950 } 2951 2952 func (gc *gccontroller) handleErr(err error, key interface{}) <span class="cov0" title="0">{ 2953 if err == nil </span><span class="cov0" title="0">{ 2954 gc.queue.Forget(key) 2955 return 2956 }</span> 2957 2958 <span class="cov0" title="0">klog.Errorf("error cleaning up Job %v, will retry: %v", key, err) 2959 gc.queue.AddRateLimited(key)</span> 2960 } 2961 2962 // processJob will check the Job's state and TTL and delete the Job when it 2963 // finishes and its TTL after finished has expired. If the Job hasn't finished or 2964 // its TTL hasn't expired, it will be added to the queue after the TTL is expected 2965 // to expire. 2966 // This function is not meant to be invoked concurrently with the same key. 2967 func (gc *gccontroller) processJob(key string) error <span class="cov0" title="0">{ 2968 namespace, name, err := cache.SplitMetaNamespaceKey(key) 2969 if err != nil </span><span class="cov0" title="0">{ 2970 return err 2971 }</span> 2972 2973 <span class="cov0" title="0">klog.V(4).Infof("Checking if Job %s/%s is ready for cleanup", namespace, name) 2974 // Ignore the Jobs that are already deleted or being deleted, or the ones that don't need clean up. 2975 job, err := gc.jobLister.Jobs(namespace).Get(name) 2976 if errors.IsNotFound(err) </span><span class="cov0" title="0">{ 2977 return nil 2978 }</span> 2979 <span class="cov0" title="0">if err != nil </span><span class="cov0" title="0">{ 2980 return err 2981 }</span> 2982 2983 <span class="cov0" title="0">if expired, err := gc.processTTL(job); err != nil </span><span class="cov0" title="0">{ 2984 return err 2985 }</span> else<span class="cov0" title="0"> if !expired </span><span class="cov0" title="0">{ 2986 return nil 2987 }</span> 2988 2989 // The Job's TTL is assumed to have expired, but the Job TTL might be stale. 2990 // Before deleting the Job, do a final sanity check. 2991 // If TTL is modified before we do this check, we cannot be sure if the TTL truly expires. 2992 // The latest Job may have a different UID, but it's fine because the checks will be run again. 2993 <span class="cov0" title="0">fresh, err := gc.vcClient.BatchV1alpha1().Jobs(namespace).Get(context.TODO(), name, metav1.GetOptions{}) 2994 if errors.IsNotFound(err) </span><span class="cov0" title="0">{ 2995 return nil 2996 }</span> 2997 <span class="cov0" title="0">if err != nil </span><span class="cov0" title="0">{ 2998 return err 2999 }</span> 3000 // Use the latest Job TTL to see if the TTL truly expires. 3001 <span class="cov0" title="0">if expired, err := gc.processTTL(fresh); err != nil </span><span class="cov0" title="0">{ 3002 return err 3003 }</span> else<span class="cov0" title="0"> if !expired </span><span class="cov0" title="0">{ 3004 return nil 3005 }</span> 3006 // Cascade deletes the Jobs if TTL truly expires. 3007 <span class="cov0" title="0">policy := metav1.DeletePropagationForeground 3008 options := metav1.DeleteOptions{ 3009 PropagationPolicy: &policy, 3010 Preconditions: &metav1.Preconditions{UID: &fresh.UID}, 3011 } 3012 klog.V(4).Infof("Cleaning up Job %s/%s", namespace, name) 3013 return gc.vcClient.BatchV1alpha1().Jobs(fresh.Namespace).Delete(context.TODO(), fresh.Name, options)</span> 3014 } 3015 3016 // processTTL checks whether a given Job's TTL has expired, and add it to the queue after the TTL is expected to expire 3017 // if the TTL will expire later. 3018 func (gc *gccontroller) processTTL(job *v1alpha1.Job) (expired bool, err error) <span class="cov8" title="1">{ 3019 // We don't care about the Jobs that are going to be deleted, or the ones that don't need clean up. 3020 if job.DeletionTimestamp != nil || !needsCleanup(job) </span><span class="cov0" title="0">{ 3021 return false, nil 3022 }</span> 3023 3024 <span class="cov8" title="1">now := time.Now() 3025 t, err := timeLeft(job, &now) 3026 if err != nil </span><span class="cov0" title="0">{ 3027 return false, err 3028 }</span> 3029 3030 // TTL has expired 3031 <span class="cov8" title="1">if *t <= 0 </span><span class="cov8" title="1">{ 3032 return true, nil 3033 }</span> 3034 3035 <span class="cov8" title="1">gc.enqueueAfter(job, *t) 3036 return false, nil</span> 3037 } 3038 3039 // needsCleanup checks whether a Job has finished and has a TTL set. 3040 func needsCleanup(j *v1alpha1.Job) bool <span class="cov8" title="1">{ 3041 return j.Spec.TTLSecondsAfterFinished != nil && isJobFinished(j) 3042 }</span> 3043 3044 func isJobFinished(job *v1alpha1.Job) bool <span class="cov8" title="1">{ 3045 return job.Status.State.Phase == v1alpha1.Completed || 3046 job.Status.State.Phase == v1alpha1.Failed || 3047 job.Status.State.Phase == v1alpha1.Terminated 3048 }</span> 3049 3050 func getFinishAndExpireTime(j *v1alpha1.Job) (*time.Time, *time.Time, error) <span class="cov8" title="1">{ 3051 if !needsCleanup(j) </span><span class="cov8" title="1">{ 3052 return nil, nil, fmt.Errorf("job %s/%s should not be cleaned up", j.Namespace, j.Name) 3053 }</span> 3054 <span class="cov8" title="1">finishAt, err := jobFinishTime(j) 3055 if err != nil </span><span class="cov0" title="0">{ 3056 return nil, nil, err 3057 }</span> 3058 <span class="cov8" title="1">finishAtUTC := finishAt.UTC() 3059 expireAtUTC := finishAtUTC.Add(time.Duration(*j.Spec.TTLSecondsAfterFinished) * time.Second) 3060 return &finishAtUTC, &expireAtUTC, nil</span> 3061 } 3062 3063 func timeLeft(j *v1alpha1.Job, since *time.Time) (*time.Duration, error) <span class="cov8" title="1">{ 3064 finishAt, expireAt, err := getFinishAndExpireTime(j) 3065 if err != nil </span><span class="cov8" title="1">{ 3066 return nil, err 3067 }</span> 3068 <span class="cov8" title="1">if finishAt.UTC().After(since.UTC()) </span><span class="cov0" title="0">{ 3069 klog.Warningf("Warning: Found Job %s/%s finished in the future. This is likely due to time skew in the cluster. Job cleanup will be deferred.", j.Namespace, j.Name) 3070 }</span> 3071 <span class="cov8" title="1">remaining := expireAt.UTC().Sub(since.UTC()) 3072 klog.V(4).Infof("Found Job %s/%s finished at %v, remaining TTL %v since %v, TTL will expire at %v", j.Namespace, j.Name, finishAt.UTC(), remaining, since.UTC(), expireAt.UTC()) 3073 return &remaining, nil</span> 3074 } 3075 3076 // jobFinishTime takes an already finished Job and returns the time it finishes. 3077 func jobFinishTime(finishedJob *v1alpha1.Job) (metav1.Time, error) <span class="cov8" title="1">{ 3078 if finishedJob.Status.State.LastTransitionTime.IsZero() </span><span class="cov8" title="1">{ 3079 return metav1.Time{}, fmt.Errorf("unable to find the time when the Job %s/%s finished", finishedJob.Namespace, finishedJob.Name) 3080 }</span> 3081 <span class="cov8" title="1">return finishedJob.Status.State.LastTransitionTime, nil</span> 3082 } 3083 </pre> 3084 3085 <pre class="file" id="file23" style="display: none">/* 3086 Copyright 2019 The Volcano Authors. 3087 3088 Licensed under the Apache License, Version 2.0 (the "License"); 3089 you may not use this file except in compliance with the License. 3090 You may obtain a copy of the License at 3091 3092 http://www.apache.org/licenses/LICENSE-2.0 3093 3094 Unless required by applicable law or agreed to in writing, software 3095 distributed under the License is distributed on an "AS IS" BASIS, 3096 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 3097 See the License for the specific language governing permissions and 3098 limitations under the License. 3099 */ 3100 3101 package helpers 3102 3103 import ( 3104 "fmt" 3105 "math/rand" 3106 "strconv" 3107 "strings" 3108 "time" 3109 3110 v1 "k8s.io/api/core/v1" 3111 3112 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 3113 "volcano.sh/volcano/pkg/controllers/apis" 3114 "volcano.sh/volcano/pkg/scheduler/api" 3115 ) 3116 3117 const ( 3118 // PodNameFmt pod name format 3119 PodNameFmt = "%s-%s-%d" 3120 // persistentVolumeClaimFmt represents persistent volume claim name format 3121 persistentVolumeClaimFmt = "%s-pvc-%s" 3122 ) 3123 3124 // GetPodIndexUnderTask returns task Index. 3125 func GetPodIndexUnderTask(pod *v1.Pod) string <span class="cov8" title="1">{ 3126 num := strings.Split(pod.Name, "-") 3127 if len(num) >= 3 </span><span class="cov8" title="1">{ 3128 return num[len(num)-1] 3129 }</span> 3130 3131 <span class="cov8" title="1">return ""</span> 3132 } 3133 3134 // ComparePodByIndex by pod index 3135 func CompareTask(lv, rv *api.TaskInfo) bool <span class="cov8" title="1">{ 3136 lStr := GetPodIndexUnderTask(lv.Pod) 3137 rStr := GetPodIndexUnderTask(rv.Pod) 3138 lIndex, lErr := strconv.Atoi(lStr) 3139 rIndex, rErr := strconv.Atoi(rStr) 3140 if lErr != nil || rErr != nil || lIndex == rIndex </span><span class="cov8" title="1">{ 3141 return lv.Pod.CreationTimestamp.Before(&rv.Pod.CreationTimestamp) 3142 }</span> 3143 <span class="cov8" title="1">if lIndex > rIndex </span><span class="cov8" title="1">{ 3144 return false 3145 }</span> 3146 <span class="cov8" title="1">return true</span> 3147 } 3148 3149 // GetTaskKey returns task key/name 3150 func GetTaskKey(pod *v1.Pod) string <span class="cov0" title="0">{ 3151 if pod.Annotations == nil || pod.Annotations[batch.TaskSpecKey] == "" </span><span class="cov0" title="0">{ 3152 return batch.DefaultTaskSpec 3153 }</span> 3154 <span class="cov0" title="0">return pod.Annotations[batch.TaskSpecKey]</span> 3155 } 3156 3157 // GetTaskSpec returns task spec 3158 func GetTaskSpec(job *batch.Job, taskName string) (batch.TaskSpec, bool) <span class="cov0" title="0">{ 3159 for _, ts := range job.Spec.Tasks </span><span class="cov0" title="0">{ 3160 if ts.Name == taskName </span><span class="cov0" title="0">{ 3161 return ts, true 3162 }</span> 3163 } 3164 <span class="cov0" title="0">return batch.TaskSpec{}, false</span> 3165 } 3166 3167 // MakeDomainName creates task domain name 3168 func MakeDomainName(ts batch.TaskSpec, job *batch.Job, index int) string <span class="cov0" title="0">{ 3169 hostName := ts.Template.Spec.Hostname 3170 subdomain := ts.Template.Spec.Subdomain 3171 if len(hostName) == 0 </span><span class="cov0" title="0">{ 3172 hostName = MakePodName(job.Name, ts.Name, index) 3173 }</span> 3174 <span class="cov0" title="0">if len(subdomain) == 0 </span><span class="cov0" title="0">{ 3175 subdomain = job.Name 3176 }</span> 3177 <span class="cov0" title="0">return hostName + "." + subdomain</span> 3178 } 3179 3180 // MakePodName creates pod name. 3181 func MakePodName(jobName string, taskName string, index int) string <span class="cov8" title="1">{ 3182 return fmt.Sprintf(PodNameFmt, jobName, taskName, index) 3183 }</span> 3184 3185 // GenRandomStr generate random str with specified length l. 3186 func GenRandomStr(l int) string <span class="cov0" title="0">{ 3187 str := "0123456789abcdefghijklmnopqrstuvwxyz" 3188 bytes := []byte(str) 3189 var result []byte 3190 r := rand.New(rand.NewSource(time.Now().UnixNano())) 3191 for i := 0; i < l; i++ </span><span class="cov0" title="0">{ 3192 result = append(result, bytes[r.Intn(len(bytes))]) 3193 }</span> 3194 <span class="cov0" title="0">return string(result)</span> 3195 } 3196 3197 // GenPVCName generates pvc name with job name. 3198 func GenPVCName(jobName string) string <span class="cov0" title="0">{ 3199 return fmt.Sprintf(persistentVolumeClaimFmt, jobName, GenRandomStr(12)) 3200 }</span> 3201 3202 // GetJobKeyByReq gets the key for the job request. 3203 func GetJobKeyByReq(req *apis.Request) string <span class="cov0" title="0">{ 3204 return fmt.Sprintf("%s/%s", req.Namespace, req.JobName) 3205 }</span> 3206 3207 // GetTasklndexUnderJob return index of the task in the job. 3208 func GetTasklndexUnderJob(taskName string, job *batch.Job) int <span class="cov8" title="1">{ 3209 for index, task := range job.Spec.Tasks </span><span class="cov8" title="1">{ 3210 if task.Name == taskName </span><span class="cov8" title="1">{ 3211 return index 3212 }</span> 3213 } 3214 <span class="cov0" title="0">return -1</span> 3215 } 3216 3217 // GetPodsNameUnderTask return names of all pods in the task. 3218 func GetPodsNameUnderTask(taskName string, job *batch.Job) []string <span class="cov8" title="1">{ 3219 var res []string 3220 for _, task := range job.Spec.Tasks </span><span class="cov8" title="1">{ 3221 if task.Name == taskName </span><span class="cov8" title="1">{ 3222 for index := 0; index < int(task.Replicas); index++ </span><span class="cov8" title="1">{ 3223 res = append(res, MakePodName(job.Name, taskName, index)) 3224 }</span> 3225 <span class="cov8" title="1">break</span> 3226 } 3227 } 3228 <span class="cov8" title="1">return res</span> 3229 } 3230 </pre> 3231 3232 <pre class="file" id="file24" style="display: none">/* 3233 Copyright 2017 The Volcano Authors. 3234 3235 Licensed under the Apache License, Version 2.0 (the "License"); 3236 you may not use this file except in compliance with the License. 3237 You may obtain a copy of the License at 3238 3239 http://www.apache.org/licenses/LICENSE-2.0 3240 3241 Unless required by applicable law or agreed to in writing, software 3242 distributed under the License is distributed on an "AS IS" BASIS, 3243 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 3244 See the License for the specific language governing permissions and 3245 limitations under the License. 3246 */ 3247 3248 package job 3249 3250 import ( 3251 "fmt" 3252 "hash" 3253 "hash/fnv" 3254 "time" 3255 3256 v1 "k8s.io/api/core/v1" 3257 "k8s.io/apimachinery/pkg/util/wait" 3258 coreinformers "k8s.io/client-go/informers/core/v1" 3259 kubeschedulinginformers "k8s.io/client-go/informers/scheduling/v1" 3260 "k8s.io/client-go/kubernetes" 3261 corev1 "k8s.io/client-go/kubernetes/typed/core/v1" 3262 corelisters "k8s.io/client-go/listers/core/v1" 3263 kubeschedulinglisters "k8s.io/client-go/listers/scheduling/v1" 3264 "k8s.io/client-go/tools/cache" 3265 "k8s.io/client-go/tools/record" 3266 "k8s.io/client-go/util/workqueue" 3267 "k8s.io/klog" 3268 3269 batchv1alpha1 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 3270 busv1alpha1 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 3271 vcclientset "volcano.sh/apis/pkg/client/clientset/versioned" 3272 vcscheme "volcano.sh/apis/pkg/client/clientset/versioned/scheme" 3273 informerfactory "volcano.sh/apis/pkg/client/informers/externalversions" 3274 batchinformer "volcano.sh/apis/pkg/client/informers/externalversions/batch/v1alpha1" 3275 businformer "volcano.sh/apis/pkg/client/informers/externalversions/bus/v1alpha1" 3276 schedulinginformers "volcano.sh/apis/pkg/client/informers/externalversions/scheduling/v1beta1" 3277 batchlister "volcano.sh/apis/pkg/client/listers/batch/v1alpha1" 3278 buslister "volcano.sh/apis/pkg/client/listers/bus/v1alpha1" 3279 schedulinglisters "volcano.sh/apis/pkg/client/listers/scheduling/v1beta1" 3280 "volcano.sh/volcano/pkg/controllers/apis" 3281 jobcache "volcano.sh/volcano/pkg/controllers/cache" 3282 "volcano.sh/volcano/pkg/controllers/framework" 3283 "volcano.sh/volcano/pkg/controllers/job/state" 3284 ) 3285 3286 func init() <span class="cov8" title="1">{ 3287 framework.RegisterController(&jobcontroller{}) 3288 }</span> 3289 3290 // jobcontroller the Job jobcontroller type. 3291 type jobcontroller struct { 3292 kubeClient kubernetes.Interface 3293 vcClient vcclientset.Interface 3294 3295 jobInformer batchinformer.JobInformer 3296 podInformer coreinformers.PodInformer 3297 pvcInformer coreinformers.PersistentVolumeClaimInformer 3298 pgInformer schedulinginformers.PodGroupInformer 3299 svcInformer coreinformers.ServiceInformer 3300 cmdInformer businformer.CommandInformer 3301 pcInformer kubeschedulinginformers.PriorityClassInformer 3302 queueInformer schedulinginformers.QueueInformer 3303 3304 // A store of jobs 3305 jobLister batchlister.JobLister 3306 jobSynced func() bool 3307 3308 // A store of pods 3309 podLister corelisters.PodLister 3310 podSynced func() bool 3311 3312 pvcLister corelisters.PersistentVolumeClaimLister 3313 pvcSynced func() bool 3314 3315 // A store of podgroups 3316 pgLister schedulinglisters.PodGroupLister 3317 pgSynced func() bool 3318 3319 // A store of service 3320 svcLister corelisters.ServiceLister 3321 svcSynced func() bool 3322 3323 cmdLister buslister.CommandLister 3324 cmdSynced func() bool 3325 3326 pcLister kubeschedulinglisters.PriorityClassLister 3327 pcSynced func() bool 3328 3329 queueLister schedulinglisters.QueueLister 3330 queueSynced func() bool 3331 3332 // queue that need to sync up 3333 queueList []workqueue.RateLimitingInterface 3334 commandQueue workqueue.RateLimitingInterface 3335 cache jobcache.Cache 3336 // Job Event recorder 3337 recorder record.EventRecorder 3338 3339 errTasks workqueue.RateLimitingInterface 3340 workers uint32 3341 maxRequeueNum int 3342 } 3343 3344 func (cc *jobcontroller) Name() string <span class="cov8" title="1">{ 3345 return "job-controller" 3346 }</span> 3347 3348 // Initialize creates the new Job job controller. 3349 func (cc *jobcontroller) Initialize(opt *framework.ControllerOption) error <span class="cov8" title="1">{ 3350 cc.kubeClient = opt.KubeClient 3351 cc.vcClient = opt.VolcanoClient 3352 3353 sharedInformers := opt.SharedInformerFactory 3354 workers := opt.WorkerNum 3355 // Initialize event client 3356 eventBroadcaster := record.NewBroadcaster() 3357 eventBroadcaster.StartLogging(klog.Infof) 3358 eventBroadcaster.StartRecordingToSink(&corev1.EventSinkImpl{Interface: cc.kubeClient.CoreV1().Events("")}) 3359 recorder := eventBroadcaster.NewRecorder(vcscheme.Scheme, v1.EventSource{Component: "vc-controller-manager"}) 3360 3361 cc.queueList = make([]workqueue.RateLimitingInterface, workers) 3362 cc.commandQueue = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) 3363 cc.cache = jobcache.New() 3364 cc.errTasks = newRateLimitingQueue() 3365 cc.recorder = recorder 3366 cc.workers = workers 3367 cc.maxRequeueNum = opt.MaxRequeueNum 3368 if cc.maxRequeueNum < 0 </span><span class="cov0" title="0">{ 3369 cc.maxRequeueNum = -1 3370 }</span> 3371 3372 <span class="cov8" title="1">var i uint32 3373 for i = 0; i < workers; i++ </span><span class="cov8" title="1">{ 3374 cc.queueList[i] = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) 3375 }</span> 3376 3377 <span class="cov8" title="1">cc.jobInformer = informerfactory.NewSharedInformerFactory(cc.vcClient, 0).Batch().V1alpha1().Jobs() 3378 cc.jobInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 3379 AddFunc: cc.addJob, 3380 UpdateFunc: cc.updateJob, 3381 DeleteFunc: cc.deleteJob, 3382 }) 3383 cc.jobLister = cc.jobInformer.Lister() 3384 cc.jobSynced = cc.jobInformer.Informer().HasSynced 3385 3386 cc.cmdInformer = informerfactory.NewSharedInformerFactory(cc.vcClient, 0).Bus().V1alpha1().Commands() 3387 cc.cmdInformer.Informer().AddEventHandler( 3388 cache.FilteringResourceEventHandler{ 3389 FilterFunc: func(obj interface{}) bool </span><span class="cov0" title="0">{ 3390 switch v := obj.(type) </span>{ 3391 case *busv1alpha1.Command:<span class="cov0" title="0"> 3392 if v.TargetObject != nil && 3393 v.TargetObject.APIVersion == batchv1alpha1.SchemeGroupVersion.String() && 3394 v.TargetObject.Kind == "Job" </span><span class="cov0" title="0">{ 3395 return true 3396 }</span> 3397 3398 <span class="cov0" title="0">return false</span> 3399 default:<span class="cov0" title="0"> 3400 return false</span> 3401 } 3402 }, 3403 Handler: cache.ResourceEventHandlerFuncs{ 3404 AddFunc: cc.addCommand, 3405 }, 3406 }, 3407 ) 3408 <span class="cov8" title="1">cc.cmdLister = cc.cmdInformer.Lister() 3409 cc.cmdSynced = cc.cmdInformer.Informer().HasSynced 3410 3411 cc.podInformer = sharedInformers.Core().V1().Pods() 3412 cc.podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 3413 AddFunc: cc.addPod, 3414 UpdateFunc: cc.updatePod, 3415 DeleteFunc: cc.deletePod, 3416 }) 3417 3418 cc.podLister = cc.podInformer.Lister() 3419 cc.podSynced = cc.podInformer.Informer().HasSynced 3420 3421 cc.pvcInformer = sharedInformers.Core().V1().PersistentVolumeClaims() 3422 cc.pvcLister = cc.pvcInformer.Lister() 3423 cc.pvcSynced = cc.pvcInformer.Informer().HasSynced 3424 3425 cc.svcInformer = sharedInformers.Core().V1().Services() 3426 cc.svcLister = cc.svcInformer.Lister() 3427 cc.svcSynced = cc.svcInformer.Informer().HasSynced 3428 3429 cc.pgInformer = informerfactory.NewSharedInformerFactory(cc.vcClient, 0).Scheduling().V1beta1().PodGroups() 3430 cc.pgInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 3431 UpdateFunc: cc.updatePodGroup, 3432 }) 3433 cc.pgLister = cc.pgInformer.Lister() 3434 cc.pgSynced = cc.pgInformer.Informer().HasSynced 3435 3436 cc.pcInformer = sharedInformers.Scheduling().V1().PriorityClasses() 3437 cc.pcLister = cc.pcInformer.Lister() 3438 cc.pcSynced = cc.pcInformer.Informer().HasSynced 3439 3440 cc.queueInformer = informerfactory.NewSharedInformerFactory(cc.vcClient, 0).Scheduling().V1beta1().Queues() 3441 cc.queueLister = cc.queueInformer.Lister() 3442 cc.queueSynced = cc.queueInformer.Informer().HasSynced 3443 3444 // Register actions 3445 state.SyncJob = cc.syncJob 3446 state.KillJob = cc.killJob 3447 3448 return nil</span> 3449 } 3450 3451 // Run start JobController. 3452 func (cc *jobcontroller) Run(stopCh <-chan struct{}) <span class="cov0" title="0">{ 3453 go cc.jobInformer.Informer().Run(stopCh) 3454 go cc.podInformer.Informer().Run(stopCh) 3455 go cc.pvcInformer.Informer().Run(stopCh) 3456 go cc.pgInformer.Informer().Run(stopCh) 3457 go cc.svcInformer.Informer().Run(stopCh) 3458 go cc.cmdInformer.Informer().Run(stopCh) 3459 go cc.pcInformer.Informer().Run(stopCh) 3460 go cc.queueInformer.Informer().Run(stopCh) 3461 3462 cache.WaitForCacheSync(stopCh, cc.jobSynced, cc.podSynced, cc.pgSynced, 3463 cc.svcSynced, cc.cmdSynced, cc.pvcSynced, cc.pcSynced, cc.queueSynced) 3464 3465 go wait.Until(cc.handleCommands, 0, stopCh) 3466 var i uint32 3467 for i = 0; i < cc.workers; i++ </span><span class="cov0" title="0">{ 3468 go func(num uint32) </span><span class="cov0" title="0">{ 3469 wait.Until( 3470 func() </span><span class="cov0" title="0">{ 3471 cc.worker(num) 3472 }</span>, 3473 time.Second, 3474 stopCh) 3475 }(i) 3476 } 3477 3478 <span class="cov0" title="0">go cc.cache.Run(stopCh) 3479 3480 // Re-sync error tasks. 3481 go wait.Until(cc.processResyncTask, 0, stopCh) 3482 3483 klog.Infof("JobController is running ...... ")</span> 3484 } 3485 3486 func (cc *jobcontroller) worker(i uint32) <span class="cov0" title="0">{ 3487 klog.Infof("worker %d start ...... ", i) 3488 3489 for cc.processNextReq(i) </span>{<span class="cov0" title="0"> 3490 }</span> 3491 } 3492 3493 func (cc *jobcontroller) belongsToThisRoutine(key string, count uint32) bool <span class="cov0" title="0">{ 3494 var hashVal hash.Hash32 3495 var val uint32 3496 3497 hashVal = fnv.New32() 3498 hashVal.Write([]byte(key)) 3499 3500 val = hashVal.Sum32() 3501 3502 return val%cc.workers == count 3503 }</span> 3504 3505 func (cc *jobcontroller) getWorkerQueue(key string) workqueue.RateLimitingInterface <span class="cov8" title="1">{ 3506 var hashVal hash.Hash32 3507 var val uint32 3508 3509 hashVal = fnv.New32() 3510 hashVal.Write([]byte(key)) 3511 3512 val = hashVal.Sum32() 3513 3514 queue := cc.queueList[val%cc.workers] 3515 3516 return queue 3517 }</span> 3518 3519 func (cc *jobcontroller) processNextReq(count uint32) bool <span class="cov0" title="0">{ 3520 queue := cc.queueList[count] 3521 obj, shutdown := queue.Get() 3522 if shutdown </span><span class="cov0" title="0">{ 3523 klog.Errorf("Fail to pop item from queue") 3524 return false 3525 }</span> 3526 3527 <span class="cov0" title="0">req := obj.(apis.Request) 3528 defer queue.Done(req) 3529 3530 key := jobcache.JobKeyByReq(&req) 3531 if !cc.belongsToThisRoutine(key, count) </span><span class="cov0" title="0">{ 3532 klog.Errorf("should not occur The job does not belongs to this routine key:%s, worker:%d...... ", key, count) 3533 queueLocal := cc.getWorkerQueue(key) 3534 queueLocal.Add(req) 3535 return true 3536 }</span> 3537 3538 <span class="cov0" title="0">klog.V(3).Infof("Try to handle request <%v>", req) 3539 3540 jobInfo, err := cc.cache.Get(key) 3541 if err != nil </span><span class="cov0" title="0">{ 3542 // TODO(k82cn): ignore not-ready error. 3543 klog.Errorf("Failed to get job by <%v> from cache: %v", req, err) 3544 return true 3545 }</span> 3546 3547 <span class="cov0" title="0">st := state.NewState(jobInfo) 3548 if st == nil </span><span class="cov0" title="0">{ 3549 klog.Errorf("Invalid state <%s> of Job <%v/%v>", 3550 jobInfo.Job.Status.State, jobInfo.Job.Namespace, jobInfo.Job.Name) 3551 return true 3552 }</span> 3553 3554 <span class="cov0" title="0">action := applyPolicies(jobInfo.Job, &req) 3555 klog.V(3).Infof("Execute <%v> on Job <%s/%s> in <%s> by <%T>.", 3556 action, req.Namespace, req.JobName, jobInfo.Job.Status.State.Phase, st) 3557 3558 if action != busv1alpha1.SyncJobAction </span><span class="cov0" title="0">{ 3559 cc.recordJobEvent(jobInfo.Job.Namespace, jobInfo.Job.Name, batchv1alpha1.ExecuteAction, fmt.Sprintf( 3560 "Start to execute action %s ", action)) 3561 }</span> 3562 3563 <span class="cov0" title="0">if err := st.Execute(action); err != nil </span><span class="cov0" title="0">{ 3564 if cc.maxRequeueNum == -1 || queue.NumRequeues(req) < cc.maxRequeueNum </span><span class="cov0" title="0">{ 3565 klog.V(2).Infof("Failed to handle Job <%s/%s>: %v", 3566 jobInfo.Job.Namespace, jobInfo.Job.Name, err) 3567 // If any error, requeue it. 3568 queue.AddRateLimited(req) 3569 return true 3570 }</span> 3571 <span class="cov0" title="0">cc.recordJobEvent(jobInfo.Job.Namespace, jobInfo.Job.Name, batchv1alpha1.ExecuteAction, fmt.Sprintf( 3572 "Job failed on action %s for retry limit reached", action)) 3573 klog.Warningf("Terminating Job <%s/%s> and releasing resources", jobInfo.Job.Namespace, jobInfo.Job.Name) 3574 if err = st.Execute(busv1alpha1.TerminateJobAction); err != nil </span><span class="cov0" title="0">{ 3575 klog.Errorf("Failed to terminate Job<%s/%s>: %v", jobInfo.Job.Namespace, jobInfo.Job.Name, err) 3576 }</span> 3577 <span class="cov0" title="0">klog.Warningf("Dropping job<%s/%s> out of the queue: %v because max retries has reached", jobInfo.Job.Namespace, jobInfo.Job.Name, err)</span> 3578 } 3579 3580 // If no error, forget it. 3581 <span class="cov0" title="0">queue.Forget(req) 3582 3583 return true</span> 3584 } 3585 </pre> 3586 3587 <pre class="file" id="file25" style="display: none">/* 3588 Copyright 2019 The Volcano Authors. 3589 3590 Licensed under the Apache License, Version 2.0 (the "License"); 3591 you may not use this file except in compliance with the License. 3592 You may obtain a copy of the License at 3593 3594 http://www.apache.org/licenses/LICENSE-2.0 3595 3596 Unless required by applicable law or agreed to in writing, software 3597 distributed under the License is distributed on an "AS IS" BASIS, 3598 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 3599 See the License for the specific language governing permissions and 3600 limitations under the License. 3601 */ 3602 3603 package job 3604 3605 import ( 3606 "context" 3607 "fmt" 3608 "reflect" 3609 "sort" 3610 "sync" 3611 "sync/atomic" 3612 "time" 3613 3614 v1 "k8s.io/api/core/v1" 3615 apierrors "k8s.io/apimachinery/pkg/api/errors" 3616 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 3617 "k8s.io/apimachinery/pkg/util/wait" 3618 "k8s.io/klog" 3619 3620 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 3621 "volcano.sh/apis/pkg/apis/helpers" 3622 scheduling "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 3623 "volcano.sh/volcano/pkg/controllers/apis" 3624 jobhelpers "volcano.sh/volcano/pkg/controllers/job/helpers" 3625 "volcano.sh/volcano/pkg/controllers/job/state" 3626 ) 3627 3628 var calMutex sync.Mutex 3629 3630 func (cc *jobcontroller) killJob(jobInfo *apis.JobInfo, podRetainPhase state.PhaseMap, updateStatus state.UpdateStatusFn) error <span class="cov8" title="1">{ 3631 job := jobInfo.Job 3632 klog.V(3).Infof("Killing Job <%s/%s>, current version %d", job.Namespace, job.Name, job.Status.Version) 3633 defer klog.V(3).Infof("Finished Job <%s/%s> killing, current version %d", job.Namespace, job.Name, job.Status.Version) 3634 3635 if job.DeletionTimestamp != nil </span><span class="cov0" title="0">{ 3636 klog.Infof("Job <%s/%s> is terminating, skip management process.", 3637 job.Namespace, job.Name) 3638 return nil 3639 }</span> 3640 3641 <span class="cov8" title="1">var pending, running, terminating, succeeded, failed, unknown int32 3642 taskStatusCount := make(map[string]batch.TaskState) 3643 3644 var errs []error 3645 var total int 3646 3647 for _, pods := range jobInfo.Pods </span><span class="cov8" title="1">{ 3648 for _, pod := range pods </span><span class="cov8" title="1">{ 3649 total++ 3650 3651 if pod.DeletionTimestamp != nil </span><span class="cov0" title="0">{ 3652 klog.Infof("Pod <%s/%s> is terminating", pod.Namespace, pod.Name) 3653 terminating++ 3654 continue</span> 3655 } 3656 3657 <span class="cov8" title="1">maxRetry := job.Spec.MaxRetry 3658 lastRetry := false 3659 if job.Status.RetryCount >= maxRetry-1 </span><span class="cov8" title="1">{ 3660 lastRetry = true 3661 }</span> 3662 3663 // Only retain the Failed and Succeeded pods at the last retry. 3664 // If it is not the last retry, kill pod as defined in `podRetainPhase`. 3665 <span class="cov8" title="1">retainPhase := podRetainPhase 3666 if lastRetry </span><span class="cov8" title="1">{ 3667 retainPhase = state.PodRetainPhaseSoft 3668 }</span> 3669 <span class="cov8" title="1">_, retain := retainPhase[pod.Status.Phase] 3670 3671 if !retain </span><span class="cov8" title="1">{ 3672 err := cc.deleteJobPod(job.Name, pod) 3673 if err == nil </span><span class="cov8" title="1">{ 3674 terminating++ 3675 continue</span> 3676 } 3677 // record the err, and then collect the pod info like retained pod 3678 <span class="cov0" title="0">errs = append(errs, err) 3679 cc.resyncTask(pod)</span> 3680 } 3681 3682 <span class="cov0" title="0">classifyAndAddUpPodBaseOnPhase(pod, &pending, &running, &succeeded, &failed, &unknown) 3683 calcPodStatus(pod, taskStatusCount)</span> 3684 } 3685 } 3686 3687 <span class="cov8" title="1">if len(errs) != 0 </span><span class="cov0" title="0">{ 3688 klog.Errorf("failed to kill pods for job %s/%s, with err %+v", job.Namespace, job.Name, errs) 3689 cc.recorder.Event(job, v1.EventTypeWarning, FailedDeletePodReason, 3690 fmt.Sprintf("Error deleting pods: %+v", errs)) 3691 return fmt.Errorf("failed to kill %d pods of %d", len(errs), total) 3692 }</span> 3693 3694 <span class="cov8" title="1">job = job.DeepCopy() 3695 // Job version is bumped only when job is killed 3696 job.Status.Version++ 3697 job.Status.Pending = pending 3698 job.Status.Running = running 3699 job.Status.Succeeded = succeeded 3700 job.Status.Failed = failed 3701 job.Status.Terminating = terminating 3702 job.Status.Unknown = unknown 3703 job.Status.TaskStatusCount = taskStatusCount 3704 3705 // Update running duration 3706 klog.V(3).Infof("Running duration is %s", metav1.Duration{Duration: time.Since(jobInfo.Job.CreationTimestamp.Time)}.ToUnstructured()) 3707 job.Status.RunningDuration = &metav1.Duration{Duration: time.Since(jobInfo.Job.CreationTimestamp.Time)} 3708 3709 if updateStatus != nil </span><span class="cov8" title="1">{ 3710 if updateStatus(&job.Status) </span><span class="cov8" title="1">{ 3711 job.Status.State.LastTransitionTime = metav1.Now() 3712 jobCondition := newCondition(job.Status.State.Phase, &job.Status.State.LastTransitionTime) 3713 job.Status.Conditions = append(job.Status.Conditions, jobCondition) 3714 }</span> 3715 } 3716 3717 // must be called before update job status 3718 <span class="cov8" title="1">if err := cc.pluginOnJobDelete(job); err != nil </span><span class="cov0" title="0">{ 3719 return err 3720 }</span> 3721 3722 // Update Job status 3723 <span class="cov8" title="1">newJob, err := cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(context.TODO(), job, metav1.UpdateOptions{}) 3724 if err != nil </span><span class="cov0" title="0">{ 3725 klog.Errorf("Failed to update status of Job %v/%v: %v", 3726 job.Namespace, job.Name, err) 3727 return err 3728 }</span> 3729 <span class="cov8" title="1">if e := cc.cache.Update(newJob); e != nil </span><span class="cov0" title="0">{ 3730 klog.Errorf("KillJob - Failed to update Job %v/%v in cache: %v", 3731 newJob.Namespace, newJob.Name, e) 3732 return e 3733 }</span> 3734 3735 // Delete PodGroup 3736 <span class="cov8" title="1">if err := cc.vcClient.SchedulingV1beta1().PodGroups(job.Namespace).Delete(context.TODO(), job.Name, metav1.DeleteOptions{}); err != nil </span><span class="cov8" title="1">{ 3737 if !apierrors.IsNotFound(err) </span><span class="cov0" title="0">{ 3738 klog.Errorf("Failed to delete PodGroup of Job %v/%v: %v", 3739 job.Namespace, job.Name, err) 3740 return err 3741 }</span> 3742 } 3743 3744 // NOTE(k82cn): DO NOT delete input/output until job is deleted. 3745 3746 <span class="cov8" title="1">return nil</span> 3747 } 3748 3749 func (cc *jobcontroller) initiateJob(job *batch.Job) (*batch.Job, error) <span class="cov8" title="1">{ 3750 klog.V(3).Infof("Starting to initiate Job <%s/%s>", job.Namespace, job.Name) 3751 jobInstance, err := cc.initJobStatus(job) 3752 if err != nil </span><span class="cov0" title="0">{ 3753 cc.recorder.Event(job, v1.EventTypeWarning, string(batch.JobStatusError), 3754 fmt.Sprintf("Failed to initialize job status, err: %v", err)) 3755 return nil, err 3756 }</span> 3757 3758 <span class="cov8" title="1">if err := cc.pluginOnJobAdd(jobInstance); err != nil </span><span class="cov0" title="0">{ 3759 cc.recorder.Event(job, v1.EventTypeWarning, string(batch.PluginError), 3760 fmt.Sprintf("Execute plugin when job add failed, err: %v", err)) 3761 return nil, err 3762 }</span> 3763 3764 <span class="cov8" title="1">newJob, err := cc.createJobIOIfNotExist(jobInstance) 3765 if err != nil </span><span class="cov0" title="0">{ 3766 cc.recorder.Event(job, v1.EventTypeWarning, string(batch.PVCError), 3767 fmt.Sprintf("Failed to create PVC, err: %v", err)) 3768 return nil, err 3769 }</span> 3770 3771 <span class="cov8" title="1">if err := cc.createOrUpdatePodGroup(newJob); err != nil </span><span class="cov0" title="0">{ 3772 cc.recorder.Event(job, v1.EventTypeWarning, string(batch.PodGroupError), 3773 fmt.Sprintf("Failed to create PodGroup, err: %v", err)) 3774 return nil, err 3775 }</span> 3776 3777 <span class="cov8" title="1">return newJob, nil</span> 3778 } 3779 3780 func (cc *jobcontroller) initOnJobUpdate(job *batch.Job) error <span class="cov8" title="1">{ 3781 klog.V(3).Infof("Starting to initiate Job <%s/%s> on update", job.Namespace, job.Name) 3782 3783 if err := cc.pluginOnJobUpdate(job); err != nil </span><span class="cov0" title="0">{ 3784 cc.recorder.Event(job, v1.EventTypeWarning, string(batch.PluginError), 3785 fmt.Sprintf("Execute plugin when job add failed, err: %v", err)) 3786 return err 3787 }</span> 3788 3789 <span class="cov8" title="1">if err := cc.createOrUpdatePodGroup(job); err != nil </span><span class="cov0" title="0">{ 3790 cc.recorder.Event(job, v1.EventTypeWarning, string(batch.PodGroupError), 3791 fmt.Sprintf("Failed to create PodGroup, err: %v", err)) 3792 return err 3793 }</span> 3794 3795 <span class="cov8" title="1">return nil</span> 3796 } 3797 3798 func (cc *jobcontroller) GetQueueInfo(queue string) (*scheduling.Queue, error) <span class="cov0" title="0">{ 3799 queueInfo, err := cc.queueLister.Get(queue) 3800 if err != nil </span><span class="cov0" title="0">{ 3801 klog.Errorf("Failed to get queue from queueLister, error: %s", err.Error()) 3802 }</span> 3803 3804 <span class="cov0" title="0">return queueInfo, err</span> 3805 } 3806 3807 func (cc *jobcontroller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateStatusFn) error <span class="cov8" title="1">{ 3808 job := jobInfo.Job 3809 klog.V(3).Infof("Starting to sync up Job <%s/%s>, current version %d", job.Namespace, job.Name, job.Status.Version) 3810 defer klog.V(3).Infof("Finished Job <%s/%s> sync up, current version %d", job.Namespace, job.Name, job.Status.Version) 3811 3812 if jobInfo.Job.DeletionTimestamp != nil </span><span class="cov0" title="0">{ 3813 klog.Infof("Job <%s/%s> is terminating, skip management process.", 3814 jobInfo.Job.Namespace, jobInfo.Job.Name) 3815 return nil 3816 }</span> 3817 3818 // deep copy job to prevent mutate it 3819 <span class="cov8" title="1">job = job.DeepCopy() 3820 3821 // Find queue that job belongs to, and check if the queue has forwarding metadata 3822 queueInfo, err := cc.GetQueueInfo(job.Spec.Queue) 3823 if err != nil </span><span class="cov0" title="0">{ 3824 return err 3825 }</span> 3826 3827 <span class="cov8" title="1">var jobForwarding bool 3828 if len(queueInfo.Spec.ExtendClusters) != 0 </span><span class="cov0" title="0">{ 3829 jobForwarding = true 3830 if len(job.Annotations) == 0 </span><span class="cov0" title="0">{ 3831 job.Annotations = make(map[string]string) 3832 }</span> 3833 <span class="cov0" title="0">job.Annotations[batch.JobForwardingKey] = "true" 3834 job, err = cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).Update(context.TODO(), job, metav1.UpdateOptions{}) 3835 if err != nil </span><span class="cov0" title="0">{ 3836 klog.Errorf("failed to update job: %s/%s, error: %s", job.Namespace, job.Name, err.Error()) 3837 return err 3838 }</span> 3839 } 3840 3841 // Skip job initiation if job is already initiated 3842 <span class="cov8" title="1">if !isInitiated(job) </span><span class="cov8" title="1">{ 3843 if job, err = cc.initiateJob(job); err != nil </span><span class="cov0" title="0">{ 3844 return err 3845 }</span> 3846 } else<span class="cov8" title="1"> { 3847 // TODO: optimize this call it only when scale up/down 3848 if err = cc.initOnJobUpdate(job); err != nil </span><span class="cov0" title="0">{ 3849 return err 3850 }</span> 3851 } 3852 3853 <span class="cov8" title="1">if len(queueInfo.Spec.ExtendClusters) != 0 </span><span class="cov0" title="0">{ 3854 jobForwarding = true 3855 job.Annotations[batch.JobForwardingKey] = "true" 3856 _, err := cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).Update(context.TODO(), job, metav1.UpdateOptions{}) 3857 if err != nil </span><span class="cov0" title="0">{ 3858 klog.Errorf("failed to update job: %s/%s, error: %s", job.Namespace, job.Name, err.Error()) 3859 return err 3860 }</span> 3861 } 3862 3863 <span class="cov8" title="1">var syncTask bool 3864 if pg, _ := cc.pgLister.PodGroups(job.Namespace).Get(job.Name); pg != nil </span><span class="cov8" title="1">{ 3865 if pg.Status.Phase != "" && pg.Status.Phase != scheduling.PodGroupPending </span><span class="cov8" title="1">{ 3866 syncTask = true 3867 }</span> 3868 3869 <span class="cov8" title="1">for _, condition := range pg.Status.Conditions </span><span class="cov0" title="0">{ 3870 if condition.Type == scheduling.PodGroupUnschedulableType </span><span class="cov0" title="0">{ 3871 cc.recorder.Eventf(job, v1.EventTypeWarning, string(batch.PodGroupPending), 3872 fmt.Sprintf("PodGroup %s:%s unschedule,reason: %s", job.Namespace, job.Name, condition.Message)) 3873 }</span> 3874 } 3875 } 3876 3877 <span class="cov8" title="1">var jobCondition batch.JobCondition 3878 if !syncTask </span><span class="cov8" title="1">{ 3879 if updateStatus != nil </span><span class="cov8" title="1">{ 3880 if updateStatus(&job.Status) </span><span class="cov8" title="1">{ 3881 job.Status.State.LastTransitionTime = metav1.Now() 3882 jobCondition = newCondition(job.Status.State.Phase, &job.Status.State.LastTransitionTime) 3883 job.Status.Conditions = append(job.Status.Conditions, jobCondition) 3884 }</span> 3885 } 3886 <span class="cov8" title="1">newJob, err := cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(context.TODO(), job, metav1.UpdateOptions{}) 3887 if err != nil </span><span class="cov0" title="0">{ 3888 klog.Errorf("Failed to update status of Job %v/%v: %v", 3889 job.Namespace, job.Name, err) 3890 return err 3891 }</span> 3892 <span class="cov8" title="1">if e := cc.cache.Update(newJob); e != nil </span><span class="cov0" title="0">{ 3893 klog.Errorf("SyncJob - Failed to update Job %v/%v in cache: %v", 3894 newJob.Namespace, newJob.Name, e) 3895 return e 3896 }</span> 3897 <span class="cov8" title="1">return nil</span> 3898 } 3899 3900 <span class="cov8" title="1">var running, pending, terminating, succeeded, failed, unknown int32 3901 taskStatusCount := make(map[string]batch.TaskState) 3902 3903 podToCreate := make(map[string][]*v1.Pod) 3904 var podToDelete []*v1.Pod 3905 var creationErrs []error 3906 var deletionErrs []error 3907 appendMutex := sync.Mutex{} 3908 3909 appendError := func(container *[]error, err error) </span><span class="cov0" title="0">{ 3910 appendMutex.Lock() 3911 defer appendMutex.Unlock() 3912 *container = append(*container, err) 3913 }</span> 3914 3915 <span class="cov8" title="1">waitCreationGroup := sync.WaitGroup{} 3916 3917 for _, ts := range job.Spec.Tasks </span><span class="cov8" title="1">{ 3918 ts.Template.Name = ts.Name 3919 tc := ts.Template.DeepCopy() 3920 name := ts.Template.Name 3921 3922 pods, found := jobInfo.Pods[name] 3923 if !found </span><span class="cov0" title="0">{ 3924 pods = map[string]*v1.Pod{} 3925 }</span> 3926 3927 <span class="cov8" title="1">var podToCreateEachTask []*v1.Pod 3928 for i := 0; i < int(ts.Replicas); i++ </span><span class="cov8" title="1">{ 3929 podName := fmt.Sprintf(jobhelpers.PodNameFmt, job.Name, name, i) 3930 if pod, found := pods[podName]; !found </span><span class="cov8" title="1">{ 3931 newPod := createJobPod(job, tc, ts.TopologyPolicy, i, jobForwarding) 3932 if err := cc.pluginOnPodCreate(job, newPod); err != nil </span><span class="cov0" title="0">{ 3933 return err 3934 }</span> 3935 <span class="cov8" title="1">podToCreateEachTask = append(podToCreateEachTask, newPod) 3936 waitCreationGroup.Add(1)</span> 3937 } else<span class="cov8" title="1"> { 3938 delete(pods, podName) 3939 if pod.DeletionTimestamp != nil </span><span class="cov0" title="0">{ 3940 klog.Infof("Pod <%s/%s> is terminating", pod.Namespace, pod.Name) 3941 atomic.AddInt32(&terminating, 1) 3942 continue</span> 3943 } 3944 3945 <span class="cov8" title="1">classifyAndAddUpPodBaseOnPhase(pod, &pending, &running, &succeeded, &failed, &unknown) 3946 calcPodStatus(pod, taskStatusCount)</span> 3947 } 3948 } 3949 <span class="cov8" title="1">podToCreate[ts.Name] = podToCreateEachTask 3950 for _, pod := range pods </span><span class="cov0" title="0">{ 3951 podToDelete = append(podToDelete, pod) 3952 }</span> 3953 } 3954 3955 <span class="cov8" title="1">for taskName, podToCreateEachTask := range podToCreate </span><span class="cov8" title="1">{ 3956 if len(podToCreateEachTask) == 0 </span><span class="cov0" title="0">{ 3957 continue</span> 3958 } 3959 <span class="cov8" title="1">go func(taskName string, podToCreateEachTask []*v1.Pod) </span><span class="cov8" title="1">{ 3960 taskIndex := jobhelpers.GetTasklndexUnderJob(taskName, job) 3961 if job.Spec.Tasks[taskIndex].DependsOn != nil </span><span class="cov0" title="0">{ 3962 cc.waitDependsOnTaskMeetCondition(taskName, taskIndex, podToCreateEachTask, job) 3963 }</span> 3964 3965 <span class="cov8" title="1">for _, pod := range podToCreateEachTask </span><span class="cov8" title="1">{ 3966 go func(pod *v1.Pod) </span><span class="cov8" title="1">{ 3967 defer waitCreationGroup.Done() 3968 newPod, err := cc.kubeClient.CoreV1().Pods(pod.Namespace).Create(context.TODO(), pod, metav1.CreateOptions{}) 3969 if err != nil && !apierrors.IsAlreadyExists(err) </span><span class="cov0" title="0">{ 3970 // Failed to create Pod, waitCreationGroup a moment and then create it again 3971 // This is to ensure all podsMap under the same Job created 3972 // So gang-scheduling could schedule the Job successfully 3973 klog.Errorf("Failed to create pod %s for Job %s, err %#v", 3974 pod.Name, job.Name, err) 3975 appendError(&creationErrs, fmt.Errorf("failed to create pod %s, err: %#v", pod.Name, err)) 3976 }</span> else<span class="cov8" title="1"> { 3977 classifyAndAddUpPodBaseOnPhase(newPod, &pending, &running, &succeeded, &failed, &unknown) 3978 calcPodStatus(pod, taskStatusCount) 3979 klog.V(5).Infof("Created Task <%s> of Job <%s/%s>", 3980 pod.Name, job.Namespace, job.Name) 3981 }</span> 3982 }(pod) 3983 } 3984 }(taskName, podToCreateEachTask) 3985 } 3986 3987 <span class="cov8" title="1">waitCreationGroup.Wait() 3988 3989 if len(creationErrs) != 0 </span><span class="cov0" title="0">{ 3990 cc.recorder.Event(job, v1.EventTypeWarning, FailedCreatePodReason, 3991 fmt.Sprintf("Error creating pods: %+v", creationErrs)) 3992 return fmt.Errorf("failed to create %d pods of %d", len(creationErrs), len(podToCreate)) 3993 }</span> 3994 3995 // Delete pods when scale down. 3996 <span class="cov8" title="1">waitDeletionGroup := sync.WaitGroup{} 3997 waitDeletionGroup.Add(len(podToDelete)) 3998 for _, pod := range podToDelete </span><span class="cov0" title="0">{ 3999 go func(pod *v1.Pod) </span><span class="cov0" title="0">{ 4000 defer waitDeletionGroup.Done() 4001 err := cc.deleteJobPod(job.Name, pod) 4002 if err != nil </span><span class="cov0" title="0">{ 4003 // Failed to delete Pod, waitCreationGroup a moment and then create it again 4004 // This is to ensure all podsMap under the same Job created 4005 // So gang-scheduling could schedule the Job successfully 4006 klog.Errorf("Failed to delete pod %s for Job %s, err %#v", 4007 pod.Name, job.Name, err) 4008 appendError(&deletionErrs, err) 4009 cc.resyncTask(pod) 4010 }</span> else<span class="cov0" title="0"> { 4011 klog.V(3).Infof("Deleted Task <%s> of Job <%s/%s>", 4012 pod.Name, job.Namespace, job.Name) 4013 atomic.AddInt32(&terminating, 1) 4014 }</span> 4015 }(pod) 4016 } 4017 <span class="cov8" title="1">waitDeletionGroup.Wait() 4018 4019 if len(deletionErrs) != 0 </span><span class="cov0" title="0">{ 4020 cc.recorder.Event(job, v1.EventTypeWarning, FailedDeletePodReason, 4021 fmt.Sprintf("Error deleting pods: %+v", deletionErrs)) 4022 return fmt.Errorf("failed to delete %d pods of %d", len(deletionErrs), len(podToDelete)) 4023 }</span> 4024 <span class="cov8" title="1">job.Status = batch.JobStatus{ 4025 State: job.Status.State, 4026 4027 Pending: pending, 4028 Running: running, 4029 Succeeded: succeeded, 4030 Failed: failed, 4031 Terminating: terminating, 4032 Unknown: unknown, 4033 Version: job.Status.Version, 4034 MinAvailable: job.Spec.MinAvailable, 4035 TaskStatusCount: taskStatusCount, 4036 ControlledResources: job.Status.ControlledResources, 4037 Conditions: job.Status.Conditions, 4038 RetryCount: job.Status.RetryCount, 4039 } 4040 4041 if updateStatus != nil </span><span class="cov0" title="0">{ 4042 if updateStatus(&job.Status) </span><span class="cov0" title="0">{ 4043 job.Status.State.LastTransitionTime = metav1.Now() 4044 jobCondition = newCondition(job.Status.State.Phase, &job.Status.State.LastTransitionTime) 4045 job.Status.Conditions = append(job.Status.Conditions, jobCondition) 4046 }</span> 4047 } 4048 <span class="cov8" title="1">newJob, err := cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(context.TODO(), job, metav1.UpdateOptions{}) 4049 if err != nil </span><span class="cov0" title="0">{ 4050 klog.Errorf("Failed to update status of Job %v/%v: %v", 4051 job.Namespace, job.Name, err) 4052 return err 4053 }</span> 4054 <span class="cov8" title="1">if e := cc.cache.Update(newJob); e != nil </span><span class="cov0" title="0">{ 4055 klog.Errorf("SyncJob - Failed to update Job %v/%v in cache: %v", 4056 newJob.Namespace, newJob.Name, e) 4057 return e 4058 }</span> 4059 4060 <span class="cov8" title="1">return nil</span> 4061 } 4062 4063 func (cc *jobcontroller) waitDependsOnTaskMeetCondition(taskName string, taskIndex int, podToCreateEachTask []*v1.Pod, job *batch.Job) <span class="cov0" title="0">{ 4064 if job.Spec.Tasks[taskIndex].DependsOn != nil </span><span class="cov0" title="0">{ 4065 dependsOn := *job.Spec.Tasks[taskIndex].DependsOn 4066 if len(dependsOn.Name) > 1 && dependsOn.Iteration == batch.IterationAny </span><span class="cov0" title="0">{ 4067 wait.PollInfinite(detectionPeriodOfDependsOntask, func() (bool, error) </span><span class="cov0" title="0">{ 4068 for _, task := range dependsOn.Name </span><span class="cov0" title="0">{ 4069 if cc.isDependsOnPodsReady(task, job) </span><span class="cov0" title="0">{ 4070 return true, nil 4071 }</span> 4072 } 4073 <span class="cov0" title="0">return false, nil</span> 4074 }) 4075 } else<span class="cov0" title="0"> { 4076 for _, dependsOnTask := range dependsOn.Name </span><span class="cov0" title="0">{ 4077 wait.PollInfinite(detectionPeriodOfDependsOntask, func() (bool, error) </span><span class="cov0" title="0">{ 4078 if cc.isDependsOnPodsReady(dependsOnTask, job) </span><span class="cov0" title="0">{ 4079 return true, nil 4080 }</span> 4081 <span class="cov0" title="0">return false, nil</span> 4082 }) 4083 } 4084 } 4085 } 4086 } 4087 4088 func (cc *jobcontroller) isDependsOnPodsReady(task string, job *batch.Job) bool <span class="cov0" title="0">{ 4089 dependsOnPods := jobhelpers.GetPodsNameUnderTask(task, job) 4090 dependsOnTaskIndex := jobhelpers.GetTasklndexUnderJob(task, job) 4091 runningPodCount := 0 4092 for _, podName := range dependsOnPods </span><span class="cov0" title="0">{ 4093 pod, err := cc.podLister.Pods(job.Namespace).Get(podName) 4094 if err != nil </span><span class="cov0" title="0">{ 4095 klog.Errorf("Failed to get pod %v/%v %v", job.Namespace, podName, err) 4096 continue</span> 4097 } 4098 4099 <span class="cov0" title="0">if pod.Status.Phase != v1.PodRunning && pod.Status.Phase != v1.PodSucceeded </span><span class="cov0" title="0">{ 4100 klog.V(5).Infof("Sequential state, pod %v/%v of depends on tasks is not running", pod.Namespace, pod.Name) 4101 continue</span> 4102 } 4103 4104 <span class="cov0" title="0">allContainerReady := true 4105 for _, containerStatus := range pod.Status.ContainerStatuses </span><span class="cov0" title="0">{ 4106 if !containerStatus.Ready </span><span class="cov0" title="0">{ 4107 allContainerReady = false 4108 break</span> 4109 } 4110 } 4111 <span class="cov0" title="0">if allContainerReady </span><span class="cov0" title="0">{ 4112 runningPodCount++ 4113 }</span> 4114 } 4115 <span class="cov0" title="0">dependsOnTaskMinReplicas := job.Spec.Tasks[dependsOnTaskIndex].MinAvailable 4116 if dependsOnTaskMinReplicas != nil </span><span class="cov0" title="0">{ 4117 if runningPodCount < int(*dependsOnTaskMinReplicas) </span><span class="cov0" title="0">{ 4118 klog.V(5).Infof("In a depends on startup state, there are already %d pods running, which is less than the minimum number of runs", runningPodCount) 4119 return false 4120 }</span> 4121 } 4122 <span class="cov0" title="0">return true</span> 4123 } 4124 4125 func (cc *jobcontroller) createJobIOIfNotExist(job *batch.Job) (*batch.Job, error) <span class="cov8" title="1">{ 4126 // If PVC does not exist, create them for Job. 4127 var needUpdate bool 4128 if job.Status.ControlledResources == nil </span><span class="cov8" title="1">{ 4129 job.Status.ControlledResources = make(map[string]string) 4130 }</span> 4131 <span class="cov8" title="1">for index, volume := range job.Spec.Volumes </span><span class="cov8" title="1">{ 4132 vcName := volume.VolumeClaimName 4133 if len(vcName) == 0 </span><span class="cov0" title="0">{ 4134 // NOTE(k82cn): Ensure never have duplicated generated names. 4135 for </span><span class="cov0" title="0">{ 4136 vcName = jobhelpers.GenPVCName(job.Name) 4137 exist, err := cc.checkPVCExist(job, vcName) 4138 if err != nil </span><span class="cov0" title="0">{ 4139 return job, err 4140 }</span> 4141 <span class="cov0" title="0">if exist </span><span class="cov0" title="0">{ 4142 continue</span> 4143 } 4144 <span class="cov0" title="0">job.Spec.Volumes[index].VolumeClaimName = vcName 4145 needUpdate = true 4146 break</span> 4147 } 4148 // TODO: check VolumeClaim must be set if VolumeClaimName is empty 4149 <span class="cov0" title="0">if volume.VolumeClaim != nil </span><span class="cov0" title="0">{ 4150 if err := cc.createPVC(job, vcName, volume.VolumeClaim); err != nil </span><span class="cov0" title="0">{ 4151 return job, err 4152 }</span> 4153 } 4154 } else<span class="cov8" title="1"> { 4155 exist, err := cc.checkPVCExist(job, vcName) 4156 if err != nil </span><span class="cov0" title="0">{ 4157 return job, err 4158 }</span> 4159 <span class="cov8" title="1">if !exist </span><span class="cov8" title="1">{ 4160 return job, fmt.Errorf("pvc %s is not found, the job will be in the Pending state until the PVC is created", vcName) 4161 }</span> 4162 } 4163 <span class="cov0" title="0">job.Status.ControlledResources["volume-pvc-"+vcName] = vcName</span> 4164 } 4165 <span class="cov8" title="1">if needUpdate </span><span class="cov0" title="0">{ 4166 newJob, err := cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).Update(context.TODO(), job, metav1.UpdateOptions{}) 4167 if err != nil </span><span class="cov0" title="0">{ 4168 klog.Errorf("Failed to update Job %v/%v for volume claim name: %v ", 4169 job.Namespace, job.Name, err) 4170 return job, err 4171 }</span> 4172 4173 <span class="cov0" title="0">newJob.Status = job.Status 4174 return newJob, err</span> 4175 } 4176 <span class="cov8" title="1">return job, nil</span> 4177 } 4178 4179 func (cc *jobcontroller) checkPVCExist(job *batch.Job, pvc string) (bool, error) <span class="cov8" title="1">{ 4180 if _, err := cc.pvcLister.PersistentVolumeClaims(job.Namespace).Get(pvc); err != nil </span><span class="cov8" title="1">{ 4181 if apierrors.IsNotFound(err) </span><span class="cov8" title="1">{ 4182 return false, nil 4183 }</span> 4184 <span class="cov0" title="0">klog.V(3).Infof("Failed to get PVC %s for job <%s/%s>: %v", 4185 pvc, job.Namespace, job.Name, err) 4186 return false, err</span> 4187 } 4188 <span class="cov0" title="0">return true, nil</span> 4189 } 4190 4191 func (cc *jobcontroller) createPVC(job *batch.Job, vcName string, volumeClaim *v1.PersistentVolumeClaimSpec) error <span class="cov8" title="1">{ 4192 pvc := &v1.PersistentVolumeClaim{ 4193 ObjectMeta: metav1.ObjectMeta{ 4194 Namespace: job.Namespace, 4195 Name: vcName, 4196 OwnerReferences: []metav1.OwnerReference{ 4197 *metav1.NewControllerRef(job, helpers.JobKind), 4198 }, 4199 }, 4200 Spec: *volumeClaim, 4201 } 4202 4203 klog.V(3).Infof("Try to create PVC: %v", pvc) 4204 4205 if _, e := cc.kubeClient.CoreV1().PersistentVolumeClaims(job.Namespace).Create(context.TODO(), pvc, metav1.CreateOptions{}); e != nil </span><span class="cov0" title="0">{ 4206 klog.V(3).Infof("Failed to create PVC for Job <%s/%s>: %v", 4207 job.Namespace, job.Name, e) 4208 return e 4209 }</span> 4210 <span class="cov8" title="1">return nil</span> 4211 } 4212 4213 func (cc *jobcontroller) createOrUpdatePodGroup(job *batch.Job) error <span class="cov8" title="1">{ 4214 // If PodGroup does not exist, create one for Job. 4215 pg, err := cc.pgLister.PodGroups(job.Namespace).Get(job.Name) 4216 if err != nil </span><span class="cov8" title="1">{ 4217 if !apierrors.IsNotFound(err) </span><span class="cov0" title="0">{ 4218 klog.Errorf("Failed to get PodGroup for Job <%s/%s>: %v", 4219 job.Namespace, job.Name, err) 4220 return err 4221 }</span> 4222 4223 <span class="cov8" title="1">minTaskMember := map[string]int32{} 4224 for _, task := range job.Spec.Tasks </span><span class="cov8" title="1">{ 4225 if task.MinAvailable != nil </span><span class="cov0" title="0">{ 4226 minTaskMember[task.Name] = *task.MinAvailable 4227 }</span> else<span class="cov8" title="1"> { 4228 minTaskMember[task.Name] = task.Replicas 4229 }</span> 4230 } 4231 4232 <span class="cov8" title="1">pg := &scheduling.PodGroup{ 4233 ObjectMeta: metav1.ObjectMeta{ 4234 Namespace: job.Namespace, 4235 Name: job.Name, 4236 Annotations: job.Annotations, 4237 Labels: job.Labels, 4238 OwnerReferences: []metav1.OwnerReference{ 4239 *metav1.NewControllerRef(job, helpers.JobKind), 4240 }, 4241 }, 4242 Spec: scheduling.PodGroupSpec{ 4243 MinMember: job.Spec.MinAvailable, 4244 MinTaskMember: minTaskMember, 4245 Queue: job.Spec.Queue, 4246 MinResources: cc.calcPGMinResources(job), 4247 PriorityClassName: job.Spec.PriorityClassName, 4248 }, 4249 } 4250 4251 if _, err = cc.vcClient.SchedulingV1beta1().PodGroups(job.Namespace).Create(context.TODO(), pg, metav1.CreateOptions{}); err != nil </span><span class="cov0" title="0">{ 4252 if !apierrors.IsAlreadyExists(err) </span><span class="cov0" title="0">{ 4253 klog.Errorf("Failed to create PodGroup for Job <%s/%s>: %v", 4254 job.Namespace, job.Name, err) 4255 return err 4256 }</span> 4257 } 4258 <span class="cov8" title="1">return nil</span> 4259 } 4260 4261 <span class="cov8" title="1">pgShouldUpdate := false 4262 if pg.Spec.PriorityClassName != job.Spec.PriorityClassName </span><span class="cov8" title="1">{ 4263 pg.Spec.PriorityClassName = job.Spec.PriorityClassName 4264 pgShouldUpdate = true 4265 }</span> 4266 4267 <span class="cov8" title="1">minResources := cc.calcPGMinResources(job) 4268 if pg.Spec.MinMember != job.Spec.MinAvailable || !reflect.DeepEqual(pg.Spec.MinResources, minResources) </span><span class="cov0" title="0">{ 4269 pg.Spec.MinMember = job.Spec.MinAvailable 4270 pg.Spec.MinResources = minResources 4271 pgShouldUpdate = true 4272 }</span> 4273 4274 <span class="cov8" title="1">if pg.Spec.MinTaskMember == nil </span><span class="cov8" title="1">{ 4275 pgShouldUpdate = true 4276 pg.Spec.MinTaskMember = make(map[string]int32) 4277 }</span> 4278 4279 <span class="cov8" title="1">for _, task := range job.Spec.Tasks </span><span class="cov8" title="1">{ 4280 if task.MinAvailable == nil </span><span class="cov8" title="1">{ 4281 continue</span> 4282 } 4283 4284 <span class="cov0" title="0">if taskMember, ok := pg.Spec.MinTaskMember[task.Name]; !ok </span><span class="cov0" title="0">{ 4285 pgShouldUpdate = true 4286 pg.Spec.MinTaskMember[task.Name] = *task.MinAvailable 4287 }</span> else<span class="cov0" title="0"> { 4288 if taskMember == *task.MinAvailable </span><span class="cov0" title="0">{ 4289 continue</span> 4290 } 4291 4292 <span class="cov0" title="0">pgShouldUpdate = true 4293 pg.Spec.MinTaskMember[task.Name] = *task.MinAvailable</span> 4294 } 4295 } 4296 4297 <span class="cov8" title="1">if !pgShouldUpdate </span><span class="cov8" title="1">{ 4298 return nil 4299 }</span> 4300 4301 <span class="cov8" title="1">_, err = cc.vcClient.SchedulingV1beta1().PodGroups(job.Namespace).Update(context.TODO(), pg, metav1.UpdateOptions{}) 4302 if err != nil </span><span class="cov0" title="0">{ 4303 klog.V(3).Infof("Failed to update PodGroup for Job <%s/%s>: %v", 4304 job.Namespace, job.Name, err) 4305 }</span> 4306 <span class="cov8" title="1">return err</span> 4307 } 4308 4309 func (cc *jobcontroller) deleteJobPod(jobName string, pod *v1.Pod) error <span class="cov8" title="1">{ 4310 err := cc.kubeClient.CoreV1().Pods(pod.Namespace).Delete(context.TODO(), pod.Name, metav1.DeleteOptions{}) 4311 if err != nil && !apierrors.IsNotFound(err) </span><span class="cov0" title="0">{ 4312 klog.Errorf("Failed to delete pod %s/%s for Job %s, err %#v", 4313 pod.Namespace, pod.Name, jobName, err) 4314 4315 return fmt.Errorf("failed to delete pod %s, err %#v", pod.Name, err) 4316 }</span> 4317 4318 <span class="cov8" title="1">return nil</span> 4319 } 4320 4321 func (cc *jobcontroller) calcPGMinResources(job *batch.Job) *v1.ResourceList <span class="cov8" title="1">{ 4322 // sort task by priorityClasses 4323 var tasksPriority TasksPriority 4324 for _, task := range job.Spec.Tasks </span><span class="cov8" title="1">{ 4325 tp := TaskPriority{0, task} 4326 pc := task.Template.Spec.PriorityClassName 4327 4328 priorityClass, err := cc.pcLister.Get(pc) 4329 if err != nil || priorityClass == nil </span><span class="cov8" title="1">{ 4330 klog.Warningf("Ignore task %s priority class %s: %v", task.Name, pc, err) 4331 }</span> else<span class="cov0" title="0"> { 4332 tp.priority = priorityClass.Value 4333 }</span> 4334 4335 <span class="cov8" title="1">tasksPriority = append(tasksPriority, tp)</span> 4336 } 4337 4338 <span class="cov8" title="1">sort.Sort(tasksPriority) 4339 4340 minAvailableTasksRes := v1.ResourceList{} 4341 podCnt := int32(0) 4342 for _, task := range tasksPriority </span><span class="cov8" title="1">{ 4343 for i := int32(0); i < task.Replicas; i++ </span><span class="cov8" title="1">{ 4344 if podCnt >= job.Spec.MinAvailable </span><span class="cov8" title="1">{ 4345 break</span> 4346 } 4347 <span class="cov0" title="0">podCnt++ 4348 for _, c := range task.Template.Spec.Containers </span><span class="cov0" title="0">{ 4349 addResourceList(minAvailableTasksRes, c.Resources.Requests, c.Resources.Limits) 4350 }</span> 4351 } 4352 } 4353 4354 <span class="cov8" title="1">return &minAvailableTasksRes</span> 4355 } 4356 4357 func (cc *jobcontroller) initJobStatus(job *batch.Job) (*batch.Job, error) <span class="cov8" title="1">{ 4358 if job.Status.State.Phase != "" </span><span class="cov8" title="1">{ 4359 return job, nil 4360 }</span> 4361 4362 <span class="cov0" title="0">job.Status.State.Phase = batch.Pending 4363 job.Status.State.LastTransitionTime = metav1.Now() 4364 job.Status.MinAvailable = job.Spec.MinAvailable 4365 jobCondition := newCondition(job.Status.State.Phase, &job.Status.State.LastTransitionTime) 4366 job.Status.Conditions = append(job.Status.Conditions, jobCondition) 4367 newJob, err := cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(context.TODO(), job, metav1.UpdateOptions{}) 4368 if err != nil </span><span class="cov0" title="0">{ 4369 klog.Errorf("Failed to update status of Job %v/%v: %v", 4370 job.Namespace, job.Name, err) 4371 return nil, err 4372 }</span> 4373 <span class="cov0" title="0">if err := cc.cache.Update(newJob); err != nil </span><span class="cov0" title="0">{ 4374 klog.Errorf("CreateJob - Failed to update Job %v/%v in cache: %v", 4375 newJob.Namespace, newJob.Name, err) 4376 return nil, err 4377 }</span> 4378 4379 <span class="cov0" title="0">return newJob, nil</span> 4380 } 4381 4382 func classifyAndAddUpPodBaseOnPhase(pod *v1.Pod, pending, running, succeeded, failed, unknown *int32) <span class="cov8" title="1">{ 4383 switch pod.Status.Phase </span>{ 4384 case v1.PodPending:<span class="cov0" title="0"> 4385 atomic.AddInt32(pending, 1)</span> 4386 case v1.PodRunning:<span class="cov8" title="1"> 4387 atomic.AddInt32(running, 1)</span> 4388 case v1.PodSucceeded:<span class="cov0" title="0"> 4389 atomic.AddInt32(succeeded, 1)</span> 4390 case v1.PodFailed:<span class="cov0" title="0"> 4391 atomic.AddInt32(failed, 1)</span> 4392 default:<span class="cov8" title="1"> 4393 atomic.AddInt32(unknown, 1)</span> 4394 } 4395 } 4396 4397 func calcPodStatus(pod *v1.Pod, taskStatusCount map[string]batch.TaskState) <span class="cov8" title="1">{ 4398 taskName, found := pod.Annotations[batch.TaskSpecKey] 4399 if !found </span><span class="cov8" title="1">{ 4400 return 4401 }</span> 4402 4403 <span class="cov8" title="1">calMutex.Lock() 4404 defer calMutex.Unlock() 4405 if _, ok := taskStatusCount[taskName]; !ok </span><span class="cov8" title="1">{ 4406 taskStatusCount[taskName] = batch.TaskState{ 4407 Phase: make(map[v1.PodPhase]int32), 4408 } 4409 }</span> 4410 4411 <span class="cov8" title="1">switch pod.Status.Phase </span>{ 4412 case v1.PodPending:<span class="cov0" title="0"> 4413 taskStatusCount[taskName].Phase[v1.PodPending]++</span> 4414 case v1.PodRunning:<span class="cov0" title="0"> 4415 taskStatusCount[taskName].Phase[v1.PodRunning]++</span> 4416 case v1.PodSucceeded:<span class="cov0" title="0"> 4417 taskStatusCount[taskName].Phase[v1.PodSucceeded]++</span> 4418 case v1.PodFailed:<span class="cov0" title="0"> 4419 taskStatusCount[taskName].Phase[v1.PodFailed]++</span> 4420 default:<span class="cov8" title="1"> 4421 taskStatusCount[taskName].Phase[v1.PodUnknown]++</span> 4422 } 4423 } 4424 4425 func isInitiated(job *batch.Job) bool <span class="cov8" title="1">{ 4426 if job.Status.State.Phase == "" || job.Status.State.Phase == batch.Pending </span><span class="cov8" title="1">{ 4427 return false 4428 }</span> 4429 4430 <span class="cov8" title="1">return true</span> 4431 } 4432 4433 func newCondition(status batch.JobPhase, lastTransitionTime *metav1.Time) batch.JobCondition <span class="cov8" title="1">{ 4434 return batch.JobCondition{ 4435 Status: status, 4436 LastTransitionTime: lastTransitionTime, 4437 } 4438 }</span> 4439 </pre> 4440 4441 <pre class="file" id="file26" style="display: none">/* 4442 Copyright 2017 The Volcano Authors. 4443 4444 Licensed under the Apache License, Version 2.0 (the "License"); 4445 you may not use this file except in compliance with the License. 4446 You may obtain a copy of the License at 4447 4448 http://www.apache.org/licenses/LICENSE-2.0 4449 4450 Unless required by applicable law or agreed to in writing, software 4451 distributed under the License is distributed on an "AS IS" BASIS, 4452 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 4453 See the License for the specific language governing permissions and 4454 limitations under the License. 4455 */ 4456 4457 package job 4458 4459 import ( 4460 "context" 4461 "fmt" 4462 "reflect" 4463 "strconv" 4464 4465 v1 "k8s.io/api/core/v1" 4466 apierrors "k8s.io/apimachinery/pkg/api/errors" 4467 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 4468 "k8s.io/client-go/tools/cache" 4469 "k8s.io/klog" 4470 4471 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 4472 bus "volcano.sh/apis/pkg/apis/bus/v1alpha1" 4473 "volcano.sh/apis/pkg/apis/helpers" 4474 scheduling "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 4475 "volcano.sh/volcano/pkg/controllers/apis" 4476 jobcache "volcano.sh/volcano/pkg/controllers/cache" 4477 jobhelpers "volcano.sh/volcano/pkg/controllers/job/helpers" 4478 ) 4479 4480 func (cc *jobcontroller) addCommand(obj interface{}) <span class="cov8" title="1">{ 4481 cmd, ok := obj.(*bus.Command) 4482 if !ok </span><span class="cov8" title="1">{ 4483 klog.Errorf("obj is not Command") 4484 return 4485 }</span> 4486 4487 <span class="cov8" title="1">cc.commandQueue.Add(cmd)</span> 4488 } 4489 4490 func (cc *jobcontroller) addJob(obj interface{}) <span class="cov8" title="1">{ 4491 job, ok := obj.(*batch.Job) 4492 if !ok </span><span class="cov0" title="0">{ 4493 klog.Errorf("obj is not Job") 4494 return 4495 }</span> 4496 4497 <span class="cov8" title="1">req := apis.Request{ 4498 Namespace: job.Namespace, 4499 JobName: job.Name, 4500 4501 Event: bus.OutOfSyncEvent, 4502 } 4503 4504 // TODO(k82cn): if failed to add job, the cache should be refresh 4505 if err := cc.cache.Add(job); err != nil </span><span class="cov0" title="0">{ 4506 klog.Errorf("Failed to add job <%s/%s>: %v in cache", 4507 job.Namespace, job.Name, err) 4508 }</span> 4509 <span class="cov8" title="1">key := jobhelpers.GetJobKeyByReq(&req) 4510 queue := cc.getWorkerQueue(key) 4511 queue.Add(req)</span> 4512 } 4513 4514 func (cc *jobcontroller) updateJob(oldObj, newObj interface{}) <span class="cov8" title="1">{ 4515 newJob, ok := newObj.(*batch.Job) 4516 if !ok </span><span class="cov0" title="0">{ 4517 klog.Errorf("newObj is not Job") 4518 return 4519 }</span> 4520 4521 <span class="cov8" title="1">oldJob, ok := oldObj.(*batch.Job) 4522 if !ok </span><span class="cov0" title="0">{ 4523 klog.Errorf("oldJob is not Job") 4524 return 4525 }</span> 4526 4527 // No need to update if ResourceVersion is not changed 4528 <span class="cov8" title="1">if newJob.ResourceVersion == oldJob.ResourceVersion </span><span class="cov8" title="1">{ 4529 klog.V(6).Infof("No need to update because job is not modified.") 4530 return 4531 }</span> 4532 4533 <span class="cov8" title="1">if err := cc.cache.Update(newJob); err != nil </span><span class="cov0" title="0">{ 4534 klog.Errorf("UpdateJob - Failed to update job <%s/%s>: %v in cache", 4535 newJob.Namespace, newJob.Name, err) 4536 }</span> 4537 4538 // NOTE: Since we only reconcile job based on Spec, we will ignore other attributes 4539 // For Job status, it's used internally and always been updated via our controller. 4540 <span class="cov8" title="1">if reflect.DeepEqual(newJob.Spec, oldJob.Spec) && newJob.Status.State.Phase == oldJob.Status.State.Phase </span><span class="cov0" title="0">{ 4541 klog.V(6).Infof("Job update event is ignored since no update in 'Spec'.") 4542 return 4543 }</span> 4544 4545 <span class="cov8" title="1">req := apis.Request{ 4546 Namespace: newJob.Namespace, 4547 JobName: newJob.Name, 4548 Event: bus.OutOfSyncEvent, 4549 } 4550 key := jobhelpers.GetJobKeyByReq(&req) 4551 queue := cc.getWorkerQueue(key) 4552 queue.Add(req)</span> 4553 } 4554 4555 func (cc *jobcontroller) deleteJob(obj interface{}) <span class="cov0" title="0">{ 4556 job, ok := obj.(*batch.Job) 4557 if !ok </span><span class="cov0" title="0">{ 4558 // If we reached here it means the Job was deleted but its final state is unrecorded. 4559 tombstone, ok := obj.(cache.DeletedFinalStateUnknown) 4560 if !ok </span><span class="cov0" title="0">{ 4561 klog.Errorf("Couldn't get object from tombstone %#v", obj) 4562 return 4563 }</span> 4564 <span class="cov0" title="0">job, ok = tombstone.Obj.(*batch.Job) 4565 if !ok </span><span class="cov0" title="0">{ 4566 klog.Errorf("Tombstone contained object that is not a volcano Job: %#v", obj) 4567 return 4568 }</span> 4569 } 4570 4571 <span class="cov0" title="0">if err := cc.cache.Delete(job); err != nil </span><span class="cov0" title="0">{ 4572 klog.Errorf("Failed to delete job <%s/%s>: %v in cache", 4573 job.Namespace, job.Name, err) 4574 }</span> 4575 } 4576 4577 func (cc *jobcontroller) addPod(obj interface{}) <span class="cov8" title="1">{ 4578 pod, ok := obj.(*v1.Pod) 4579 if !ok </span><span class="cov0" title="0">{ 4580 klog.Errorf("Failed to convert %v to v1.Pod", obj) 4581 return 4582 }</span> 4583 // Filter out pods that are not created from volcano job 4584 <span class="cov8" title="1">if !isControlledBy(pod, helpers.JobKind) </span><span class="cov0" title="0">{ 4585 return 4586 }</span> 4587 4588 <span class="cov8" title="1">jobName, found := pod.Annotations[batch.JobNameKey] 4589 if !found </span><span class="cov0" title="0">{ 4590 klog.Infof("Failed to find jobName of Pod <%s/%s>, skipping", 4591 pod.Namespace, pod.Name) 4592 return 4593 }</span> 4594 4595 <span class="cov8" title="1">version, found := pod.Annotations[batch.JobVersion] 4596 if !found </span><span class="cov0" title="0">{ 4597 klog.Infof("Failed to find jobVersion of Pod <%s/%s>, skipping", 4598 pod.Namespace, pod.Name) 4599 return 4600 }</span> 4601 4602 <span class="cov8" title="1">dVersion, err := strconv.Atoi(version) 4603 if err != nil </span><span class="cov0" title="0">{ 4604 klog.Infof("Failed to convert jobVersion of Pod <%s/%s> into number, skipping", 4605 pod.Namespace, pod.Name) 4606 return 4607 }</span> 4608 4609 <span class="cov8" title="1">if pod.DeletionTimestamp != nil </span><span class="cov0" title="0">{ 4610 cc.deletePod(pod) 4611 return 4612 }</span> 4613 4614 <span class="cov8" title="1">req := apis.Request{ 4615 Namespace: pod.Namespace, 4616 JobName: jobName, 4617 4618 Event: bus.OutOfSyncEvent, 4619 JobVersion: int32(dVersion), 4620 } 4621 4622 if err := cc.cache.AddPod(pod); err != nil </span><span class="cov8" title="1">{ 4623 klog.Errorf("Failed to add Pod <%s/%s>: %v to cache", 4624 pod.Namespace, pod.Name, err) 4625 }</span> 4626 <span class="cov8" title="1">key := jobhelpers.GetJobKeyByReq(&req) 4627 queue := cc.getWorkerQueue(key) 4628 queue.Add(req)</span> 4629 } 4630 4631 func (cc *jobcontroller) updatePod(oldObj, newObj interface{}) <span class="cov8" title="1">{ 4632 oldPod, ok := oldObj.(*v1.Pod) 4633 if !ok </span><span class="cov0" title="0">{ 4634 klog.Errorf("Failed to convert %v to v1.Pod", oldObj) 4635 return 4636 }</span> 4637 4638 <span class="cov8" title="1">newPod, ok := newObj.(*v1.Pod) 4639 if !ok </span><span class="cov0" title="0">{ 4640 klog.Errorf("Failed to convert %v to v1.Pod", newObj) 4641 return 4642 }</span> 4643 4644 // Filter out pods that are not created from volcano job 4645 <span class="cov8" title="1">if !isControlledBy(newPod, helpers.JobKind) </span><span class="cov0" title="0">{ 4646 return 4647 }</span> 4648 4649 <span class="cov8" title="1">if newPod.ResourceVersion == oldPod.ResourceVersion </span><span class="cov0" title="0">{ 4650 return 4651 }</span> 4652 4653 <span class="cov8" title="1">if newPod.DeletionTimestamp != nil </span><span class="cov0" title="0">{ 4654 cc.deletePod(newObj) 4655 return 4656 }</span> 4657 4658 <span class="cov8" title="1">taskName, found := newPod.Annotations[batch.TaskSpecKey] 4659 if !found </span><span class="cov0" title="0">{ 4660 klog.Infof("Failed to find taskName of Pod <%s/%s>, skipping", 4661 newPod.Namespace, newPod.Name) 4662 return 4663 }</span> 4664 4665 <span class="cov8" title="1">jobName, found := newPod.Annotations[batch.JobNameKey] 4666 if !found </span><span class="cov0" title="0">{ 4667 klog.Infof("Failed to find jobName of Pod <%s/%s>, skipping", 4668 newPod.Namespace, newPod.Name) 4669 return 4670 }</span> 4671 4672 <span class="cov8" title="1">version, found := newPod.Annotations[batch.JobVersion] 4673 if !found </span><span class="cov0" title="0">{ 4674 klog.Infof("Failed to find jobVersion of Pod <%s/%s>, skipping", 4675 newPod.Namespace, newPod.Name) 4676 return 4677 }</span> 4678 4679 <span class="cov8" title="1">dVersion, err := strconv.Atoi(version) 4680 if err != nil </span><span class="cov0" title="0">{ 4681 klog.Infof("Failed to convert jobVersion of Pod into number <%s/%s>, skipping", 4682 newPod.Namespace, newPod.Name) 4683 return 4684 }</span> 4685 4686 <span class="cov8" title="1">if err := cc.cache.UpdatePod(newPod); err != nil </span><span class="cov0" title="0">{ 4687 klog.Errorf("Failed to update Pod <%s/%s>: %v in cache", 4688 newPod.Namespace, newPod.Name, err) 4689 }</span> 4690 4691 <span class="cov8" title="1">event := bus.OutOfSyncEvent 4692 var exitCode int32 4693 4694 switch newPod.Status.Phase </span>{ 4695 case v1.PodFailed:<span class="cov8" title="1"> 4696 if oldPod.Status.Phase != v1.PodFailed </span><span class="cov8" title="1">{ 4697 event = bus.PodFailedEvent 4698 // TODO: currently only one container pod is supported by volcano 4699 // Once multi containers pod is supported, update accordingly. 4700 if len(newPod.Status.ContainerStatuses) > 0 && newPod.Status.ContainerStatuses[0].State.Terminated != nil </span><span class="cov0" title="0">{ 4701 exitCode = newPod.Status.ContainerStatuses[0].State.Terminated.ExitCode 4702 }</span> 4703 } 4704 case v1.PodSucceeded:<span class="cov0" title="0"> 4705 if oldPod.Status.Phase != v1.PodSucceeded && 4706 cc.cache.TaskCompleted(jobcache.JobKeyByName(newPod.Namespace, jobName), taskName) </span><span class="cov0" title="0">{ 4707 event = bus.TaskCompletedEvent 4708 }</span> 4709 case v1.PodPending, v1.PodRunning:<span class="cov8" title="1"> 4710 if cc.cache.TaskFailed(jobcache.JobKeyByName(newPod.Namespace, jobName), taskName) </span><span class="cov0" title="0">{ 4711 event = bus.TaskFailedEvent 4712 }</span> 4713 } 4714 4715 <span class="cov8" title="1">req := apis.Request{ 4716 Namespace: newPod.Namespace, 4717 JobName: jobName, 4718 TaskName: taskName, 4719 4720 Event: event, 4721 ExitCode: exitCode, 4722 JobVersion: int32(dVersion), 4723 } 4724 4725 key := jobhelpers.GetJobKeyByReq(&req) 4726 queue := cc.getWorkerQueue(key) 4727 queue.Add(req)</span> 4728 } 4729 4730 func (cc *jobcontroller) deletePod(obj interface{}) <span class="cov8" title="1">{ 4731 pod, ok := obj.(*v1.Pod) 4732 if !ok </span><span class="cov0" title="0">{ 4733 // If we reached here it means the pod was deleted but its final state is unrecorded. 4734 tombstone, ok := obj.(cache.DeletedFinalStateUnknown) 4735 if !ok </span><span class="cov0" title="0">{ 4736 klog.Errorf("Couldn't get object from tombstone %#v", obj) 4737 return 4738 }</span> 4739 <span class="cov0" title="0">pod, ok = tombstone.Obj.(*v1.Pod) 4740 if !ok </span><span class="cov0" title="0">{ 4741 klog.Errorf("Tombstone contained object that is not a Pod: %#v", obj) 4742 return 4743 }</span> 4744 } 4745 4746 // Filter out pods that are not created from volcano job 4747 <span class="cov8" title="1">if !isControlledBy(pod, helpers.JobKind) </span><span class="cov0" title="0">{ 4748 return 4749 }</span> 4750 4751 <span class="cov8" title="1">taskName, found := pod.Annotations[batch.TaskSpecKey] 4752 if !found </span><span class="cov0" title="0">{ 4753 klog.Infof("Failed to find taskName of Pod <%s/%s>, skipping", 4754 pod.Namespace, pod.Name) 4755 return 4756 }</span> 4757 4758 <span class="cov8" title="1">jobName, found := pod.Annotations[batch.JobNameKey] 4759 if !found </span><span class="cov0" title="0">{ 4760 klog.Infof("Failed to find jobName of Pod <%s/%s>, skipping", 4761 pod.Namespace, pod.Name) 4762 return 4763 }</span> 4764 4765 <span class="cov8" title="1">version, found := pod.Annotations[batch.JobVersion] 4766 if !found </span><span class="cov0" title="0">{ 4767 klog.Infof("Failed to find jobVersion of Pod <%s/%s>, skipping", 4768 pod.Namespace, pod.Name) 4769 return 4770 }</span> 4771 4772 <span class="cov8" title="1">dVersion, err := strconv.Atoi(version) 4773 if err != nil </span><span class="cov0" title="0">{ 4774 klog.Infof("Failed to convert jobVersion of Pod <%s/%s> into number, skipping", 4775 pod.Namespace, pod.Name) 4776 return 4777 }</span> 4778 4779 <span class="cov8" title="1">req := apis.Request{ 4780 Namespace: pod.Namespace, 4781 JobName: jobName, 4782 TaskName: taskName, 4783 4784 Event: bus.PodEvictedEvent, 4785 JobVersion: int32(dVersion), 4786 } 4787 4788 if err := cc.cache.DeletePod(pod); err != nil </span><span class="cov0" title="0">{ 4789 klog.Errorf("Failed to delete Pod <%s/%s>: %v in cache", 4790 pod.Namespace, pod.Name, err) 4791 }</span> 4792 4793 <span class="cov8" title="1">key := jobhelpers.GetJobKeyByReq(&req) 4794 queue := cc.getWorkerQueue(key) 4795 queue.Add(req)</span> 4796 } 4797 4798 func (cc *jobcontroller) recordJobEvent(namespace, name string, event batch.JobEvent, message string) <span class="cov0" title="0">{ 4799 job, err := cc.cache.Get(jobcache.JobKeyByName(namespace, name)) 4800 if err != nil </span><span class="cov0" title="0">{ 4801 klog.Warningf("Failed to find job in cache when reporting job event <%s/%s>: %v", 4802 namespace, name, err) 4803 return 4804 }</span> 4805 <span class="cov0" title="0">cc.recorder.Event(job.Job, v1.EventTypeNormal, string(event), message)</span> 4806 } 4807 4808 func (cc *jobcontroller) handleCommands() <span class="cov0" title="0">{ 4809 for cc.processNextCommand() </span>{<span class="cov0" title="0"> 4810 }</span> 4811 } 4812 4813 func (cc *jobcontroller) processNextCommand() bool <span class="cov0" title="0">{ 4814 obj, shutdown := cc.commandQueue.Get() 4815 if shutdown </span><span class="cov0" title="0">{ 4816 return false 4817 }</span> 4818 <span class="cov0" title="0">cmd := obj.(*bus.Command) 4819 defer cc.commandQueue.Done(cmd) 4820 4821 if err := cc.vcClient.BusV1alpha1().Commands(cmd.Namespace).Delete(context.TODO(), cmd.Name, metav1.DeleteOptions{}); err != nil </span><span class="cov0" title="0">{ 4822 if !apierrors.IsNotFound(err) </span><span class="cov0" title="0">{ 4823 klog.Errorf("Failed to delete Command <%s/%s>.", cmd.Namespace, cmd.Name) 4824 cc.commandQueue.AddRateLimited(cmd) 4825 }</span> 4826 <span class="cov0" title="0">return true</span> 4827 } 4828 <span class="cov0" title="0">cc.recordJobEvent(cmd.Namespace, cmd.TargetObject.Name, 4829 batch.CommandIssued, 4830 fmt.Sprintf( 4831 "Start to execute command %s, and clean it up to make sure executed not more than once.", cmd.Action)) 4832 req := apis.Request{ 4833 Namespace: cmd.Namespace, 4834 JobName: cmd.TargetObject.Name, 4835 Event: bus.CommandIssuedEvent, 4836 Action: bus.Action(cmd.Action), 4837 } 4838 4839 key := jobhelpers.GetJobKeyByReq(&req) 4840 queue := cc.getWorkerQueue(key) 4841 queue.Add(req) 4842 4843 return true</span> 4844 } 4845 4846 func (cc *jobcontroller) updatePodGroup(oldObj, newObj interface{}) <span class="cov8" title="1">{ 4847 oldPG, ok := oldObj.(*scheduling.PodGroup) 4848 if !ok </span><span class="cov0" title="0">{ 4849 klog.Errorf("Failed to convert %v to PodGroup", newObj) 4850 return 4851 }</span> 4852 4853 <span class="cov8" title="1">newPG, ok := newObj.(*scheduling.PodGroup) 4854 if !ok </span><span class="cov0" title="0">{ 4855 klog.Errorf("Failed to convert %v to PodGroup", newObj) 4856 return 4857 }</span> 4858 4859 <span class="cov8" title="1">_, err := cc.cache.Get(jobcache.JobKeyByName(newPG.Namespace, newPG.Name)) 4860 if err != nil && newPG.Annotations != nil </span><span class="cov0" title="0">{ 4861 klog.Warningf( 4862 "Failed to find job in cache by PodGroup, this may not be a PodGroup for volcano job.") 4863 }</span> 4864 4865 <span class="cov8" title="1">if newPG.Status.Phase != oldPG.Status.Phase </span><span class="cov8" title="1">{ 4866 req := apis.Request{ 4867 Namespace: newPG.Namespace, 4868 JobName: newPG.Name, 4869 } 4870 switch newPG.Status.Phase </span>{ 4871 case scheduling.PodGroupUnknown:<span class="cov0" title="0"> 4872 req.Event = bus.JobUnknownEvent</span> 4873 } 4874 <span class="cov8" title="1">key := jobhelpers.GetJobKeyByReq(&req) 4875 queue := cc.getWorkerQueue(key) 4876 queue.Add(req)</span> 4877 } 4878 } 4879 4880 // TODO(k82cn): add handler for PodGroup unschedulable event. 4881 </pre> 4882 4883 <pre class="file" id="file27" style="display: none">/* 4884 Copyright 2019 The Volcano Authors. 4885 4886 Licensed under the Apache License, Version 2.0 (the "License"); 4887 you may not use this file except in compliance with the License. 4888 You may obtain a copy of the License at 4889 4890 http://www.apache.org/licenses/LICENSE-2.0 4891 4892 Unless required by applicable law or agreed to in writing, software 4893 distributed under the License is distributed on an "AS IS" BASIS, 4894 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 4895 See the License for the specific language governing permissions and 4896 limitations under the License. 4897 */ 4898 4899 package job 4900 4901 import ( 4902 "fmt" 4903 4904 v1 "k8s.io/api/core/v1" 4905 "k8s.io/klog" 4906 4907 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 4908 "volcano.sh/volcano/pkg/controllers/job/plugins" 4909 pluginsinterface "volcano.sh/volcano/pkg/controllers/job/plugins/interface" 4910 ) 4911 4912 func (cc *jobcontroller) pluginOnPodCreate(job *batch.Job, pod *v1.Pod) error <span class="cov8" title="1">{ 4913 client := pluginsinterface.PluginClientset{KubeClients: cc.kubeClient} 4914 for name, args := range job.Spec.Plugins </span><span class="cov8" title="1">{ 4915 pb, found := plugins.GetPluginBuilder(name) 4916 if !found </span><span class="cov8" title="1">{ 4917 err := fmt.Errorf("failed to get plugin %s", name) 4918 klog.Error(err) 4919 return err 4920 }</span> 4921 <span class="cov8" title="1">klog.Infof("Starting to execute plugin at <pluginOnPodCreate>: %s on job: <%s/%s>", name, job.Namespace, job.Name) 4922 if err := pb(client, args).OnPodCreate(pod, job); err != nil </span><span class="cov0" title="0">{ 4923 klog.Errorf("Failed to process on pod create plugin %s, err %v.", name, err) 4924 return err 4925 }</span> 4926 } 4927 <span class="cov8" title="1">return nil</span> 4928 } 4929 4930 func (cc *jobcontroller) pluginOnJobAdd(job *batch.Job) error <span class="cov8" title="1">{ 4931 client := pluginsinterface.PluginClientset{KubeClients: cc.kubeClient} 4932 if job.Status.ControlledResources == nil </span><span class="cov8" title="1">{ 4933 job.Status.ControlledResources = make(map[string]string) 4934 }</span> 4935 <span class="cov8" title="1">for name, args := range job.Spec.Plugins </span><span class="cov8" title="1">{ 4936 pb, found := plugins.GetPluginBuilder(name) 4937 if !found </span><span class="cov8" title="1">{ 4938 err := fmt.Errorf("failed to get plugin %s", name) 4939 klog.Error(err) 4940 return err 4941 }</span> 4942 <span class="cov8" title="1">klog.Infof("Starting to execute plugin at <pluginOnJobAdd>: %s on job: <%s/%s>", name, job.Namespace, job.Name) 4943 if err := pb(client, args).OnJobAdd(job); err != nil </span><span class="cov0" title="0">{ 4944 klog.Errorf("Failed to process on job add plugin %s, err %v.", name, err) 4945 return err 4946 }</span> 4947 } 4948 4949 <span class="cov8" title="1">return nil</span> 4950 } 4951 4952 func (cc *jobcontroller) pluginOnJobDelete(job *batch.Job) error <span class="cov8" title="1">{ 4953 if job.Status.ControlledResources == nil </span><span class="cov8" title="1">{ 4954 job.Status.ControlledResources = make(map[string]string) 4955 }</span> 4956 <span class="cov8" title="1">client := pluginsinterface.PluginClientset{KubeClients: cc.kubeClient} 4957 for name, args := range job.Spec.Plugins </span><span class="cov8" title="1">{ 4958 pb, found := plugins.GetPluginBuilder(name) 4959 if !found </span><span class="cov8" title="1">{ 4960 err := fmt.Errorf("failed to get plugin %s", name) 4961 klog.Error(err) 4962 return err 4963 }</span> 4964 <span class="cov8" title="1">klog.Infof("Starting to execute plugin at <pluginOnJobDelete>: %s on job: <%s/%s>", name, job.Namespace, job.Name) 4965 if err := pb(client, args).OnJobDelete(job); err != nil </span><span class="cov0" title="0">{ 4966 klog.Errorf("failed to process on job delete plugin %s, err %v.", name, err) 4967 return err 4968 }</span> 4969 } 4970 4971 <span class="cov8" title="1">return nil</span> 4972 } 4973 4974 func (cc *jobcontroller) pluginOnJobUpdate(job *batch.Job) error <span class="cov8" title="1">{ 4975 client := pluginsinterface.PluginClientset{KubeClients: cc.kubeClient} 4976 if job.Status.ControlledResources == nil </span><span class="cov8" title="1">{ 4977 job.Status.ControlledResources = make(map[string]string) 4978 }</span> 4979 <span class="cov8" title="1">for name, args := range job.Spec.Plugins </span><span class="cov0" title="0">{ 4980 pb, found := plugins.GetPluginBuilder(name) 4981 if !found </span><span class="cov0" title="0">{ 4982 err := fmt.Errorf("failed to get plugin %s", name) 4983 klog.Error(err) 4984 return err 4985 }</span> 4986 <span class="cov0" title="0">klog.Infof("Starting to execute plugin at <pluginOnJobUpdate>: %s on job: <%s/%s>", name, job.Namespace, job.Name) 4987 if err := pb(client, args).OnJobUpdate(job); err != nil </span><span class="cov0" title="0">{ 4988 klog.Errorf("Failed to process on job update plugin %s, err %v.", name, err) 4989 return err 4990 }</span> 4991 } 4992 4993 <span class="cov8" title="1">return nil</span> 4994 } 4995 </pre> 4996 4997 <pre class="file" id="file28" style="display: none">/* 4998 Copyright 2019 The Volcano Authors. 4999 5000 Licensed under the Apache License, Version 2.0 (the "License"); 5001 you may not use this file except in compliance with the License. 5002 You may obtain a copy of the License at 5003 5004 http://www.apache.org/licenses/LICENSE-2.0 5005 5006 Unless required by applicable law or agreed to in writing, software 5007 distributed under the License is distributed on an "AS IS" BASIS, 5008 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 5009 See the License for the specific language governing permissions and 5010 limitations under the License. 5011 */ 5012 5013 package job 5014 5015 import ( 5016 "context" 5017 "fmt" 5018 "time" 5019 5020 "golang.org/x/time/rate" 5021 v1 "k8s.io/api/core/v1" 5022 "k8s.io/apimachinery/pkg/api/errors" 5023 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 5024 "k8s.io/client-go/util/workqueue" 5025 "k8s.io/klog" 5026 ) 5027 5028 func newRateLimitingQueue() workqueue.RateLimitingInterface <span class="cov8" title="1">{ 5029 return workqueue.NewRateLimitingQueue(workqueue.NewMaxOfRateLimiter( 5030 workqueue.NewItemExponentialFailureRateLimiter(5*time.Millisecond, 180*time.Second), 5031 // 10 qps, 100 bucket size. This is only for retry speed and its only the overall factor (not per item) 5032 &workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(10), 100)}, 5033 )) 5034 }</span> 5035 5036 func (cc *jobcontroller) processResyncTask() <span class="cov0" title="0">{ 5037 obj, shutdown := cc.errTasks.Get() 5038 if shutdown </span><span class="cov0" title="0">{ 5039 return 5040 }</span> 5041 5042 // one task only resync 10 times 5043 <span class="cov0" title="0">if cc.errTasks.NumRequeues(obj) > 10 </span><span class="cov0" title="0">{ 5044 cc.errTasks.Forget(obj) 5045 return 5046 }</span> 5047 5048 <span class="cov0" title="0">defer cc.errTasks.Done(obj) 5049 5050 task, ok := obj.(*v1.Pod) 5051 if !ok </span><span class="cov0" title="0">{ 5052 klog.Errorf("failed to convert %v to *v1.Pod", obj) 5053 return 5054 }</span> 5055 5056 <span class="cov0" title="0">if err := cc.syncTask(task); err != nil </span><span class="cov0" title="0">{ 5057 klog.Errorf("Failed to sync pod <%v/%v>, retry it, err %v", task.Namespace, task.Name, err) 5058 cc.resyncTask(task) 5059 }</span> 5060 } 5061 5062 func (cc *jobcontroller) syncTask(oldTask *v1.Pod) error <span class="cov0" title="0">{ 5063 newPod, err := cc.kubeClient.CoreV1().Pods(oldTask.Namespace).Get(context.TODO(), oldTask.Name, metav1.GetOptions{}) 5064 if err != nil </span><span class="cov0" title="0">{ 5065 if errors.IsNotFound(err) </span><span class="cov0" title="0">{ 5066 if err := cc.cache.DeletePod(oldTask); err != nil </span><span class="cov0" title="0">{ 5067 klog.Errorf("failed to delete cache pod <%v/%v>, err %v.", oldTask.Namespace, oldTask.Name, err) 5068 return err 5069 }</span> 5070 <span class="cov0" title="0">klog.V(3).Infof("Pod <%v/%v> was deleted, removed from cache.", oldTask.Namespace, oldTask.Name) 5071 5072 return nil</span> 5073 } 5074 <span class="cov0" title="0">return fmt.Errorf("failed to get Pod <%v/%v>: err %v", oldTask.Namespace, oldTask.Name, err)</span> 5075 } 5076 5077 <span class="cov0" title="0">return cc.cache.UpdatePod(newPod)</span> 5078 } 5079 5080 func (cc *jobcontroller) resyncTask(task *v1.Pod) <span class="cov0" title="0">{ 5081 cc.errTasks.AddRateLimited(task) 5082 }</span> 5083 </pre> 5084 5085 <pre class="file" id="file29" style="display: none">/* 5086 Copyright 2017 The Volcano Authors. 5087 5088 Licensed under the Apache License, Version 2.0 (the "License"); 5089 you may not use this file except in compliance with the License. 5090 You may obtain a copy of the License at 5091 5092 http://www.apache.org/licenses/LICENSE-2.0 5093 5094 Unless required by applicable law or agreed to in writing, software 5095 distributed under the License is distributed on an "AS IS" BASIS, 5096 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 5097 See the License for the specific language governing permissions and 5098 limitations under the License. 5099 */ 5100 5101 package job 5102 5103 import ( 5104 "fmt" 5105 "time" 5106 5107 v1 "k8s.io/api/core/v1" 5108 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 5109 "k8s.io/apimachinery/pkg/runtime/schema" 5110 "k8s.io/klog" 5111 5112 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 5113 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 5114 "volcano.sh/apis/pkg/apis/helpers" 5115 schedulingv2 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 5116 "volcano.sh/volcano/pkg/controllers/apis" 5117 jobhelpers "volcano.sh/volcano/pkg/controllers/job/helpers" 5118 ) 5119 5120 var detectionPeriodOfDependsOntask time.Duration 5121 5122 // MakePodName append podname,jobname,taskName and index and returns the string. 5123 func MakePodName(jobName string, taskName string, index int) string <span class="cov8" title="1">{ 5124 return fmt.Sprintf(jobhelpers.PodNameFmt, jobName, taskName, index) 5125 }</span> 5126 5127 func createJobPod(job *batch.Job, template *v1.PodTemplateSpec, topologyPolicy batch.NumaPolicy, ix int, jobForwarding bool) *v1.Pod <span class="cov8" title="1">{ 5128 templateCopy := template.DeepCopy() 5129 5130 pod := &v1.Pod{ 5131 ObjectMeta: metav1.ObjectMeta{ 5132 Name: jobhelpers.MakePodName(job.Name, template.Name, ix), 5133 Namespace: job.Namespace, 5134 OwnerReferences: []metav1.OwnerReference{ 5135 *metav1.NewControllerRef(job, helpers.JobKind), 5136 }, 5137 Labels: templateCopy.Labels, 5138 Annotations: templateCopy.Annotations, 5139 }, 5140 Spec: templateCopy.Spec, 5141 } 5142 5143 // If no scheduler name in Pod, use scheduler name from Job. 5144 if len(pod.Spec.SchedulerName) == 0 </span><span class="cov8" title="1">{ 5145 pod.Spec.SchedulerName = job.Spec.SchedulerName 5146 }</span> 5147 5148 <span class="cov8" title="1">volumeMap := make(map[string]string) 5149 for _, volume := range job.Spec.Volumes </span><span class="cov8" title="1">{ 5150 vcName := volume.VolumeClaimName 5151 name := fmt.Sprintf("%s-%s", job.Name, jobhelpers.GenRandomStr(12)) 5152 if _, ok := volumeMap[vcName]; !ok </span><span class="cov8" title="1">{ 5153 volume := v1.Volume{ 5154 Name: name, 5155 VolumeSource: v1.VolumeSource{ 5156 PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ 5157 ClaimName: vcName, 5158 }, 5159 }, 5160 } 5161 pod.Spec.Volumes = append(pod.Spec.Volumes, volume) 5162 volumeMap[vcName] = name 5163 }</span> else<span class="cov0" title="0"> { 5164 // duplicate volumes, should be prevented 5165 continue</span> 5166 } 5167 5168 <span class="cov8" title="1">for i, c := range pod.Spec.Containers </span><span class="cov8" title="1">{ 5169 vm := v1.VolumeMount{ 5170 MountPath: volume.MountPath, 5171 Name: name, 5172 } 5173 pod.Spec.Containers[i].VolumeMounts = append(c.VolumeMounts, vm) 5174 }</span> 5175 } 5176 5177 <span class="cov8" title="1">tsKey := templateCopy.Name 5178 if len(tsKey) == 0 </span><span class="cov8" title="1">{ 5179 tsKey = batch.DefaultTaskSpec 5180 }</span> 5181 5182 <span class="cov8" title="1">if len(pod.Annotations) == 0 </span><span class="cov8" title="1">{ 5183 pod.Annotations = make(map[string]string) 5184 }</span> 5185 5186 <span class="cov8" title="1">pod.Annotations[batch.TaskSpecKey] = tsKey 5187 pod.Annotations[schedulingv2.KubeGroupNameAnnotationKey] = job.Name 5188 pod.Annotations[batch.JobNameKey] = job.Name 5189 pod.Annotations[batch.QueueNameKey] = job.Spec.Queue 5190 pod.Annotations[batch.JobVersion] = fmt.Sprintf("%d", job.Status.Version) 5191 pod.Annotations[batch.PodTemplateKey] = fmt.Sprintf("%s-%s", job.Name, template.Name) 5192 5193 if topologyPolicy != "" </span><span class="cov0" title="0">{ 5194 pod.Annotations[schedulingv2.NumaPolicyKey] = string(topologyPolicy) 5195 }</span> 5196 5197 <span class="cov8" title="1">if len(job.Annotations) > 0 </span><span class="cov0" title="0">{ 5198 if value, found := job.Annotations[schedulingv2.PodPreemptable]; found </span><span class="cov0" title="0">{ 5199 pod.Annotations[schedulingv2.PodPreemptable] = value 5200 }</span> 5201 <span class="cov0" title="0">if value, found := job.Annotations[schedulingv2.RevocableZone]; found </span><span class="cov0" title="0">{ 5202 pod.Annotations[schedulingv2.RevocableZone] = value 5203 }</span> 5204 5205 <span class="cov0" title="0">if value, found := job.Annotations[schedulingv2.JDBMinAvailable]; found </span><span class="cov0" title="0">{ 5206 pod.Annotations[schedulingv2.JDBMinAvailable] = value 5207 }</span> else<span class="cov0" title="0"> if value, found := job.Annotations[schedulingv2.JDBMaxUnavailable]; found </span><span class="cov0" title="0">{ 5208 pod.Annotations[schedulingv2.JDBMaxUnavailable] = value 5209 }</span> 5210 } 5211 5212 <span class="cov8" title="1">if len(pod.Labels) == 0 </span><span class="cov8" title="1">{ 5213 pod.Labels = make(map[string]string) 5214 }</span> 5215 5216 // Set pod labels for Service. 5217 <span class="cov8" title="1">pod.Labels[batch.JobNameKey] = job.Name 5218 pod.Labels[batch.TaskSpecKey] = tsKey 5219 pod.Labels[batch.JobNamespaceKey] = job.Namespace 5220 pod.Labels[batch.QueueNameKey] = job.Spec.Queue 5221 if len(job.Labels) > 0 </span><span class="cov0" title="0">{ 5222 if value, found := job.Labels[schedulingv2.PodPreemptable]; found </span><span class="cov0" title="0">{ 5223 pod.Labels[schedulingv2.PodPreemptable] = value 5224 }</span> 5225 } 5226 5227 <span class="cov8" title="1">if jobForwarding </span><span class="cov0" title="0">{ 5228 pod.Annotations[batch.JobForwardingKey] = "true" 5229 pod.Labels[batch.JobForwardingKey] = "true" 5230 }</span> 5231 5232 <span class="cov8" title="1">return pod</span> 5233 } 5234 5235 func applyPolicies(job *batch.Job, req *apis.Request) v1alpha1.Action <span class="cov8" title="1">{ 5236 if len(req.Action) != 0 </span><span class="cov8" title="1">{ 5237 return req.Action 5238 }</span> 5239 5240 <span class="cov8" title="1">if req.Event == v1alpha1.OutOfSyncEvent </span><span class="cov8" title="1">{ 5241 return v1alpha1.SyncJobAction 5242 }</span> 5243 5244 // For all the requests triggered from discarded job resources will perform sync action instead 5245 <span class="cov8" title="1">if req.JobVersion < job.Status.Version </span><span class="cov0" title="0">{ 5246 klog.Infof("Request %s is outdated, will perform sync instead.", req) 5247 return v1alpha1.SyncJobAction 5248 }</span> 5249 5250 // Overwrite Job level policies 5251 <span class="cov8" title="1">if len(req.TaskName) != 0 </span><span class="cov8" title="1">{ 5252 // Parse task level policies 5253 for _, task := range job.Spec.Tasks </span><span class="cov8" title="1">{ 5254 if task.Name == req.TaskName </span><span class="cov8" title="1">{ 5255 for _, policy := range task.Policies </span><span class="cov8" title="1">{ 5256 policyEvents := getEventlist(policy) 5257 5258 if len(policyEvents) > 0 && len(req.Event) > 0 </span><span class="cov8" title="1">{ 5259 if checkEventExist(policyEvents, req.Event) || checkEventExist(policyEvents, v1alpha1.AnyEvent) </span><span class="cov8" title="1">{ 5260 return policy.Action 5261 }</span> 5262 } 5263 5264 // 0 is not an error code, is prevented in validation admission controller 5265 <span class="cov8" title="1">if policy.ExitCode != nil && *policy.ExitCode == req.ExitCode </span><span class="cov8" title="1">{ 5266 return policy.Action 5267 }</span> 5268 } 5269 <span class="cov8" title="1">break</span> 5270 } 5271 } 5272 } 5273 5274 // Parse Job level policies 5275 <span class="cov8" title="1">for _, policy := range job.Spec.Policies </span><span class="cov8" title="1">{ 5276 policyEvents := getEventlist(policy) 5277 5278 if len(policyEvents) > 0 && len(req.Event) > 0 </span><span class="cov8" title="1">{ 5279 if checkEventExist(policyEvents, req.Event) || checkEventExist(policyEvents, v1alpha1.AnyEvent) </span><span class="cov8" title="1">{ 5280 return policy.Action 5281 }</span> 5282 } 5283 5284 // 0 is not an error code, is prevented in validation admission controller 5285 <span class="cov8" title="1">if policy.ExitCode != nil && *policy.ExitCode == req.ExitCode </span><span class="cov8" title="1">{ 5286 return policy.Action 5287 }</span> 5288 } 5289 5290 <span class="cov8" title="1">return v1alpha1.SyncJobAction</span> 5291 } 5292 5293 func getEventlist(policy batch.LifecyclePolicy) []v1alpha1.Event <span class="cov8" title="1">{ 5294 policyEventsList := policy.Events 5295 if len(policy.Event) > 0 </span><span class="cov8" title="1">{ 5296 policyEventsList = append(policyEventsList, policy.Event) 5297 }</span> 5298 <span class="cov8" title="1">return policyEventsList</span> 5299 } 5300 5301 func checkEventExist(policyEvents []v1alpha1.Event, reqEvent v1alpha1.Event) bool <span class="cov8" title="1">{ 5302 for _, event := range policyEvents </span><span class="cov8" title="1">{ 5303 if event == reqEvent </span><span class="cov8" title="1">{ 5304 return true 5305 }</span> 5306 } 5307 <span class="cov0" title="0">return false</span> 5308 } 5309 5310 func addResourceList(list, req, limit v1.ResourceList) <span class="cov8" title="1">{ 5311 for name, quantity := range req </span><span class="cov8" title="1">{ 5312 if value, ok := list[name]; !ok </span><span class="cov8" title="1">{ 5313 list[name] = quantity.DeepCopy() 5314 }</span> else<span class="cov8" title="1"> { 5315 value.Add(quantity) 5316 list[name] = value 5317 }</span> 5318 } 5319 5320 <span class="cov8" title="1">if req != nil </span><span class="cov8" title="1">{ 5321 return 5322 }</span> 5323 5324 // If Requests is omitted for a container, 5325 // it defaults to Limits if that is explicitly specified. 5326 <span class="cov8" title="1">for name, quantity := range limit </span><span class="cov8" title="1">{ 5327 if value, ok := list[name]; !ok </span><span class="cov8" title="1">{ 5328 list[name] = quantity.DeepCopy() 5329 }</span> else<span class="cov8" title="1"> { 5330 value.Add(quantity) 5331 list[name] = value 5332 }</span> 5333 } 5334 } 5335 5336 // TaskPriority structure. 5337 type TaskPriority struct { 5338 priority int32 5339 5340 batch.TaskSpec 5341 } 5342 5343 // TasksPriority is a slice of TaskPriority. 5344 type TasksPriority []TaskPriority 5345 5346 func (p TasksPriority) Len() int <span class="cov8" title="1">{ return len(p) }</span> 5347 5348 func (p TasksPriority) Less(i, j int) bool <span class="cov8" title="1">{ 5349 return p[i].priority > p[j].priority 5350 }</span> 5351 5352 func (p TasksPriority) Swap(i, j int) <span class="cov8" title="1">{ p[i], p[j] = p[j], p[i] }</span> 5353 5354 func isControlledBy(obj metav1.Object, gvk schema.GroupVersionKind) bool <span class="cov8" title="1">{ 5355 controllerRef := metav1.GetControllerOf(obj) 5356 if controllerRef == nil </span><span class="cov0" title="0">{ 5357 return false 5358 }</span> 5359 <span class="cov8" title="1">if controllerRef.APIVersion == gvk.GroupVersion().String() && controllerRef.Kind == gvk.Kind </span><span class="cov8" title="1">{ 5360 return true 5361 }</span> 5362 <span class="cov0" title="0">return false</span> 5363 } 5364 5365 func SetDetectionPeriodOfDependsOntask(period time.Duration) <span class="cov0" title="0">{ 5366 detectionPeriodOfDependsOntask = period 5367 }</span> 5368 </pre> 5369 5370 <pre class="file" id="file30" style="display: none">/* 5371 Copyright 2021 The Volcano Authors. 5372 5373 Licensed under the Apache License, Version 2.0 (the "License"); 5374 you may not use this file except in compliance with the License. 5375 You may obtain a copy of the License at 5376 5377 http://www.apache.org/licenses/LICENSE-2.0 5378 5379 Unless required by applicable law or agreed to in writing, software 5380 distributed under the License is distributed on an "AS IS" BASIS, 5381 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 5382 See the License for the specific language governing permissions and 5383 limitations under the License. 5384 */ 5385 5386 package tensorflow 5387 5388 import ( 5389 "encoding/json" 5390 "flag" 5391 "fmt" 5392 "strconv" 5393 5394 v1 "k8s.io/api/core/v1" 5395 "k8s.io/klog" 5396 5397 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 5398 jobhelpers "volcano.sh/volcano/pkg/controllers/job/helpers" 5399 pluginsinterface "volcano.sh/volcano/pkg/controllers/job/plugins/interface" 5400 ) 5401 5402 const ( 5403 DefaultPort = 2222 5404 TFConfig = "TF_CONFIG" 5405 ) 5406 5407 type tensorflowPlugin struct { 5408 tfArguments []string 5409 Clientset pluginsinterface.PluginClientset 5410 psName string 5411 workerName string 5412 chiefName string 5413 evaluatorName string 5414 port int 5415 } 5416 5417 // New creates tensorflow plugin. 5418 func New(client pluginsinterface.PluginClientset, arguments []string) pluginsinterface.PluginInterface <span class="cov8" title="1">{ 5419 tp := tensorflowPlugin{tfArguments: arguments, Clientset: client} 5420 tp.addFlags() 5421 return &tp 5422 }</span> 5423 5424 func (tp *tensorflowPlugin) addFlags() <span class="cov8" title="1">{ 5425 flagSet := flag.NewFlagSet(tp.Name(), flag.ContinueOnError) 5426 flagSet.StringVar(&tp.psName, "ps", "ps", "name of ps role task") 5427 flagSet.StringVar(&tp.workerName, "worker", "worker", "name of ps role task") 5428 flagSet.StringVar(&tp.chiefName, "chief", "chief", "name of chief role task") 5429 flagSet.StringVar(&tp.evaluatorName, "evaluator", "evaluator", "name of evaluator role task") 5430 flagSet.IntVar(&tp.port, "port", DefaultPort, "service port") 5431 if err := flagSet.Parse(tp.tfArguments); err != nil </span><span class="cov0" title="0">{ 5432 klog.Errorf("plugin %s flagset parse failed, err: %v", tp.Name(), err) 5433 }</span> 5434 } 5435 5436 func (tp *tensorflowPlugin) Name() string <span class="cov8" title="1">{ 5437 return "tensorflow" 5438 }</span> 5439 5440 func (tp *tensorflowPlugin) OnPodCreate(pod *v1.Pod, job *batch.Job) error <span class="cov8" title="1">{ 5441 // No need to generate TF_CONFIG for stand-alone tensorflow job 5442 if len(job.Spec.Tasks) == 1 && job.Spec.Tasks[0].Replicas == 1 </span><span class="cov0" title="0">{ 5443 return nil 5444 }</span> 5445 // Generate TF_CONFIG value 5446 <span class="cov8" title="1">spec, err := tp.generateTFClusterSpec(pod, job) 5447 if err != nil </span><span class="cov0" title="0">{ 5448 return err 5449 }</span> 5450 <span class="cov8" title="1">raw, err := json.Marshal(spec) 5451 if err != nil </span><span class="cov0" title="0">{ 5452 return err 5453 }</span> 5454 5455 // Add TF_CONFIG enviroment variables 5456 <span class="cov8" title="1">for i := range pod.Spec.Containers </span><span class="cov8" title="1">{ 5457 pod.Spec.Containers[i].Env = append(pod.Spec.Containers[i].Env, v1.EnvVar{ 5458 Name: TFConfig, 5459 Value: string(raw), 5460 }) 5461 }</span> 5462 <span class="cov8" title="1">return nil</span> 5463 } 5464 5465 func (tp *tensorflowPlugin) OnJobAdd(job *batch.Job) error <span class="cov0" title="0">{ 5466 if job.Status.ControlledResources["plugin-"+tp.Name()] == tp.Name() </span><span class="cov0" title="0">{ 5467 return nil 5468 }</span> 5469 5470 <span class="cov0" title="0">job.Status.ControlledResources["plugin-"+tp.Name()] = tp.Name() 5471 5472 return nil</span> 5473 } 5474 5475 func (tp *tensorflowPlugin) OnJobDelete(job *batch.Job) error <span class="cov0" title="0">{ 5476 if job.Status.ControlledResources["plugin-"+tp.Name()] != tp.Name() </span><span class="cov0" title="0">{ 5477 return nil 5478 }</span> 5479 <span class="cov0" title="0">delete(job.Status.ControlledResources, "plugin-"+tp.Name()) 5480 return nil</span> 5481 } 5482 5483 func (tp *tensorflowPlugin) OnJobUpdate(job *batch.Job) error <span class="cov0" title="0">{ 5484 return nil 5485 }</span> 5486 5487 func (tp *tensorflowPlugin) generateTFClusterSpec(pod *v1.Pod, job *batch.Job) (tfClusterSpec, error) <span class="cov8" title="1">{ 5488 index, err := strconv.Atoi(jobhelpers.GetPodIndexUnderTask(pod)) 5489 if err != nil </span><span class="cov0" title="0">{ 5490 return tfClusterSpec{}, err 5491 }</span> 5492 5493 // Generate tensorflow task info 5494 <span class="cov8" title="1">c := tfClusterSpec{ 5495 Task: taskInfo{ 5496 Type: tp.getTaskType(jobhelpers.GetTaskKey(pod)), 5497 Index: index, 5498 }, 5499 } 5500 5501 // Generate tensorflow cluster info 5502 for _, ts := range job.Spec.Tasks </span><span class="cov8" title="1">{ 5503 hosts := []string{} 5504 for i := 0; i < int(ts.Replicas); i++ </span><span class="cov8" title="1">{ 5505 hosts = append(hosts, fmt.Sprintf("%s:%d", jobhelpers.MakeDomainName(ts, job, i), tp.port)) 5506 }</span> 5507 <span class="cov8" title="1">switch ts.Name </span>{ 5508 case tp.psName:<span class="cov8" title="1"> 5509 c.Cluster.PS = hosts</span> 5510 case tp.workerName:<span class="cov8" title="1"> 5511 c.Cluster.Worker = hosts</span> 5512 case tp.chiefName:<span class="cov8" title="1"> 5513 c.Cluster.Chief = hosts</span> 5514 case tp.evaluatorName:<span class="cov0" title="0"> 5515 c.Cluster.Evaluator = hosts</span> 5516 } 5517 } 5518 <span class="cov8" title="1">return c, nil</span> 5519 } 5520 5521 func (tp *tensorflowPlugin) getTaskType(taskKey string) tfTaskType <span class="cov8" title="1">{ 5522 switch taskKey </span>{ 5523 case tp.chiefName:<span class="cov8" title="1"> 5524 return tfChief</span> 5525 case tp.workerName:<span class="cov8" title="1"> 5526 return tfWorker</span> 5527 case tp.psName:<span class="cov8" title="1"> 5528 return tfPS</span> 5529 case tp.evaluatorName:<span class="cov0" title="0"> 5530 return tfEvaluator</span> 5531 } 5532 <span class="cov0" title="0">return tfTaskType(taskKey)</span> 5533 } 5534 5535 // TfClusterSpec is the spec of a tensorflow cluster 5536 // It will be injected into container's environment variables, and be used by tensorflow framework. 5537 // e.g. 5538 // { 5539 // "cluster": { 5540 // "worker": ["worker-0:2222", "worker-1:2222"], 5541 // "ps": ["ps-0:2222"] 5542 // }, 5543 // "task": { 5544 // "type": "worker", 5545 // "index": 0 5546 // } 5547 // } 5548 type tfClusterSpec struct { 5549 Cluster clusterInfo `json:"cluster"` 5550 Task taskInfo `json:"task"` 5551 } 5552 5553 type clusterInfo struct { 5554 PS []string `json:"ps,omitempty"` 5555 Worker []string `json:"worker,omitempty"` 5556 Chief []string `json:"chief,omitempty"` 5557 Evaluator []string `json:"evaluator,omitempty"` 5558 } 5559 5560 type tfTaskType string 5561 5562 const ( 5563 tfWorker tfTaskType = "worker" 5564 tfChief tfTaskType = "chief" 5565 tfPS tfTaskType = "ps" 5566 tfEvaluator tfTaskType = "evaluator" 5567 ) 5568 5569 type taskInfo struct { 5570 Type tfTaskType `json:"type"` 5571 Index int `json:"index"` 5572 } 5573 </pre> 5574 5575 <pre class="file" id="file31" style="display: none">/* 5576 Copyright 2019 The Volcano Authors. 5577 5578 Licensed under the Apache License, Version 2.0 (the "License"); 5579 you may not use this file except in compliance with the License. 5580 You may obtain a copy of the License at 5581 5582 http://www.apache.org/licenses/LICENSE-2.0 5583 5584 Unless required by applicable law or agreed to in writing, software 5585 distributed under the License is distributed on an "AS IS" BASIS, 5586 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 5587 See the License for the specific language governing permissions and 5588 limitations under the License. 5589 */ 5590 5591 package ssh 5592 5593 import ( 5594 "crypto/rand" 5595 "crypto/rsa" 5596 "crypto/x509" 5597 "encoding/pem" 5598 "flag" 5599 "fmt" 5600 5601 "golang.org/x/crypto/ssh" 5602 v1 "k8s.io/api/core/v1" 5603 "k8s.io/klog" 5604 5605 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 5606 "volcano.sh/apis/pkg/apis/helpers" 5607 jobhelpers "volcano.sh/volcano/pkg/controllers/job/helpers" 5608 pluginsinterface "volcano.sh/volcano/pkg/controllers/job/plugins/interface" 5609 ) 5610 5611 type sshPlugin struct { 5612 // Arguments given for the plugin 5613 pluginArguments []string 5614 5615 client pluginsinterface.PluginClientset 5616 5617 // flag parse args 5618 sshKeyFilePath string 5619 5620 // private key string 5621 sshPrivateKey string 5622 5623 // public key string 5624 sshPublicKey string 5625 } 5626 5627 // New creates ssh plugin 5628 func New(client pluginsinterface.PluginClientset, arguments []string) pluginsinterface.PluginInterface <span class="cov8" title="1">{ 5629 p := sshPlugin{ 5630 pluginArguments: arguments, 5631 client: client, 5632 sshKeyFilePath: SSHAbsolutePath, 5633 } 5634 5635 p.addFlags() 5636 5637 return &p 5638 }</span> 5639 5640 func (sp *sshPlugin) Name() string <span class="cov8" title="1">{ 5641 return "ssh" 5642 }</span> 5643 5644 func (sp *sshPlugin) OnPodCreate(pod *v1.Pod, job *batch.Job) error <span class="cov0" title="0">{ 5645 sp.mountRsaKey(pod, job) 5646 5647 return nil 5648 }</span> 5649 5650 func (sp *sshPlugin) OnJobAdd(job *batch.Job) error <span class="cov0" title="0">{ 5651 if job.Status.ControlledResources["plugin-"+sp.Name()] == sp.Name() </span><span class="cov0" title="0">{ 5652 return nil 5653 }</span> 5654 5655 <span class="cov0" title="0">var data map[string][]byte 5656 var err error 5657 if len(sp.sshPrivateKey) > 0 </span><span class="cov0" title="0">{ 5658 data, err = withUserProvidedRsaKey(job, sp.sshPrivateKey, sp.sshPublicKey) 5659 }</span> else<span class="cov0" title="0"> { 5660 data, err = generateRsaKey(job) 5661 }</span> 5662 <span class="cov0" title="0">if err != nil </span><span class="cov0" title="0">{ 5663 return err 5664 }</span> 5665 5666 <span class="cov0" title="0">if err := helpers.CreateOrUpdateSecret(job, sp.client.KubeClients, data, sp.secretName(job)); err != nil </span><span class="cov0" title="0">{ 5667 return fmt.Errorf("create secret for job <%s/%s> with ssh plugin failed for %v", 5668 job.Namespace, job.Name, err) 5669 }</span> 5670 5671 <span class="cov0" title="0">job.Status.ControlledResources["plugin-"+sp.Name()] = sp.Name() 5672 5673 return nil</span> 5674 } 5675 5676 func (sp *sshPlugin) OnJobDelete(job *batch.Job) error <span class="cov0" title="0">{ 5677 if job.Status.ControlledResources["plugin-"+sp.Name()] != sp.Name() </span><span class="cov0" title="0">{ 5678 return nil 5679 }</span> 5680 <span class="cov0" title="0">if err := helpers.DeleteSecret(job, sp.client.KubeClients, sp.secretName(job)); err != nil </span><span class="cov0" title="0">{ 5681 return err 5682 }</span> 5683 <span class="cov0" title="0">delete(job.Status.ControlledResources, "plugin-"+sp.Name()) 5684 5685 return nil</span> 5686 } 5687 5688 // TODO: currently a container using a Secret as a subPath volume mount will not receive Secret updates. 5689 // we may not update the job secret due to the above reason now. 5690 // related issue: https://github.com/volcano-sh/volcano/issues/1420 5691 func (sp *sshPlugin) OnJobUpdate(job *batch.Job) error <span class="cov0" title="0">{ 5692 //data, err := generateRsaKey(job) 5693 //if err != nil { 5694 // return err 5695 //} 5696 // 5697 //if err := helpers.CreateOrUpdateSecret(job, sp.client.KubeClients, data, sp.secretName(job)); err != nil { 5698 // return fmt.Errorf("update secret for job <%s/%s> with ssh plugin failed for %v", 5699 // job.Namespace, job.Name, err) 5700 //} 5701 5702 return nil 5703 }</span> 5704 5705 func (sp *sshPlugin) mountRsaKey(pod *v1.Pod, job *batch.Job) <span class="cov0" title="0">{ 5706 secretName := sp.secretName(job) 5707 5708 sshVolume := v1.Volume{ 5709 Name: secretName, 5710 } 5711 5712 var mode int32 = 0600 5713 sshVolume.Secret = &v1.SecretVolumeSource{ 5714 SecretName: secretName, 5715 Items: []v1.KeyToPath{ 5716 { 5717 Key: SSHPrivateKey, 5718 Path: SSHRelativePath + "/" + SSHPrivateKey, 5719 }, 5720 { 5721 Key: SSHPublicKey, 5722 Path: SSHRelativePath + "/" + SSHPublicKey, 5723 }, 5724 { 5725 Key: SSHAuthorizedKeys, 5726 Path: SSHRelativePath + "/" + SSHAuthorizedKeys, 5727 }, 5728 { 5729 Key: SSHConfig, 5730 Path: SSHRelativePath + "/" + SSHConfig, 5731 }, 5732 }, 5733 DefaultMode: &mode, 5734 } 5735 5736 if sp.sshKeyFilePath != SSHAbsolutePath </span><span class="cov0" title="0">{ 5737 var noRootMode int32 = 0600 5738 sshVolume.Secret.DefaultMode = &noRootMode 5739 }</span> 5740 5741 <span class="cov0" title="0">pod.Spec.Volumes = append(pod.Spec.Volumes, sshVolume) 5742 5743 for i, c := range pod.Spec.Containers </span><span class="cov0" title="0">{ 5744 vm := v1.VolumeMount{ 5745 MountPath: sp.sshKeyFilePath, 5746 SubPath: SSHRelativePath, 5747 Name: secretName, 5748 } 5749 5750 pod.Spec.Containers[i].VolumeMounts = append(c.VolumeMounts, vm) 5751 }</span> 5752 <span class="cov0" title="0">for i, c := range pod.Spec.InitContainers </span><span class="cov0" title="0">{ 5753 vm := v1.VolumeMount{ 5754 MountPath: sp.sshKeyFilePath, 5755 SubPath: SSHRelativePath, 5756 Name: secretName, 5757 } 5758 5759 pod.Spec.InitContainers[i].VolumeMounts = append(c.VolumeMounts, vm) 5760 }</span> 5761 } 5762 5763 func generateRsaKey(job *batch.Job) (map[string][]byte, error) <span class="cov0" title="0">{ 5764 bitSize := 2048 5765 5766 privateKey, err := rsa.GenerateKey(rand.Reader, bitSize) 5767 if err != nil </span><span class="cov0" title="0">{ 5768 klog.Errorf("rsa generateKey err: %v", err) 5769 return nil, err 5770 }</span> 5771 5772 // id_rsa 5773 <span class="cov0" title="0">privBlock := pem.Block{ 5774 Type: "RSA PRIVATE KEY", 5775 Bytes: x509.MarshalPKCS1PrivateKey(privateKey), 5776 } 5777 privateKeyBytes := pem.EncodeToMemory(&privBlock) 5778 5779 // id_rsa.pub 5780 publicRsaKey, err := ssh.NewPublicKey(&privateKey.PublicKey) 5781 if err != nil </span><span class="cov0" title="0">{ 5782 klog.Errorf("ssh newPublicKey err: %v", err) 5783 return nil, err 5784 }</span> 5785 <span class="cov0" title="0">publicKeyBytes := ssh.MarshalAuthorizedKey(publicRsaKey) 5786 5787 data := make(map[string][]byte) 5788 data[SSHPrivateKey] = privateKeyBytes 5789 data[SSHPublicKey] = publicKeyBytes 5790 data[SSHAuthorizedKeys] = publicKeyBytes 5791 data[SSHConfig] = []byte(generateSSHConfig(job)) 5792 5793 return data, nil</span> 5794 } 5795 5796 func withUserProvidedRsaKey(job *batch.Job, sshPrivateKey string, sshPublicKey string) (map[string][]byte, error) <span class="cov0" title="0">{ 5797 data := make(map[string][]byte) 5798 data[SSHPrivateKey] = []byte(sshPrivateKey) 5799 data[SSHPublicKey] = []byte(sshPublicKey) 5800 data[SSHAuthorizedKeys] = []byte(sshPublicKey) 5801 data[SSHConfig] = []byte(generateSSHConfig(job)) 5802 5803 return data, nil 5804 }</span> 5805 5806 func (sp *sshPlugin) secretName(job *batch.Job) string <span class="cov0" title="0">{ 5807 return fmt.Sprintf("%s-%s", job.Name, sp.Name()) 5808 }</span> 5809 5810 func (sp *sshPlugin) addFlags() <span class="cov8" title="1">{ 5811 flagSet := flag.NewFlagSet(sp.Name(), flag.ContinueOnError) 5812 flagSet.StringVar(&sp.sshKeyFilePath, "ssh-key-file-path", sp.sshKeyFilePath, "The path used to store "+ 5813 "ssh private and public keys, it is `/root/.ssh` by default.") 5814 flagSet.StringVar(&sp.sshPrivateKey, "ssh-private-key", sp.sshPrivateKey, "The input string of the private key") 5815 flagSet.StringVar(&sp.sshPublicKey, "ssh-public-key", sp.sshPublicKey, "The input string of the public key") 5816 5817 if err := flagSet.Parse(sp.pluginArguments); err != nil </span><span class="cov0" title="0">{ 5818 klog.Errorf("plugin %s flagset parse failed, err: %v", sp.Name(), err) 5819 }</span> 5820 } 5821 5822 func generateSSHConfig(job *batch.Job) string <span class="cov0" title="0">{ 5823 config := "StrictHostKeyChecking no\nUserKnownHostsFile /dev/null\n" 5824 5825 for _, ts := range job.Spec.Tasks </span><span class="cov0" title="0">{ 5826 for i := 0; i < int(ts.Replicas); i++ </span><span class="cov0" title="0">{ 5827 hostName := ts.Template.Spec.Hostname 5828 subdomain := ts.Template.Spec.Subdomain 5829 if len(hostName) == 0 </span><span class="cov0" title="0">{ 5830 hostName = jobhelpers.MakePodName(job.Name, ts.Name, i) 5831 }</span> 5832 <span class="cov0" title="0">if len(subdomain) == 0 </span><span class="cov0" title="0">{ 5833 subdomain = job.Name 5834 }</span> 5835 5836 <span class="cov0" title="0">config += "Host " + hostName + "\n" 5837 config += " HostName " + hostName + "." + subdomain + "\n" 5838 if len(ts.Template.Spec.Hostname) != 0 </span><span class="cov0" title="0">{ 5839 break</span> 5840 } 5841 } 5842 } 5843 5844 <span class="cov0" title="0">return config</span> 5845 } 5846 </pre> 5847 5848 <pre class="file" id="file32" style="display: none">/* 5849 Copyright 2019 The Volcano Authors. 5850 5851 Licensed under the Apache License, Version 2.0 (the "License"); 5852 you may not use this file except in compliance with the License. 5853 You may obtain a copy of the License at 5854 5855 http://www.apache.org/licenses/LICENSE-2.0 5856 5857 Unless required by applicable law or agreed to in writing, software 5858 distributed under the License is distributed on an "AS IS" BASIS, 5859 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 5860 See the License for the specific language governing permissions and 5861 limitations under the License. 5862 */ 5863 5864 package podgroup 5865 5866 import ( 5867 "k8s.io/apimachinery/pkg/util/wait" 5868 coreinformers "k8s.io/client-go/informers/core/v1" 5869 "k8s.io/client-go/kubernetes" 5870 corelisters "k8s.io/client-go/listers/core/v1" 5871 "k8s.io/client-go/tools/cache" 5872 "k8s.io/client-go/util/workqueue" 5873 "k8s.io/klog" 5874 5875 scheduling "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 5876 vcclientset "volcano.sh/apis/pkg/client/clientset/versioned" 5877 informerfactory "volcano.sh/apis/pkg/client/informers/externalversions" 5878 schedulinginformer "volcano.sh/apis/pkg/client/informers/externalversions/scheduling/v1beta1" 5879 schedulinglister "volcano.sh/apis/pkg/client/listers/scheduling/v1beta1" 5880 "volcano.sh/volcano/pkg/controllers/framework" 5881 ) 5882 5883 func init() <span class="cov8" title="1">{ 5884 framework.RegisterController(&pgcontroller{}) 5885 }</span> 5886 5887 // pgcontroller the Podgroup pgcontroller type. 5888 type pgcontroller struct { 5889 kubeClient kubernetes.Interface 5890 vcClient vcclientset.Interface 5891 5892 podInformer coreinformers.PodInformer 5893 pgInformer schedulinginformer.PodGroupInformer 5894 5895 // A store of pods 5896 podLister corelisters.PodLister 5897 podSynced func() bool 5898 5899 // A store of podgroups 5900 pgLister schedulinglister.PodGroupLister 5901 pgSynced func() bool 5902 5903 queue workqueue.RateLimitingInterface 5904 5905 schedulerNames []string 5906 } 5907 5908 func (pg *pgcontroller) Name() string <span class="cov8" title="1">{ 5909 return "pg-controller" 5910 }</span> 5911 5912 // Initialize create new Podgroup Controller. 5913 func (pg *pgcontroller) Initialize(opt *framework.ControllerOption) error <span class="cov8" title="1">{ 5914 pg.kubeClient = opt.KubeClient 5915 pg.vcClient = opt.VolcanoClient 5916 5917 pg.queue = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) 5918 5919 pg.schedulerNames = make([]string, len(opt.SchedulerNames)) 5920 copy(pg.schedulerNames, opt.SchedulerNames) 5921 5922 pg.podInformer = opt.SharedInformerFactory.Core().V1().Pods() 5923 pg.podLister = pg.podInformer.Lister() 5924 pg.podSynced = pg.podInformer.Informer().HasSynced 5925 pg.podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 5926 AddFunc: pg.addPod, 5927 }) 5928 5929 pg.pgInformer = informerfactory.NewSharedInformerFactory(pg.vcClient, 0).Scheduling().V1beta1().PodGroups() 5930 pg.pgLister = pg.pgInformer.Lister() 5931 pg.pgSynced = pg.pgInformer.Informer().HasSynced 5932 5933 return nil 5934 }</span> 5935 5936 // Run start NewPodgroupController. 5937 func (pg *pgcontroller) Run(stopCh <-chan struct{}) <span class="cov0" title="0">{ 5938 go pg.podInformer.Informer().Run(stopCh) 5939 go pg.pgInformer.Informer().Run(stopCh) 5940 5941 cache.WaitForCacheSync(stopCh, pg.podSynced, pg.pgSynced) 5942 5943 go wait.Until(pg.worker, 0, stopCh) 5944 5945 klog.Infof("PodgroupController is running ...... ") 5946 }</span> 5947 5948 func (pg *pgcontroller) worker() <span class="cov0" title="0">{ 5949 for pg.processNextReq() </span>{<span class="cov0" title="0"> 5950 }</span> 5951 } 5952 5953 func (pg *pgcontroller) processNextReq() bool <span class="cov0" title="0">{ 5954 obj, shutdown := pg.queue.Get() 5955 if shutdown </span><span class="cov0" title="0">{ 5956 klog.Errorf("Fail to pop item from queue") 5957 return false 5958 }</span> 5959 5960 <span class="cov0" title="0">req := obj.(podRequest) 5961 defer pg.queue.Done(req) 5962 5963 pod, err := pg.podLister.Pods(req.podNamespace).Get(req.podName) 5964 if err != nil </span><span class="cov0" title="0">{ 5965 klog.Errorf("Failed to get pod by <%v> from cache: %v", req, err) 5966 return true 5967 }</span> 5968 5969 <span class="cov0" title="0">if !contains(pg.schedulerNames, pod.Spec.SchedulerName) </span><span class="cov0" title="0">{ 5970 klog.V(5).Infof("pod %v/%v field SchedulerName is not matched", pod.Namespace, pod.Name) 5971 return true 5972 }</span> 5973 5974 <span class="cov0" title="0">if pod.Annotations != nil && pod.Annotations[scheduling.KubeGroupNameAnnotationKey] != "" </span><span class="cov0" title="0">{ 5975 klog.V(5).Infof("pod %v/%v has created podgroup", pod.Namespace, pod.Name) 5976 return true 5977 }</span> 5978 5979 // normal pod use volcano 5980 <span class="cov0" title="0">if err := pg.createNormalPodPGIfNotExist(pod); err != nil </span><span class="cov0" title="0">{ 5981 klog.Errorf("Failed to handle Pod <%s/%s>: %v", pod.Namespace, pod.Name, err) 5982 pg.queue.AddRateLimited(req) 5983 return true 5984 }</span> 5985 5986 // If no error, forget it. 5987 <span class="cov0" title="0">pg.queue.Forget(req) 5988 5989 return true</span> 5990 } 5991 5992 func contains(slice []string, element string) bool <span class="cov0" title="0">{ 5993 for _, item := range slice </span><span class="cov0" title="0">{ 5994 if item == element </span><span class="cov0" title="0">{ 5995 return true 5996 }</span> 5997 } 5998 <span class="cov0" title="0">return false</span> 5999 } 6000 </pre> 6001 6002 <pre class="file" id="file33" style="display: none">/* 6003 Copyright 2019 The Volcano Authors. 6004 6005 Licensed under the Apache License, Version 2.0 (the "License"); 6006 you may not use this file except in compliance with the License. 6007 You may obtain a copy of the License at 6008 6009 http://www.apache.org/licenses/LICENSE-2.0 6010 6011 Unless required by applicable law or agreed to in writing, software 6012 distributed under the License is distributed on an "AS IS" BASIS, 6013 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 6014 See the License for the specific language governing permissions and 6015 limitations under the License. 6016 */ 6017 6018 package podgroup 6019 6020 import ( 6021 "context" 6022 6023 v1 "k8s.io/api/core/v1" 6024 apierrors "k8s.io/apimachinery/pkg/api/errors" 6025 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 6026 "k8s.io/apimachinery/pkg/runtime/schema" 6027 "k8s.io/klog" 6028 6029 "volcano.sh/apis/pkg/apis/helpers" 6030 scheduling "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 6031 ) 6032 6033 type podRequest struct { 6034 podName string 6035 podNamespace string 6036 } 6037 6038 func (pg *pgcontroller) addPod(obj interface{}) <span class="cov8" title="1">{ 6039 pod, ok := obj.(*v1.Pod) 6040 if !ok </span><span class="cov0" title="0">{ 6041 klog.Errorf("Failed to convert %v to v1.Pod", obj) 6042 return 6043 }</span> 6044 6045 <span class="cov8" title="1">req := podRequest{ 6046 podName: pod.Name, 6047 podNamespace: pod.Namespace, 6048 } 6049 6050 pg.queue.Add(req)</span> 6051 } 6052 6053 func (pg *pgcontroller) updatePodAnnotations(pod *v1.Pod, pgName string) error <span class="cov8" title="1">{ 6054 if pod.Annotations == nil </span><span class="cov8" title="1">{ 6055 pod.Annotations = make(map[string]string) 6056 }</span> 6057 <span class="cov8" title="1">if pod.Annotations[scheduling.KubeGroupNameAnnotationKey] == "" </span><span class="cov8" title="1">{ 6058 pod.Annotations[scheduling.KubeGroupNameAnnotationKey] = pgName 6059 }</span> else<span class="cov0" title="0"> { 6060 if pod.Annotations[scheduling.KubeGroupNameAnnotationKey] != pgName </span><span class="cov0" title="0">{ 6061 klog.Errorf("normal pod %s/%s annotations %s value is not %s, but %s", pod.Namespace, pod.Name, 6062 scheduling.KubeGroupNameAnnotationKey, pgName, pod.Annotations[scheduling.KubeGroupNameAnnotationKey]) 6063 }</span> 6064 <span class="cov0" title="0">return nil</span> 6065 } 6066 6067 <span class="cov8" title="1">if _, err := pg.kubeClient.CoreV1().Pods(pod.Namespace).Update(context.TODO(), pod, metav1.UpdateOptions{}); err != nil </span><span class="cov0" title="0">{ 6068 klog.Errorf("Failed to update pod <%s/%s>: %v", pod.Namespace, pod.Name, err) 6069 return err 6070 }</span> 6071 6072 <span class="cov8" title="1">return nil</span> 6073 } 6074 6075 func (pg *pgcontroller) createNormalPodPGIfNotExist(pod *v1.Pod) error <span class="cov8" title="1">{ 6076 pgName := helpers.GeneratePodgroupName(pod) 6077 6078 if _, err := pg.pgLister.PodGroups(pod.Namespace).Get(pgName); err != nil </span><span class="cov8" title="1">{ 6079 if !apierrors.IsNotFound(err) </span><span class="cov0" title="0">{ 6080 klog.Errorf("Failed to get normal PodGroup for Pod <%s/%s>: %v", 6081 pod.Namespace, pod.Name, err) 6082 return err 6083 }</span> 6084 6085 <span class="cov8" title="1">obj := &scheduling.PodGroup{ 6086 ObjectMeta: metav1.ObjectMeta{ 6087 Namespace: pod.Namespace, 6088 Name: pgName, 6089 OwnerReferences: newPGOwnerReferences(pod), 6090 Annotations: map[string]string{}, 6091 Labels: map[string]string{}, 6092 }, 6093 Spec: scheduling.PodGroupSpec{ 6094 MinMember: 1, 6095 PriorityClassName: pod.Spec.PriorityClassName, 6096 MinResources: calcPGMinResources(pod), 6097 }, 6098 } 6099 if queueName, ok := pod.Annotations[scheduling.QueueNameAnnotationKey]; ok </span><span class="cov0" title="0">{ 6100 obj.Spec.Queue = queueName 6101 }</span> 6102 6103 <span class="cov8" title="1">if value, ok := pod.Annotations[scheduling.PodPreemptable]; ok </span><span class="cov0" title="0">{ 6104 obj.Annotations[scheduling.PodPreemptable] = value 6105 }</span> 6106 <span class="cov8" title="1">if value, ok := pod.Annotations[scheduling.RevocableZone]; ok </span><span class="cov0" title="0">{ 6107 obj.Annotations[scheduling.RevocableZone] = value 6108 }</span> 6109 <span class="cov8" title="1">if value, ok := pod.Labels[scheduling.PodPreemptable]; ok </span><span class="cov0" title="0">{ 6110 obj.Labels[scheduling.PodPreemptable] = value 6111 }</span> 6112 6113 <span class="cov8" title="1">if value, found := pod.Annotations[scheduling.JDBMinAvailable]; found </span><span class="cov0" title="0">{ 6114 obj.Annotations[scheduling.JDBMinAvailable] = value 6115 }</span> else<span class="cov8" title="1"> if value, found := pod.Annotations[scheduling.JDBMaxUnavailable]; found </span><span class="cov0" title="0">{ 6116 obj.Annotations[scheduling.JDBMaxUnavailable] = value 6117 }</span> 6118 6119 <span class="cov8" title="1">if _, err := pg.vcClient.SchedulingV1beta1().PodGroups(pod.Namespace).Create(context.TODO(), obj, metav1.CreateOptions{}); err != nil </span><span class="cov0" title="0">{ 6120 klog.Errorf("Failed to create normal PodGroup for Pod <%s/%s>: %v", 6121 pod.Namespace, pod.Name, err) 6122 return err 6123 }</span> 6124 } 6125 6126 <span class="cov8" title="1">return pg.updatePodAnnotations(pod, pgName)</span> 6127 } 6128 6129 func newPGOwnerReferences(pod *v1.Pod) []metav1.OwnerReference <span class="cov8" title="1">{ 6130 if len(pod.OwnerReferences) != 0 </span><span class="cov8" title="1">{ 6131 for _, ownerReference := range pod.OwnerReferences </span><span class="cov8" title="1">{ 6132 if ownerReference.Controller != nil && *ownerReference.Controller </span><span class="cov8" title="1">{ 6133 return pod.OwnerReferences 6134 }</span> 6135 } 6136 } 6137 6138 <span class="cov8" title="1">gvk := schema.GroupVersionKind{ 6139 Group: v1.SchemeGroupVersion.Group, 6140 Version: v1.SchemeGroupVersion.Version, 6141 Kind: "Pod", 6142 } 6143 ref := metav1.NewControllerRef(pod, gvk) 6144 return []metav1.OwnerReference{*ref}</span> 6145 } 6146 6147 // addResourceList add list resource quantity 6148 func addResourceList(list, req, limit v1.ResourceList) <span class="cov0" title="0">{ 6149 for name, quantity := range req </span><span class="cov0" title="0">{ 6150 if value, ok := list[name]; !ok </span><span class="cov0" title="0">{ 6151 list[name] = quantity.DeepCopy() 6152 }</span> else<span class="cov0" title="0"> { 6153 value.Add(quantity) 6154 list[name] = value 6155 }</span> 6156 } 6157 6158 <span class="cov0" title="0">if req != nil </span><span class="cov0" title="0">{ 6159 return 6160 }</span> 6161 6162 // If Requests is omitted for a container, 6163 // it defaults to Limits if that is explicitly specified. 6164 <span class="cov0" title="0">for name, quantity := range limit </span><span class="cov0" title="0">{ 6165 if value, ok := list[name]; !ok </span><span class="cov0" title="0">{ 6166 list[name] = quantity.DeepCopy() 6167 }</span> else<span class="cov0" title="0"> { 6168 value.Add(quantity) 6169 list[name] = value 6170 }</span> 6171 } 6172 } 6173 6174 // calcPGMinResources calculate podgroup minimum resource 6175 func calcPGMinResources(pod *v1.Pod) *v1.ResourceList <span class="cov8" title="1">{ 6176 pgMinRes := v1.ResourceList{} 6177 6178 for _, c := range pod.Spec.Containers </span><span class="cov0" title="0">{ 6179 addResourceList(pgMinRes, c.Resources.Requests, c.Resources.Limits) 6180 }</span> 6181 6182 <span class="cov8" title="1">return &pgMinRes</span> 6183 } 6184 </pre> 6185 6186 <pre class="file" id="file34" style="display: none">/* 6187 Copyright 2019 The Volcano Authors. 6188 6189 Licensed under the Apache License, Version 2.0 (the "License"); 6190 you may not use this file except in compliance with the License. 6191 You may obtain a copy of the License at 6192 6193 http://www.apache.org/licenses/LICENSE-2.0 6194 6195 Unless required by applicable law or agreed to in writing, software 6196 distributed under the License is distributed on an "AS IS" BASIS, 6197 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 6198 See the License for the specific language governing permissions and 6199 limitations under the License. 6200 */ 6201 6202 package queue 6203 6204 import ( 6205 "context" 6206 "fmt" 6207 "sync" 6208 "time" 6209 6210 v1 "k8s.io/api/core/v1" 6211 apierrors "k8s.io/apimachinery/pkg/api/errors" 6212 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 6213 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 6214 "k8s.io/apimachinery/pkg/util/wait" 6215 "k8s.io/client-go/kubernetes" 6216 corev1 "k8s.io/client-go/kubernetes/typed/core/v1" 6217 "k8s.io/client-go/tools/cache" 6218 "k8s.io/client-go/tools/record" 6219 "k8s.io/client-go/util/workqueue" 6220 "k8s.io/klog" 6221 6222 busv1alpha1 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 6223 vcclientset "volcano.sh/apis/pkg/client/clientset/versioned" 6224 versionedscheme "volcano.sh/apis/pkg/client/clientset/versioned/scheme" 6225 informerfactory "volcano.sh/apis/pkg/client/informers/externalversions" 6226 busv1alpha1informer "volcano.sh/apis/pkg/client/informers/externalversions/bus/v1alpha1" 6227 schedulinginformer "volcano.sh/apis/pkg/client/informers/externalversions/scheduling/v1beta1" 6228 busv1alpha1lister "volcano.sh/apis/pkg/client/listers/bus/v1alpha1" 6229 schedulinglister "volcano.sh/apis/pkg/client/listers/scheduling/v1beta1" 6230 "volcano.sh/volcano/pkg/controllers/apis" 6231 "volcano.sh/volcano/pkg/controllers/framework" 6232 queuestate "volcano.sh/volcano/pkg/controllers/queue/state" 6233 ) 6234 6235 func init() <span class="cov8" title="1">{ 6236 framework.RegisterController(&queuecontroller{}) 6237 }</span> 6238 6239 // queuecontroller manages queue status. 6240 type queuecontroller struct { 6241 kubeClient kubernetes.Interface 6242 vcClient vcclientset.Interface 6243 6244 // informer 6245 queueInformer schedulinginformer.QueueInformer 6246 pgInformer schedulinginformer.PodGroupInformer 6247 6248 // queueLister 6249 queueLister schedulinglister.QueueLister 6250 queueSynced cache.InformerSynced 6251 6252 // podGroup lister 6253 pgLister schedulinglister.PodGroupLister 6254 pgSynced cache.InformerSynced 6255 6256 cmdInformer busv1alpha1informer.CommandInformer 6257 cmdLister busv1alpha1lister.CommandLister 6258 cmdSynced cache.InformerSynced 6259 6260 // queues that need to be updated. 6261 queue workqueue.RateLimitingInterface 6262 commandQueue workqueue.RateLimitingInterface 6263 6264 pgMutex sync.RWMutex 6265 // queue name -> podgroup namespace/name 6266 podGroups map[string]map[string]struct{} 6267 6268 syncHandler func(req *apis.Request) error 6269 syncCommandHandler func(cmd *busv1alpha1.Command) error 6270 6271 enqueueQueue func(req *apis.Request) 6272 6273 recorder record.EventRecorder 6274 maxRequeueNum int 6275 } 6276 6277 func (c *queuecontroller) Name() string <span class="cov8" title="1">{ 6278 return "queue-controller" 6279 }</span> 6280 6281 // NewQueueController creates a QueueController. 6282 func (c *queuecontroller) Initialize(opt *framework.ControllerOption) error <span class="cov8" title="1">{ 6283 c.vcClient = opt.VolcanoClient 6284 c.kubeClient = opt.KubeClient 6285 6286 factory := informerfactory.NewSharedInformerFactory(c.vcClient, 0) 6287 queueInformer := factory.Scheduling().V1beta1().Queues() 6288 pgInformer := factory.Scheduling().V1beta1().PodGroups() 6289 6290 eventBroadcaster := record.NewBroadcaster() 6291 eventBroadcaster.StartLogging(klog.Infof) 6292 eventBroadcaster.StartRecordingToSink(&corev1.EventSinkImpl{Interface: c.kubeClient.CoreV1().Events("")}) 6293 6294 c.queueInformer = queueInformer 6295 c.pgInformer = pgInformer 6296 c.queueLister = queueInformer.Lister() 6297 c.queueSynced = queueInformer.Informer().HasSynced 6298 c.pgLister = pgInformer.Lister() 6299 c.pgSynced = pgInformer.Informer().HasSynced 6300 c.queue = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) 6301 c.commandQueue = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) 6302 c.podGroups = make(map[string]map[string]struct{}) 6303 c.recorder = eventBroadcaster.NewRecorder(versionedscheme.Scheme, v1.EventSource{Component: "vc-controller-manager"}) 6304 c.maxRequeueNum = opt.MaxRequeueNum 6305 if c.maxRequeueNum < 0 </span><span class="cov0" title="0">{ 6306 c.maxRequeueNum = -1 6307 }</span> 6308 6309 <span class="cov8" title="1">queueInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 6310 AddFunc: c.addQueue, 6311 UpdateFunc: c.updateQueue, 6312 DeleteFunc: c.deleteQueue, 6313 }) 6314 6315 pgInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 6316 AddFunc: c.addPodGroup, 6317 UpdateFunc: c.updatePodGroup, 6318 DeleteFunc: c.deletePodGroup, 6319 }) 6320 6321 c.cmdInformer = informerfactory.NewSharedInformerFactory(c.vcClient, 0).Bus().V1alpha1().Commands() 6322 c.cmdInformer.Informer().AddEventHandler(cache.FilteringResourceEventHandler{ 6323 FilterFunc: func(obj interface{}) bool </span><span class="cov0" title="0">{ 6324 switch v := obj.(type) </span>{ 6325 case *busv1alpha1.Command:<span class="cov0" title="0"> 6326 return IsQueueReference(v.TargetObject)</span> 6327 default:<span class="cov0" title="0"> 6328 return false</span> 6329 } 6330 }, 6331 Handler: cache.ResourceEventHandlerFuncs{ 6332 AddFunc: c.addCommand, 6333 }, 6334 }) 6335 <span class="cov8" title="1">c.cmdLister = c.cmdInformer.Lister() 6336 c.cmdSynced = c.cmdInformer.Informer().HasSynced 6337 6338 queuestate.SyncQueue = c.syncQueue 6339 queuestate.OpenQueue = c.openQueue 6340 queuestate.CloseQueue = c.closeQueue 6341 6342 c.syncHandler = c.handleQueue 6343 c.syncCommandHandler = c.handleCommand 6344 6345 c.enqueueQueue = c.enqueue 6346 6347 return nil</span> 6348 } 6349 6350 // Run starts QueueController. 6351 func (c *queuecontroller) Run(stopCh <-chan struct{}) <span class="cov0" title="0">{ 6352 defer utilruntime.HandleCrash() 6353 defer c.queue.ShutDown() 6354 defer c.commandQueue.ShutDown() 6355 6356 klog.Infof("Starting queue controller.") 6357 defer klog.Infof("Shutting down queue controller.") 6358 6359 go c.queueInformer.Informer().Run(stopCh) 6360 go c.pgInformer.Informer().Run(stopCh) 6361 go c.cmdInformer.Informer().Run(stopCh) 6362 6363 if !cache.WaitForCacheSync(stopCh, c.queueSynced, c.pgSynced, c.cmdSynced) </span><span class="cov0" title="0">{ 6364 klog.Errorf("unable to sync caches for queue controller.") 6365 return 6366 }</span> 6367 6368 <span class="cov0" title="0">go wait.Until(c.worker, 0, stopCh) 6369 go wait.Until(c.commandWorker, 0, stopCh) 6370 6371 <-stopCh</span> 6372 } 6373 6374 // worker runs a worker thread that just dequeues items, processes them, and 6375 // marks them done. You may run as many of these in parallel as you wish; the 6376 // workqueue guarantees that they will not end up processing the same `queue` 6377 // at the same time. 6378 func (c *queuecontroller) worker() <span class="cov0" title="0">{ 6379 for c.processNextWorkItem() </span>{<span class="cov0" title="0"> 6380 }</span> 6381 } 6382 6383 func (c *queuecontroller) processNextWorkItem() bool <span class="cov8" title="1">{ 6384 obj, shutdown := c.queue.Get() 6385 if shutdown </span><span class="cov0" title="0">{ 6386 return false 6387 }</span> 6388 <span class="cov8" title="1">defer c.queue.Done(obj) 6389 6390 req, ok := obj.(*apis.Request) 6391 if !ok </span><span class="cov8" title="1">{ 6392 klog.Errorf("%v is not a valid queue request struct.", obj) 6393 return true 6394 }</span> 6395 6396 <span class="cov0" title="0">err := c.syncHandler(req) 6397 c.handleQueueErr(err, obj) 6398 6399 return true</span> 6400 } 6401 6402 func (c *queuecontroller) handleQueue(req *apis.Request) error <span class="cov0" title="0">{ 6403 startTime := time.Now() 6404 defer func() </span><span class="cov0" title="0">{ 6405 klog.V(4).Infof("Finished syncing queue %s (%v).", req.QueueName, time.Since(startTime)) 6406 }</span>() 6407 6408 <span class="cov0" title="0">queue, err := c.queueLister.Get(req.QueueName) 6409 if err != nil </span><span class="cov0" title="0">{ 6410 if apierrors.IsNotFound(err) </span><span class="cov0" title="0">{ 6411 klog.V(4).Infof("Queue %s has been deleted.", req.QueueName) 6412 return nil 6413 }</span> 6414 6415 <span class="cov0" title="0">return fmt.Errorf("get queue %s failed for %v", req.QueueName, err)</span> 6416 } 6417 6418 <span class="cov0" title="0">queueState := queuestate.NewState(queue) 6419 if queueState == nil </span><span class="cov0" title="0">{ 6420 return fmt.Errorf("queue %s state %s is invalid", queue.Name, queue.Status.State) 6421 }</span> 6422 6423 <span class="cov0" title="0">klog.V(4).Infof("Begin execute %s action for queue %s, current status %s", req.Action, req.QueueName, queue.Status.State) 6424 if err := queueState.Execute(req.Action); err != nil </span><span class="cov0" title="0">{ 6425 return fmt.Errorf("sync queue %s failed for %v, event is %v, action is %s", 6426 req.QueueName, err, req.Event, req.Action) 6427 }</span> 6428 6429 <span class="cov0" title="0">return nil</span> 6430 } 6431 6432 func (c *queuecontroller) handleQueueErr(err error, obj interface{}) <span class="cov0" title="0">{ 6433 if err == nil </span><span class="cov0" title="0">{ 6434 c.queue.Forget(obj) 6435 return 6436 }</span> 6437 6438 <span class="cov0" title="0">if c.maxRequeueNum == -1 || c.queue.NumRequeues(obj) < c.maxRequeueNum </span><span class="cov0" title="0">{ 6439 klog.V(4).Infof("Error syncing queue request %v for %v.", obj, err) 6440 c.queue.AddRateLimited(obj) 6441 return 6442 }</span> 6443 6444 <span class="cov0" title="0">req, _ := obj.(*apis.Request) 6445 c.recordEventsForQueue(req.QueueName, v1.EventTypeWarning, string(req.Action), 6446 fmt.Sprintf("%v queue failed for %v", req.Action, err)) 6447 klog.V(2).Infof("Dropping queue request %v out of the queue for %v.", obj, err) 6448 c.queue.Forget(obj)</span> 6449 } 6450 6451 func (c *queuecontroller) commandWorker() <span class="cov0" title="0">{ 6452 for c.processNextCommand() </span>{<span class="cov0" title="0"> 6453 }</span> 6454 } 6455 6456 func (c *queuecontroller) processNextCommand() bool <span class="cov0" title="0">{ 6457 obj, shutdown := c.commandQueue.Get() 6458 if shutdown </span><span class="cov0" title="0">{ 6459 return false 6460 }</span> 6461 <span class="cov0" title="0">defer c.commandQueue.Done(obj) 6462 6463 cmd, ok := obj.(*busv1alpha1.Command) 6464 if !ok </span><span class="cov0" title="0">{ 6465 klog.Errorf("%v is not a valid Command struct.", obj) 6466 return true 6467 }</span> 6468 6469 <span class="cov0" title="0">err := c.syncCommandHandler(cmd) 6470 c.handleCommandErr(err, obj) 6471 6472 return true</span> 6473 } 6474 6475 func (c *queuecontroller) handleCommand(cmd *busv1alpha1.Command) error <span class="cov0" title="0">{ 6476 startTime := time.Now() 6477 defer func() </span><span class="cov0" title="0">{ 6478 klog.V(4).Infof("Finished syncing command %s/%s (%v).", cmd.Namespace, cmd.Name, time.Since(startTime)) 6479 }</span>() 6480 6481 <span class="cov0" title="0">err := c.vcClient.BusV1alpha1().Commands(cmd.Namespace).Delete(context.TODO(), cmd.Name, metav1.DeleteOptions{}) 6482 if err != nil </span><span class="cov0" title="0">{ 6483 if apierrors.IsNotFound(err) </span><span class="cov0" title="0">{ 6484 return nil 6485 }</span> 6486 6487 <span class="cov0" title="0">return fmt.Errorf("failed to delete command <%s/%s> for %v", cmd.Namespace, cmd.Name, err)</span> 6488 } 6489 6490 <span class="cov0" title="0">req := &apis.Request{ 6491 QueueName: cmd.TargetObject.Name, 6492 Event: busv1alpha1.CommandIssuedEvent, 6493 Action: busv1alpha1.Action(cmd.Action), 6494 } 6495 6496 c.enqueueQueue(req) 6497 6498 return nil</span> 6499 } 6500 6501 func (c *queuecontroller) handleCommandErr(err error, obj interface{}) <span class="cov0" title="0">{ 6502 if err == nil </span><span class="cov0" title="0">{ 6503 c.commandQueue.Forget(obj) 6504 return 6505 }</span> 6506 6507 <span class="cov0" title="0">if c.maxRequeueNum == -1 || c.commandQueue.NumRequeues(obj) < c.maxRequeueNum </span><span class="cov0" title="0">{ 6508 klog.V(4).Infof("Error syncing command %v for %v.", obj, err) 6509 c.commandQueue.AddRateLimited(obj) 6510 return 6511 }</span> 6512 6513 <span class="cov0" title="0">klog.V(2).Infof("Dropping command %v out of the queue for %v.", obj, err) 6514 c.commandQueue.Forget(obj)</span> 6515 } 6516 </pre> 6517 6518 <pre class="file" id="file35" style="display: none">/* 6519 Copyright 2019 The Volcano Authors. 6520 6521 Licensed under the Apache License, Version 2.0 (the "License"); 6522 you may not use this file except in compliance with the License. 6523 You may obtain a copy of the License at 6524 6525 http://www.apache.org/licenses/LICENSE-2.0 6526 6527 Unless required by applicable law or agreed to in writing, software 6528 distributed under the License is distributed on an "AS IS" BASIS, 6529 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 6530 See the License for the specific language governing permissions and 6531 limitations under the License. 6532 */ 6533 6534 package queue 6535 6536 import ( 6537 "context" 6538 "fmt" 6539 "reflect" 6540 6541 v1 "k8s.io/api/core/v1" 6542 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 6543 "k8s.io/client-go/tools/cache" 6544 "k8s.io/klog" 6545 6546 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 6547 schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 6548 "volcano.sh/volcano/pkg/controllers/queue/state" 6549 ) 6550 6551 func (c *queuecontroller) syncQueue(queue *schedulingv1beta1.Queue, updateStateFn state.UpdateQueueStatusFn) error <span class="cov8" title="1">{ 6552 klog.V(4).Infof("Begin to sync queue %s.", queue.Name) 6553 defer klog.V(4).Infof("End sync queue %s.", queue.Name) 6554 6555 podGroups := c.getPodGroups(queue.Name) 6556 queueStatus := schedulingv1beta1.QueueStatus{} 6557 6558 for _, pgKey := range podGroups </span><span class="cov8" title="1">{ 6559 // Ignore error here, tt can not occur. 6560 ns, name, _ := cache.SplitMetaNamespaceKey(pgKey) 6561 6562 // TODO: check NotFound error and sync local cache. 6563 pg, err := c.pgLister.PodGroups(ns).Get(name) 6564 if err != nil </span><span class="cov0" title="0">{ 6565 return err 6566 }</span> 6567 6568 <span class="cov8" title="1">switch pg.Status.Phase </span>{ 6569 case schedulingv1beta1.PodGroupPending:<span class="cov8" title="1"> 6570 queueStatus.Pending++</span> 6571 case schedulingv1beta1.PodGroupRunning:<span class="cov0" title="0"> 6572 queueStatus.Running++</span> 6573 case schedulingv1beta1.PodGroupUnknown:<span class="cov0" title="0"> 6574 queueStatus.Unknown++</span> 6575 case schedulingv1beta1.PodGroupInqueue:<span class="cov0" title="0"> 6576 queueStatus.Inqueue++</span> 6577 } 6578 } 6579 6580 <span class="cov8" title="1">if updateStateFn != nil </span><span class="cov0" title="0">{ 6581 updateStateFn(&queueStatus, podGroups) 6582 }</span> else<span class="cov8" title="1"> { 6583 queueStatus.State = queue.Status.State 6584 }</span> 6585 6586 // ignore update when status does not change 6587 <span class="cov8" title="1">if reflect.DeepEqual(queueStatus, queue.Status) </span><span class="cov0" title="0">{ 6588 return nil 6589 }</span> 6590 6591 <span class="cov8" title="1">newQueue := queue.DeepCopy() 6592 newQueue.Status = queueStatus 6593 if _, err := c.vcClient.SchedulingV1beta1().Queues().UpdateStatus(context.TODO(), newQueue, metav1.UpdateOptions{}); err != nil </span><span class="cov0" title="0">{ 6594 klog.Errorf("Failed to update status of Queue %s: %v.", newQueue.Name, err) 6595 return err 6596 }</span> 6597 6598 <span class="cov8" title="1">return nil</span> 6599 } 6600 6601 func (c *queuecontroller) openQueue(queue *schedulingv1beta1.Queue, updateStateFn state.UpdateQueueStatusFn) error <span class="cov0" title="0">{ 6602 klog.V(4).Infof("Begin to open queue %s.", queue.Name) 6603 6604 newQueue := queue.DeepCopy() 6605 newQueue.Status.State = schedulingv1beta1.QueueStateOpen 6606 6607 if queue.Status.State != newQueue.Status.State </span><span class="cov0" title="0">{ 6608 if _, err := c.vcClient.SchedulingV1beta1().Queues().Update(context.TODO(), newQueue, metav1.UpdateOptions{}); err != nil </span><span class="cov0" title="0">{ 6609 c.recorder.Event(newQueue, v1.EventTypeWarning, string(v1alpha1.OpenQueueAction), 6610 fmt.Sprintf("Open queue failed for %v", err)) 6611 return err 6612 }</span> 6613 6614 <span class="cov0" title="0">c.recorder.Event(newQueue, v1.EventTypeNormal, string(v1alpha1.OpenQueueAction), "Open queue succeed")</span> 6615 } else<span class="cov0" title="0"> { 6616 return nil 6617 }</span> 6618 6619 <span class="cov0" title="0">q, err := c.vcClient.SchedulingV1beta1().Queues().Get(context.TODO(), newQueue.Name, metav1.GetOptions{}) 6620 if err != nil </span><span class="cov0" title="0">{ 6621 return err 6622 }</span> 6623 6624 <span class="cov0" title="0">newQueue = q.DeepCopy() 6625 if updateStateFn != nil </span><span class="cov0" title="0">{ 6626 updateStateFn(&newQueue.Status, nil) 6627 }</span> else<span class="cov0" title="0"> { 6628 return fmt.Errorf("internal error, update state function should be provided") 6629 }</span> 6630 6631 <span class="cov0" title="0">if queue.Status.State != newQueue.Status.State </span><span class="cov0" title="0">{ 6632 if _, err := c.vcClient.SchedulingV1beta1().Queues().UpdateStatus(context.TODO(), newQueue, metav1.UpdateOptions{}); err != nil </span><span class="cov0" title="0">{ 6633 c.recorder.Event(newQueue, v1.EventTypeWarning, string(v1alpha1.OpenQueueAction), 6634 fmt.Sprintf("Update queue status from %s to %s failed for %v", 6635 queue.Status.State, newQueue.Status.State, err)) 6636 return err 6637 }</span> 6638 } 6639 6640 <span class="cov0" title="0">return nil</span> 6641 } 6642 6643 func (c *queuecontroller) closeQueue(queue *schedulingv1beta1.Queue, updateStateFn state.UpdateQueueStatusFn) error <span class="cov0" title="0">{ 6644 klog.V(4).Infof("Begin to close queue %s.", queue.Name) 6645 6646 newQueue := queue.DeepCopy() 6647 newQueue.Status.State = schedulingv1beta1.QueueStateClosed 6648 6649 if queue.Status.State != newQueue.Status.State </span><span class="cov0" title="0">{ 6650 if _, err := c.vcClient.SchedulingV1beta1().Queues().Update(context.TODO(), newQueue, metav1.UpdateOptions{}); err != nil </span><span class="cov0" title="0">{ 6651 c.recorder.Event(newQueue, v1.EventTypeWarning, string(v1alpha1.CloseQueueAction), 6652 fmt.Sprintf("Close queue failed for %v", err)) 6653 return err 6654 }</span> 6655 6656 <span class="cov0" title="0">c.recorder.Event(newQueue, v1.EventTypeNormal, string(v1alpha1.CloseQueueAction), "Close queue succeed")</span> 6657 } else<span class="cov0" title="0"> { 6658 return nil 6659 }</span> 6660 6661 <span class="cov0" title="0">q, err := c.vcClient.SchedulingV1beta1().Queues().Get(context.TODO(), newQueue.Name, metav1.GetOptions{}) 6662 if err != nil </span><span class="cov0" title="0">{ 6663 return err 6664 }</span> 6665 6666 <span class="cov0" title="0">newQueue = q.DeepCopy() 6667 podGroups := c.getPodGroups(newQueue.Name) 6668 if updateStateFn != nil </span><span class="cov0" title="0">{ 6669 updateStateFn(&newQueue.Status, podGroups) 6670 }</span> else<span class="cov0" title="0"> { 6671 return fmt.Errorf("internal error, update state function should be provided") 6672 }</span> 6673 6674 <span class="cov0" title="0">if queue.Status.State != newQueue.Status.State </span><span class="cov0" title="0">{ 6675 if _, err := c.vcClient.SchedulingV1beta1().Queues().UpdateStatus(context.TODO(), newQueue, metav1.UpdateOptions{}); err != nil </span><span class="cov0" title="0">{ 6676 c.recorder.Event(newQueue, v1.EventTypeWarning, string(v1alpha1.CloseQueueAction), 6677 fmt.Sprintf("Update queue status from %s to %s failed for %v", 6678 queue.Status.State, newQueue.Status.State, err)) 6679 return err 6680 }</span> 6681 } 6682 6683 <span class="cov0" title="0">return nil</span> 6684 } 6685 </pre> 6686 6687 <pre class="file" id="file36" style="display: none">/* 6688 Copyright 2019 The Volcano Authors. 6689 6690 Licensed under the Apache License, Version 2.0 (the "License"); 6691 you may not use this file except in compliance with the License. 6692 You may obtain a copy of the License at 6693 6694 http://www.apache.org/licenses/LICENSE-2.0 6695 6696 Unless required by applicable law or agreed to in writing, software 6697 distributed under the License is distributed on an "AS IS" BASIS, 6698 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 6699 See the License for the specific language governing permissions and 6700 limitations under the License. 6701 */ 6702 6703 package queue 6704 6705 import ( 6706 "k8s.io/client-go/tools/cache" 6707 "k8s.io/klog" 6708 6709 busv1alpha1 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 6710 schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 6711 "volcano.sh/volcano/pkg/controllers/apis" 6712 ) 6713 6714 func (c *queuecontroller) enqueue(req *apis.Request) <span class="cov8" title="1">{ 6715 c.queue.Add(req) 6716 }</span> 6717 6718 func (c *queuecontroller) addQueue(obj interface{}) <span class="cov8" title="1">{ 6719 queue := obj.(*schedulingv1beta1.Queue) 6720 6721 req := &apis.Request{ 6722 QueueName: queue.Name, 6723 6724 Event: busv1alpha1.OutOfSyncEvent, 6725 Action: busv1alpha1.SyncQueueAction, 6726 } 6727 6728 c.enqueue(req) 6729 }</span> 6730 6731 func (c *queuecontroller) deleteQueue(obj interface{}) <span class="cov8" title="1">{ 6732 queue, ok := obj.(*schedulingv1beta1.Queue) 6733 if !ok </span><span class="cov0" title="0">{ 6734 tombstone, ok := obj.(cache.DeletedFinalStateUnknown) 6735 if !ok </span><span class="cov0" title="0">{ 6736 klog.Errorf("Couldn't get object from tombstone %#v.", obj) 6737 return 6738 }</span> 6739 <span class="cov0" title="0">queue, ok = tombstone.Obj.(*schedulingv1beta1.Queue) 6740 if !ok </span><span class="cov0" title="0">{ 6741 klog.Errorf("Tombstone contained object that is not a Queue: %#v.", obj) 6742 return 6743 }</span> 6744 } 6745 6746 <span class="cov8" title="1">c.pgMutex.Lock() 6747 defer c.pgMutex.Unlock() 6748 delete(c.podGroups, queue.Name)</span> 6749 } 6750 6751 func (c *queuecontroller) updateQueue(_, _ interface{}) {<span class="cov0" title="0"> 6752 // currently do not care about queue update 6753 }</span> 6754 6755 func (c *queuecontroller) addPodGroup(obj interface{}) <span class="cov8" title="1">{ 6756 pg := obj.(*schedulingv1beta1.PodGroup) 6757 key, _ := cache.MetaNamespaceKeyFunc(obj) 6758 6759 c.pgMutex.Lock() 6760 defer c.pgMutex.Unlock() 6761 6762 if c.podGroups[pg.Spec.Queue] == nil </span><span class="cov8" title="1">{ 6763 c.podGroups[pg.Spec.Queue] = make(map[string]struct{}) 6764 }</span> 6765 <span class="cov8" title="1">c.podGroups[pg.Spec.Queue][key] = struct{}{} 6766 6767 req := &apis.Request{ 6768 QueueName: pg.Spec.Queue, 6769 6770 Event: busv1alpha1.OutOfSyncEvent, 6771 Action: busv1alpha1.SyncQueueAction, 6772 } 6773 6774 c.enqueue(req)</span> 6775 } 6776 6777 func (c *queuecontroller) updatePodGroup(old, new interface{}) <span class="cov8" title="1">{ 6778 oldPG := old.(*schedulingv1beta1.PodGroup) 6779 newPG := new.(*schedulingv1beta1.PodGroup) 6780 6781 // Note: we have no use case update PodGroup.Spec.Queue 6782 // So do not consider it here. 6783 if oldPG.Status.Phase != newPG.Status.Phase </span><span class="cov8" title="1">{ 6784 c.addPodGroup(newPG) 6785 }</span> 6786 } 6787 6788 func (c *queuecontroller) deletePodGroup(obj interface{}) <span class="cov8" title="1">{ 6789 pg, ok := obj.(*schedulingv1beta1.PodGroup) 6790 if !ok </span><span class="cov0" title="0">{ 6791 tombstone, ok := obj.(cache.DeletedFinalStateUnknown) 6792 if !ok </span><span class="cov0" title="0">{ 6793 klog.Errorf("Couldn't get object from tombstone %#v.", obj) 6794 return 6795 }</span> 6796 <span class="cov0" title="0">pg, ok = tombstone.Obj.(*schedulingv1beta1.PodGroup) 6797 if !ok </span><span class="cov0" title="0">{ 6798 klog.Errorf("Tombstone contained object that is not a PodGroup: %#v.", obj) 6799 return 6800 }</span> 6801 } 6802 6803 <span class="cov8" title="1">key, _ := cache.MetaNamespaceKeyFunc(obj) 6804 6805 c.pgMutex.Lock() 6806 defer c.pgMutex.Unlock() 6807 6808 delete(c.podGroups[pg.Spec.Queue], key) 6809 6810 req := &apis.Request{ 6811 QueueName: pg.Spec.Queue, 6812 6813 Event: busv1alpha1.OutOfSyncEvent, 6814 Action: busv1alpha1.SyncQueueAction, 6815 } 6816 6817 c.enqueue(req)</span> 6818 } 6819 6820 func (c *queuecontroller) addCommand(obj interface{}) <span class="cov0" title="0">{ 6821 cmd, ok := obj.(*busv1alpha1.Command) 6822 if !ok </span><span class="cov0" title="0">{ 6823 klog.Errorf("Obj %v is not command.", obj) 6824 return 6825 }</span> 6826 6827 <span class="cov0" title="0">c.commandQueue.Add(cmd)</span> 6828 } 6829 6830 func (c *queuecontroller) getPodGroups(key string) []string <span class="cov8" title="1">{ 6831 c.pgMutex.RLock() 6832 defer c.pgMutex.RUnlock() 6833 6834 if c.podGroups[key] == nil </span><span class="cov0" title="0">{ 6835 return nil 6836 }</span> 6837 <span class="cov8" title="1">podGroups := make([]string, 0, len(c.podGroups[key])) 6838 for pgKey := range c.podGroups[key] </span><span class="cov8" title="1">{ 6839 podGroups = append(podGroups, pgKey) 6840 }</span> 6841 6842 <span class="cov8" title="1">return podGroups</span> 6843 } 6844 6845 func (c *queuecontroller) recordEventsForQueue(name, eventType, reason, message string) <span class="cov0" title="0">{ 6846 queue, err := c.queueLister.Get(name) 6847 if err != nil </span><span class="cov0" title="0">{ 6848 klog.Errorf("Get queue %s failed for %v.", name, err) 6849 return 6850 }</span> 6851 6852 <span class="cov0" title="0">c.recorder.Event(queue, eventType, reason, message)</span> 6853 } 6854 </pre> 6855 6856 <pre class="file" id="file37" style="display: none">/* 6857 Copyright 2019 The Volcano Authors. 6858 6859 Licensed under the Apache License, Version 2.0 (the "License"); 6860 you may not use this file except in compliance with the License. 6861 You may obtain a copy of the License at 6862 6863 http://www.apache.org/licenses/LICENSE-2.0 6864 6865 Unless required by applicable law or agreed to in writing, software 6866 distributed under the License is distributed on an "AS IS" BASIS, 6867 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 6868 See the License for the specific language governing permissions and 6869 limitations under the License. 6870 */ 6871 6872 package queue 6873 6874 import ( 6875 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 6876 6877 schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 6878 ) 6879 6880 // IsQueueReference return if ownerReference is Queue Kind. 6881 func IsQueueReference(ref *metav1.OwnerReference) bool <span class="cov0" title="0">{ 6882 if ref == nil </span><span class="cov0" title="0">{ 6883 return false 6884 }</span> 6885 6886 <span class="cov0" title="0">if ref.APIVersion != schedulingv1beta1.SchemeGroupVersion.String() </span><span class="cov0" title="0">{ 6887 return false 6888 }</span> 6889 6890 <span class="cov0" title="0">if ref.Kind != "Queue" </span><span class="cov0" title="0">{ 6891 return false 6892 }</span> 6893 6894 <span class="cov0" title="0">return true</span> 6895 } 6896 </pre> 6897 6898 <pre class="file" id="file38" style="display: none">/* 6899 Copyright 2021 The Volcano Authors. 6900 6901 Licensed under the Apache License, Version 2.0 (the "License"); 6902 you may not use this file except in compliance with the License. 6903 You may obtain a copy of the License at 6904 6905 http://www.apache.org/licenses/LICENSE-2.0 6906 6907 Unless required by applicable law or agreed to in writing, software 6908 distributed under the License is distributed on an "AS IS" BASIS, 6909 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 6910 See the License for the specific language governing permissions and 6911 limitations under the License. 6912 */ 6913 6914 package allocate 6915 6916 import ( 6917 "k8s.io/klog" 6918 6919 "volcano.sh/volcano/pkg/scheduler/api" 6920 "volcano.sh/volcano/pkg/scheduler/framework" 6921 "volcano.sh/volcano/pkg/scheduler/metrics" 6922 "volcano.sh/volcano/pkg/scheduler/util" 6923 ) 6924 6925 var targetJob = util.Reservation.TargetJob 6926 6927 type Action struct{} 6928 6929 func New() *Action <span class="cov8" title="1">{ 6930 return &Action{} 6931 }</span> 6932 6933 func (alloc *Action) Name() string <span class="cov0" title="0">{ 6934 return "allocate" 6935 }</span> 6936 6937 func (alloc *Action) Initialize() {<span class="cov0" title="0">}</span> 6938 6939 func (alloc *Action) Execute(ssn *framework.Session) <span class="cov8" title="1">{ 6940 klog.V(3).Infof("Enter Allocate ...") 6941 defer klog.V(3).Infof("Leaving Allocate ...") 6942 6943 // the allocation for pod may have many stages 6944 // 1. pick a namespace named N (using ssn.NamespaceOrderFn) 6945 // 2. pick a queue named Q from N (using ssn.QueueOrderFn) 6946 // 3. pick a job named J from Q (using ssn.JobOrderFn) 6947 // 4. pick a task T from J (using ssn.TaskOrderFn) 6948 // 5. use predicateFn to filter out node that T can not be allocated on. 6949 // 6. use ssn.NodeOrderFn to judge the best node and assign it to T 6950 6951 namespaces := util.NewPriorityQueue(ssn.NamespaceOrderFn) 6952 6953 // jobsMap is map[api.NamespaceName]map[api.QueueID]PriorityQueue(*api.JobInfo) 6954 // used to find job with highest priority in given queue and namespace 6955 jobsMap := map[api.NamespaceName]map[api.QueueID]*util.PriorityQueue{} 6956 6957 for _, job := range ssn.Jobs </span><span class="cov8" title="1">{ 6958 if job.IsPending() </span><span class="cov0" title="0">{ 6959 klog.V(4).Infof("Job <%s/%s> Queue <%s> skip allocate, reason: job status is pending.", 6960 job.Namespace, job.Name, job.Queue) 6961 continue</span> 6962 } 6963 <span class="cov8" title="1">if vr := ssn.JobValid(job); vr != nil && !vr.Pass </span><span class="cov0" title="0">{ 6964 klog.V(4).Infof("Job <%s/%s> Queue <%s> skip allocate, reason: %v, message %v", job.Namespace, job.Name, job.Queue, vr.Reason, vr.Message) 6965 continue</span> 6966 } 6967 6968 <span class="cov8" title="1">if _, found := ssn.Queues[job.Queue]; !found </span><span class="cov0" title="0">{ 6969 klog.Warningf("Skip adding Job <%s/%s> because its queue %s is not found", 6970 job.Namespace, job.Name, job.Queue) 6971 continue</span> 6972 } 6973 6974 <span class="cov8" title="1">namespace := api.NamespaceName(job.Namespace) 6975 queueMap, found := jobsMap[namespace] 6976 if !found </span><span class="cov8" title="1">{ 6977 namespaces.Push(namespace) 6978 6979 queueMap = make(map[api.QueueID]*util.PriorityQueue) 6980 jobsMap[namespace] = queueMap 6981 }</span> 6982 6983 <span class="cov8" title="1">jobs, found := queueMap[job.Queue] 6984 if !found </span><span class="cov8" title="1">{ 6985 jobs = util.NewPriorityQueue(ssn.JobOrderFn) 6986 queueMap[job.Queue] = jobs 6987 }</span> 6988 6989 <span class="cov8" title="1">klog.V(4).Infof("Added Job <%s/%s> into Queue <%s>", job.Namespace, job.Name, job.Queue) 6990 jobs.Push(job)</span> 6991 } 6992 6993 <span class="cov8" title="1">klog.V(3).Infof("Try to allocate resource to %d Namespaces", len(jobsMap)) 6994 6995 pendingTasks := map[api.JobID]*util.PriorityQueue{} 6996 6997 allNodes := ssn.NodeList 6998 unlockedNodes := allNodes 6999 if targetJob != nil && len(util.Reservation.LockedNodes) != 0 </span><span class="cov0" title="0">{ 7000 unlockedNodes = unlockedNodes[0:0] 7001 for _, node := range allNodes </span><span class="cov0" title="0">{ 7002 if _, exist := util.Reservation.LockedNodes[node.Name]; !exist </span><span class="cov0" title="0">{ 7003 unlockedNodes = append(unlockedNodes, node) 7004 }</span> 7005 } 7006 } 7007 <span class="cov8" title="1">for _, unlockedNode := range unlockedNodes </span><span class="cov8" title="1">{ 7008 klog.V(4).Infof("unlockedNode ID: %s, Name: %s", unlockedNode.Node.UID, unlockedNode.Node.Name) 7009 }</span> 7010 <span class="cov8" title="1">predicateFn := func(task *api.TaskInfo, node *api.NodeInfo) error </span><span class="cov8" title="1">{ 7011 // Check for Resource Predicate 7012 if !task.InitResreq.LessEqual(node.FutureIdle(), api.Zero) </span><span class="cov8" title="1">{ 7013 return api.NewFitError(task, node, api.NodeResourceFitFailed) 7014 }</span> 7015 7016 <span class="cov8" title="1">return ssn.PredicateFn(task, node)</span> 7017 } 7018 7019 // To pick <namespace, queue> tuple for job, we choose to pick namespace firstly. 7020 // Because we believe that number of queues would less than namespaces in most case. 7021 // And, this action would make the resource usage among namespace balanced. 7022 <span class="cov8" title="1">for </span><span class="cov8" title="1">{ 7023 if namespaces.Empty() </span><span class="cov8" title="1">{ 7024 break</span> 7025 } 7026 7027 // pick namespace from namespaces PriorityQueue 7028 <span class="cov8" title="1">namespace := namespaces.Pop().(api.NamespaceName) 7029 7030 queueInNamespace := jobsMap[namespace] 7031 7032 // pick queue for given namespace 7033 // 7034 // This block use an algorithm with time complex O(n). 7035 // But at least PriorityQueue could not be used here, 7036 // because the allocation of job would change the priority of queue among all namespaces, 7037 // and the PriorityQueue have no ability to update priority for a special queue. 7038 var queue *api.QueueInfo 7039 for queueID := range queueInNamespace </span><span class="cov8" title="1">{ 7040 currentQueue := ssn.Queues[queueID] 7041 if ssn.Overused(currentQueue) </span><span class="cov8" title="1">{ 7042 klog.V(3).Infof("Namespace <%s> Queue <%s> is overused, ignore it.", namespace, currentQueue.Name) 7043 delete(queueInNamespace, queueID) 7044 continue</span> 7045 } 7046 <span class="cov8" title="1">if jobs, found := queueInNamespace[currentQueue.UID]; found && jobs.Empty() </span><span class="cov8" title="1">{ 7047 continue</span> 7048 } 7049 7050 <span class="cov8" title="1">if queue == nil || ssn.QueueOrderFn(currentQueue, queue) </span><span class="cov8" title="1">{ 7051 queue = currentQueue 7052 }</span> 7053 } 7054 7055 <span class="cov8" title="1">if queue == nil </span><span class="cov8" title="1">{ 7056 klog.V(3).Infof("Namespace <%s> have no queue, skip it", namespace) 7057 continue</span> 7058 } 7059 7060 <span class="cov8" title="1">klog.V(3).Infof("Try to allocate resource to Jobs in Namespace <%s> Queue <%v>", namespace, queue.Name) 7061 7062 jobs, found := queueInNamespace[queue.UID] 7063 if !found || jobs.Empty() </span><span class="cov0" title="0">{ 7064 delete(queueInNamespace, queue.UID) 7065 namespaces.Push(namespace) 7066 klog.V(4).Infof("Can not find jobs for queue %s.", queue.Name) 7067 continue</span> 7068 } 7069 7070 <span class="cov8" title="1">job := jobs.Pop().(*api.JobInfo) 7071 var nodes []*api.NodeInfo 7072 if targetJob != nil && job.UID == targetJob.UID </span><span class="cov0" title="0">{ 7073 klog.V(4).Infof("Try to allocate resource to target job: %s", job.Name) 7074 nodes = allNodes 7075 }</span> else<span class="cov8" title="1"> { 7076 nodes = unlockedNodes 7077 }</span> 7078 <span class="cov8" title="1">if _, found = pendingTasks[job.UID]; !found </span><span class="cov8" title="1">{ 7079 tasks := util.NewPriorityQueue(ssn.TaskOrderFn) 7080 for _, task := range job.TaskStatusIndex[api.Pending] </span><span class="cov8" title="1">{ 7081 // Skip BestEffort task in 'allocate' action. 7082 if task.Resreq.IsEmpty() </span><span class="cov0" title="0">{ 7083 klog.V(4).Infof("Task <%v/%v> is BestEffort task, skip it.", 7084 task.Namespace, task.Name) 7085 continue</span> 7086 } 7087 7088 <span class="cov8" title="1">tasks.Push(task)</span> 7089 } 7090 <span class="cov8" title="1">pendingTasks[job.UID] = tasks</span> 7091 } 7092 <span class="cov8" title="1">tasks := pendingTasks[job.UID] 7093 7094 klog.V(3).Infof("Try to allocate resource to %d tasks of Job <%v/%v>", 7095 tasks.Len(), job.Namespace, job.Name) 7096 7097 stmt := framework.NewStatement(ssn) 7098 ph := util.NewPredicateHelper() 7099 for !tasks.Empty() </span><span class="cov8" title="1">{ 7100 task := tasks.Pop().(*api.TaskInfo) 7101 7102 // Check whether the queue is overused on dimension that the task requested 7103 taskRequest := task.Resreq.ResourceNames() 7104 if underusedResources := ssn.UnderusedResources(queue); underusedResources != nil && !underusedResources.Contains(taskRequest) </span><span class="cov8" title="1">{ 7105 klog.V(3).Infof("Queue <%s> is overused when considering task <%s>, ignore it.", queue.Name, task.Name) 7106 continue</span> 7107 } 7108 7109 <span class="cov8" title="1">klog.V(3).Infof("There are <%d> nodes for Job <%v/%v>", len(nodes), job.Namespace, job.Name) 7110 7111 predicateNodes, fitErrors := ph.PredicateNodes(task, nodes, predicateFn) 7112 if len(predicateNodes) == 0 </span><span class="cov8" title="1">{ 7113 job.NodesFitErrors[task.UID] = fitErrors 7114 break</span> 7115 } 7116 7117 <span class="cov8" title="1">var candidateNodes []*api.NodeInfo 7118 for _, n := range predicateNodes </span><span class="cov8" title="1">{ 7119 if task.InitResreq.LessEqual(n.Idle, api.Zero) || task.InitResreq.LessEqual(n.FutureIdle(), api.Zero) </span><span class="cov8" title="1">{ 7120 candidateNodes = append(candidateNodes, n) 7121 }</span> 7122 } 7123 7124 // If not candidate nodes for this task, skip it. 7125 <span class="cov8" title="1">if len(candidateNodes) == 0 </span><span class="cov0" title="0">{ 7126 continue</span> 7127 } 7128 7129 <span class="cov8" title="1">nodeScores := util.PrioritizeNodes(task, candidateNodes, ssn.BatchNodeOrderFn, ssn.NodeOrderMapFn, ssn.NodeOrderReduceFn) 7130 7131 node := ssn.BestNodeFn(task, nodeScores) 7132 if node == nil </span><span class="cov8" title="1">{ 7133 node = util.SelectBestNode(nodeScores) 7134 }</span> 7135 7136 // Allocate idle resource to the task. 7137 <span class="cov8" title="1">if task.InitResreq.LessEqual(node.Idle, api.Zero) </span><span class="cov8" title="1">{ 7138 klog.V(3).Infof("Binding Task <%v/%v> to node <%v>", 7139 task.Namespace, task.Name, node.Name) 7140 if err := stmt.Allocate(task, node); err != nil </span><span class="cov0" title="0">{ 7141 klog.Errorf("Failed to bind Task %v on %v in Session %v, err: %v", 7142 task.UID, node.Name, ssn.UID, err) 7143 }</span> else<span class="cov8" title="1"> { 7144 metrics.UpdateE2eSchedulingDurationByJob(job.Name, string(job.Queue), job.Namespace, metrics.Duration(job.CreationTimestamp.Time)) 7145 }</span> 7146 } else<span class="cov0" title="0"> { 7147 klog.V(3).Infof("Predicates failed for task <%s/%s> on node <%s> with limited resources", 7148 task.Namespace, task.Name, node.Name) 7149 7150 // Allocate releasing resource to the task if any. 7151 if task.InitResreq.LessEqual(node.FutureIdle(), api.Zero) </span><span class="cov0" title="0">{ 7152 klog.V(3).Infof("Pipelining Task <%v/%v> to node <%v> for <%v> on <%v>", 7153 task.Namespace, task.Name, node.Name, task.InitResreq, node.Releasing) 7154 if err := stmt.Pipeline(task, node.Name); err != nil </span><span class="cov0" title="0">{ 7155 klog.Errorf("Failed to pipeline Task %v on %v in Session %v for %v.", 7156 task.UID, node.Name, ssn.UID, err) 7157 }</span> else<span class="cov0" title="0"> { 7158 metrics.UpdateE2eSchedulingDurationByJob(job.Name, string(job.Queue), job.Namespace, metrics.Duration(job.CreationTimestamp.Time)) 7159 }</span> 7160 } 7161 } 7162 7163 <span class="cov8" title="1">if ssn.JobReady(job) && !tasks.Empty() </span><span class="cov8" title="1">{ 7164 jobs.Push(job) 7165 break</span> 7166 } 7167 } 7168 7169 <span class="cov8" title="1">if ssn.JobReady(job) </span><span class="cov8" title="1">{ 7170 stmt.Commit() 7171 }</span> else<span class="cov0" title="0"> { 7172 if !ssn.JobPipelined(job) </span><span class="cov0" title="0">{ 7173 stmt.Discard() 7174 }</span> 7175 } 7176 7177 // Added Namespace back until no job in Namespace. 7178 <span class="cov8" title="1">namespaces.Push(namespace)</span> 7179 } 7180 } 7181 7182 func (alloc *Action) UnInitialize() {<span class="cov0" title="0">}</span> 7183 </pre> 7184 7185 <pre class="file" id="file39" style="display: none">// Package elect is used to find the target job and reserve resource for it 7186 package elect 7187 7188 import ( 7189 "k8s.io/klog" 7190 7191 "volcano.sh/apis/pkg/apis/scheduling" 7192 "volcano.sh/volcano/pkg/scheduler/api" 7193 "volcano.sh/volcano/pkg/scheduler/framework" 7194 "volcano.sh/volcano/pkg/scheduler/util" 7195 ) 7196 7197 // Action defines the action 7198 type Action struct{} 7199 7200 // New returns the action instance 7201 func New() *Action <span class="cov8" title="1">{ 7202 return &Action{} 7203 }</span> 7204 7205 // Name returns the action name 7206 func (alloc *Action) Name() string <span class="cov0" title="0">{ 7207 return "elect" 7208 }</span> 7209 7210 // Initialize inits the action 7211 func (alloc *Action) Initialize() {<span class="cov0" title="0">}</span> 7212 7213 // Execute selects the target job which is of the highest priority and waits for the longest time. 7214 func (alloc *Action) Execute(ssn *framework.Session) <span class="cov8" title="1">{ 7215 klog.V(3).Infof("Enter Elect ...") 7216 defer klog.V(3).Infof("Leaving Elect ...") 7217 7218 if util.Reservation.TargetJob == nil </span><span class="cov8" title="1">{ 7219 klog.V(4).Infof("Start select Target Job") 7220 var pendingJobs []*api.JobInfo 7221 for _, job := range ssn.Jobs </span><span class="cov0" title="0">{ 7222 if job.PodGroup.Status.Phase == scheduling.PodGroupPending </span><span class="cov0" title="0">{ 7223 pendingJobs = append(pendingJobs, job) 7224 }</span> 7225 } 7226 <span class="cov8" title="1">util.Reservation.TargetJob = ssn.TargetJob(pendingJobs) 7227 if util.Reservation.TargetJob != nil </span><span class="cov0" title="0">{ 7228 klog.V(3).Infof("Target Job name: %s", util.Reservation.TargetJob.Name) 7229 }</span> else<span class="cov8" title="1"> { 7230 klog.V(3).Infof("Target Job name: nil") 7231 }</span> 7232 } 7233 } 7234 7235 // UnInitialize releases resource which are not useful. 7236 func (alloc *Action) UnInitialize() {<span class="cov0" title="0">}</span> 7237 </pre> 7238 7239 <pre class="file" id="file40" style="display: none">/* 7240 Copyright 2018 The Kubernetes Authors. 7241 7242 Licensed under the Apache License, Version 2.0 (the "License"); 7243 you may not use this file except in compliance with the License. 7244 You may obtain a copy of the License at 7245 7246 http://www.apache.org/licenses/LICENSE-2.0 7247 7248 Unless required by applicable law or agreed to in writing, software 7249 distributed under the License is distributed on an "AS IS" BASIS, 7250 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 7251 See the License for the specific language governing permissions and 7252 limitations under the License. 7253 */ 7254 7255 package preempt 7256 7257 import ( 7258 "k8s.io/klog" 7259 7260 "volcano.sh/volcano/pkg/scheduler/api" 7261 "volcano.sh/volcano/pkg/scheduler/framework" 7262 "volcano.sh/volcano/pkg/scheduler/metrics" 7263 "volcano.sh/volcano/pkg/scheduler/util" 7264 ) 7265 7266 type Action struct{} 7267 7268 func New() *Action <span class="cov8" title="1">{ 7269 return &Action{} 7270 }</span> 7271 7272 func (alloc *Action) Name() string <span class="cov0" title="0">{ 7273 return "preempt" 7274 }</span> 7275 7276 func (alloc *Action) Initialize() {<span class="cov0" title="0">}</span> 7277 7278 func (alloc *Action) Execute(ssn *framework.Session) <span class="cov8" title="1">{ 7279 klog.V(3).Infof("Enter Preempt ...") 7280 defer klog.V(3).Infof("Leaving Preempt ...") 7281 7282 preemptorsMap := map[api.QueueID]*util.PriorityQueue{} 7283 preemptorTasks := map[api.JobID]*util.PriorityQueue{} 7284 7285 var underRequest []*api.JobInfo 7286 queues := map[api.QueueID]*api.QueueInfo{} 7287 7288 for _, job := range ssn.Jobs </span><span class="cov8" title="1">{ 7289 if job.IsPending() </span><span class="cov0" title="0">{ 7290 continue</span> 7291 } 7292 7293 <span class="cov8" title="1">if vr := ssn.JobValid(job); vr != nil && !vr.Pass </span><span class="cov0" title="0">{ 7294 klog.V(4).Infof("Job <%s/%s> Queue <%s> skip preemption, reason: %v, message %v", job.Namespace, job.Name, job.Queue, vr.Reason, vr.Message) 7295 continue</span> 7296 } 7297 7298 <span class="cov8" title="1">if queue, found := ssn.Queues[job.Queue]; !found </span><span class="cov0" title="0">{ 7299 continue</span> 7300 } else<span class="cov8" title="1"> if _, existed := queues[queue.UID]; !existed </span><span class="cov8" title="1">{ 7301 klog.V(3).Infof("Added Queue <%s> for Job <%s/%s>", 7302 queue.Name, job.Namespace, job.Name) 7303 queues[queue.UID] = queue 7304 }</span> 7305 7306 // check job if starting for more resources. 7307 <span class="cov8" title="1">if ssn.JobStarving(job) </span><span class="cov8" title="1">{ 7308 if _, found := preemptorsMap[job.Queue]; !found </span><span class="cov8" title="1">{ 7309 preemptorsMap[job.Queue] = util.NewPriorityQueue(ssn.JobOrderFn) 7310 }</span> 7311 <span class="cov8" title="1">preemptorsMap[job.Queue].Push(job) 7312 underRequest = append(underRequest, job) 7313 preemptorTasks[job.UID] = util.NewPriorityQueue(ssn.TaskOrderFn) 7314 for _, task := range job.TaskStatusIndex[api.Pending] </span><span class="cov8" title="1">{ 7315 preemptorTasks[job.UID].Push(task) 7316 }</span> 7317 } 7318 } 7319 7320 <span class="cov8" title="1">ph := util.NewPredicateHelper() 7321 // Preemption between Jobs within Queue. 7322 for _, queue := range queues </span><span class="cov8" title="1">{ 7323 for </span><span class="cov8" title="1">{ 7324 preemptors := preemptorsMap[queue.UID] 7325 7326 // If no preemptors, no preemption. 7327 if preemptors == nil || preemptors.Empty() </span><span class="cov8" title="1">{ 7328 klog.V(4).Infof("No preemptors in Queue <%s>, break.", queue.Name) 7329 break</span> 7330 } 7331 7332 <span class="cov8" title="1">preemptorJob := preemptors.Pop().(*api.JobInfo) 7333 7334 stmt := framework.NewStatement(ssn) 7335 assigned := false 7336 for </span><span class="cov8" title="1">{ 7337 // If job is not request more resource, then stop preempting. 7338 if !ssn.JobStarving(preemptorJob) </span><span class="cov8" title="1">{ 7339 break</span> 7340 } 7341 7342 // If not preemptor tasks, next job. 7343 <span class="cov8" title="1">if preemptorTasks[preemptorJob.UID].Empty() </span><span class="cov8" title="1">{ 7344 klog.V(3).Infof("No preemptor task in job <%s/%s>.", 7345 preemptorJob.Namespace, preemptorJob.Name) 7346 break</span> 7347 } 7348 7349 <span class="cov8" title="1">preemptor := preemptorTasks[preemptorJob.UID].Pop().(*api.TaskInfo) 7350 7351 if preempted, _ := preempt(ssn, stmt, preemptor, func(task *api.TaskInfo) bool </span><span class="cov8" title="1">{ 7352 // Ignore non running task. 7353 if task.Status != api.Running </span><span class="cov0" title="0">{ 7354 return false 7355 }</span> 7356 // Ignore task with empty resource request. 7357 <span class="cov8" title="1">if task.Resreq.IsEmpty() </span><span class="cov0" title="0">{ 7358 return false 7359 }</span> 7360 <span class="cov8" title="1">job, found := ssn.Jobs[task.Job] 7361 if !found </span><span class="cov0" title="0">{ 7362 return false 7363 }</span> 7364 // Preempt other jobs within queue 7365 <span class="cov8" title="1">return job.Queue == preemptorJob.Queue && preemptor.Job != task.Job</span> 7366 }, ph); preempted <span class="cov8" title="1">{ 7367 assigned = true 7368 }</span> 7369 } 7370 7371 // Commit changes only if job is pipelined, otherwise try next job. 7372 <span class="cov8" title="1">if ssn.JobPipelined(preemptorJob) </span><span class="cov8" title="1">{ 7373 stmt.Commit() 7374 }</span> else<span class="cov8" title="1"> { 7375 stmt.Discard() 7376 continue</span> 7377 } 7378 7379 <span class="cov8" title="1">if assigned </span><span class="cov8" title="1">{ 7380 preemptors.Push(preemptorJob) 7381 }</span> 7382 } 7383 7384 // Preemption between Task within Job. 7385 <span class="cov8" title="1">for _, job := range underRequest </span><span class="cov8" title="1">{ 7386 // Fix: preemptor numbers lose when in same job 7387 preemptorTasks[job.UID] = util.NewPriorityQueue(ssn.TaskOrderFn) 7388 for _, task := range job.TaskStatusIndex[api.Pending] </span><span class="cov8" title="1">{ 7389 preemptorTasks[job.UID].Push(task) 7390 }</span> 7391 <span class="cov8" title="1">for </span><span class="cov8" title="1">{ 7392 if _, found := preemptorTasks[job.UID]; !found </span><span class="cov0" title="0">{ 7393 break</span> 7394 } 7395 7396 <span class="cov8" title="1">if preemptorTasks[job.UID].Empty() </span><span class="cov8" title="1">{ 7397 break</span> 7398 } 7399 7400 <span class="cov8" title="1">preemptor := preemptorTasks[job.UID].Pop().(*api.TaskInfo) 7401 7402 stmt := framework.NewStatement(ssn) 7403 assigned, _ := preempt(ssn, stmt, preemptor, func(task *api.TaskInfo) bool </span><span class="cov8" title="1">{ 7404 // Ignore non running task. 7405 if task.Status != api.Running </span><span class="cov8" title="1">{ 7406 return false 7407 }</span> 7408 // Ignore task with empty resource request. 7409 <span class="cov8" title="1">if task.Resreq.IsEmpty() </span><span class="cov0" title="0">{ 7410 return false 7411 }</span> 7412 // Preempt tasks within job. 7413 <span class="cov8" title="1">return preemptor.Job == task.Job</span> 7414 }, ph) 7415 <span class="cov8" title="1">stmt.Commit() 7416 7417 // If no preemption, next job. 7418 if !assigned </span><span class="cov8" title="1">{ 7419 break</span> 7420 } 7421 } 7422 } 7423 } 7424 7425 // call victimTasksFn to evict tasks 7426 <span class="cov8" title="1">victimTasks(ssn)</span> 7427 } 7428 7429 func (alloc *Action) UnInitialize() {<span class="cov0" title="0">}</span> 7430 7431 func preempt( 7432 ssn *framework.Session, 7433 stmt *framework.Statement, 7434 preemptor *api.TaskInfo, 7435 filter func(*api.TaskInfo) bool, 7436 predicateHelper util.PredicateHelper, 7437 ) (bool, error) <span class="cov8" title="1">{ 7438 assigned := false 7439 7440 allNodes := ssn.NodeList 7441 7442 predicateNodes, _ := predicateHelper.PredicateNodes(preemptor, allNodes, ssn.PredicateFn) 7443 7444 nodeScores := util.PrioritizeNodes(preemptor, predicateNodes, ssn.BatchNodeOrderFn, ssn.NodeOrderMapFn, ssn.NodeOrderReduceFn) 7445 7446 selectedNodes := util.SortNodes(nodeScores) 7447 for _, node := range selectedNodes </span><span class="cov8" title="1">{ 7448 klog.V(3).Infof("Considering Task <%s/%s> on Node <%s>.", 7449 preemptor.Namespace, preemptor.Name, node.Name) 7450 7451 var preemptees []*api.TaskInfo 7452 for _, task := range node.Tasks </span><span class="cov8" title="1">{ 7453 if filter == nil </span><span class="cov0" title="0">{ 7454 preemptees = append(preemptees, task.Clone()) 7455 }</span> else<span class="cov8" title="1"> if filter(task) </span><span class="cov8" title="1">{ 7456 preemptees = append(preemptees, task.Clone()) 7457 }</span> 7458 } 7459 <span class="cov8" title="1">victims := ssn.Preemptable(preemptor, preemptees) 7460 metrics.UpdatePreemptionVictimsCount(len(victims)) 7461 7462 if err := util.ValidateVictims(preemptor, node, victims); err != nil </span><span class="cov8" title="1">{ 7463 klog.V(3).Infof("No validated victims on Node <%s>: %v", node.Name, err) 7464 continue</span> 7465 } 7466 7467 <span class="cov8" title="1">victimsQueue := util.NewPriorityQueue(func(l, r interface{}) bool </span><span class="cov8" title="1">{ 7468 return !ssn.TaskOrderFn(l, r) 7469 }</span>) 7470 <span class="cov8" title="1">for _, victim := range victims </span><span class="cov8" title="1">{ 7471 victimsQueue.Push(victim) 7472 }</span> 7473 // Preempt victims for tasks, pick lowest priority task first. 7474 <span class="cov8" title="1">preempted := api.EmptyResource() 7475 7476 for !victimsQueue.Empty() </span><span class="cov8" title="1">{ 7477 // If reclaimed enough resources, break loop to avoid Sub panic. 7478 if preemptor.InitResreq.LessEqual(node.FutureIdle(), api.Zero) </span><span class="cov0" title="0">{ 7479 break</span> 7480 } 7481 <span class="cov8" title="1">preemptee := victimsQueue.Pop().(*api.TaskInfo) 7482 klog.V(3).Infof("Try to preempt Task <%s/%s> for Task <%s/%s>", 7483 preemptee.Namespace, preemptee.Name, preemptor.Namespace, preemptor.Name) 7484 if err := stmt.Evict(preemptee, "preempt"); err != nil </span><span class="cov0" title="0">{ 7485 klog.Errorf("Failed to preempt Task <%s/%s> for Task <%s/%s>: %v", 7486 preemptee.Namespace, preemptee.Name, preemptor.Namespace, preemptor.Name, err) 7487 continue</span> 7488 } 7489 <span class="cov8" title="1">preempted.Add(preemptee.Resreq)</span> 7490 } 7491 7492 <span class="cov8" title="1">metrics.RegisterPreemptionAttempts() 7493 klog.V(3).Infof("Preempted <%v> for Task <%s/%s> requested <%v>.", 7494 preempted, preemptor.Namespace, preemptor.Name, preemptor.InitResreq) 7495 7496 if preemptor.InitResreq.LessEqual(node.FutureIdle(), api.Zero) </span><span class="cov8" title="1">{ 7497 if err := stmt.Pipeline(preemptor, node.Name); err != nil </span><span class="cov0" title="0">{ 7498 klog.Errorf("Failed to pipeline Task <%s/%s> on Node <%s>", 7499 preemptor.Namespace, preemptor.Name, node.Name) 7500 }</span> 7501 7502 // Ignore pipeline error, will be corrected in next scheduling loop. 7503 <span class="cov8" title="1">assigned = true 7504 7505 break</span> 7506 } 7507 } 7508 7509 <span class="cov8" title="1">return assigned, nil</span> 7510 } 7511 7512 func victimTasks(ssn *framework.Session) <span class="cov8" title="1">{ 7513 stmt := framework.NewStatement(ssn) 7514 victimTasks := ssn.VictimTasks() 7515 for _, victim := range victimTasks </span><span class="cov0" title="0">{ 7516 if err := stmt.Evict(victim.Clone(), "evict"); err != nil </span><span class="cov0" title="0">{ 7517 klog.Errorf("Failed to evict Task <%s/%s>: %v", 7518 victim.Namespace, victim.Name, err) 7519 continue</span> 7520 } 7521 } 7522 <span class="cov8" title="1">stmt.Commit()</span> 7523 } 7524 </pre> 7525 7526 <pre class="file" id="file41" style="display: none">/* 7527 Copyright 2018 The Kubernetes Authors. 7528 7529 Licensed under the Apache License, Version 2.0 (the "License"); 7530 you may not use this file except in compliance with the License. 7531 You may obtain a copy of the License at 7532 7533 http://www.apache.org/licenses/LICENSE-2.0 7534 7535 Unless required by applicable law or agreed to in writing, software 7536 distributed under the License is distributed on an "AS IS" BASIS, 7537 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 7538 See the License for the specific language governing permissions and 7539 limitations under the License. 7540 */ 7541 7542 package reclaim 7543 7544 import ( 7545 "k8s.io/klog" 7546 7547 "volcano.sh/volcano/pkg/scheduler/api" 7548 "volcano.sh/volcano/pkg/scheduler/framework" 7549 "volcano.sh/volcano/pkg/scheduler/util" 7550 ) 7551 7552 type Action struct{} 7553 7554 func New() *Action <span class="cov8" title="1">{ 7555 return &Action{} 7556 }</span> 7557 7558 func (ra *Action) Name() string <span class="cov0" title="0">{ 7559 return "reclaim" 7560 }</span> 7561 7562 func (ra *Action) Initialize() {<span class="cov0" title="0">}</span> 7563 7564 func (ra *Action) Execute(ssn *framework.Session) <span class="cov8" title="1">{ 7565 klog.V(3).Infof("Enter Reclaim ...") 7566 defer klog.V(3).Infof("Leaving Reclaim ...") 7567 7568 queues := util.NewPriorityQueue(ssn.QueueOrderFn) 7569 queueMap := map[api.QueueID]*api.QueueInfo{} 7570 7571 preemptorsMap := map[api.QueueID]*util.PriorityQueue{} 7572 preemptorTasks := map[api.JobID]*util.PriorityQueue{} 7573 7574 klog.V(3).Infof("There are <%d> Jobs and <%d> Queues in total for scheduling.", 7575 len(ssn.Jobs), len(ssn.Queues)) 7576 7577 for _, job := range ssn.Jobs </span><span class="cov8" title="1">{ 7578 if job.IsPending() </span><span class="cov0" title="0">{ 7579 continue</span> 7580 } 7581 7582 <span class="cov8" title="1">if vr := ssn.JobValid(job); vr != nil && !vr.Pass </span><span class="cov0" title="0">{ 7583 klog.V(4).Infof("Job <%s/%s> Queue <%s> skip reclaim, reason: %v, message %v", job.Namespace, job.Name, job.Queue, vr.Reason, vr.Message) 7584 continue</span> 7585 } 7586 7587 <span class="cov8" title="1">if queue, found := ssn.Queues[job.Queue]; !found </span><span class="cov0" title="0">{ 7588 klog.Errorf("Failed to find Queue <%s> for Job <%s/%s>", 7589 job.Queue, job.Namespace, job.Name) 7590 continue</span> 7591 } else<span class="cov8" title="1"> if _, existed := queueMap[queue.UID]; !existed </span><span class="cov8" title="1">{ 7592 klog.V(4).Infof("Added Queue <%s> for Job <%s/%s>", queue.Name, job.Namespace, job.Name) 7593 queueMap[queue.UID] = queue 7594 queues.Push(queue) 7595 }</span> 7596 7597 <span class="cov8" title="1">if len(job.TaskStatusIndex[api.Pending]) != 0 </span><span class="cov8" title="1">{ 7598 if _, found := preemptorsMap[job.Queue]; !found </span><span class="cov8" title="1">{ 7599 preemptorsMap[job.Queue] = util.NewPriorityQueue(ssn.JobOrderFn) 7600 }</span> 7601 <span class="cov8" title="1">preemptorsMap[job.Queue].Push(job) 7602 preemptorTasks[job.UID] = util.NewPriorityQueue(ssn.TaskOrderFn) 7603 for _, task := range job.TaskStatusIndex[api.Pending] </span><span class="cov8" title="1">{ 7604 preemptorTasks[job.UID].Push(task) 7605 }</span> 7606 } 7607 } 7608 7609 <span class="cov8" title="1">for </span><span class="cov8" title="1">{ 7610 // If no queues, break 7611 if queues.Empty() </span><span class="cov8" title="1">{ 7612 break</span> 7613 } 7614 7615 <span class="cov8" title="1">var job *api.JobInfo 7616 var task *api.TaskInfo 7617 7618 queue := queues.Pop().(*api.QueueInfo) 7619 if ssn.Overused(queue) </span><span class="cov8" title="1">{ 7620 klog.V(3).Infof("Queue <%s> is overused, ignore it.", queue.Name) 7621 continue</span> 7622 } 7623 7624 // Found "high" priority job 7625 <span class="cov8" title="1">jobs, found := preemptorsMap[queue.UID] 7626 if !found || jobs.Empty() </span><span class="cov0" title="0">{ 7627 continue</span> 7628 } else<span class="cov8" title="1"> { 7629 job = jobs.Pop().(*api.JobInfo) 7630 }</span> 7631 7632 // Found "high" priority task to reclaim others 7633 <span class="cov8" title="1">if tasks, found := preemptorTasks[job.UID]; !found || tasks.Empty() </span><span class="cov0" title="0">{ 7634 continue</span> 7635 } else<span class="cov8" title="1"> { 7636 task = tasks.Pop().(*api.TaskInfo) 7637 }</span> 7638 7639 // Check whether the queue is overused on dimension that the task requested 7640 <span class="cov8" title="1">taskRequest := task.Resreq.ResourceNames() 7641 if underusedResources := ssn.UnderusedResources(queue); underusedResources != nil && !underusedResources.Contains(taskRequest) </span><span class="cov0" title="0">{ 7642 klog.V(3).Infof("Queue <%s> is overused when considering task <%s>, ignore it.", queue.Name, task.Name) 7643 continue</span> 7644 } 7645 7646 <span class="cov8" title="1">assigned := false 7647 for _, n := range ssn.Nodes </span><span class="cov8" title="1">{ 7648 // If predicates failed, next node. 7649 if err := ssn.PredicateFn(task, n); err != nil </span><span class="cov0" title="0">{ 7650 continue</span> 7651 } 7652 7653 <span class="cov8" title="1">klog.V(3).Infof("Considering Task <%s/%s> on Node <%s>.", 7654 task.Namespace, task.Name, n.Name) 7655 7656 var reclaimees []*api.TaskInfo 7657 for _, task := range n.Tasks </span><span class="cov8" title="1">{ 7658 // Ignore non running task. 7659 if task.Status != api.Running </span><span class="cov0" title="0">{ 7660 continue</span> 7661 } 7662 7663 <span class="cov8" title="1">if j, found := ssn.Jobs[task.Job]; !found </span><span class="cov0" title="0">{ 7664 continue</span> 7665 } else<span class="cov8" title="1"> if j.Queue != job.Queue </span><span class="cov8" title="1">{ 7666 q := ssn.Queues[j.Queue] 7667 if !q.Reclaimable() </span><span class="cov0" title="0">{ 7668 continue</span> 7669 } 7670 // Clone task to avoid modify Task's status on node. 7671 <span class="cov8" title="1">reclaimees = append(reclaimees, task.Clone())</span> 7672 } 7673 } 7674 <span class="cov8" title="1">victims := ssn.Reclaimable(task, reclaimees) 7675 7676 if err := util.ValidateVictims(task, n, victims); err != nil </span><span class="cov0" title="0">{ 7677 klog.V(3).Infof("No validated victims on Node <%s>: %v", n.Name, err) 7678 continue</span> 7679 } 7680 7681 <span class="cov8" title="1">resreq := task.InitResreq.Clone() 7682 reclaimed := api.EmptyResource() 7683 7684 // Reclaim victims for tasks. 7685 for _, reclaimee := range victims </span><span class="cov8" title="1">{ 7686 klog.Errorf("Try to reclaim Task <%s/%s> for Tasks <%s/%s>", 7687 reclaimee.Namespace, reclaimee.Name, task.Namespace, task.Name) 7688 if err := ssn.Evict(reclaimee, "reclaim"); err != nil </span><span class="cov0" title="0">{ 7689 klog.Errorf("Failed to reclaim Task <%s/%s> for Tasks <%s/%s>: %v", 7690 reclaimee.Namespace, reclaimee.Name, task.Namespace, task.Name, err) 7691 continue</span> 7692 } 7693 <span class="cov8" title="1">reclaimed.Add(reclaimee.Resreq) 7694 // If reclaimed enough resources, break loop to avoid Sub panic. 7695 if resreq.LessEqual(reclaimed, api.Zero) </span><span class="cov8" title="1">{ 7696 break</span> 7697 } 7698 } 7699 7700 <span class="cov8" title="1">klog.V(3).Infof("Reclaimed <%v> for task <%s/%s> requested <%v>.", 7701 reclaimed, task.Namespace, task.Name, task.InitResreq) 7702 7703 if task.InitResreq.LessEqual(reclaimed, api.Zero) </span><span class="cov8" title="1">{ 7704 if err := ssn.Pipeline(task, n.Name); err != nil </span><span class="cov0" title="0">{ 7705 klog.Errorf("Failed to pipeline Task <%s/%s> on Node <%s>", 7706 task.Namespace, task.Name, n.Name) 7707 }</span> 7708 7709 // Ignore error of pipeline, will be corrected in next scheduling loop. 7710 <span class="cov8" title="1">assigned = true 7711 7712 break</span> 7713 } 7714 } 7715 7716 <span class="cov8" title="1">if assigned </span><span class="cov8" title="1">{ 7717 jobs.Push(job) 7718 }</span> 7719 <span class="cov8" title="1">queues.Push(queue)</span> 7720 } 7721 } 7722 7723 func (ra *Action) UnInitialize() {<span class="cov0" title="0"> 7724 }</span> 7725 </pre> 7726 7727 <pre class="file" id="file42" style="display: none">/* 7728 Copyright 2017 The Kubernetes Authors. 7729 7730 Licensed under the Apache License, Version 2.0 (the "License"); 7731 you may not use this file except in compliance with the License. 7732 You may obtain a copy of the License at 7733 7734 http://www.apache.org/licenses/LICENSE-2.0 7735 7736 Unless required by applicable law or agreed to in writing, software 7737 distributed under the License is distributed on an "AS IS" BASIS, 7738 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 7739 See the License for the specific language governing permissions and 7740 limitations under the License. 7741 */ 7742 7743 package api 7744 7745 import ( 7746 "fmt" 7747 ) 7748 7749 // ClusterInfo is a snapshot of cluster by cache. 7750 type ClusterInfo struct { 7751 Jobs map[JobID]*JobInfo 7752 Nodes map[string]*NodeInfo 7753 Queues map[QueueID]*QueueInfo 7754 NamespaceInfo map[NamespaceName]*NamespaceInfo 7755 RevocableNodes map[string]*NodeInfo 7756 NodeList []string 7757 } 7758 7759 func (ci ClusterInfo) String() string <span class="cov0" title="0">{ 7760 str := "Cache:\n" 7761 7762 if len(ci.Nodes) != 0 </span><span class="cov0" title="0">{ 7763 str += "Nodes:\n" 7764 for _, n := range ci.Nodes </span><span class="cov0" title="0">{ 7765 str += fmt.Sprintf("\t %s: idle(%v) used(%v) allocatable(%v) pods(%d)\n", 7766 n.Name, n.Idle, n.Used, n.Allocatable, len(n.Tasks)) 7767 7768 i := 0 7769 for _, p := range n.Tasks </span><span class="cov0" title="0">{ 7770 str += fmt.Sprintf("\t\t %d: %v\n", i, p) 7771 i++ 7772 }</span> 7773 } 7774 } 7775 7776 <span class="cov0" title="0">if len(ci.Jobs) != 0 </span><span class="cov0" title="0">{ 7777 str += "Jobs:\n" 7778 for _, job := range ci.Jobs </span><span class="cov0" title="0">{ 7779 str += fmt.Sprintf("\t Job(%s) name(%s) minAvailable(%v)\n", 7780 job.UID, job.Name, job.MinAvailable) 7781 7782 i := 0 7783 for _, task := range job.Tasks </span><span class="cov0" title="0">{ 7784 str += fmt.Sprintf("\t\t %d: %v\n", i, task) 7785 i++ 7786 }</span> 7787 } 7788 } 7789 7790 <span class="cov0" title="0">if len(ci.NamespaceInfo) != 0 </span><span class="cov0" title="0">{ 7791 str += "Namespaces:\n" 7792 for _, ns := range ci.NamespaceInfo </span><span class="cov0" title="0">{ 7793 str += fmt.Sprintf("\t Namespace(%s) Weight(%v)\n", 7794 ns.Name, ns.Weight) 7795 }</span> 7796 } 7797 7798 <span class="cov0" title="0">if len(ci.NodeList) != 0 </span><span class="cov0" title="0">{ 7799 str += fmt.Sprintf("NodeList: %v\n", ci.NodeList) 7800 }</span> 7801 7802 <span class="cov0" title="0">return str</span> 7803 } 7804 </pre> 7805 7806 <pre class="file" id="file43" style="display: none">/* 7807 Copyright 2020 The Volcano Authors. 7808 7809 Licensed under the Apache License, Version 2.0 (the "License"); 7810 you may not use this file except in compliance with the License. 7811 You may obtain a copy of the License at 7812 7813 http://www.apache.org/licenses/LICENSE-2.0 7814 7815 Unless required by applicable law or agreed to in writing, software 7816 distributed under the License is distributed on an "AS IS" BASIS, 7817 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 7818 See the License for the specific language governing permissions and 7819 limitations under the License. 7820 */ 7821 7822 package api 7823 7824 import ( 7825 v1 "k8s.io/api/core/v1" 7826 ) 7827 7828 // GPUDevice include gpu id, memory and the pods that are sharing it. 7829 type GPUDevice struct { 7830 // GPU ID 7831 ID int 7832 // The pods that are sharing this GPU 7833 PodMap map[string]*v1.Pod 7834 // memory per card 7835 Memory uint 7836 } 7837 7838 // NewGPUDevice creates a device 7839 func NewGPUDevice(id int, mem uint) *GPUDevice <span class="cov0" title="0">{ 7840 return &GPUDevice{ 7841 ID: id, 7842 Memory: mem, 7843 PodMap: map[string]*v1.Pod{}, 7844 } 7845 }</span> 7846 7847 // getUsedGPUMemory calculates the used memory of the device. 7848 func (g *GPUDevice) getUsedGPUMemory() uint <span class="cov0" title="0">{ 7849 res := uint(0) 7850 for _, pod := range g.PodMap </span><span class="cov0" title="0">{ 7851 if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed </span><span class="cov0" title="0">{ 7852 continue</span> 7853 } else<span class="cov0" title="0"> { 7854 gpuRequest := GetGPUResourceOfPod(pod) 7855 res += gpuRequest 7856 }</span> 7857 } 7858 <span class="cov0" title="0">return res</span> 7859 } 7860 7861 // GetGPUResourceOfPod returns the GPU resource required by the pod. 7862 func GetGPUResourceOfPod(pod *v1.Pod) uint <span class="cov8" title="1">{ 7863 var mem uint 7864 for _, container := range pod.Spec.Containers </span><span class="cov8" title="1">{ 7865 mem += getGPUResourceOfContainer(&container) 7866 }</span> 7867 <span class="cov8" title="1">return mem</span> 7868 } 7869 7870 // getGPUResourceOfPod returns the GPU resource required by the container. 7871 func getGPUResourceOfContainer(container *v1.Container) uint <span class="cov8" title="1">{ 7872 var mem uint 7873 if val, ok := container.Resources.Limits[VolcanoGPUResource]; ok </span><span class="cov0" title="0">{ 7874 mem = uint(val.Value()) 7875 }</span> 7876 <span class="cov8" title="1">return mem</span> 7877 } 7878 </pre> 7879 7880 <pre class="file" id="file44" style="display: none">/* 7881 Copyright 2017 The Kubernetes Authors. 7882 7883 Licensed under the Apache License, Version 2.0 (the "License"); 7884 you may not use this file except in compliance with the License. 7885 You may obtain a copy of the License at 7886 7887 http://www.apache.org/licenses/LICENSE-2.0 7888 7889 Unless required by applicable law or agreed to in writing, software 7890 distributed under the License is distributed on an "AS IS" BASIS, 7891 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 7892 See the License for the specific language governing permissions and 7893 limitations under the License. 7894 */ 7895 7896 package api 7897 7898 import ( 7899 "fmt" 7900 7901 v1 "k8s.io/api/core/v1" 7902 clientcache "k8s.io/client-go/tools/cache" 7903 ) 7904 7905 // PodKey returns the string key of a pod. 7906 func PodKey(pod *v1.Pod) TaskID <span class="cov8" title="1">{ 7907 key, err := clientcache.MetaNamespaceKeyFunc(pod) 7908 if err != nil </span><span class="cov0" title="0">{ 7909 return TaskID(fmt.Sprintf("%v/%v", pod.Namespace, pod.Name)) 7910 }</span> 7911 <span class="cov8" title="1">return TaskID(key)</span> 7912 } 7913 7914 func getTaskStatus(pod *v1.Pod) TaskStatus <span class="cov8" title="1">{ 7915 switch pod.Status.Phase </span>{ 7916 case v1.PodRunning:<span class="cov8" title="1"> 7917 if pod.DeletionTimestamp != nil </span><span class="cov0" title="0">{ 7918 return Releasing 7919 }</span> 7920 7921 <span class="cov8" title="1">return Running</span> 7922 case v1.PodPending:<span class="cov8" title="1"> 7923 if pod.DeletionTimestamp != nil </span><span class="cov0" title="0">{ 7924 return Releasing 7925 }</span> 7926 7927 <span class="cov8" title="1">if len(pod.Spec.NodeName) == 0 </span><span class="cov8" title="1">{ 7928 return Pending 7929 }</span> 7930 <span class="cov8" title="1">return Bound</span> 7931 case v1.PodUnknown:<span class="cov8" title="1"> 7932 return Unknown</span> 7933 case v1.PodSucceeded:<span class="cov0" title="0"> 7934 return Succeeded</span> 7935 case v1.PodFailed:<span class="cov0" title="0"> 7936 return Failed</span> 7937 } 7938 7939 <span class="cov0" title="0">return Unknown</span> 7940 } 7941 7942 // AllocatedStatus checks whether the tasks has AllocatedStatus 7943 func AllocatedStatus(status TaskStatus) bool <span class="cov8" title="1">{ 7944 switch status </span>{ 7945 case Bound, Binding, Running, Allocated:<span class="cov8" title="1"> 7946 return true</span> 7947 default:<span class="cov8" title="1"> 7948 return false</span> 7949 } 7950 } 7951 7952 // MergeErrors is used to merge multiple errors into single error 7953 func MergeErrors(errs ...error) error <span class="cov0" title="0">{ 7954 msg := "errors: " 7955 7956 foundErr := false 7957 i := 1 7958 7959 for _, e := range errs </span><span class="cov0" title="0">{ 7960 if e != nil </span><span class="cov0" title="0">{ 7961 if foundErr </span><span class="cov0" title="0">{ 7962 msg = fmt.Sprintf("%s, %d: ", msg, i) 7963 }</span> else<span class="cov0" title="0"> { 7964 msg = fmt.Sprintf("%s %d: ", msg, i) 7965 }</span> 7966 7967 <span class="cov0" title="0">msg = fmt.Sprintf("%s%v", msg, e) 7968 foundErr = true 7969 i++</span> 7970 } 7971 } 7972 7973 <span class="cov0" title="0">if foundErr </span><span class="cov0" title="0">{ 7974 return fmt.Errorf("%s", msg) 7975 }</span> 7976 7977 <span class="cov0" title="0">return nil</span> 7978 } 7979 7980 // JobTerminated checks whether job was terminated. 7981 func JobTerminated(job *JobInfo) bool <span class="cov0" title="0">{ 7982 return job.PodGroup == nil && len(job.Tasks) == 0 7983 }</span> 7984 </pre> 7985 7986 <pre class="file" id="file45" style="display: none">/* 7987 Copyright 2018 The Kubernetes Authors. 7988 7989 Licensed under the Apache License, Version 2.0 (the "License"); 7990 you may not use this file except in compliance with the License. 7991 You may obtain a copy of the License at 7992 7993 http://www.apache.org/licenses/LICENSE-2.0 7994 7995 Unless required by applicable law or agreed to in writing, software 7996 distributed under the License is distributed on an "AS IS" BASIS, 7997 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 7998 See the License for the specific language governing permissions and 7999 limitations under the License. 8000 */ 8001 8002 package helpers 8003 8004 import ( 8005 "math" 8006 8007 v1 "k8s.io/api/core/v1" 8008 8009 "volcano.sh/volcano/pkg/scheduler/api" 8010 ) 8011 8012 // Min is used to find the min of two resource types 8013 func Min(l, r *api.Resource) *api.Resource <span class="cov0" title="0">{ 8014 res := &api.Resource{} 8015 8016 res.MilliCPU = math.Min(l.MilliCPU, r.MilliCPU) 8017 res.Memory = math.Min(l.Memory, r.Memory) 8018 8019 if l.ScalarResources == nil || r.ScalarResources == nil </span><span class="cov0" title="0">{ 8020 return res 8021 }</span> 8022 8023 <span class="cov0" title="0">res.ScalarResources = map[v1.ResourceName]float64{} 8024 for lName, lQuant := range l.ScalarResources </span><span class="cov0" title="0">{ 8025 res.ScalarResources[lName] = math.Min(lQuant, r.ScalarResources[lName]) 8026 }</span> 8027 8028 <span class="cov0" title="0">return res</span> 8029 } 8030 8031 // Max returns the resource object with larger value in each dimension. 8032 func Max(l, r *api.Resource) *api.Resource <span class="cov8" title="1">{ 8033 res := &api.Resource{} 8034 8035 res.MilliCPU = math.Max(l.MilliCPU, r.MilliCPU) 8036 res.Memory = math.Max(l.Memory, r.Memory) 8037 8038 if l.ScalarResources == nil && r.ScalarResources == nil </span><span class="cov0" title="0">{ 8039 return res 8040 }</span> 8041 <span class="cov8" title="1">res.ScalarResources = map[v1.ResourceName]float64{} 8042 if l.ScalarResources != nil </span><span class="cov8" title="1">{ 8043 for lName, lQuant := range l.ScalarResources </span><span class="cov8" title="1">{ 8044 if lQuant > 0 </span><span class="cov8" title="1">{ 8045 res.ScalarResources[lName] = lQuant 8046 }</span> 8047 } 8048 } 8049 <span class="cov8" title="1">if r.ScalarResources != nil </span><span class="cov8" title="1">{ 8050 for rName, rQuant := range r.ScalarResources </span><span class="cov8" title="1">{ 8051 if rQuant > 0 </span><span class="cov8" title="1">{ 8052 maxQuant := math.Max(rQuant, res.ScalarResources[rName]) 8053 res.ScalarResources[rName] = maxQuant 8054 }</span> 8055 } 8056 } 8057 <span class="cov8" title="1">return res</span> 8058 } 8059 8060 // Share is used to determine the share 8061 func Share(l, r float64) float64 <span class="cov0" title="0">{ 8062 var share float64 8063 if r == 0 </span><span class="cov0" title="0">{ 8064 if l == 0 </span><span class="cov0" title="0">{ 8065 share = 0 8066 }</span> else<span class="cov0" title="0"> { 8067 share = 1 8068 }</span> 8069 } else<span class="cov0" title="0"> { 8070 share = l / r 8071 }</span> 8072 8073 <span class="cov0" title="0">return share</span> 8074 } 8075 </pre> 8076 8077 <pre class="file" id="file46" style="display: none">/* 8078 Copyright 2017 The Kubernetes Authors. 8079 8080 Licensed under the Apache License, Version 2.0 (the "License"); 8081 you may not use this file except in compliance with the License. 8082 You may obtain a copy of the License at 8083 8084 http://www.apache.org/licenses/LICENSE-2.0 8085 8086 Unless required by applicable law or agreed to in writing, software 8087 distributed under the License is distributed on an "AS IS" BASIS, 8088 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 8089 See the License for the specific language governing permissions and 8090 limitations under the License. 8091 */ 8092 8093 package api 8094 8095 import ( 8096 "encoding/json" 8097 "errors" 8098 "fmt" 8099 "sort" 8100 "strconv" 8101 "strings" 8102 "time" 8103 8104 v1 "k8s.io/api/core/v1" 8105 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 8106 "k8s.io/apimachinery/pkg/types" 8107 "k8s.io/klog" 8108 volumescheduling "k8s.io/kubernetes/pkg/controller/volume/scheduling" 8109 8110 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 8111 "volcano.sh/apis/pkg/apis/scheduling" 8112 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 8113 ) 8114 8115 // DisruptionBudget define job min pod available and max pod unvailable value 8116 type DisruptionBudget struct { 8117 MinAvailable string 8118 MaxUnavilable string 8119 } 8120 8121 // NewDisruptionBudget create disruption budget for job 8122 func NewDisruptionBudget(minAvailable, maxUnavilable string) *DisruptionBudget <span class="cov8" title="1">{ 8123 disruptionBudget := &DisruptionBudget{ 8124 MinAvailable: minAvailable, 8125 MaxUnavilable: maxUnavilable, 8126 } 8127 return disruptionBudget 8128 }</span> 8129 8130 // Clone return a clone of DisruptionBudget 8131 func (db *DisruptionBudget) Clone() *DisruptionBudget <span class="cov0" title="0">{ 8132 return &DisruptionBudget{ 8133 MinAvailable: db.MinAvailable, 8134 MaxUnavilable: db.MaxUnavilable, 8135 } 8136 }</span> 8137 8138 // JobWaitingTime is maximum waiting time that a job could stay Pending in service level agreement 8139 // when job waits longer than waiting time, it should be inqueue at once, and cluster should reserve resources for it 8140 const JobWaitingTime = "sla-waiting-time" 8141 8142 // TaskID is UID type for Task 8143 type TaskID types.UID 8144 8145 // TransactionContext holds all the fields that needed by scheduling transaction 8146 type TransactionContext struct { 8147 NodeName string 8148 Status TaskStatus 8149 } 8150 8151 // Clone return a clone of TransactionContext 8152 func (ctx *TransactionContext) Clone() *TransactionContext <span class="cov8" title="1">{ 8153 if ctx == nil </span><span class="cov8" title="1">{ 8154 return nil 8155 }</span> 8156 <span class="cov0" title="0">clone := *ctx 8157 return &clone</span> 8158 } 8159 8160 type TopologyInfo struct { 8161 Policy string 8162 ResMap map[int]v1.ResourceList // key: numa ID 8163 } 8164 8165 func (info *TopologyInfo) Clone() *TopologyInfo <span class="cov8" title="1">{ 8166 copyInfo := &TopologyInfo{ 8167 Policy: info.Policy, 8168 ResMap: make(map[int]v1.ResourceList), 8169 } 8170 8171 for numaId, resList := range info.ResMap </span><span class="cov0" title="0">{ 8172 copyInfo.ResMap[numaId] = resList.DeepCopy() 8173 }</span> 8174 8175 <span class="cov8" title="1">return copyInfo</span> 8176 } 8177 8178 // TaskInfo will have all infos about the task 8179 type TaskInfo struct { 8180 UID TaskID 8181 Job JobID 8182 8183 Name string 8184 Namespace string 8185 8186 // Resreq is the resource that used when task running. 8187 Resreq *Resource 8188 // InitResreq is the resource that used to launch a task. 8189 InitResreq *Resource 8190 8191 TransactionContext 8192 // LastTransaction holds the context of last scheduling transaction 8193 LastTransaction *TransactionContext 8194 8195 Priority int32 8196 VolumeReady bool 8197 Preemptable bool 8198 BestEffort bool 8199 8200 // RevocableZone support set volcano.sh/revocable-zone annotaion or label for pod/podgroup 8201 // we only support empty value or * value for this version and we will support specify revocable zone name for futrue release 8202 // empty value means workload can not use revocable node 8203 // * value means workload can use all the revocable node for during node active revocable time. 8204 RevocableZone string 8205 8206 NumaInfo *TopologyInfo 8207 PodVolumes *volumescheduling.PodVolumes 8208 Pod *v1.Pod 8209 } 8210 8211 func getJobID(pod *v1.Pod) JobID <span class="cov8" title="1">{ 8212 if gn, found := pod.Annotations[v1beta1.KubeGroupNameAnnotationKey]; found && len(gn) != 0 </span><span class="cov8" title="1">{ 8213 // Make sure Pod and PodGroup belong to the same namespace. 8214 jobID := fmt.Sprintf("%s/%s", pod.Namespace, gn) 8215 return JobID(jobID) 8216 }</span> 8217 8218 <span class="cov8" title="1">return ""</span> 8219 } 8220 8221 func getTaskID(pod *v1.Pod) TaskID <span class="cov0" title="0">{ 8222 if ts, found := pod.Annotations[batch.TaskSpecKey]; found && len(ts) != 0 </span><span class="cov0" title="0">{ 8223 return TaskID(ts) 8224 }</span> 8225 8226 <span class="cov0" title="0">return ""</span> 8227 } 8228 8229 const TaskPriorityAnnotation = "volcano.sh/task-priority" 8230 8231 // NewTaskInfo creates new taskInfo object for a Pod 8232 func NewTaskInfo(pod *v1.Pod) *TaskInfo <span class="cov8" title="1">{ 8233 initResReq := GetPodResourceRequest(pod) 8234 resReq := initResReq 8235 bestEffort := initResReq.IsEmpty() 8236 preemptable := GetPodPreemptable(pod) 8237 revocableZone := GetPodRevocableZone(pod) 8238 topologyInfo := GetPodTopologyInfo(pod) 8239 8240 jobID := getJobID(pod) 8241 8242 ti := &TaskInfo{ 8243 UID: TaskID(pod.UID), 8244 Job: jobID, 8245 Name: pod.Name, 8246 Namespace: pod.Namespace, 8247 Priority: 1, 8248 Pod: pod, 8249 Resreq: resReq, 8250 InitResreq: initResReq, 8251 Preemptable: preemptable, 8252 BestEffort: bestEffort, 8253 RevocableZone: revocableZone, 8254 NumaInfo: topologyInfo, 8255 TransactionContext: TransactionContext{ 8256 NodeName: pod.Spec.NodeName, 8257 Status: getTaskStatus(pod), 8258 }, 8259 } 8260 8261 if pod.Spec.Priority != nil </span><span class="cov0" title="0">{ 8262 ti.Priority = *pod.Spec.Priority 8263 }</span> 8264 8265 <span class="cov8" title="1">if taskPriority, ok := pod.Annotations[TaskPriorityAnnotation]; ok </span><span class="cov0" title="0">{ 8266 if priority, err := strconv.ParseInt(taskPriority, 10, 32); err == nil </span><span class="cov0" title="0">{ 8267 ti.Priority = int32(priority) 8268 }</span> 8269 } 8270 8271 <span class="cov8" title="1">return ti</span> 8272 } 8273 8274 // GetTransactionContext get transaction context of a task 8275 func (ti *TaskInfo) GetTransactionContext() TransactionContext <span class="cov8" title="1">{ 8276 return ti.TransactionContext 8277 }</span> 8278 8279 // GenerateLastTxContext generate and set context of last transaction for a task 8280 func (ti *TaskInfo) GenerateLastTxContext() <span class="cov0" title="0">{ 8281 ctx := ti.GetTransactionContext() 8282 ti.LastTransaction = &ctx 8283 }</span> 8284 8285 // ClearLastTxContext clear context of last transaction for a task 8286 func (ti *TaskInfo) ClearLastTxContext() <span class="cov0" title="0">{ 8287 ti.LastTransaction = nil 8288 }</span> 8289 8290 func (ti *TaskInfo) SetPodResourceDecision() error <span class="cov0" title="0">{ 8291 if ti.NumaInfo == nil || len(ti.NumaInfo.ResMap) == 0 </span><span class="cov0" title="0">{ 8292 return nil 8293 }</span> 8294 8295 <span class="cov0" title="0">klog.V(4).Infof("%v/%v resource decision: %v", ti.Namespace, ti.Name, ti.NumaInfo.ResMap) 8296 decision := PodResourceDecision{ 8297 NUMAResources: ti.NumaInfo.ResMap, 8298 } 8299 8300 layout, err := json.Marshal(&decision) 8301 if err != nil </span><span class="cov0" title="0">{ 8302 return err 8303 }</span> 8304 8305 <span class="cov0" title="0">metav1.SetMetaDataAnnotation(&ti.Pod.ObjectMeta, topologyDecisionAnnotation, string(layout[:])) 8306 return nil</span> 8307 } 8308 8309 func (ti *TaskInfo) UnsetPodResourceDecision() <span class="cov0" title="0">{ 8310 delete(ti.Pod.Annotations, topologyDecisionAnnotation) 8311 }</span> 8312 8313 // Clone is used for cloning a task 8314 func (ti *TaskInfo) Clone() *TaskInfo <span class="cov8" title="1">{ 8315 return &TaskInfo{ 8316 UID: ti.UID, 8317 Job: ti.Job, 8318 Name: ti.Name, 8319 Namespace: ti.Namespace, 8320 Priority: ti.Priority, 8321 PodVolumes: ti.PodVolumes, 8322 Pod: ti.Pod, 8323 Resreq: ti.Resreq.Clone(), 8324 InitResreq: ti.InitResreq.Clone(), 8325 VolumeReady: ti.VolumeReady, 8326 Preemptable: ti.Preemptable, 8327 BestEffort: ti.BestEffort, 8328 RevocableZone: ti.RevocableZone, 8329 NumaInfo: ti.NumaInfo.Clone(), 8330 TransactionContext: TransactionContext{ 8331 NodeName: ti.NodeName, 8332 Status: ti.Status, 8333 }, 8334 LastTransaction: ti.LastTransaction.Clone(), 8335 } 8336 }</span> 8337 8338 func (ti *TaskInfo) GetTaskSpecKey() TaskID <span class="cov0" title="0">{ 8339 if ti.Pod == nil </span><span class="cov0" title="0">{ 8340 return "" 8341 }</span> 8342 <span class="cov0" title="0">return getTaskID(ti.Pod)</span> 8343 } 8344 8345 // String returns the taskInfo details in a string 8346 func (ti TaskInfo) String() string <span class="cov0" title="0">{ 8347 if ti.NumaInfo == nil </span><span class="cov0" title="0">{ 8348 return fmt.Sprintf("Task (%v:%v/%v): job %v, status %v, pri %v"+ 8349 "resreq %v, preemptable %v, revocableZone %v", 8350 ti.UID, ti.Namespace, ti.Name, ti.Job, ti.Status, ti.Priority, 8351 ti.Resreq, ti.Preemptable, ti.RevocableZone) 8352 }</span> 8353 8354 <span class="cov0" title="0">return fmt.Sprintf("Task (%v:%v/%v): job %v, status %v, pri %v"+ 8355 "resreq %v, preemptable %v, revocableZone %v, numaInfo %v", 8356 ti.UID, ti.Namespace, ti.Name, ti.Job, ti.Status, ti.Priority, 8357 ti.Resreq, ti.Preemptable, ti.RevocableZone, *ti.NumaInfo)</span> 8358 } 8359 8360 // JobID is the type of JobInfo's ID. 8361 type JobID types.UID 8362 8363 type tasksMap map[TaskID]*TaskInfo 8364 8365 // NodeResourceMap stores resource in a node 8366 type NodeResourceMap map[string]*Resource 8367 8368 // JobInfo will have all info of a Job 8369 type JobInfo struct { 8370 UID JobID 8371 8372 Name string 8373 Namespace string 8374 8375 Queue QueueID 8376 8377 Priority int32 8378 8379 MinAvailable int32 8380 8381 WaitingTime *time.Duration 8382 8383 JobFitErrors string 8384 NodesFitErrors map[TaskID]*FitErrors 8385 8386 // All tasks of the Job. 8387 TaskStatusIndex map[TaskStatus]tasksMap 8388 Tasks tasksMap 8389 TaskMinAvailable map[TaskID]int32 8390 TaskMinAvailableTotal int32 8391 8392 Allocated *Resource 8393 TotalRequest *Resource 8394 8395 CreationTimestamp metav1.Time 8396 PodGroup *PodGroup 8397 8398 ScheduleStartTimestamp metav1.Time 8399 8400 Preemptable bool 8401 8402 // RevocableZone support set volcano.sh/revocable-zone annotaion or label for pod/podgroup 8403 // we only support empty value or * value for this version and we will support specify revocable zone name for futrue release 8404 // empty value means workload can not use revocable node 8405 // * value means workload can use all the revocable node for during node active revocable time. 8406 RevocableZone string 8407 Budget *DisruptionBudget 8408 } 8409 8410 // NewJobInfo creates a new jobInfo for set of tasks 8411 func NewJobInfo(uid JobID, tasks ...*TaskInfo) *JobInfo <span class="cov8" title="1">{ 8412 job := &JobInfo{ 8413 UID: uid, 8414 MinAvailable: 0, 8415 NodesFitErrors: make(map[TaskID]*FitErrors), 8416 Allocated: EmptyResource(), 8417 TotalRequest: EmptyResource(), 8418 TaskStatusIndex: map[TaskStatus]tasksMap{}, 8419 Tasks: tasksMap{}, 8420 TaskMinAvailable: map[TaskID]int32{}, 8421 } 8422 8423 for _, task := range tasks </span><span class="cov0" title="0">{ 8424 job.AddTaskInfo(task) 8425 }</span> 8426 8427 <span class="cov8" title="1">return job</span> 8428 } 8429 8430 // UnsetPodGroup removes podGroup details from a job 8431 func (ji *JobInfo) UnsetPodGroup() <span class="cov0" title="0">{ 8432 ji.PodGroup = nil 8433 }</span> 8434 8435 // SetPodGroup sets podGroup details to a job 8436 func (ji *JobInfo) SetPodGroup(pg *PodGroup) <span class="cov8" title="1">{ 8437 ji.Name = pg.Name 8438 ji.Namespace = pg.Namespace 8439 ji.MinAvailable = pg.Spec.MinMember 8440 ji.Queue = QueueID(pg.Spec.Queue) 8441 ji.CreationTimestamp = pg.GetCreationTimestamp() 8442 8443 var err error 8444 ji.WaitingTime, err = ji.extractWaitingTime(pg) 8445 if err != nil </span><span class="cov0" title="0">{ 8446 klog.Warningf("Error occurs in parsing waiting time for job <%s/%s>, err: %s.", 8447 pg.Namespace, pg.Name, err.Error()) 8448 ji.WaitingTime = nil 8449 }</span> 8450 8451 <span class="cov8" title="1">ji.Preemptable = ji.extractPreemptable(pg) 8452 ji.RevocableZone = ji.extractRevocableZone(pg) 8453 ji.Budget = ji.extractBudget(pg) 8454 8455 taskMinAvailableTotal := int32(0) 8456 for task, member := range pg.Spec.MinTaskMember </span><span class="cov0" title="0">{ 8457 ji.TaskMinAvailable[TaskID(task)] = member 8458 taskMinAvailableTotal += member 8459 }</span> 8460 <span class="cov8" title="1">ji.TaskMinAvailableTotal = taskMinAvailableTotal 8461 8462 ji.PodGroup = pg</span> 8463 } 8464 8465 // extractWaitingTime reads sla waiting time for job from podgroup annotations 8466 // TODO: should also read from given field in volcano job spec 8467 func (ji *JobInfo) extractWaitingTime(pg *PodGroup) (*time.Duration, error) <span class="cov8" title="1">{ 8468 if _, exist := pg.Annotations[JobWaitingTime]; !exist </span><span class="cov8" title="1">{ 8469 return nil, nil 8470 }</span> 8471 8472 <span class="cov0" title="0">jobWaitingTime, err := time.ParseDuration(pg.Annotations[JobWaitingTime]) 8473 if err != nil </span><span class="cov0" title="0">{ 8474 return nil, err 8475 }</span> 8476 8477 <span class="cov0" title="0">if jobWaitingTime <= 0 </span><span class="cov0" title="0">{ 8478 return nil, errors.New("invalid sla waiting time") 8479 }</span> 8480 8481 <span class="cov0" title="0">return &jobWaitingTime, nil</span> 8482 } 8483 8484 // extractPreemptable return volcano.sh/preemptable value for job 8485 func (ji *JobInfo) extractPreemptable(pg *PodGroup) bool <span class="cov8" title="1">{ 8486 // check annotaion first 8487 if len(pg.Annotations) > 0 </span><span class="cov0" title="0">{ 8488 if value, found := pg.Annotations[v1beta1.PodPreemptable]; found </span><span class="cov0" title="0">{ 8489 b, err := strconv.ParseBool(value) 8490 if err != nil </span><span class="cov0" title="0">{ 8491 klog.Warningf("invalid %s=%s", v1beta1.PodPreemptable, value) 8492 return false 8493 }</span> 8494 <span class="cov0" title="0">return b</span> 8495 } 8496 } 8497 8498 // it annotation does not exit, check label 8499 <span class="cov8" title="1">if len(pg.Labels) > 0 </span><span class="cov0" title="0">{ 8500 if value, found := pg.Labels[v1beta1.PodPreemptable]; found </span><span class="cov0" title="0">{ 8501 b, err := strconv.ParseBool(value) 8502 if err != nil </span><span class="cov0" title="0">{ 8503 klog.Warningf("invalid %s=%s", v1beta1.PodPreemptable, value) 8504 return false 8505 }</span> 8506 <span class="cov0" title="0">return b</span> 8507 } 8508 } 8509 8510 <span class="cov8" title="1">return false</span> 8511 } 8512 8513 // extractRevocableZone return volcano.sh/revocable-zone value for pod/podgroup 8514 func (ji *JobInfo) extractRevocableZone(pg *PodGroup) string <span class="cov8" title="1">{ 8515 // check annotation first 8516 if len(pg.Annotations) > 0 </span><span class="cov0" title="0">{ 8517 if value, found := pg.Annotations[v1beta1.RevocableZone]; found </span><span class="cov0" title="0">{ 8518 if value != "*" </span><span class="cov0" title="0">{ 8519 return "" 8520 }</span> 8521 <span class="cov0" title="0">return value</span> 8522 } 8523 8524 <span class="cov0" title="0">if value, found := pg.Annotations[v1beta1.PodPreemptable]; found </span><span class="cov0" title="0">{ 8525 if b, err := strconv.ParseBool(value); err == nil && b </span><span class="cov0" title="0">{ 8526 return "*" 8527 }</span> 8528 } 8529 } 8530 8531 <span class="cov8" title="1">return ""</span> 8532 } 8533 8534 // extractBudget return budget value for job 8535 func (ji *JobInfo) extractBudget(pg *PodGroup) *DisruptionBudget <span class="cov8" title="1">{ 8536 if len(pg.Annotations) > 0 </span><span class="cov0" title="0">{ 8537 if value, found := pg.Annotations[v1beta1.JDBMinAvailable]; found </span><span class="cov0" title="0">{ 8538 return NewDisruptionBudget(value, "") 8539 }</span> else<span class="cov0" title="0"> if value, found := pg.Annotations[v1beta1.JDBMaxUnavailable]; found </span><span class="cov0" title="0">{ 8540 return NewDisruptionBudget("", value) 8541 }</span> 8542 } 8543 8544 <span class="cov8" title="1">return NewDisruptionBudget("", "")</span> 8545 } 8546 8547 // GetMinResources return the min resources of podgroup. 8548 func (ji *JobInfo) GetMinResources() *Resource <span class="cov0" title="0">{ 8549 if ji.PodGroup.Spec.MinResources == nil </span><span class="cov0" title="0">{ 8550 return EmptyResource() 8551 }</span> 8552 8553 <span class="cov0" title="0">return NewResource(*ji.PodGroup.Spec.MinResources)</span> 8554 } 8555 8556 func (ji *JobInfo) addTaskIndex(ti *TaskInfo) <span class="cov8" title="1">{ 8557 if _, found := ji.TaskStatusIndex[ti.Status]; !found </span><span class="cov8" title="1">{ 8558 ji.TaskStatusIndex[ti.Status] = tasksMap{} 8559 }</span> 8560 <span class="cov8" title="1">ji.TaskStatusIndex[ti.Status][ti.UID] = ti</span> 8561 } 8562 8563 // AddTaskInfo is used to add a task to a job 8564 func (ji *JobInfo) AddTaskInfo(ti *TaskInfo) <span class="cov8" title="1">{ 8565 ji.Tasks[ti.UID] = ti 8566 ji.addTaskIndex(ti) 8567 ji.TotalRequest.Add(ti.Resreq) 8568 if AllocatedStatus(ti.Status) </span><span class="cov8" title="1">{ 8569 ji.Allocated.Add(ti.Resreq) 8570 }</span> 8571 } 8572 8573 // UpdateTaskStatus is used to update task's status in a job. 8574 // If error occurs both task and job are guaranteed to be in the original state. 8575 func (ji *JobInfo) UpdateTaskStatus(task *TaskInfo, status TaskStatus) error <span class="cov0" title="0">{ 8576 if err := validateStatusUpdate(task.Status, status); err != nil </span><span class="cov0" title="0">{ 8577 return err 8578 }</span> 8579 8580 // First remove the task (if exist) from the task list. 8581 <span class="cov0" title="0">if _, found := ji.Tasks[task.UID]; found </span><span class="cov0" title="0">{ 8582 if err := ji.DeleteTaskInfo(task); err != nil </span><span class="cov0" title="0">{ 8583 return err 8584 }</span> 8585 } 8586 8587 // Update task's status to the target status once task addition is guaranteed to succeed. 8588 <span class="cov0" title="0">task.Status = status 8589 ji.AddTaskInfo(task) 8590 8591 return nil</span> 8592 } 8593 8594 func (ji *JobInfo) deleteTaskIndex(ti *TaskInfo) <span class="cov8" title="1">{ 8595 if tasks, found := ji.TaskStatusIndex[ti.Status]; found </span><span class="cov8" title="1">{ 8596 delete(tasks, ti.UID) 8597 8598 if len(tasks) == 0 </span><span class="cov8" title="1">{ 8599 delete(ji.TaskStatusIndex, ti.Status) 8600 }</span> 8601 } 8602 } 8603 8604 // DeleteTaskInfo is used to delete a task from a job 8605 func (ji *JobInfo) DeleteTaskInfo(ti *TaskInfo) error <span class="cov8" title="1">{ 8606 if task, found := ji.Tasks[ti.UID]; found </span><span class="cov8" title="1">{ 8607 ji.TotalRequest.Sub(task.Resreq) 8608 if AllocatedStatus(task.Status) </span><span class="cov8" title="1">{ 8609 ji.Allocated.Sub(task.Resreq) 8610 }</span> 8611 <span class="cov8" title="1">delete(ji.Tasks, task.UID) 8612 ji.deleteTaskIndex(task) 8613 return nil</span> 8614 } 8615 8616 <span class="cov0" title="0">return fmt.Errorf("failed to find task <%v/%v> in job <%v/%v>", 8617 ti.Namespace, ti.Name, ji.Namespace, ji.Name)</span> 8618 } 8619 8620 // Clone is used to clone a jobInfo object 8621 func (ji *JobInfo) Clone() *JobInfo <span class="cov0" title="0">{ 8622 info := &JobInfo{ 8623 UID: ji.UID, 8624 Name: ji.Name, 8625 Namespace: ji.Namespace, 8626 Queue: ji.Queue, 8627 Priority: ji.Priority, 8628 8629 MinAvailable: ji.MinAvailable, 8630 WaitingTime: ji.WaitingTime, 8631 JobFitErrors: ji.JobFitErrors, 8632 NodesFitErrors: make(map[TaskID]*FitErrors), 8633 Allocated: EmptyResource(), 8634 TotalRequest: EmptyResource(), 8635 8636 PodGroup: ji.PodGroup.Clone(), 8637 8638 TaskStatusIndex: map[TaskStatus]tasksMap{}, 8639 TaskMinAvailable: ji.TaskMinAvailable, 8640 TaskMinAvailableTotal: ji.TaskMinAvailableTotal, 8641 Tasks: tasksMap{}, 8642 Preemptable: ji.Preemptable, 8643 RevocableZone: ji.RevocableZone, 8644 Budget: ji.Budget.Clone(), 8645 } 8646 8647 ji.CreationTimestamp.DeepCopyInto(&info.CreationTimestamp) 8648 8649 for _, task := range ji.Tasks </span><span class="cov0" title="0">{ 8650 info.AddTaskInfo(task.Clone()) 8651 }</span> 8652 8653 <span class="cov0" title="0">return info</span> 8654 } 8655 8656 // String returns a jobInfo object in string format 8657 func (ji JobInfo) String() string <span class="cov0" title="0">{ 8658 res := "" 8659 8660 i := 0 8661 for _, task := range ji.Tasks </span><span class="cov0" title="0">{ 8662 res += fmt.Sprintf("\n\t %d: %v", i, task) 8663 i++ 8664 }</span> 8665 8666 <span class="cov0" title="0">return fmt.Sprintf("Job (%v): namespace %v (%v), name %v, minAvailable %d, podGroup %+v, preemptable %+v, revocableZone %+v, minAvailable %+v, maxAvailable %+v", 8667 ji.UID, ji.Namespace, ji.Queue, ji.Name, ji.MinAvailable, ji.PodGroup, ji.Preemptable, ji.RevocableZone, ji.Budget.MinAvailable, ji.Budget.MaxUnavilable) + res</span> 8668 } 8669 8670 // FitError returns detailed information on why a job's task failed to fit on 8671 // each available node 8672 func (ji *JobInfo) FitError() string <span class="cov8" title="1">{ 8673 sortReasonsHistogram := func(reasons map[string]int) []string </span><span class="cov8" title="1">{ 8674 reasonStrings := []string{} 8675 for k, v := range reasons </span><span class="cov8" title="1">{ 8676 reasonStrings = append(reasonStrings, fmt.Sprintf("%v %v", v, k)) 8677 }</span> 8678 <span class="cov8" title="1">sort.Strings(reasonStrings) 8679 return reasonStrings</span> 8680 } 8681 8682 // Stat histogram for all tasks of the job 8683 <span class="cov8" title="1">reasons := make(map[string]int) 8684 for status, taskMap := range ji.TaskStatusIndex </span><span class="cov8" title="1">{ 8685 reasons[status.String()] += len(taskMap) 8686 }</span> 8687 <span class="cov8" title="1">reasons["minAvailable"] = int(ji.MinAvailable) 8688 reasonMsg := fmt.Sprintf("%v, %v", scheduling.PodGroupNotReady, strings.Join(sortReasonsHistogram(reasons), ", ")) 8689 8690 // Stat histogram for pending tasks only 8691 reasons = make(map[string]int) 8692 for uid := range ji.TaskStatusIndex[Pending] </span><span class="cov8" title="1">{ 8693 reason, _ := ji.TaskSchedulingReason(uid) 8694 reasons[reason]++ 8695 }</span> 8696 <span class="cov8" title="1">if len(reasons) > 0 </span><span class="cov8" title="1">{ 8697 reasonMsg += "; " + fmt.Sprintf("%s: %s", Pending.String(), strings.Join(sortReasonsHistogram(reasons), ", ")) 8698 }</span> 8699 <span class="cov8" title="1">return reasonMsg</span> 8700 } 8701 8702 // TaskSchedulingReason get detailed reason and message of the given task 8703 // It returns detailed reason and message for tasks based on last scheduling transaction. 8704 func (ji *JobInfo) TaskSchedulingReason(tid TaskID) (reason string, msg string) <span class="cov8" title="1">{ 8705 taskInfo, exists := ji.Tasks[tid] 8706 if !exists </span><span class="cov0" title="0">{ 8707 return "", "" 8708 }</span> 8709 8710 // Get detailed scheduling reason based on LastTransaction 8711 <span class="cov8" title="1">ctx := taskInfo.GetTransactionContext() 8712 if taskInfo.LastTransaction != nil </span><span class="cov8" title="1">{ 8713 ctx = *taskInfo.LastTransaction 8714 }</span> 8715 8716 <span class="cov8" title="1">msg = ji.JobFitErrors 8717 switch status := ctx.Status; status </span>{ 8718 case Allocated, Pipelined:<span class="cov8" title="1"> 8719 // Pod is schedulable 8720 msg = fmt.Sprintf("Pod %s/%s can possibly be assigned to %s", taskInfo.Namespace, taskInfo.Name, ctx.NodeName) 8721 if status == Pipelined </span><span class="cov0" title="0">{ 8722 msg += " once resource is released" 8723 }</span> 8724 <span class="cov8" title="1">return PodReasonSchedulable, msg</span> 8725 case Pending:<span class="cov8" title="1"> 8726 if fe := ji.NodesFitErrors[tid]; fe != nil </span><span class="cov8" title="1">{ 8727 // Pod is not schedulable 8728 return PodReasonUnschedulable, fe.Error() 8729 }</span> 8730 // Pod is not scheduled yet 8731 <span class="cov8" title="1">return PodReasonUndetermined, msg</span> 8732 default:<span class="cov0" title="0"> 8733 return status.String(), msg</span> 8734 } 8735 } 8736 8737 // ReadyTaskNum returns the number of tasks that are ready or that is best-effort. 8738 func (ji *JobInfo) ReadyTaskNum() int32 <span class="cov0" title="0">{ 8739 occupied := 0 8740 occupied += len(ji.TaskStatusIndex[Bound]) 8741 occupied += len(ji.TaskStatusIndex[Binding]) 8742 occupied += len(ji.TaskStatusIndex[Running]) 8743 occupied += len(ji.TaskStatusIndex[Allocated]) 8744 occupied += len(ji.TaskStatusIndex[Succeeded]) 8745 8746 if tasks, found := ji.TaskStatusIndex[Pending]; found </span><span class="cov0" title="0">{ 8747 for _, task := range tasks </span><span class="cov0" title="0">{ 8748 if task.BestEffort </span><span class="cov0" title="0">{ 8749 occupied++ 8750 }</span> 8751 } 8752 } 8753 8754 <span class="cov0" title="0">return int32(occupied)</span> 8755 } 8756 8757 // WaitingTaskNum returns the number of tasks that are pipelined. 8758 func (ji *JobInfo) WaitingTaskNum() int32 <span class="cov0" title="0">{ 8759 return int32(len(ji.TaskStatusIndex[Pipelined])) 8760 }</span> 8761 8762 // CheckTaskMinAvailable returns whether each task of job is valid. 8763 func (ji *JobInfo) CheckTaskMinAvailable() bool <span class="cov0" title="0">{ 8764 // if job minAvailable is less than sumof(task minAvailable), skip this check 8765 if ji.MinAvailable < ji.TaskMinAvailableTotal </span><span class="cov0" title="0">{ 8766 return true 8767 }</span> 8768 8769 <span class="cov0" title="0">actual := map[TaskID]int32{} 8770 for status, tasks := range ji.TaskStatusIndex </span><span class="cov0" title="0">{ 8771 if AllocatedStatus(status) || 8772 status == Succeeded || 8773 status == Pipelined || 8774 status == Pending </span><span class="cov0" title="0">{ 8775 for _, task := range tasks </span><span class="cov0" title="0">{ 8776 actual[getTaskID(task.Pod)]++ 8777 }</span> 8778 } 8779 } 8780 8781 <span class="cov0" title="0">klog.V(4).Infof("job %s/%s actual: %+v, ji.TaskMinAvailable: %+v", ji.Name, ji.Namespace, actual, ji.TaskMinAvailable) 8782 for task, minAvailable := range ji.TaskMinAvailable </span><span class="cov0" title="0">{ 8783 if act, ok := actual[task]; !ok || act < minAvailable </span><span class="cov0" title="0">{ 8784 return false 8785 }</span> 8786 } 8787 8788 <span class="cov0" title="0">return true</span> 8789 } 8790 8791 // CheckTaskMinAvailableReady return ready pods meet task minavaliable. 8792 func (ji *JobInfo) CheckTaskMinAvailableReady() bool <span class="cov0" title="0">{ 8793 if ji.MinAvailable < ji.TaskMinAvailableTotal </span><span class="cov0" title="0">{ 8794 return true 8795 }</span> 8796 <span class="cov0" title="0">occupiedMap := map[TaskID]int32{} 8797 for status, tasks := range ji.TaskStatusIndex </span><span class="cov0" title="0">{ 8798 if AllocatedStatus(status) || 8799 status == Succeeded </span><span class="cov0" title="0">{ 8800 for _, task := range tasks </span><span class="cov0" title="0">{ 8801 occupiedMap[getTaskID(task.Pod)] += 1 8802 }</span> 8803 <span class="cov0" title="0">continue</span> 8804 } 8805 8806 <span class="cov0" title="0">if status == Pending </span><span class="cov0" title="0">{ 8807 for _, task := range tasks </span><span class="cov0" title="0">{ 8808 if task.InitResreq.IsEmpty() </span><span class="cov0" title="0">{ 8809 occupiedMap[getTaskID(task.Pod)] += 1 8810 }</span> 8811 } 8812 } 8813 } 8814 <span class="cov0" title="0">for taskId, minNum := range ji.TaskMinAvailable </span><span class="cov0" title="0">{ 8815 if occupiedMap[taskId] < minNum </span><span class="cov0" title="0">{ 8816 klog.V(4).Infof("Job %s/%s Task %s occupied %v less than task min avaliable", ji.Namespace, ji.Name, taskId, occupiedMap[taskId]) 8817 return false 8818 }</span> 8819 } 8820 <span class="cov0" title="0">return true</span> 8821 } 8822 8823 // CheckTaskMinAvailableReady return ready pods meet task minavaliable. 8824 func (ji *JobInfo) CheckTaskMinAvailablePipelined() bool <span class="cov0" title="0">{ 8825 if ji.MinAvailable < ji.TaskMinAvailableTotal </span><span class="cov0" title="0">{ 8826 return true 8827 }</span> 8828 <span class="cov0" title="0">occupiedMap := map[TaskID]int32{} 8829 for status, tasks := range ji.TaskStatusIndex </span><span class="cov0" title="0">{ 8830 if AllocatedStatus(status) || 8831 status == Succeeded || 8832 status == Pipelined </span><span class="cov0" title="0">{ 8833 for _, task := range tasks </span><span class="cov0" title="0">{ 8834 occupiedMap[getTaskID(task.Pod)] += 1 8835 }</span> 8836 <span class="cov0" title="0">continue</span> 8837 } 8838 8839 <span class="cov0" title="0">if status == Pending </span><span class="cov0" title="0">{ 8840 for _, task := range tasks </span><span class="cov0" title="0">{ 8841 if task.InitResreq.IsEmpty() </span><span class="cov0" title="0">{ 8842 occupiedMap[getTaskID(task.Pod)] += 1 8843 }</span> 8844 } 8845 } 8846 } 8847 <span class="cov0" title="0">for taskId, minNum := range ji.TaskMinAvailable </span><span class="cov0" title="0">{ 8848 if occupiedMap[taskId] < minNum </span><span class="cov0" title="0">{ 8849 klog.V(4).Infof("Job %s/%s Task %s occupied %v less than task min avaliable", ji.Namespace, ji.Name, taskId, occupiedMap[taskId]) 8850 return false 8851 }</span> 8852 } 8853 <span class="cov0" title="0">return true</span> 8854 } 8855 8856 // ValidTaskNum returns the number of tasks that are valid. 8857 func (ji *JobInfo) ValidTaskNum() int32 <span class="cov0" title="0">{ 8858 occupied := 0 8859 for status, tasks := range ji.TaskStatusIndex </span><span class="cov0" title="0">{ 8860 if AllocatedStatus(status) || 8861 status == Succeeded || 8862 status == Pipelined || 8863 status == Pending </span><span class="cov0" title="0">{ 8864 occupied += len(tasks) 8865 }</span> 8866 } 8867 8868 <span class="cov0" title="0">return int32(occupied)</span> 8869 } 8870 8871 // Ready returns whether job is ready for run 8872 func (ji *JobInfo) Ready() bool <span class="cov0" title="0">{ 8873 occupied := ji.ReadyTaskNum() 8874 8875 return occupied >= ji.MinAvailable 8876 }</span> 8877 8878 // IsPending returns whether job is in pending status 8879 func (ji *JobInfo) IsPending() bool <span class="cov0" title="0">{ 8880 if ji.PodGroup == nil || ji.PodGroup.Status.Phase == scheduling.PodGroupPending || ji.PodGroup.Status.Phase == "" </span><span class="cov0" title="0">{ 8881 return true 8882 }</span> 8883 8884 <span class="cov0" title="0">return false</span> 8885 } 8886 </pre> 8887 8888 <pre class="file" id="file47" style="display: none">/* 8889 Copyright 2018 The Volcano Authors. 8890 8891 Licensed under the Apache License, Version 2.0 (the "License"); 8892 you may not use this file except in compliance with the License. 8893 You may obtain a copy of the License at 8894 8895 http://www.apache.org/licenses/LICENSE-2.0 8896 8897 Unless required by applicable law or agreed to in writing, software 8898 distributed under the License is distributed on an "AS IS" BASIS, 8899 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 8900 See the License for the specific language governing permissions and 8901 limitations under the License. 8902 */ 8903 8904 package api 8905 8906 import ( 8907 "fmt" 8908 8909 v1 "k8s.io/api/core/v1" 8910 "k8s.io/client-go/tools/cache" 8911 "k8s.io/klog" 8912 ) 8913 8914 // NamespaceName is name of namespace 8915 type NamespaceName string 8916 8917 const ( 8918 // NamespaceWeightKey is the key in ResourceQuota.spec.hard indicating the weight of this namespace 8919 NamespaceWeightKey = "volcano.sh/namespace.weight" 8920 // DefaultNamespaceWeight is the default weight of namespace 8921 DefaultNamespaceWeight = 1 8922 ) 8923 8924 // NamespaceInfo records information of namespace 8925 type NamespaceInfo struct { 8926 // Name is the name of this namespace 8927 Name NamespaceName 8928 // Weight is the highest weight among many ResourceQuota. 8929 Weight int64 8930 } 8931 8932 // GetWeight returns weight of a namespace, any invalid case would get default value 8933 func (n *NamespaceInfo) GetWeight() int64 <span class="cov0" title="0">{ 8934 if n == nil || n.Weight == 0 </span><span class="cov0" title="0">{ 8935 return DefaultNamespaceWeight 8936 }</span> 8937 <span class="cov0" title="0">return n.Weight</span> 8938 } 8939 8940 type quotaItem struct { 8941 name string 8942 weight int64 8943 } 8944 8945 func quotaItemKeyFunc(obj interface{}) (string, error) <span class="cov8" title="1">{ 8946 item, ok := obj.(*quotaItem) 8947 if !ok </span><span class="cov0" title="0">{ 8948 return "", fmt.Errorf("obj with type %T could not parse", obj) 8949 }</span> 8950 <span class="cov8" title="1">return item.name, nil</span> 8951 } 8952 8953 // for big root heap 8954 func quotaItemLessFunc(a interface{}, b interface{}) bool <span class="cov8" title="1">{ 8955 A := a.(*quotaItem) 8956 B := b.(*quotaItem) 8957 return A.weight > B.weight 8958 }</span> 8959 8960 // NamespaceCollection will record all details about namespace 8961 type NamespaceCollection struct { 8962 Name string 8963 8964 quotaWeight *cache.Heap 8965 } 8966 8967 // NewNamespaceCollection creates new NamespaceCollection object to record all information about a namespace 8968 func NewNamespaceCollection(name string) *NamespaceCollection <span class="cov8" title="1">{ 8969 n := &NamespaceCollection{ 8970 Name: name, 8971 quotaWeight: cache.NewHeap(quotaItemKeyFunc, quotaItemLessFunc), 8972 } 8973 return n 8974 }</span> 8975 8976 func (n *NamespaceCollection) deleteWeight(q *quotaItem) <span class="cov8" title="1">{ 8977 n.quotaWeight.Delete(q) 8978 }</span> 8979 8980 func (n *NamespaceCollection) updateWeight(q *quotaItem) <span class="cov8" title="1">{ 8981 n.quotaWeight.Update(q) 8982 }</span> 8983 8984 func itemFromQuota(quota *v1.ResourceQuota) *quotaItem <span class="cov8" title="1">{ 8985 var weight int64 = DefaultNamespaceWeight 8986 8987 quotaWeight, ok := quota.Spec.Hard[NamespaceWeightKey] 8988 if ok </span><span class="cov8" title="1">{ 8989 weight = quotaWeight.Value() 8990 }</span> 8991 8992 <span class="cov8" title="1">item := &quotaItem{ 8993 name: quota.Name, 8994 weight: weight, 8995 } 8996 return item</span> 8997 } 8998 8999 // Update modify the registered information according quota object 9000 func (n *NamespaceCollection) Update(quota *v1.ResourceQuota) <span class="cov8" title="1">{ 9001 n.updateWeight(itemFromQuota(quota)) 9002 }</span> 9003 9004 // Delete remove the registered information according quota object 9005 func (n *NamespaceCollection) Delete(quota *v1.ResourceQuota) <span class="cov8" title="1">{ 9006 n.deleteWeight(itemFromQuota(quota)) 9007 }</span> 9008 9009 // Snapshot will clone a NamespaceInfo without Heap according NamespaceCollection 9010 func (n *NamespaceCollection) Snapshot() *NamespaceInfo <span class="cov8" title="1">{ 9011 var weight int64 = DefaultNamespaceWeight 9012 9013 obj, err := n.quotaWeight.Pop() 9014 if err != nil </span><span class="cov0" title="0">{ 9015 klog.Warningf("namespace %s, quota weight meets error %v when pop", n.Name, err) 9016 }</span> else<span class="cov8" title="1"> { 9017 item := obj.(*quotaItem) 9018 weight = item.weight 9019 n.quotaWeight.Add(item) 9020 }</span> 9021 9022 <span class="cov8" title="1">return &NamespaceInfo{ 9023 Name: NamespaceName(n.Name), 9024 Weight: weight, 9025 }</span> 9026 } 9027 </pre> 9028 9029 <pre class="file" id="file48" style="display: none">/* 9030 Copyright 2021 The Volcano Authors. 9031 9032 Licensed under the Apache License, Version 2.0 (the "License"); 9033 you may not use this file except in compliance with the License. 9034 You may obtain a copy of the License at 9035 9036 http://www.apache.org/licenses/LICENSE-2.0 9037 9038 Unless required by applicable law or agreed to in writing, software 9039 distributed under the License is distributed on an "AS IS" BASIS, 9040 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9041 See the License for the specific language governing permissions and 9042 limitations under the License. 9043 */ 9044 9045 package api 9046 9047 import ( 9048 "fmt" 9049 "strconv" 9050 9051 v1 "k8s.io/api/core/v1" 9052 "k8s.io/klog" 9053 9054 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 9055 ) 9056 9057 type AllocateFailError struct { 9058 Reason string 9059 } 9060 9061 func (o *AllocateFailError) Error() string <span class="cov0" title="0">{ 9062 return o.Reason 9063 }</span> 9064 9065 // NodeInfo is node level aggregated information. 9066 type NodeInfo struct { 9067 Name string 9068 Node *v1.Node 9069 9070 // The state of node 9071 State NodeState 9072 9073 // The releasing resource on that node 9074 Releasing *Resource 9075 // The pipelined resource on that node 9076 Pipelined *Resource 9077 // The idle resource on that node 9078 Idle *Resource 9079 // The used resource on that node, including running and terminating 9080 // pods 9081 Used *Resource 9082 9083 Allocatable *Resource 9084 Capability *Resource 9085 9086 Tasks map[TaskID]*TaskInfo 9087 NumaInfo *NumatopoInfo 9088 NumaChgFlag NumaChgFlag 9089 NumaSchedulerInfo *NumatopoInfo 9090 RevocableZone string 9091 9092 // Used to store custom information 9093 Others map[string]interface{} 9094 GPUDevices map[int]*GPUDevice 9095 9096 // enable node resource oversubscription 9097 OversubscriptionNode bool 9098 // OfflineJobEvicting true means node resource usage too high then dispatched pod can not use oversubscription resource 9099 OfflineJobEvicting bool 9100 9101 // Resource Oversubscription feature: the Oversubscription Resource reported in annotation 9102 OversubscriptionResource *Resource 9103 } 9104 9105 // FutureIdle returns resources that will be idle in the future: 9106 // 9107 // That is current idle resources plus released resources minus pipelined resources. 9108 func (ni *NodeInfo) FutureIdle() *Resource <span class="cov0" title="0">{ 9109 return ni.Idle.Clone().Add(ni.Releasing).Sub(ni.Pipelined) 9110 }</span> 9111 9112 // GetNodeAllocatable return node Allocatable without OversubscriptionResource resource 9113 func (ni *NodeInfo) GetNodeAllocatable() *Resource <span class="cov0" title="0">{ 9114 return NewResource(ni.Node.Status.Allocatable) 9115 }</span> 9116 9117 // NodeState defines the current state of node. 9118 type NodeState struct { 9119 Phase NodePhase 9120 Reason string 9121 } 9122 9123 // NewNodeInfo is used to create new nodeInfo object 9124 func NewNodeInfo(node *v1.Node) *NodeInfo <span class="cov8" title="1">{ 9125 nodeInfo := &NodeInfo{ 9126 Releasing: EmptyResource(), 9127 Pipelined: EmptyResource(), 9128 Idle: EmptyResource(), 9129 Used: EmptyResource(), 9130 9131 Allocatable: EmptyResource(), 9132 Capability: EmptyResource(), 9133 9134 OversubscriptionResource: EmptyResource(), 9135 Tasks: make(map[TaskID]*TaskInfo), 9136 9137 GPUDevices: make(map[int]*GPUDevice), 9138 } 9139 9140 nodeInfo.setOversubscription(node) 9141 9142 if node != nil </span><span class="cov8" title="1">{ 9143 nodeInfo.Name = node.Name 9144 nodeInfo.Node = node 9145 nodeInfo.Idle = NewResource(node.Status.Allocatable).Add(nodeInfo.OversubscriptionResource) 9146 nodeInfo.Allocatable = NewResource(node.Status.Allocatable).Add(nodeInfo.OversubscriptionResource) 9147 nodeInfo.Capability = NewResource(node.Status.Capacity).Add(nodeInfo.OversubscriptionResource) 9148 }</span> 9149 <span class="cov8" title="1">nodeInfo.setNodeGPUInfo(node) 9150 nodeInfo.setNodeState(node) 9151 nodeInfo.setRevocableZone(node) 9152 9153 return nodeInfo</span> 9154 } 9155 9156 // RefreshNumaSchedulerInfoByCrd used to update scheduler numa information based the CRD numatopo 9157 func (ni *NodeInfo) RefreshNumaSchedulerInfoByCrd() <span class="cov0" title="0">{ 9158 if ni.NumaInfo == nil </span><span class="cov0" title="0">{ 9159 ni.NumaSchedulerInfo = nil 9160 return 9161 }</span> 9162 9163 <span class="cov0" title="0">tmp := ni.NumaInfo.DeepCopy() 9164 if ni.NumaChgFlag == NumaInfoMoreFlag </span><span class="cov0" title="0">{ 9165 ni.NumaSchedulerInfo = tmp 9166 }</span> else<span class="cov0" title="0"> if ni.NumaChgFlag == NumaInfoLessFlag </span><span class="cov0" title="0">{ 9167 numaResMap := ni.NumaSchedulerInfo.NumaResMap 9168 for resName, resInfo := range tmp.NumaResMap </span><span class="cov0" title="0">{ 9169 klog.V(5).Infof("resource %s Allocatable : current %v new %v on node %s", 9170 resName, numaResMap[resName], resInfo, ni.Name) 9171 if numaResMap[resName].Allocatable.Size() >= resInfo.Allocatable.Size() </span><span class="cov0" title="0">{ 9172 numaResMap[resName].Allocatable = resInfo.Allocatable.Clone() 9173 numaResMap[resName].Capacity = resInfo.Capacity 9174 }</span> 9175 } 9176 } 9177 9178 <span class="cov0" title="0">ni.NumaChgFlag = NumaInfoResetFlag</span> 9179 } 9180 9181 // Clone used to clone nodeInfo Object 9182 func (ni *NodeInfo) Clone() *NodeInfo <span class="cov8" title="1">{ 9183 res := NewNodeInfo(ni.Node) 9184 9185 for _, p := range ni.Tasks </span><span class="cov8" title="1">{ 9186 res.AddTask(p) 9187 }</span> 9188 <span class="cov8" title="1">if ni.NumaInfo != nil </span><span class="cov0" title="0">{ 9189 res.NumaInfo = ni.NumaInfo.DeepCopy() 9190 }</span> 9191 9192 <span class="cov8" title="1">if ni.NumaSchedulerInfo != nil </span><span class="cov0" title="0">{ 9193 res.NumaSchedulerInfo = ni.NumaSchedulerInfo.DeepCopy() 9194 klog.V(5).Infof("node[%s]", ni.Name) 9195 for resName, resInfo := range res.NumaSchedulerInfo.NumaResMap </span><span class="cov0" title="0">{ 9196 klog.V(5).Infof("current resource %s : %v", resName, resInfo) 9197 }</span> 9198 9199 <span class="cov0" title="0">klog.V(5).Infof("current Policies : %v", res.NumaSchedulerInfo.Policies)</span> 9200 } 9201 9202 <span class="cov8" title="1">res.Others = ni.Others 9203 return res</span> 9204 } 9205 9206 // Ready returns whether node is ready for scheduling 9207 func (ni *NodeInfo) Ready() bool <span class="cov8" title="1">{ 9208 return ni.State.Phase == Ready 9209 }</span> 9210 9211 func (ni *NodeInfo) setRevocableZone(node *v1.Node) <span class="cov8" title="1">{ 9212 if node == nil </span><span class="cov0" title="0">{ 9213 klog.Warningf("the argument node is null.") 9214 return 9215 }</span> 9216 9217 <span class="cov8" title="1">revocableZone := "" 9218 if len(node.Labels) > 0 </span><span class="cov0" title="0">{ 9219 if value, found := node.Labels[v1beta1.RevocableZone]; found </span><span class="cov0" title="0">{ 9220 revocableZone = value 9221 }</span> 9222 } 9223 <span class="cov8" title="1">ni.RevocableZone = revocableZone</span> 9224 } 9225 9226 // Check node if enable Oversubscription and set Oversubscription resources 9227 // Only support oversubscription cpu and memory resource for this version 9228 func (ni *NodeInfo) setOversubscription(node *v1.Node) <span class="cov8" title="1">{ 9229 if node == nil </span><span class="cov0" title="0">{ 9230 return 9231 }</span> 9232 9233 <span class="cov8" title="1">ni.OversubscriptionNode = false 9234 ni.OfflineJobEvicting = false 9235 if len(node.Labels) > 0 </span><span class="cov0" title="0">{ 9236 if value, found := node.Labels[OversubscriptionNode]; found </span><span class="cov0" title="0">{ 9237 b, err := strconv.ParseBool(value) 9238 if err == nil </span><span class="cov0" title="0">{ 9239 ni.OversubscriptionNode = b 9240 }</span> else<span class="cov0" title="0"> { 9241 ni.OversubscriptionNode = false 9242 }</span> 9243 <span class="cov0" title="0">klog.V(5).Infof("Set node %s Oversubscription to %v", node.Name, ni.OversubscriptionNode)</span> 9244 } 9245 } 9246 9247 <span class="cov8" title="1">if len(node.Annotations) > 0 </span><span class="cov0" title="0">{ 9248 if value, found := node.Annotations[OfflineJobEvicting]; found </span><span class="cov0" title="0">{ 9249 b, err := strconv.ParseBool(value) 9250 if err == nil </span><span class="cov0" title="0">{ 9251 ni.OfflineJobEvicting = b 9252 }</span> else<span class="cov0" title="0"> { 9253 ni.OfflineJobEvicting = false 9254 }</span> 9255 <span class="cov0" title="0">klog.V(5).Infof("Set node %s OfflineJobEvicting to %v", node.Name, ni.OfflineJobEvicting)</span> 9256 } 9257 <span class="cov0" title="0">if value, found := node.Annotations[OversubscriptionCPU]; found </span><span class="cov0" title="0">{ 9258 ni.OversubscriptionResource.MilliCPU, _ = strconv.ParseFloat(value, 64) 9259 klog.V(5).Infof("Set node %s Oversubscription CPU to %v", node.Name, ni.OversubscriptionResource.MilliCPU) 9260 }</span> 9261 <span class="cov0" title="0">if value, found := node.Annotations[OversubscriptionMemory]; found </span><span class="cov0" title="0">{ 9262 ni.OversubscriptionResource.Memory, _ = strconv.ParseFloat(value, 64) 9263 klog.V(5).Infof("Set node %s Oversubscription Memory to %v", node.Name, ni.OversubscriptionResource.Memory) 9264 }</span> 9265 } 9266 } 9267 9268 func (ni *NodeInfo) setNodeState(node *v1.Node) <span class="cov8" title="1">{ 9269 // If node is nil, the node is un-initialized in cache 9270 if node == nil </span><span class="cov0" title="0">{ 9271 ni.State = NodeState{ 9272 Phase: NotReady, 9273 Reason: "UnInitialized", 9274 } 9275 return 9276 }</span> 9277 9278 // set NodeState according to resources 9279 <span class="cov8" title="1">if !ni.Used.LessEqual(ni.Allocatable, Zero) </span><span class="cov8" title="1">{ 9280 ni.State = NodeState{ 9281 Phase: NotReady, 9282 Reason: "OutOfSync", 9283 } 9284 return 9285 }</span> 9286 9287 // If node not ready, e.g. power off 9288 <span class="cov8" title="1">for _, cond := range node.Status.Conditions </span><span class="cov0" title="0">{ 9289 if cond.Type == v1.NodeReady && cond.Status != v1.ConditionTrue </span><span class="cov0" title="0">{ 9290 ni.State = NodeState{ 9291 Phase: NotReady, 9292 Reason: "NotReady", 9293 } 9294 klog.Warningf("set the node %s status to %s.", node.Name, NotReady.String()) 9295 return 9296 }</span> 9297 } 9298 9299 // Node is ready (ignore node conditions because of taint/toleration) 9300 <span class="cov8" title="1">ni.State = NodeState{ 9301 Phase: Ready, 9302 Reason: "", 9303 } 9304 9305 klog.V(4).Infof("set the node %s status to %s.", node.Name, Ready.String())</span> 9306 } 9307 9308 func (ni *NodeInfo) setNodeGPUInfo(node *v1.Node) <span class="cov8" title="1">{ 9309 if node == nil </span><span class="cov0" title="0">{ 9310 return 9311 }</span> 9312 <span class="cov8" title="1">memory, ok := node.Status.Capacity[VolcanoGPUResource] 9313 if !ok </span><span class="cov8" title="1">{ 9314 return 9315 }</span> 9316 <span class="cov0" title="0">totalMemory := memory.Value() 9317 9318 res, ok := node.Status.Capacity[VolcanoGPUNumber] 9319 if !ok </span><span class="cov0" title="0">{ 9320 return 9321 }</span> 9322 <span class="cov0" title="0">gpuNumber := res.Value() 9323 if gpuNumber == 0 </span><span class="cov0" title="0">{ 9324 klog.Warningf("invalid %s=%s", VolcanoGPUNumber, res.String()) 9325 return 9326 }</span> 9327 9328 <span class="cov0" title="0">memoryPerCard := uint(totalMemory / gpuNumber) 9329 for i := 0; i < int(gpuNumber); i++ </span><span class="cov0" title="0">{ 9330 ni.GPUDevices[i] = NewGPUDevice(i, memoryPerCard) 9331 }</span> 9332 } 9333 9334 // SetNode sets kubernetes node object to nodeInfo object 9335 func (ni *NodeInfo) SetNode(node *v1.Node) <span class="cov8" title="1">{ 9336 ni.setNodeState(node) 9337 if !ni.Ready() </span><span class="cov0" title="0">{ 9338 klog.Warningf("Failed to set node info for %s, phase: %s, reason: %s", 9339 ni.Name, ni.State.Phase, ni.State.Reason) 9340 return 9341 }</span> 9342 9343 // Dry run, make sure all fields other than `State` are in the original state. 9344 <span class="cov8" title="1">copy := ni.Clone() 9345 copy.setNode(node) 9346 copy.setNodeState(node) 9347 if !copy.Ready() </span><span class="cov8" title="1">{ 9348 klog.Warningf("SetNode makes node %s not ready, phase: %s, reason: %s", 9349 copy.Name, copy.State.Phase, copy.State.Reason) 9350 // Set state of node to !Ready, left other fields untouched 9351 ni.State = copy.State 9352 return 9353 }</span> 9354 9355 <span class="cov8" title="1">ni.setNode(node)</span> 9356 } 9357 9358 // setNode sets kubernetes node object to nodeInfo object without assertion 9359 func (ni *NodeInfo) setNode(node *v1.Node) <span class="cov8" title="1">{ 9360 ni.setOversubscription(node) 9361 ni.setNodeGPUInfo(node) 9362 ni.setRevocableZone(node) 9363 9364 ni.Name = node.Name 9365 ni.Node = node 9366 9367 ni.Allocatable = NewResource(node.Status.Allocatable).Add(ni.OversubscriptionResource) 9368 ni.Capability = NewResource(node.Status.Capacity).Add(ni.OversubscriptionResource) 9369 ni.Releasing = EmptyResource() 9370 ni.Pipelined = EmptyResource() 9371 ni.Idle = NewResource(node.Status.Allocatable).Add(ni.OversubscriptionResource) 9372 ni.Used = EmptyResource() 9373 9374 for _, ti := range ni.Tasks </span><span class="cov8" title="1">{ 9375 switch ti.Status </span>{ 9376 case Releasing:<span class="cov0" title="0"> 9377 ni.Idle.sub(ti.Resreq) // sub without assertion 9378 ni.Releasing.Add(ti.Resreq) 9379 ni.Used.Add(ti.Resreq) 9380 ni.AddGPUResource(ti.Pod)</span> 9381 case Pipelined:<span class="cov0" title="0"> 9382 ni.Pipelined.Add(ti.Resreq)</span> 9383 default:<span class="cov8" title="1"> 9384 ni.Idle.sub(ti.Resreq) // sub without assertion 9385 ni.Used.Add(ti.Resreq) 9386 ni.AddGPUResource(ti.Pod)</span> 9387 } 9388 } 9389 } 9390 9391 func (ni *NodeInfo) allocateIdleResource(ti *TaskInfo) error <span class="cov8" title="1">{ 9392 if ti.Resreq.LessEqual(ni.Idle, Zero) </span><span class="cov8" title="1">{ 9393 ni.Idle.Sub(ti.Resreq) 9394 return nil 9395 }</span> 9396 9397 <span class="cov8" title="1">return &AllocateFailError{Reason: fmt.Sprintf( 9398 "cannot allocate resource, <%s> idle: %s <%s/%s> req: %s", 9399 ni.Name, ni.Idle.String(), ti.Namespace, ti.Name, ti.Resreq.String(), 9400 )}</span> 9401 } 9402 9403 // AddTask is used to add a task in nodeInfo object 9404 // 9405 // If error occurs both task and node are guaranteed to be in the original state. 9406 func (ni *NodeInfo) AddTask(task *TaskInfo) error <span class="cov8" title="1">{ 9407 if len(task.NodeName) > 0 && len(ni.Name) > 0 && task.NodeName != ni.Name </span><span class="cov0" title="0">{ 9408 return fmt.Errorf("task <%v/%v> already on different node <%v>", 9409 task.Namespace, task.Name, task.NodeName) 9410 }</span> 9411 9412 <span class="cov8" title="1">key := PodKey(task.Pod) 9413 if _, found := ni.Tasks[key]; found </span><span class="cov0" title="0">{ 9414 return fmt.Errorf("task <%v/%v> already on node <%v>", 9415 task.Namespace, task.Name, ni.Name) 9416 }</span> 9417 9418 // Node will hold a copy of task to make sure the status 9419 // change will not impact resource in node. 9420 <span class="cov8" title="1">ti := task.Clone() 9421 9422 if ni.Node != nil </span><span class="cov8" title="1">{ 9423 switch ti.Status </span>{ 9424 case Releasing:<span class="cov0" title="0"> 9425 if err := ni.allocateIdleResource(ti); err != nil </span><span class="cov0" title="0">{ 9426 return err 9427 }</span> 9428 <span class="cov0" title="0">ni.Releasing.Add(ti.Resreq) 9429 ni.Used.Add(ti.Resreq) 9430 ni.AddGPUResource(ti.Pod)</span> 9431 case Pipelined:<span class="cov0" title="0"> 9432 ni.Pipelined.Add(ti.Resreq)</span> 9433 default:<span class="cov8" title="1"> 9434 if err := ni.allocateIdleResource(ti); err != nil </span><span class="cov8" title="1">{ 9435 return err 9436 }</span> 9437 <span class="cov8" title="1">ni.Used.Add(ti.Resreq) 9438 ni.AddGPUResource(ti.Pod)</span> 9439 } 9440 } 9441 9442 <span class="cov8" title="1">if ni.NumaInfo != nil </span><span class="cov0" title="0">{ 9443 ni.NumaInfo.AddTask(ti) 9444 }</span> 9445 9446 // Update task node name upon successful task addition. 9447 <span class="cov8" title="1">task.NodeName = ni.Name 9448 ti.NodeName = ni.Name 9449 ni.Tasks[key] = ti 9450 9451 return nil</span> 9452 } 9453 9454 // RemoveTask used to remove a task from nodeInfo object. 9455 // 9456 // If error occurs both task and node are guaranteed to be in the original state. 9457 func (ni *NodeInfo) RemoveTask(ti *TaskInfo) error <span class="cov8" title="1">{ 9458 key := PodKey(ti.Pod) 9459 9460 task, found := ni.Tasks[key] 9461 if !found </span><span class="cov0" title="0">{ 9462 klog.Warningf("failed to find task <%v/%v> on host <%v>", 9463 ti.Namespace, ti.Name, ni.Name) 9464 return nil 9465 }</span> 9466 9467 <span class="cov8" title="1">if ni.Node != nil </span><span class="cov8" title="1">{ 9468 switch task.Status </span>{ 9469 case Releasing:<span class="cov0" title="0"> 9470 ni.Releasing.Sub(task.Resreq) 9471 ni.Idle.Add(task.Resreq) 9472 ni.Used.Sub(task.Resreq) 9473 ni.SubGPUResource(ti.Pod)</span> 9474 case Pipelined:<span class="cov0" title="0"> 9475 ni.Pipelined.Sub(task.Resreq)</span> 9476 default:<span class="cov8" title="1"> 9477 ni.Idle.Add(task.Resreq) 9478 ni.Used.Sub(task.Resreq) 9479 ni.SubGPUResource(ti.Pod)</span> 9480 } 9481 } 9482 9483 <span class="cov8" title="1">if ni.NumaInfo != nil </span><span class="cov0" title="0">{ 9484 ni.NumaInfo.RemoveTask(ti) 9485 }</span> 9486 9487 <span class="cov8" title="1">delete(ni.Tasks, key) 9488 9489 return nil</span> 9490 } 9491 9492 // UpdateTask is used to update a task in nodeInfo object. 9493 // 9494 // If error occurs both task and node are guaranteed to be in the original state. 9495 func (ni *NodeInfo) UpdateTask(ti *TaskInfo) error <span class="cov0" title="0">{ 9496 if err := ni.RemoveTask(ti); err != nil </span><span class="cov0" title="0">{ 9497 return err 9498 }</span> 9499 9500 <span class="cov0" title="0">if err := ni.AddTask(ti); err != nil </span><span class="cov0" title="0">{ 9501 // This should never happen if task removal was successful, 9502 // because only possible error during task addition is when task is still on a node. 9503 klog.Fatalf("Failed to add Task <%s,%s> to Node <%s> during task update", 9504 ti.Namespace, ti.Name, ni.Name) 9505 }</span> 9506 <span class="cov0" title="0">return nil</span> 9507 } 9508 9509 // String returns nodeInfo details in string format 9510 func (ni NodeInfo) String() string <span class="cov0" title="0">{ 9511 tasks := "" 9512 9513 i := 0 9514 for _, task := range ni.Tasks </span><span class="cov0" title="0">{ 9515 tasks += fmt.Sprintf("\n\t %d: %v", i, task) 9516 i++ 9517 }</span> 9518 9519 <span class="cov0" title="0">return fmt.Sprintf("Node (%s): allocatable<%v> idle <%v>, used <%v>, releasing <%v>, oversubscribution <%v>, "+ 9520 "state <phase %s, reaseon %s>, oversubscributionNode <%v>, offlineJobEvicting <%v>,taints <%v>%s", 9521 ni.Name, ni.Allocatable, ni.Idle, ni.Used, ni.Releasing, ni.OversubscriptionResource, ni.State.Phase, ni.State.Reason, ni.OversubscriptionNode, ni.OfflineJobEvicting, ni.Node.Spec.Taints, tasks)</span> 9522 } 9523 9524 // Pods returns all pods running in that node 9525 func (ni *NodeInfo) Pods() (pods []*v1.Pod) <span class="cov0" title="0">{ 9526 for _, t := range ni.Tasks </span><span class="cov0" title="0">{ 9527 pods = append(pods, t.Pod) 9528 }</span> 9529 9530 <span class="cov0" title="0">return</span> 9531 } 9532 9533 // GetDevicesIdleGPUMemory returns all the idle GPU memory by gpu card. 9534 func (ni *NodeInfo) GetDevicesIdleGPUMemory() map[int]uint <span class="cov0" title="0">{ 9535 devicesAllGPUMemory := ni.getDevicesAllGPUMemory() 9536 devicesUsedGPUMemory := ni.getDevicesUsedGPUMemory() 9537 res := map[int]uint{} 9538 for id, allMemory := range devicesAllGPUMemory </span><span class="cov0" title="0">{ 9539 if usedMemory, found := devicesUsedGPUMemory[id]; found </span><span class="cov0" title="0">{ 9540 res[id] = allMemory - usedMemory 9541 }</span> else<span class="cov0" title="0"> { 9542 res[id] = allMemory 9543 }</span> 9544 } 9545 <span class="cov0" title="0">return res</span> 9546 } 9547 9548 func (ni *NodeInfo) getDevicesUsedGPUMemory() map[int]uint <span class="cov0" title="0">{ 9549 res := map[int]uint{} 9550 for _, device := range ni.GPUDevices </span><span class="cov0" title="0">{ 9551 res[device.ID] = device.getUsedGPUMemory() 9552 }</span> 9553 <span class="cov0" title="0">return res</span> 9554 } 9555 9556 func (ni *NodeInfo) getDevicesAllGPUMemory() map[int]uint <span class="cov0" title="0">{ 9557 res := map[int]uint{} 9558 for _, device := range ni.GPUDevices </span><span class="cov0" title="0">{ 9559 res[device.ID] = device.Memory 9560 }</span> 9561 <span class="cov0" title="0">return res</span> 9562 } 9563 9564 // AddGPUResource adds the pod to GPU pool if it is assigned 9565 func (ni *NodeInfo) AddGPUResource(pod *v1.Pod) <span class="cov8" title="1">{ 9566 gpuRes := GetGPUResourceOfPod(pod) 9567 if gpuRes > 0 </span><span class="cov0" title="0">{ 9568 id := GetGPUIndex(pod) 9569 if dev := ni.GPUDevices[id]; dev != nil </span><span class="cov0" title="0">{ 9570 dev.PodMap[string(pod.UID)] = pod 9571 }</span> 9572 } 9573 } 9574 9575 // SubGPUResource frees the gpu hold by the pod 9576 func (ni *NodeInfo) SubGPUResource(pod *v1.Pod) <span class="cov8" title="1">{ 9577 gpuRes := GetGPUResourceOfPod(pod) 9578 if gpuRes > 0 </span><span class="cov0" title="0">{ 9579 id := GetGPUIndex(pod) 9580 if dev := ni.GPUDevices[id]; dev != nil </span><span class="cov0" title="0">{ 9581 delete(dev.PodMap, string(pod.UID)) 9582 }</span> 9583 } 9584 } 9585 </pre> 9586 9587 <pre class="file" id="file49" style="display: none">/* 9588 Copyright 2021 The Volcano Authors. 9589 9590 Licensed under the Apache License, Version 2.0 (the "License"); 9591 you may not use this file except in compliance with the License. 9592 You may obtain a copy of the License at 9593 9594 http://www.apache.org/licenses/LICENSE-2.0 9595 9596 Unless required by applicable law or agreed to in writing, software 9597 distributed under the License is distributed on an "AS IS" BASIS, 9598 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9599 See the License for the specific language governing permissions and 9600 limitations under the License. 9601 */ 9602 9603 package api 9604 9605 import ( 9606 "encoding/json" 9607 9608 v1 "k8s.io/api/core/v1" 9609 "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" 9610 "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" 9611 9612 nodeinfov1alpha1 "volcano.sh/apis/pkg/apis/nodeinfo/v1alpha1" 9613 ) 9614 9615 // NumaChgFlag indicate node numainfo changed status 9616 type NumaChgFlag int 9617 9618 const ( 9619 // NumaInfoResetFlag indicate reset operate 9620 NumaInfoResetFlag NumaChgFlag = 0b00 9621 // NumaInfoMoreFlag indicate the received allocatable resource is getting more 9622 NumaInfoMoreFlag NumaChgFlag = 0b11 9623 // NumaInfoLessFlag indicate the received allocatable resource is getting less 9624 NumaInfoLessFlag NumaChgFlag = 0b10 9625 ) 9626 9627 // PodResourceDecision is resource allocation determinated by scheduler, 9628 // and passed to kubelet through pod annotation. 9629 type PodResourceDecision struct { 9630 // NUMAResources is resource list with numa info indexed by numa id. 9631 NUMAResources map[int]v1.ResourceList `json:"numa,omitempty"` 9632 } 9633 9634 // ResourceInfo is the allocatable information for the resource 9635 type ResourceInfo struct { 9636 Allocatable cpuset.CPUSet 9637 Capacity int 9638 AllocatablePerNuma map[int]float64 // key: NUMA ID 9639 UsedPerNuma map[int]float64 // key: NUMA ID 9640 } 9641 9642 // NumatopoInfo is the information about topology manager on the node 9643 type NumatopoInfo struct { 9644 Namespace string 9645 Name string 9646 Policies map[nodeinfov1alpha1.PolicyName]string 9647 NumaResMap map[string]*ResourceInfo 9648 CPUDetail topology.CPUDetails 9649 ResReserved v1.ResourceList 9650 } 9651 9652 // DeepCopy used to copy NumatopoInfo 9653 func (info *NumatopoInfo) DeepCopy() *NumatopoInfo <span class="cov0" title="0">{ 9654 numaInfo := &NumatopoInfo{ 9655 Namespace: info.Namespace, 9656 Name: info.Name, 9657 Policies: make(map[nodeinfov1alpha1.PolicyName]string), 9658 NumaResMap: make(map[string]*ResourceInfo), 9659 CPUDetail: topology.CPUDetails{}, 9660 ResReserved: make(v1.ResourceList), 9661 } 9662 9663 policies := info.Policies 9664 for name, policy := range policies </span><span class="cov0" title="0">{ 9665 numaInfo.Policies[name] = policy 9666 }</span> 9667 9668 <span class="cov0" title="0">for resName, resInfo := range info.NumaResMap </span><span class="cov0" title="0">{ 9669 tmpInfo := &ResourceInfo{ 9670 AllocatablePerNuma: make(map[int]float64), 9671 UsedPerNuma: make(map[int]float64), 9672 } 9673 tmpInfo.Capacity = resInfo.Capacity 9674 tmpInfo.Allocatable = resInfo.Allocatable.Clone() 9675 9676 for numaId, data := range resInfo.AllocatablePerNuma </span><span class="cov0" title="0">{ 9677 tmpInfo.AllocatablePerNuma[numaId] = data 9678 }</span> 9679 9680 <span class="cov0" title="0">for numaID, data := range resInfo.UsedPerNuma </span><span class="cov0" title="0">{ 9681 tmpInfo.UsedPerNuma[numaID] = data 9682 }</span> 9683 9684 <span class="cov0" title="0">numaInfo.NumaResMap[resName] = tmpInfo</span> 9685 } 9686 9687 <span class="cov0" title="0">cpuDetail := info.CPUDetail 9688 for cpuID, detail := range cpuDetail </span><span class="cov0" title="0">{ 9689 numaInfo.CPUDetail[cpuID] = detail 9690 }</span> 9691 9692 <span class="cov0" title="0">resReserved := info.ResReserved 9693 for resName, res := range resReserved </span><span class="cov0" title="0">{ 9694 numaInfo.ResReserved[resName] = res 9695 }</span> 9696 9697 <span class="cov0" title="0">return numaInfo</span> 9698 } 9699 9700 // Compare is the function to show the change of the resource on kubelet 9701 // return val: 9702 // - true : the resource on kubelet is getting more or no change 9703 // - false : the resource on kubelet is getting less 9704 func (info *NumatopoInfo) Compare(newInfo *NumatopoInfo) bool <span class="cov0" title="0">{ 9705 for resName := range info.NumaResMap </span><span class="cov0" title="0">{ 9706 oldSize := info.NumaResMap[resName].Allocatable.Size() 9707 newSize := newInfo.NumaResMap[resName].Allocatable.Size() 9708 if oldSize <= newSize </span><span class="cov0" title="0">{ 9709 return true 9710 }</span> 9711 } 9712 9713 <span class="cov0" title="0">return false</span> 9714 } 9715 9716 // Allocate is the function to remove the allocated resource 9717 func (info *NumatopoInfo) Allocate(resSets ResNumaSets) <span class="cov0" title="0">{ 9718 for resName := range resSets </span><span class="cov0" title="0">{ 9719 info.NumaResMap[resName].Allocatable = info.NumaResMap[resName].Allocatable.Difference(resSets[resName]) 9720 }</span> 9721 } 9722 9723 // Release is the function to reclaim the allocated resource 9724 func (info *NumatopoInfo) Release(resSets ResNumaSets) <span class="cov0" title="0">{ 9725 for resName := range resSets </span><span class="cov0" title="0">{ 9726 info.NumaResMap[resName].Allocatable = info.NumaResMap[resName].Allocatable.Union(resSets[resName]) 9727 }</span> 9728 } 9729 9730 func GetPodResourceNumaInfo(ti *TaskInfo) map[int]v1.ResourceList <span class="cov0" title="0">{ 9731 if ti.NumaInfo != nil && len(ti.NumaInfo.ResMap) > 0 </span><span class="cov0" title="0">{ 9732 return ti.NumaInfo.ResMap 9733 }</span> 9734 9735 <span class="cov0" title="0">if _, ok := ti.Pod.Annotations[topologyDecisionAnnotation]; !ok </span><span class="cov0" title="0">{ 9736 return nil 9737 }</span> 9738 9739 <span class="cov0" title="0">decision := PodResourceDecision{} 9740 err := json.Unmarshal([]byte(ti.Pod.Annotations[topologyDecisionAnnotation]), &decision) 9741 if err != nil </span><span class="cov0" title="0">{ 9742 return nil 9743 }</span> 9744 9745 <span class="cov0" title="0">return decision.NUMAResources</span> 9746 } 9747 9748 // AddTask is the function to update the used resource of per numa node 9749 func (info *NumatopoInfo) AddTask(ti *TaskInfo) <span class="cov0" title="0">{ 9750 numaInfo := GetPodResourceNumaInfo(ti) 9751 if numaInfo == nil </span><span class="cov0" title="0">{ 9752 return 9753 }</span> 9754 9755 <span class="cov0" title="0">for numaID, resList := range numaInfo </span><span class="cov0" title="0">{ 9756 for resName, quantity := range resList </span><span class="cov0" title="0">{ 9757 info.NumaResMap[string(resName)].UsedPerNuma[numaID] += ResQuantity2Float64(resName, quantity) 9758 }</span> 9759 } 9760 } 9761 9762 // RemoveTask is the function to update the used resource of per numa node 9763 func (info *NumatopoInfo) RemoveTask(ti *TaskInfo) <span class="cov0" title="0">{ 9764 decision := GetPodResourceNumaInfo(ti) 9765 if decision == nil </span><span class="cov0" title="0">{ 9766 return 9767 }</span> 9768 9769 <span class="cov0" title="0">for numaID, resList := range ti.NumaInfo.ResMap </span><span class="cov0" title="0">{ 9770 for resName, quantity := range resList </span><span class="cov0" title="0">{ 9771 info.NumaResMap[string(resName)].UsedPerNuma[numaID] -= ResQuantity2Float64(resName, quantity) 9772 }</span> 9773 } 9774 } 9775 9776 // GenerateNodeResNumaSets return the idle resource sets of all node 9777 func GenerateNodeResNumaSets(nodes map[string]*NodeInfo) map[string]ResNumaSets <span class="cov0" title="0">{ 9778 nodeSlice := make(map[string]ResNumaSets) 9779 for _, node := range nodes </span><span class="cov0" title="0">{ 9780 if node.NumaSchedulerInfo == nil </span><span class="cov0" title="0">{ 9781 continue</span> 9782 } 9783 9784 <span class="cov0" title="0">resMaps := make(ResNumaSets) 9785 for resName, resMap := range node.NumaSchedulerInfo.NumaResMap </span><span class="cov0" title="0">{ 9786 resMaps[resName] = resMap.Allocatable.Clone() 9787 }</span> 9788 9789 <span class="cov0" title="0">nodeSlice[node.Name] = resMaps</span> 9790 } 9791 9792 <span class="cov0" title="0">return nodeSlice</span> 9793 } 9794 9795 // GenerateNumaNodes return the numa IDs of all node 9796 func GenerateNumaNodes(nodes map[string]*NodeInfo) map[string][]int <span class="cov0" title="0">{ 9797 nodeNumaMap := make(map[string][]int) 9798 9799 for _, node := range nodes </span><span class="cov0" title="0">{ 9800 if node.NumaSchedulerInfo == nil </span><span class="cov0" title="0">{ 9801 continue</span> 9802 } 9803 9804 <span class="cov0" title="0">nodeNumaMap[node.Name] = node.NumaSchedulerInfo.CPUDetail.NUMANodes().ToSlice()</span> 9805 } 9806 9807 <span class="cov0" title="0">return nodeNumaMap</span> 9808 } 9809 9810 // ResNumaSets is the set map of the resource 9811 type ResNumaSets map[string]cpuset.CPUSet 9812 9813 // Allocate is to remove the allocated resource which is assigned to task 9814 func (resSets ResNumaSets) Allocate(taskSets ResNumaSets) <span class="cov0" title="0">{ 9815 for resName := range taskSets </span><span class="cov0" title="0">{ 9816 if _, ok := resSets[resName]; !ok </span><span class="cov0" title="0">{ 9817 continue</span> 9818 } 9819 <span class="cov0" title="0">resSets[resName] = resSets[resName].Difference(taskSets[resName])</span> 9820 } 9821 } 9822 9823 // Release is to reclaim the allocated resource which is assigned to task 9824 func (resSets ResNumaSets) Release(taskSets ResNumaSets) <span class="cov0" title="0">{ 9825 for resName := range taskSets </span><span class="cov0" title="0">{ 9826 if _, ok := resSets[resName]; !ok </span><span class="cov0" title="0">{ 9827 continue</span> 9828 } 9829 <span class="cov0" title="0">resSets[resName] = resSets[resName].Union(taskSets[resName])</span> 9830 } 9831 } 9832 9833 // Clone is the copy action 9834 func (resSets ResNumaSets) Clone() ResNumaSets <span class="cov0" title="0">{ 9835 newSets := make(ResNumaSets) 9836 for resName := range resSets </span><span class="cov0" title="0">{ 9837 newSets[resName] = resSets[resName].Clone() 9838 }</span> 9839 9840 <span class="cov0" title="0">return newSets</span> 9841 } 9842 </pre> 9843 9844 <pre class="file" id="file50" style="display: none">/* 9845 Copyright 2019 The Kubernetes Authors. 9846 9847 Licensed under the Apache License, Version 2.0 (the "License"); 9848 you may not use this file except in compliance with the License. 9849 You may obtain a copy of the License at 9850 9851 http://www.apache.org/licenses/LICENSE-2.0 9852 9853 Unless required by applicable law or agreed to in writing, software 9854 distributed under the License is distributed on an "AS IS" BASIS, 9855 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9856 See the License for the specific language governing permissions and 9857 limitations under the License. 9858 */ 9859 9860 package api 9861 9862 import ( 9863 "volcano.sh/apis/pkg/apis/scheduling" 9864 ) 9865 9866 // PodGroupPhase is the phase of a pod group at the current time. 9867 type PodGroupPhase string 9868 9869 // These are the valid phase of podGroups. 9870 const ( 9871 // PodGroupVersionV1Beta1 represents PodGroupVersion of v1beta1 9872 PodGroupVersionV1Beta1 string = "v1beta1" 9873 ) 9874 9875 // PodGroup is a collection of Pod; used for batch workload. 9876 type PodGroup struct { 9877 scheduling.PodGroup 9878 9879 // Version represents the version of PodGroup 9880 Version string 9881 } 9882 9883 func (pg *PodGroup) Clone() *PodGroup <span class="cov0" title="0">{ 9884 return &PodGroup{ 9885 PodGroup: *pg.PodGroup.DeepCopy(), 9886 Version: pg.Version, 9887 } 9888 }</span> 9889 </pre> 9890 9891 <pre class="file" id="file51" style="display: none">/* 9892 Copyright 2019 The Kubernetes Authors. 9893 9894 Licensed under the Apache License, Version 2.0 (the "License"); 9895 you may not use this file except in compliance with the License. 9896 You may obtain a copy of the License at 9897 9898 http://www.apache.org/licenses/LICENSE-2.0 9899 9900 Unless required by applicable law or agreed to in writing, software 9901 distributed under the License is distributed on an "AS IS" BASIS, 9902 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9903 See the License for the specific language governing permissions and 9904 limitations under the License. 9905 */ 9906 9907 package api 9908 9909 import ( 9910 "encoding/json" 9911 "fmt" 9912 "strconv" 9913 "strings" 9914 "time" 9915 9916 v1 "k8s.io/api/core/v1" 9917 "k8s.io/klog" 9918 9919 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 9920 ) 9921 9922 // Refer k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/predicates.go#GetResourceRequest. 9923 // 9924 // GetResourceRequest returns a *Resource that covers the largest width in each resource dimension. 9925 // Because init-containers run sequentially, we collect the max in each dimension iteratively. 9926 // In contrast, we sum the resource vectors for regular containers since they run simultaneously. 9927 // 9928 // To be consistent with kubernetes default scheduler, it is only used for predicates of actions(e.g. 9929 // allocate, backfill, preempt, reclaim), please use GetPodResourceWithoutInitContainers for other cases. 9930 // 9931 // Example: 9932 // 9933 // Pod: 9934 // InitContainers 9935 // IC1: 9936 // CPU: 2 9937 // Memory: 1G 9938 // IC2: 9939 // CPU: 2 9940 // Memory: 3G 9941 // Containers 9942 // C1: 9943 // CPU: 2 9944 // Memory: 1G 9945 // C2: 9946 // CPU: 1 9947 // Memory: 1G 9948 // 9949 // Result: CPU: 3, Memory: 3G 9950 9951 // GetPodResourceRequest returns all the resource required for that pod 9952 func GetPodResourceRequest(pod *v1.Pod) *Resource <span class="cov8" title="1">{ 9953 result := GetPodResourceWithoutInitContainers(pod) 9954 9955 // take max_resource(sum_pod, any_init_container) 9956 for _, container := range pod.Spec.InitContainers </span><span class="cov8" title="1">{ 9957 result.SetMaxResource(NewResource(container.Resources.Requests)) 9958 }</span> 9959 9960 <span class="cov8" title="1">return result</span> 9961 } 9962 9963 // GetPodPreemptable return volcano.sh/preemptable value for pod 9964 func GetPodPreemptable(pod *v1.Pod) bool <span class="cov8" title="1">{ 9965 // check annotaion first 9966 if len(pod.Annotations) > 0 </span><span class="cov8" title="1">{ 9967 if value, found := pod.Annotations[v1beta1.PodPreemptable]; found </span><span class="cov0" title="0">{ 9968 b, err := strconv.ParseBool(value) 9969 if err != nil </span><span class="cov0" title="0">{ 9970 klog.Warningf("invalid %s=%s", v1beta1.PodPreemptable, value) 9971 return false 9972 }</span> 9973 <span class="cov0" title="0">return b</span> 9974 } 9975 } 9976 9977 // it annotation does not exit, check label 9978 <span class="cov8" title="1">if len(pod.Labels) > 0 </span><span class="cov0" title="0">{ 9979 if value, found := pod.Labels[v1beta1.PodPreemptable]; found </span><span class="cov0" title="0">{ 9980 b, err := strconv.ParseBool(value) 9981 if err != nil </span><span class="cov0" title="0">{ 9982 klog.Warningf("invalid %s=%s", v1beta1.PodPreemptable, value) 9983 return false 9984 }</span> 9985 <span class="cov0" title="0">return b</span> 9986 } 9987 } 9988 9989 <span class="cov8" title="1">return false</span> 9990 } 9991 9992 // GetPodRevocableZone return volcano.sh/revocable-zone value for pod/podgroup 9993 func GetPodRevocableZone(pod *v1.Pod) string <span class="cov8" title="1">{ 9994 if len(pod.Annotations) > 0 </span><span class="cov8" title="1">{ 9995 if value, found := pod.Annotations[v1beta1.RevocableZone]; found </span><span class="cov0" title="0">{ 9996 if value != "*" </span><span class="cov0" title="0">{ 9997 return "" 9998 }</span> 9999 <span class="cov0" title="0">return value</span> 10000 } 10001 10002 <span class="cov8" title="1">if value, found := pod.Annotations[v1beta1.PodPreemptable]; found </span><span class="cov0" title="0">{ 10003 if b, err := strconv.ParseBool(value); err == nil && b </span><span class="cov0" title="0">{ 10004 return "*" 10005 }</span> 10006 } 10007 } 10008 <span class="cov8" title="1">return ""</span> 10009 } 10010 10011 // GetPodTopologyInfo return volcano.sh/numa-topology-policy value for pod 10012 func GetPodTopologyInfo(pod *v1.Pod) *TopologyInfo <span class="cov8" title="1">{ 10013 info := TopologyInfo{ 10014 ResMap: make(map[int]v1.ResourceList), 10015 } 10016 10017 if len(pod.Annotations) > 0 </span><span class="cov8" title="1">{ 10018 if value, found := pod.Annotations[v1beta1.NumaPolicyKey]; found </span><span class="cov0" title="0">{ 10019 info.Policy = value 10020 }</span> 10021 10022 <span class="cov8" title="1">if value, found := pod.Annotations[topologyDecisionAnnotation]; found </span><span class="cov0" title="0">{ 10023 decision := PodResourceDecision{} 10024 err := json.Unmarshal([]byte(value), &decision) 10025 if err == nil </span><span class="cov0" title="0">{ 10026 info.ResMap = decision.NUMAResources 10027 }</span> 10028 } 10029 } 10030 10031 <span class="cov8" title="1">return &info</span> 10032 } 10033 10034 // GetPodResourceWithoutInitContainers returns Pod's resource request, it does not contain 10035 // init containers' resource request. 10036 func GetPodResourceWithoutInitContainers(pod *v1.Pod) *Resource <span class="cov8" title="1">{ 10037 result := EmptyResource() 10038 for _, container := range pod.Spec.Containers </span><span class="cov8" title="1">{ 10039 result.Add(NewResource(container.Resources.Requests)) 10040 }</span> 10041 10042 <span class="cov8" title="1">return result</span> 10043 } 10044 10045 // GetGPUIndex returns the ID of the GPU 10046 func GetGPUIndex(pod *v1.Pod) int <span class="cov0" title="0">{ 10047 if len(pod.Annotations) > 0 </span><span class="cov0" title="0">{ 10048 value, found := pod.Annotations[GPUIndex] 10049 if found </span><span class="cov0" title="0">{ 10050 id, err := strconv.Atoi(value) 10051 if err != nil </span><span class="cov0" title="0">{ 10052 klog.Errorf("invalid %s=%s", GPUIndex, value) 10053 return -1 10054 }</span> 10055 <span class="cov0" title="0">return id</span> 10056 } 10057 } 10058 10059 <span class="cov0" title="0">return -1</span> 10060 } 10061 10062 func escapeJSONPointer(p string) string <span class="cov0" title="0">{ 10063 // Escaping reference name using https://tools.ietf.org/html/rfc6901 10064 p = strings.Replace(p, "~", "~0", -1) 10065 p = strings.Replace(p, "/", "~1", -1) 10066 return p 10067 }</span> 10068 10069 // AddGPUIndexPatch returns the patch adding GPU index 10070 func AddGPUIndexPatch(id int) string <span class="cov0" title="0">{ 10071 return fmt.Sprintf(`[{"op": "add", "path": "/metadata/annotations/%s", "value":"%d"},`+ 10072 `{"op": "add", "path": "/metadata/annotations/%s", "value": "%d"}]`, 10073 escapeJSONPointer(PredicateTime), time.Now().UnixNano(), 10074 escapeJSONPointer(GPUIndex), id) 10075 }</span> 10076 10077 // RemoveGPUIndexPatch returns the patch removing GPU index 10078 func RemoveGPUIndexPatch() string <span class="cov0" title="0">{ 10079 return fmt.Sprintf(`[{"op": "remove", "path": "/metadata/annotations/%s"},`+ 10080 `{"op": "remove", "path": "/metadata/annotations/%s"}]`, escapeJSONPointer(PredicateTime), escapeJSONPointer(GPUIndex)) 10081 }</span> 10082 </pre> 10083 10084 <pre class="file" id="file52" style="display: none">/* 10085 Copyright 2018 The Kubernetes Authors. 10086 10087 Licensed under the Apache License, Version 2.0 (the "License"); 10088 you may not use this file except in compliance with the License. 10089 You may obtain a copy of the License at 10090 10091 http://www.apache.org/licenses/LICENSE-2.0 10092 10093 Unless required by applicable law or agreed to in writing, software 10094 distributed under the License is distributed on an "AS IS" BASIS, 10095 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10096 See the License for the specific language governing permissions and 10097 limitations under the License. 10098 */ 10099 10100 package api 10101 10102 import ( 10103 "k8s.io/apimachinery/pkg/types" 10104 10105 "volcano.sh/apis/pkg/apis/scheduling" 10106 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 10107 ) 10108 10109 // QueueID is UID type, serves as unique ID for each queue 10110 type QueueID types.UID 10111 10112 // QueueInfo will have all details about queue 10113 type QueueInfo struct { 10114 UID QueueID 10115 Name string 10116 10117 Weight int32 10118 10119 // Weights is a list of slash sperated float numbers. 10120 // Each of them is a weight corresponding the 10121 // hierarchy level. 10122 Weights string 10123 // Hierarchy is a list of node name along the 10124 // path from the root to the node itself. 10125 Hierarchy string 10126 10127 Queue *scheduling.Queue 10128 } 10129 10130 // NewQueueInfo creates new queueInfo object 10131 func NewQueueInfo(queue *scheduling.Queue) *QueueInfo <span class="cov0" title="0">{ 10132 return &QueueInfo{ 10133 UID: QueueID(queue.Name), 10134 Name: queue.Name, 10135 10136 Weight: queue.Spec.Weight, 10137 Hierarchy: queue.Annotations[v1beta1.KubeHierarchyAnnotationKey], 10138 Weights: queue.Annotations[v1beta1.KubeHierarchyWeightAnnotationKey], 10139 10140 Queue: queue, 10141 } 10142 }</span> 10143 10144 // Clone is used to clone queueInfo object 10145 func (q *QueueInfo) Clone() *QueueInfo <span class="cov0" title="0">{ 10146 return &QueueInfo{ 10147 UID: q.UID, 10148 Name: q.Name, 10149 Weight: q.Weight, 10150 Hierarchy: q.Hierarchy, 10151 Weights: q.Weights, 10152 Queue: q.Queue, 10153 } 10154 }</span> 10155 10156 // Reclaimable return whether queue is reclaimable 10157 func (q *QueueInfo) Reclaimable() bool <span class="cov0" title="0">{ 10158 if q == nil </span><span class="cov0" title="0">{ 10159 return false 10160 }</span> 10161 10162 <span class="cov0" title="0">if q.Queue == nil </span><span class="cov0" title="0">{ 10163 return false 10164 }</span> 10165 10166 <span class="cov0" title="0">if q.Queue.Spec.Reclaimable == nil </span><span class="cov0" title="0">{ 10167 return true 10168 }</span> 10169 10170 <span class="cov0" title="0">return *q.Queue.Spec.Reclaimable</span> 10171 } 10172 </pre> 10173 10174 <pre class="file" id="file53" style="display: none">/* 10175 Copyright 2017 The Kubernetes Authors. 10176 10177 Licensed under the Apache License, Version 2.0 (the "License"); 10178 you may not use this file except in compliance with the License. 10179 You may obtain a copy of the License at 10180 10181 http://www.apache.org/licenses/LICENSE-2.0 10182 10183 Unless required by applicable law or agreed to in writing, software 10184 distributed under the License is distributed on an "AS IS" BASIS, 10185 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10186 See the License for the specific language governing permissions and 10187 limitations under the License. 10188 */ 10189 10190 package api 10191 10192 import ( 10193 "fmt" 10194 "math" 10195 10196 v1 "k8s.io/api/core/v1" 10197 "k8s.io/apimachinery/pkg/api/resource" 10198 v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" 10199 10200 "volcano.sh/volcano/pkg/scheduler/util/assert" 10201 ) 10202 10203 const ( 10204 // GPUResourceName need to follow https://github.com/NVIDIA/k8s-device-plugin/blob/66a35b71ac4b5cbfb04714678b548bd77e5ba719/server.go#L20 10205 GPUResourceName = "nvidia.com/gpu" 10206 ) 10207 10208 const ( 10209 minResource float64 = 0.1 10210 ) 10211 10212 // DimensionDefaultValue means default value for black resource dimension 10213 type DimensionDefaultValue int 10214 10215 const ( 10216 // Zero means resource dimension not defined will be treated as zero 10217 Zero DimensionDefaultValue = 0 10218 // Infinity means resource dimension not defined will be treated as infinity 10219 Infinity DimensionDefaultValue = -1 10220 ) 10221 10222 // Resource struct defines all the resource type 10223 type Resource struct { 10224 MilliCPU float64 10225 Memory float64 10226 10227 // ScalarResources 10228 ScalarResources map[v1.ResourceName]float64 10229 10230 // MaxTaskNum is only used by predicates; it should NOT 10231 // be accounted in other operators, e.g. Add. 10232 MaxTaskNum int 10233 } 10234 10235 // EmptyResource creates a empty resource object and returns 10236 func EmptyResource() *Resource <span class="cov8" title="1">{ 10237 return &Resource{} 10238 }</span> 10239 10240 // NewResource creates a new resource object from resource list 10241 func NewResource(rl v1.ResourceList) *Resource <span class="cov8" title="1">{ 10242 r := EmptyResource() 10243 for rName, rQuant := range rl </span><span class="cov8" title="1">{ 10244 switch rName </span>{ 10245 case v1.ResourceCPU:<span class="cov8" title="1"> 10246 r.MilliCPU += float64(rQuant.MilliValue())</span> 10247 case v1.ResourceMemory:<span class="cov8" title="1"> 10248 r.Memory += float64(rQuant.Value())</span> 10249 case v1.ResourcePods:<span class="cov0" title="0"> 10250 r.MaxTaskNum += int(rQuant.Value())</span> 10251 default:<span class="cov8" title="1"> 10252 //NOTE: When converting this back to k8s resource, we need record the format as well as / 1000 10253 if v1helper.IsScalarResourceName(rName) </span><span class="cov8" title="1">{ 10254 r.AddScalar(rName, float64(rQuant.MilliValue())) 10255 }</span> 10256 } 10257 } 10258 <span class="cov8" title="1">return r</span> 10259 } 10260 10261 // ResFloat642Quantity transform resource quantity 10262 func ResFloat642Quantity(resName v1.ResourceName, quantity float64) resource.Quantity <span class="cov0" title="0">{ 10263 var resQuantity *resource.Quantity 10264 switch resName </span>{ 10265 case v1.ResourceCPU:<span class="cov0" title="0"> 10266 resQuantity = resource.NewMilliQuantity(int64(quantity), resource.DecimalSI)</span> 10267 default:<span class="cov0" title="0"> 10268 resQuantity = resource.NewQuantity(int64(quantity), resource.BinarySI)</span> 10269 } 10270 10271 <span class="cov0" title="0">return *resQuantity</span> 10272 } 10273 10274 // ResQuantity2Float64 transform resource quantity 10275 func ResQuantity2Float64(resName v1.ResourceName, quantity resource.Quantity) float64 <span class="cov0" title="0">{ 10276 var resQuantity float64 10277 switch resName </span>{ 10278 case v1.ResourceCPU:<span class="cov0" title="0"> 10279 resQuantity = float64(quantity.MilliValue())</span> 10280 default:<span class="cov0" title="0"> 10281 resQuantity = float64(quantity.Value())</span> 10282 } 10283 10284 <span class="cov0" title="0">return resQuantity</span> 10285 } 10286 10287 // Clone is used to clone a resource type, which is a deep copy function. 10288 func (r *Resource) Clone() *Resource <span class="cov8" title="1">{ 10289 clone := &Resource{ 10290 MilliCPU: r.MilliCPU, 10291 Memory: r.Memory, 10292 MaxTaskNum: r.MaxTaskNum, 10293 } 10294 10295 if r.ScalarResources != nil </span><span class="cov8" title="1">{ 10296 clone.ScalarResources = make(map[v1.ResourceName]float64) 10297 for k, v := range r.ScalarResources </span><span class="cov8" title="1">{ 10298 clone.ScalarResources[k] = v 10299 }</span> 10300 } 10301 10302 <span class="cov8" title="1">return clone</span> 10303 } 10304 10305 // String returns resource details in string format 10306 func (r *Resource) String() string <span class="cov8" title="1">{ 10307 str := fmt.Sprintf("cpu %0.2f, memory %0.2f", r.MilliCPU, r.Memory) 10308 for rName, rQuant := range r.ScalarResources </span><span class="cov0" title="0">{ 10309 str = fmt.Sprintf("%s, %s %0.2f", str, rName, rQuant) 10310 }</span> 10311 <span class="cov8" title="1">return str</span> 10312 } 10313 10314 // ResourceNames returns all resource types 10315 func (r *Resource) ResourceNames() ResourceNameList <span class="cov0" title="0">{ 10316 resNames := ResourceNameList{} 10317 10318 if r.MilliCPU >= minResource </span><span class="cov0" title="0">{ 10319 resNames = append(resNames, v1.ResourceCPU) 10320 }</span> 10321 10322 <span class="cov0" title="0">if r.Memory >= minResource </span><span class="cov0" title="0">{ 10323 resNames = append(resNames, v1.ResourceMemory) 10324 }</span> 10325 10326 <span class="cov0" title="0">for rName, rMount := range r.ScalarResources </span><span class="cov0" title="0">{ 10327 if rMount >= minResource </span><span class="cov0" title="0">{ 10328 resNames = append(resNames, rName) 10329 }</span> 10330 } 10331 10332 <span class="cov0" title="0">return resNames</span> 10333 } 10334 10335 // Get returns the resource value for that particular resource type 10336 func (r *Resource) Get(rn v1.ResourceName) float64 <span class="cov0" title="0">{ 10337 switch rn </span>{ 10338 case v1.ResourceCPU:<span class="cov0" title="0"> 10339 return r.MilliCPU</span> 10340 case v1.ResourceMemory:<span class="cov0" title="0"> 10341 return r.Memory</span> 10342 default:<span class="cov0" title="0"> 10343 if r.ScalarResources == nil </span><span class="cov0" title="0">{ 10344 return 0 10345 }</span> 10346 <span class="cov0" title="0">return r.ScalarResources[rn]</span> 10347 } 10348 } 10349 10350 // IsEmpty returns false if any kind of resource is not less than min value, otherwise returns true 10351 func (r *Resource) IsEmpty() bool <span class="cov8" title="1">{ 10352 if !(r.MilliCPU < minResource && r.Memory < minResource) </span><span class="cov8" title="1">{ 10353 return false 10354 }</span> 10355 10356 <span class="cov0" title="0">for _, rQuant := range r.ScalarResources </span><span class="cov0" title="0">{ 10357 if rQuant >= minResource </span><span class="cov0" title="0">{ 10358 return false 10359 }</span> 10360 } 10361 10362 <span class="cov0" title="0">return true</span> 10363 } 10364 10365 // IsZero returns false if the given kind of resource is not less than min value 10366 func (r *Resource) IsZero(rn v1.ResourceName) bool <span class="cov8" title="1">{ 10367 switch rn </span>{ 10368 case v1.ResourceCPU:<span class="cov8" title="1"> 10369 return r.MilliCPU < minResource</span> 10370 case v1.ResourceMemory:<span class="cov0" title="0"> 10371 return r.Memory < minResource</span> 10372 default:<span class="cov8" title="1"> 10373 if r.ScalarResources == nil </span><span class="cov0" title="0">{ 10374 return true 10375 }</span> 10376 10377 <span class="cov8" title="1">_, found := r.ScalarResources[rn] 10378 assert.Assertf(found, "unknown resource %s", rn) 10379 10380 return r.ScalarResources[rn] < minResource</span> 10381 } 10382 } 10383 10384 // Add is used to add two given resources 10385 func (r *Resource) Add(rr *Resource) *Resource <span class="cov8" title="1">{ 10386 r.MilliCPU += rr.MilliCPU 10387 r.Memory += rr.Memory 10388 10389 for rName, rQuant := range rr.ScalarResources </span><span class="cov8" title="1">{ 10390 if r.ScalarResources == nil </span><span class="cov8" title="1">{ 10391 r.ScalarResources = map[v1.ResourceName]float64{} 10392 }</span> 10393 <span class="cov8" title="1">r.ScalarResources[rName] += rQuant</span> 10394 } 10395 10396 <span class="cov8" title="1">return r</span> 10397 } 10398 10399 // Sub subtracts two Resource objects with assertion. 10400 func (r *Resource) Sub(rr *Resource) *Resource <span class="cov8" title="1">{ 10401 assert.Assertf(rr.LessEqual(r, Zero), "resource is not sufficient to do operation: <%v> sub <%v>", r, rr) 10402 return r.sub(rr) 10403 }</span> 10404 10405 // sub subtracts two Resource objects. 10406 func (r *Resource) sub(rr *Resource) *Resource <span class="cov8" title="1">{ 10407 r.MilliCPU -= rr.MilliCPU 10408 r.Memory -= rr.Memory 10409 10410 if r.ScalarResources == nil </span><span class="cov8" title="1">{ 10411 return r 10412 }</span> 10413 <span class="cov8" title="1">for rrName, rrQuant := range rr.ScalarResources </span><span class="cov8" title="1">{ 10414 r.ScalarResources[rrName] -= rrQuant 10415 }</span> 10416 10417 <span class="cov8" title="1">return r</span> 10418 } 10419 10420 // Multi multiples the resource with ratio provided 10421 func (r *Resource) Multi(ratio float64) *Resource <span class="cov0" title="0">{ 10422 r.MilliCPU *= ratio 10423 r.Memory *= ratio 10424 for rName, rQuant := range r.ScalarResources </span><span class="cov0" title="0">{ 10425 r.ScalarResources[rName] = rQuant * ratio 10426 }</span> 10427 <span class="cov0" title="0">return r</span> 10428 } 10429 10430 // SetMaxResource compares with ResourceList and takes max value for each Resource. 10431 func (r *Resource) SetMaxResource(rr *Resource) <span class="cov8" title="1">{ 10432 if r == nil || rr == nil </span><span class="cov0" title="0">{ 10433 return 10434 }</span> 10435 10436 <span class="cov8" title="1">if rr.MilliCPU > r.MilliCPU </span><span class="cov8" title="1">{ 10437 r.MilliCPU = rr.MilliCPU 10438 }</span> 10439 <span class="cov8" title="1">if rr.Memory > r.Memory </span><span class="cov8" title="1">{ 10440 r.Memory = rr.Memory 10441 }</span> 10442 10443 <span class="cov8" title="1">for rrName, rrQuant := range rr.ScalarResources </span><span class="cov8" title="1">{ 10444 if r.ScalarResources == nil </span><span class="cov8" title="1">{ 10445 r.ScalarResources = make(map[v1.ResourceName]float64) 10446 for k, v := range rr.ScalarResources </span><span class="cov8" title="1">{ 10447 r.ScalarResources[k] = v 10448 }</span> 10449 <span class="cov8" title="1">return</span> 10450 } 10451 <span class="cov8" title="1">_, ok := r.ScalarResources[rrName] 10452 if !ok || rrQuant > r.ScalarResources[rrName] </span><span class="cov8" title="1">{ 10453 r.ScalarResources[rrName] = rrQuant 10454 }</span> 10455 } 10456 } 10457 10458 //FitDelta Computes the delta between a resource object representing available 10459 //resources an operand representing resources being requested. Any 10460 //field that is less than 0 after the operation represents an 10461 //insufficient resource. 10462 func (r *Resource) FitDelta(rr *Resource) *Resource <span class="cov0" title="0">{ 10463 if rr.MilliCPU > 0 </span><span class="cov0" title="0">{ 10464 r.MilliCPU -= rr.MilliCPU + minResource 10465 }</span> 10466 10467 <span class="cov0" title="0">if rr.Memory > 0 </span><span class="cov0" title="0">{ 10468 r.Memory -= rr.Memory + minResource 10469 }</span> 10470 10471 <span class="cov0" title="0">if r.ScalarResources == nil </span><span class="cov0" title="0">{ 10472 r.ScalarResources = make(map[v1.ResourceName]float64) 10473 }</span> 10474 10475 <span class="cov0" title="0">for rrName, rrQuant := range rr.ScalarResources </span><span class="cov0" title="0">{ 10476 if rrQuant > 0 </span><span class="cov0" title="0">{ 10477 _, ok := r.ScalarResources[rrName] 10478 if !ok </span><span class="cov0" title="0">{ 10479 r.ScalarResources[rrName] = 0 10480 }</span> 10481 <span class="cov0" title="0">r.ScalarResources[rrName] -= rrQuant + minResource</span> 10482 } 10483 } 10484 10485 <span class="cov0" title="0">return r</span> 10486 } 10487 10488 // Less returns true only on condition that all dimensions of resources in r are less than that of rr, 10489 // Otherwise returns false. 10490 // @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'" 10491 func (r *Resource) Less(rr *Resource, defaultValue DimensionDefaultValue) bool <span class="cov8" title="1">{ 10492 lessFunc := func(l, r float64) bool </span><span class="cov8" title="1">{ 10493 return l < r 10494 }</span> 10495 10496 <span class="cov8" title="1">if !lessFunc(r.MilliCPU, rr.MilliCPU) </span><span class="cov8" title="1">{ 10497 return false 10498 }</span> 10499 <span class="cov8" title="1">if !lessFunc(r.Memory, rr.Memory) </span><span class="cov0" title="0">{ 10500 return false 10501 }</span> 10502 10503 <span class="cov8" title="1">for resourceName, leftValue := range r.ScalarResources </span><span class="cov8" title="1">{ 10504 rightValue, ok := rr.ScalarResources[resourceName] 10505 if !ok && defaultValue == Infinity </span><span class="cov8" title="1">{ 10506 continue</span> 10507 } 10508 10509 <span class="cov8" title="1">if !lessFunc(leftValue, rightValue) </span><span class="cov8" title="1">{ 10510 return false 10511 }</span> 10512 } 10513 <span class="cov8" title="1">return true</span> 10514 } 10515 10516 // LessEqual returns true only on condition that all dimensions of resources in r are less than or equal with that of rr, 10517 // Otherwise returns false. 10518 // @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'" 10519 func (r *Resource) LessEqual(rr *Resource, defaultValue DimensionDefaultValue) bool <span class="cov8" title="1">{ 10520 lessEqualFunc := func(l, r, diff float64) bool </span><span class="cov8" title="1">{ 10521 if l < r || math.Abs(l-r) < diff </span><span class="cov8" title="1">{ 10522 return true 10523 }</span> 10524 <span class="cov8" title="1">return false</span> 10525 } 10526 10527 <span class="cov8" title="1">if !lessEqualFunc(r.MilliCPU, rr.MilliCPU, minResource) </span><span class="cov8" title="1">{ 10528 return false 10529 }</span> 10530 <span class="cov8" title="1">if !lessEqualFunc(r.Memory, rr.Memory, minResource) </span><span class="cov8" title="1">{ 10531 return false 10532 }</span> 10533 10534 <span class="cov8" title="1">for resourceName, leftValue := range r.ScalarResources </span><span class="cov8" title="1">{ 10535 rightValue, ok := rr.ScalarResources[resourceName] 10536 if !ok && defaultValue == Infinity </span><span class="cov0" title="0">{ 10537 continue</span> 10538 } 10539 10540 <span class="cov8" title="1">if !lessEqualFunc(leftValue, rightValue, minResource) </span><span class="cov8" title="1">{ 10541 return false 10542 }</span> 10543 } 10544 <span class="cov8" title="1">return true</span> 10545 } 10546 10547 // LessPartly returns true if there exists any dimension whose resource amount in r is less than that in rr. 10548 // Otherwise returns false. 10549 // @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'" 10550 func (r *Resource) LessPartly(rr *Resource, defaultValue DimensionDefaultValue) bool <span class="cov8" title="1">{ 10551 lessFunc := func(l, r float64) bool </span><span class="cov8" title="1">{ 10552 return l < r 10553 }</span> 10554 10555 <span class="cov8" title="1">if lessFunc(r.MilliCPU, rr.MilliCPU) || lessFunc(r.Memory, rr.Memory) </span><span class="cov8" title="1">{ 10556 return true 10557 }</span> 10558 10559 <span class="cov8" title="1">for resourceName, leftValue := range r.ScalarResources </span><span class="cov8" title="1">{ 10560 rightValue, ok := rr.ScalarResources[resourceName] 10561 if !ok && defaultValue == Infinity </span><span class="cov8" title="1">{ 10562 return true 10563 }</span> 10564 10565 <span class="cov8" title="1">if lessFunc(leftValue, rightValue) </span><span class="cov8" title="1">{ 10566 return true 10567 }</span> 10568 } 10569 <span class="cov8" title="1">return false</span> 10570 } 10571 10572 // LessEqualPartly returns true if there exists any dimension whose resource amount in r is less than or equal with that in rr. 10573 // Otherwise returns false. 10574 // @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'" 10575 func (r *Resource) LessEqualPartly(rr *Resource, defaultValue DimensionDefaultValue) bool <span class="cov8" title="1">{ 10576 lessEqualFunc := func(l, r, diff float64) bool </span><span class="cov8" title="1">{ 10577 if l < r || math.Abs(l-r) < diff </span><span class="cov8" title="1">{ 10578 return true 10579 }</span> 10580 <span class="cov8" title="1">return false</span> 10581 } 10582 10583 <span class="cov8" title="1">if lessEqualFunc(r.MilliCPU, rr.MilliCPU, minResource) || lessEqualFunc(r.Memory, rr.Memory, minResource) </span><span class="cov8" title="1">{ 10584 return true 10585 }</span> 10586 10587 <span class="cov8" title="1">for resourceName, leftValue := range r.ScalarResources </span><span class="cov8" title="1">{ 10588 rightValue, ok := rr.ScalarResources[resourceName] 10589 if !ok && defaultValue == Infinity </span><span class="cov0" title="0">{ 10590 return true 10591 }</span> 10592 10593 <span class="cov8" title="1">if lessEqualFunc(leftValue, rightValue, minResource) </span><span class="cov8" title="1">{ 10594 return true 10595 }</span> 10596 } 10597 <span class="cov8" title="1">return false</span> 10598 } 10599 10600 // Equal returns true only on condition that values in all dimension are equal with each other for r and rr 10601 // Otherwise returns false. 10602 // @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'" 10603 func (r *Resource) Equal(rr *Resource, defaultValue DimensionDefaultValue) bool <span class="cov8" title="1">{ 10604 equalFunc := func(l, r, diff float64) bool </span><span class="cov8" title="1">{ 10605 return l == r || math.Abs(l-r) < diff 10606 }</span> 10607 10608 <span class="cov8" title="1">if !equalFunc(r.MilliCPU, rr.MilliCPU, minResource) || !equalFunc(r.Memory, rr.Memory, minResource) </span><span class="cov8" title="1">{ 10609 return false 10610 }</span> 10611 10612 <span class="cov8" title="1">for resourceName, leftValue := range r.ScalarResources </span><span class="cov8" title="1">{ 10613 rightValue := rr.ScalarResources[resourceName] 10614 if !equalFunc(leftValue, rightValue, minResource) </span><span class="cov0" title="0">{ 10615 return false 10616 }</span> 10617 } 10618 <span class="cov8" title="1">return true</span> 10619 } 10620 10621 // Diff calculate the difference between two resource object 10622 // Note: if `defaultValue` equals `Infinity`, the difference between two values will be `Infinity`, marked as -1 10623 func (r *Resource) Diff(rr *Resource, defaultValue DimensionDefaultValue) (*Resource, *Resource) <span class="cov8" title="1">{ 10624 leftRes := r.Clone() 10625 rightRes := rr.Clone() 10626 increasedVal := EmptyResource() 10627 decreasedVal := EmptyResource() 10628 r.setDefaultValue(leftRes, rightRes, defaultValue) 10629 10630 if leftRes.MilliCPU > rightRes.MilliCPU </span><span class="cov8" title="1">{ 10631 increasedVal.MilliCPU = leftRes.MilliCPU - rightRes.MilliCPU 10632 }</span> else<span class="cov8" title="1"> { 10633 decreasedVal.MilliCPU = rightRes.MilliCPU - leftRes.MilliCPU 10634 }</span> 10635 10636 <span class="cov8" title="1">if leftRes.Memory > rightRes.Memory </span><span class="cov8" title="1">{ 10637 increasedVal.Memory = leftRes.Memory - rightRes.Memory 10638 }</span> else<span class="cov8" title="1"> { 10639 decreasedVal.Memory = rightRes.Memory - leftRes.Memory 10640 }</span> 10641 10642 <span class="cov8" title="1">increasedVal.ScalarResources = make(map[v1.ResourceName]float64) 10643 decreasedVal.ScalarResources = make(map[v1.ResourceName]float64) 10644 for lName, lQuant := range leftRes.ScalarResources </span><span class="cov8" title="1">{ 10645 rQuant := rightRes.ScalarResources[lName] 10646 if lQuant == -1 </span><span class="cov8" title="1">{ 10647 increasedVal.ScalarResources[lName] = -1 10648 continue</span> 10649 } 10650 <span class="cov8" title="1">if rQuant == -1 </span><span class="cov8" title="1">{ 10651 decreasedVal.ScalarResources[lName] = -1 10652 continue</span> 10653 } 10654 <span class="cov8" title="1">if lQuant > rQuant </span><span class="cov8" title="1">{ 10655 increasedVal.ScalarResources[lName] = lQuant - rQuant 10656 }</span> else<span class="cov8" title="1"> { 10657 decreasedVal.ScalarResources[lName] = rQuant - lQuant 10658 }</span> 10659 } 10660 10661 <span class="cov8" title="1">return increasedVal, decreasedVal</span> 10662 } 10663 10664 // AddScalar adds a resource by a scalar value of this resource. 10665 func (r *Resource) AddScalar(name v1.ResourceName, quantity float64) <span class="cov8" title="1">{ 10666 r.SetScalar(name, r.ScalarResources[name]+quantity) 10667 }</span> 10668 10669 // SetScalar sets a resource by a scalar value of this resource. 10670 func (r *Resource) SetScalar(name v1.ResourceName, quantity float64) <span class="cov8" title="1">{ 10671 // Lazily allocate scalar resource map. 10672 if r.ScalarResources == nil </span><span class="cov8" title="1">{ 10673 r.ScalarResources = map[v1.ResourceName]float64{} 10674 }</span> 10675 <span class="cov8" title="1">r.ScalarResources[name] = quantity</span> 10676 } 10677 10678 // MinDimensionResource is used to reset the r resource dimension which is less than rr 10679 // e.g r resource is <cpu 2000.00, memory 4047845376.00, hugepages-2Mi 0.00, hugepages-1Gi 0.00> 10680 // rr resource is <cpu 3000.00, memory 1000.00> 10681 // return r resource is <cpu 2000.00, memory 1000.00, hugepages-2Mi 0.00, hugepages-1Gi 0.00> 10682 // @param defaultValue "default value for resource dimension not defined in ScalarResources. Its value can only be one of 'Zero' and 'Infinity'" 10683 func (r *Resource) MinDimensionResource(rr *Resource, defaultValue DimensionDefaultValue) *Resource <span class="cov8" title="1">{ 10684 if rr.MilliCPU < r.MilliCPU </span><span class="cov8" title="1">{ 10685 r.MilliCPU = rr.MilliCPU 10686 }</span> 10687 <span class="cov8" title="1">if rr.Memory < r.Memory </span><span class="cov8" title="1">{ 10688 r.Memory = rr.Memory 10689 }</span> 10690 10691 <span class="cov8" title="1">if r.ScalarResources == nil </span><span class="cov0" title="0">{ 10692 return r 10693 }</span> 10694 10695 <span class="cov8" title="1">if rr.ScalarResources == nil </span><span class="cov8" title="1">{ 10696 if defaultValue == Infinity </span><span class="cov8" title="1">{ 10697 return r 10698 }</span> 10699 10700 <span class="cov8" title="1">for name := range r.ScalarResources </span><span class="cov8" title="1">{ 10701 r.ScalarResources[name] = 0 10702 }</span> 10703 <span class="cov8" title="1">return r</span> 10704 } 10705 10706 <span class="cov8" title="1">for name, quant := range r.ScalarResources </span><span class="cov8" title="1">{ 10707 rQuant, ok := rr.ScalarResources[name] 10708 if ok </span><span class="cov8" title="1">{ 10709 r.ScalarResources[name] = math.Min(quant, rQuant) 10710 }</span> else<span class="cov8" title="1"> { 10711 if defaultValue == Infinity </span><span class="cov8" title="1">{ 10712 continue</span> 10713 } 10714 10715 <span class="cov8" title="1">r.ScalarResources[name] = 0</span> 10716 } 10717 } 10718 <span class="cov8" title="1">return r</span> 10719 } 10720 10721 // setDefaultValue sets default value for resource dimension not defined of ScalarResource in leftResource and rightResource 10722 // @param defaultValue "default value for resource dimension not defined in ScalarResources. It can only be one of 'Zero' or 'Infinity'" 10723 func (r *Resource) setDefaultValue(leftResource, rightResource *Resource, defaultValue DimensionDefaultValue) <span class="cov8" title="1">{ 10724 if leftResource.ScalarResources == nil </span><span class="cov8" title="1">{ 10725 leftResource.ScalarResources = map[v1.ResourceName]float64{} 10726 }</span> 10727 <span class="cov8" title="1">if rightResource.ScalarResources == nil </span><span class="cov8" title="1">{ 10728 rightResource.ScalarResources = map[v1.ResourceName]float64{} 10729 }</span> 10730 <span class="cov8" title="1">for resourceName := range leftResource.ScalarResources </span><span class="cov8" title="1">{ 10731 _, ok := rightResource.ScalarResources[resourceName] 10732 if !ok </span><span class="cov8" title="1">{ 10733 if defaultValue == Zero </span><span class="cov8" title="1">{ 10734 rightResource.ScalarResources[resourceName] = 0 10735 }</span> else<span class="cov8" title="1"> if defaultValue == Infinity </span><span class="cov8" title="1">{ 10736 rightResource.ScalarResources[resourceName] = -1 10737 }</span> 10738 } 10739 } 10740 10741 <span class="cov8" title="1">for resourceName := range rightResource.ScalarResources </span><span class="cov8" title="1">{ 10742 _, ok := leftResource.ScalarResources[resourceName] 10743 if !ok </span><span class="cov8" title="1">{ 10744 if defaultValue == Zero </span><span class="cov8" title="1">{ 10745 leftResource.ScalarResources[resourceName] = 0 10746 }</span> else<span class="cov8" title="1"> if defaultValue == Infinity </span><span class="cov8" title="1">{ 10747 leftResource.ScalarResources[resourceName] = -1 10748 }</span> 10749 } 10750 } 10751 } 10752 10753 // ParseResourceList parses the given configuration map into an API 10754 // ResourceList or returns an error. 10755 func ParseResourceList(m map[string]string) (v1.ResourceList, error) <span class="cov0" title="0">{ 10756 if len(m) == 0 </span><span class="cov0" title="0">{ 10757 return nil, nil 10758 }</span> 10759 <span class="cov0" title="0">rl := make(v1.ResourceList) 10760 for k, v := range m </span><span class="cov0" title="0">{ 10761 switch v1.ResourceName(k) </span>{ 10762 // CPU, memory, local storage, and PID resources are supported. 10763 case v1.ResourceCPU, v1.ResourceMemory, v1.ResourceEphemeralStorage:<span class="cov0" title="0"> 10764 q, err := resource.ParseQuantity(v) 10765 if err != nil </span><span class="cov0" title="0">{ 10766 return nil, err 10767 }</span> 10768 <span class="cov0" title="0">if q.Sign() == -1 </span><span class="cov0" title="0">{ 10769 return nil, fmt.Errorf("resource quantity for %q cannot be negative: %v", k, v) 10770 }</span> 10771 <span class="cov0" title="0">rl[v1.ResourceName(k)] = q</span> 10772 default:<span class="cov0" title="0"> 10773 return nil, fmt.Errorf("cannot reserve %q resource", k)</span> 10774 } 10775 } 10776 <span class="cov0" title="0">return rl, nil</span> 10777 } 10778 10779 func GetMinResource() float64 <span class="cov0" title="0">{ 10780 return minResource 10781 }</span> 10782 10783 // ResourceNameList struct defines resource name collection 10784 type ResourceNameList []v1.ResourceName 10785 10786 // Contains judges whether rr is subset of r 10787 func (r ResourceNameList) Contains(rr ResourceNameList) bool <span class="cov0" title="0">{ 10788 for _, rrName := range ([]v1.ResourceName)(rr) </span><span class="cov0" title="0">{ 10789 isResourceExist := false 10790 for _, rName := range ([]v1.ResourceName)(r) </span><span class="cov0" title="0">{ 10791 if rName == rrName </span><span class="cov0" title="0">{ 10792 isResourceExist = true 10793 break</span> 10794 } 10795 } 10796 <span class="cov0" title="0">if !isResourceExist </span><span class="cov0" title="0">{ 10797 return false 10798 }</span> 10799 } 10800 <span class="cov0" title="0">return true</span> 10801 } 10802 </pre> 10803 10804 <pre class="file" id="file54" style="display: none">/* 10805 Copyright 2021 The Volcano Authors. 10806 10807 Licensed under the Apache License, Version 2.0 (the "License"); 10808 you may not use this file except in compliance with the License. 10809 You may obtain a copy of the License at 10810 10811 http://www.apache.org/licenses/LICENSE-2.0 10812 10813 Unless required by applicable law or agreed to in writing, software 10814 distributed under the License is distributed on an "AS IS" BASIS, 10815 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10816 See the License for the specific language governing permissions and 10817 limitations under the License. 10818 */ 10819 10820 package api 10821 10822 import ( 10823 "k8s.io/apimachinery/pkg/types" 10824 10825 "volcano.sh/apis/pkg/apis/scheduling" 10826 ) 10827 10828 // ClusterID is UID type, serves as unique ID for each queue 10829 type ClusterID types.UID 10830 10831 // SiloClusterInfo will have all details about queue 10832 type SiloClusterInfo struct { 10833 UID ClusterID 10834 Cluster *scheduling.Cluster 10835 } 10836 10837 // NewSiloClusterInfo creates new queueInfo object 10838 func NewSiloClusterInfo(cluster *scheduling.Cluster) *SiloClusterInfo <span class="cov0" title="0">{ 10839 return &SiloClusterInfo{ 10840 UID: ClusterID(cluster.Name), 10841 Cluster: cluster, 10842 } 10843 }</span> 10844 </pre> 10845 10846 <pre class="file" id="file55" style="display: none">/* 10847 Copyright 2018 The Kubernetes Authors. 10848 10849 Licensed under the Apache License, Version 2.0 (the "License"); 10850 you may not use this file except in compliance with the License. 10851 You may obtain a copy of the License at 10852 10853 http://www.apache.org/licenses/LICENSE-2.0 10854 10855 Unless required by applicable law or agreed to in writing, software 10856 distributed under the License is distributed on an "AS IS" BASIS, 10857 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10858 See the License for the specific language governing permissions and 10859 limitations under the License. 10860 */ 10861 10862 package api 10863 10864 import ( 10865 "fmt" 10866 10867 v1 "k8s.io/api/core/v1" 10868 "k8s.io/apimachinery/pkg/api/resource" 10869 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 10870 "k8s.io/apimachinery/pkg/types" 10871 ) 10872 10873 func buildNode(name string, alloc v1.ResourceList) *v1.Node <span class="cov8" title="1">{ 10874 return &v1.Node{ 10875 ObjectMeta: metav1.ObjectMeta{ 10876 Name: name, 10877 }, 10878 Status: v1.NodeStatus{ 10879 Capacity: alloc, 10880 Allocatable: alloc, 10881 }, 10882 } 10883 }</span> 10884 10885 func buildPod(ns, n, nn string, p v1.PodPhase, req v1.ResourceList, owner []metav1.OwnerReference, labels map[string]string) *v1.Pod <span class="cov8" title="1">{ 10886 return &v1.Pod{ 10887 ObjectMeta: metav1.ObjectMeta{ 10888 UID: types.UID(fmt.Sprintf("%v-%v", ns, n)), 10889 Name: n, 10890 Namespace: ns, 10891 OwnerReferences: owner, 10892 Labels: labels, 10893 }, 10894 Status: v1.PodStatus{ 10895 Phase: p, 10896 }, 10897 Spec: v1.PodSpec{ 10898 NodeName: nn, 10899 Containers: []v1.Container{ 10900 { 10901 Resources: v1.ResourceRequirements{ 10902 Requests: req, 10903 }, 10904 }, 10905 }, 10906 }, 10907 } 10908 }</span> 10909 10910 func buildResourceList(cpu string, memory string) v1.ResourceList <span class="cov8" title="1">{ 10911 return v1.ResourceList{ 10912 v1.ResourceCPU: resource.MustParse(cpu), 10913 v1.ResourceMemory: resource.MustParse(memory), 10914 } 10915 }</span> 10916 10917 func buildResource(cpu string, memory string) *Resource <span class="cov8" title="1">{ 10918 return NewResource(v1.ResourceList{ 10919 v1.ResourceCPU: resource.MustParse(cpu), 10920 v1.ResourceMemory: resource.MustParse(memory), 10921 }) 10922 }</span> 10923 10924 func buildOwnerReference(owner string) metav1.OwnerReference <span class="cov8" title="1">{ 10925 controller := true 10926 return metav1.OwnerReference{ 10927 Controller: &controller, 10928 UID: types.UID(owner), 10929 } 10930 }</span> 10931 </pre> 10932 10933 <pre class="file" id="file56" style="display: none">/* 10934 Copyright 2018 The Kubernetes Authors. 10935 10936 Licensed under the Apache License, Version 2.0 (the "License"); 10937 you may not use this file except in compliance with the License. 10938 You may obtain a copy of the License at 10939 10940 http://www.apache.org/licenses/LICENSE-2.0 10941 10942 Unless required by applicable law or agreed to in writing, software 10943 distributed under the License is distributed on an "AS IS" BASIS, 10944 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10945 See the License for the specific language governing permissions and 10946 limitations under the License. 10947 */ 10948 10949 package api 10950 10951 import ( 10952 k8sframework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1" 10953 ) 10954 10955 // TaskStatus defines the status of a task/pod. 10956 type TaskStatus int 10957 10958 const ( 10959 // Pending means the task is pending in the apiserver. 10960 Pending TaskStatus = 1 << iota 10961 10962 // Allocated means the scheduler assigns a host to it. 10963 Allocated 10964 10965 // Pipelined means the scheduler assigns a host to wait for releasing resource. 10966 Pipelined 10967 10968 // Binding means the scheduler send Bind request to apiserver. 10969 Binding 10970 10971 // Bound means the task/Pod bounds to a host. 10972 Bound 10973 10974 // Running means a task is running on the host. 10975 Running 10976 10977 // Releasing means a task/pod is deleted. 10978 Releasing 10979 10980 // Succeeded means that all containers in the pod have voluntarily terminated 10981 // with a container exit code of 0, and the system is not going to restart any of these containers. 10982 Succeeded 10983 10984 // Failed means that all containers in the pod have terminated, and at least one container has 10985 // terminated in a failure (exited with a non-zero exit code or was stopped by the system). 10986 Failed 10987 10988 // Unknown means the status of task/pod is unknown to the scheduler. 10989 Unknown 10990 ) 10991 10992 func (ts TaskStatus) String() string <span class="cov8" title="1">{ 10993 switch ts </span>{ 10994 case Pending:<span class="cov8" title="1"> 10995 return "Pending"</span> 10996 case Allocated:<span class="cov0" title="0"> 10997 return "Allocated"</span> 10998 case Pipelined:<span class="cov0" title="0"> 10999 return "Pipelined"</span> 11000 case Binding:<span class="cov0" title="0"> 11001 return "Binding"</span> 11002 case Bound:<span class="cov0" title="0"> 11003 return "Bound"</span> 11004 case Running:<span class="cov0" title="0"> 11005 return "Running"</span> 11006 case Releasing:<span class="cov0" title="0"> 11007 return "Releasing"</span> 11008 case Succeeded:<span class="cov0" title="0"> 11009 return "Succeeded"</span> 11010 case Failed:<span class="cov0" title="0"> 11011 return "Failed"</span> 11012 default:<span class="cov0" title="0"> 11013 return "Unknown"</span> 11014 } 11015 } 11016 11017 // NodePhase defines the phase of node 11018 type NodePhase int 11019 11020 const ( 11021 // Ready means the node is ready for scheduling 11022 Ready NodePhase = 1 << iota 11023 // NotReady means the node is not ready for scheduling 11024 NotReady 11025 ) 11026 11027 func (np NodePhase) String() string <span class="cov8" title="1">{ 11028 switch np </span>{ 11029 case Ready:<span class="cov8" title="1"> 11030 return "Ready"</span> 11031 case NotReady:<span class="cov8" title="1"> 11032 return "NotReady"</span> 11033 } 11034 11035 <span class="cov0" title="0">return "Unknown"</span> 11036 } 11037 11038 // validateStatusUpdate validates whether the status transfer is valid. 11039 func validateStatusUpdate(oldStatus, newStatus TaskStatus) error <span class="cov0" title="0">{ 11040 return nil 11041 }</span> 11042 11043 // LessFn is the func declaration used by sort or priority queue. 11044 type LessFn func(interface{}, interface{}) bool 11045 11046 // CompareFn is the func declaration used by sort or priority queue. 11047 type CompareFn func(interface{}, interface{}) int 11048 11049 // ValidateFn is the func declaration used to check object's status. 11050 type ValidateFn func(interface{}) bool 11051 11052 // ValidateResult is struct to which can used to determine the result 11053 type ValidateResult struct { 11054 Pass bool 11055 Reason string 11056 Message string 11057 } 11058 11059 // ValidateExFn is the func declaration used to validate the result. 11060 type ValidateExFn func(interface{}) *ValidateResult 11061 11062 // VoteFn is the func declaration used to check object's complicated status. 11063 type VoteFn func(interface{}) int 11064 11065 // JobEnqueuedFn is the func declaration used to call after job enqueued. 11066 type JobEnqueuedFn func(interface{}) 11067 11068 // PredicateFn is the func declaration used to predicate node for task. 11069 type PredicateFn func(*TaskInfo, *NodeInfo) error 11070 11071 // BestNodeFn is the func declaration used to return the nodeScores to plugins. 11072 type BestNodeFn func(*TaskInfo, map[float64][]*NodeInfo) *NodeInfo 11073 11074 // EvictableFn is the func declaration used to evict tasks. 11075 type EvictableFn func(*TaskInfo, []*TaskInfo) ([]*TaskInfo, int) 11076 11077 // NodeOrderFn is the func declaration used to get priority score for a node for a particular task. 11078 type NodeOrderFn func(*TaskInfo, *NodeInfo) (float64, error) 11079 11080 // BatchNodeOrderFn is the func declaration used to get priority score for ALL nodes for a particular task. 11081 type BatchNodeOrderFn func(*TaskInfo, []*NodeInfo) (map[string]float64, error) 11082 11083 // NodeMapFn is the func declaration used to get priority score for a node for a particular task. 11084 type NodeMapFn func(*TaskInfo, *NodeInfo) (float64, error) 11085 11086 // NodeReduceFn is the func declaration used to reduce priority score for a node for a particular task. 11087 type NodeReduceFn func(*TaskInfo, k8sframework.NodeScoreList) error 11088 11089 // NodeOrderMapFn is the func declaration used to get priority score of all plugins for a node for a particular task. 11090 type NodeOrderMapFn func(*TaskInfo, *NodeInfo) (map[string]float64, float64, error) 11091 11092 // NodeOrderReduceFn is the func declaration used to reduce priority score of all nodes for a plugin for a particular task. 11093 type NodeOrderReduceFn func(*TaskInfo, map[string]k8sframework.NodeScoreList) (map[string]float64, error) 11094 11095 // TargetJobFn is the func declaration used to select the target job satisfies some conditions 11096 type TargetJobFn func([]*JobInfo) *JobInfo 11097 11098 // ReservedNodesFn is the func declaration used to select the reserved nodes 11099 type ReservedNodesFn func() 11100 11101 // VictimTasksFn is the func declaration used to select victim tasks 11102 type VictimTasksFn func() []*TaskInfo 11103 11104 // UnderUsedResourceFn is the func declaration used to get under used resource list for queue 11105 type UnderUsedResourceFn func(*QueueInfo) ResourceNameList 11106 </pre> 11107 11108 <pre class="file" id="file57" style="display: none">package api 11109 11110 import ( 11111 "fmt" 11112 "sort" 11113 "strings" 11114 ) 11115 11116 const ( 11117 // NodePodNumberExceeded means pods in node exceed the allocatable pod number 11118 NodePodNumberExceeded = "node(s) pod number exceeded" 11119 // NodeResourceFitFailed means node could not fit the request of pod 11120 NodeResourceFitFailed = "node(s) resource fit failed" 11121 11122 // AllNodeUnavailableMsg is the default error message 11123 AllNodeUnavailableMsg = "all nodes are unavailable" 11124 ) 11125 11126 // These are reasons for a pod's transition to a condition. 11127 const ( 11128 // PodReasonUnschedulable reason in PodScheduled PodCondition means that the scheduler 11129 // can't schedule the pod right now, for example due to insufficient resources in the cluster. 11130 PodReasonUnschedulable = "Unschedulable" 11131 // PodReasonSchedulable reason in PodScheduled PodCondition means that the scheduler 11132 // can schedule the pod right now, but not bind yet 11133 PodReasonSchedulable = "Schedulable" 11134 // PodReasonUndetermined reason in PodScheduled PodCondition means that the scheduler 11135 // skips scheduling the pod which left the pod `Undetermined`, for example due to unschedulable pod already occurred. 11136 PodReasonUndetermined = "Undetermined" 11137 ) 11138 11139 // FitErrors is set of FitError on many nodes 11140 type FitErrors struct { 11141 nodes map[string]*FitError 11142 err string 11143 } 11144 11145 // NewFitErrors returns an FitErrors 11146 func NewFitErrors() *FitErrors <span class="cov0" title="0">{ 11147 f := new(FitErrors) 11148 f.nodes = make(map[string]*FitError) 11149 return f 11150 }</span> 11151 11152 // SetError set the common error message in FitErrors 11153 func (f *FitErrors) SetError(err string) <span class="cov0" title="0">{ 11154 f.err = err 11155 }</span> 11156 11157 // SetNodeError set the node error in FitErrors 11158 func (f *FitErrors) SetNodeError(nodeName string, err error) <span class="cov0" title="0">{ 11159 var fe *FitError 11160 switch obj := err.(type) </span>{ 11161 case *FitError:<span class="cov0" title="0"> 11162 obj.NodeName = nodeName 11163 fe = obj</span> 11164 default:<span class="cov0" title="0"> 11165 fe = &FitError{ 11166 NodeName: nodeName, 11167 Reasons: []string{obj.Error()}, 11168 }</span> 11169 } 11170 11171 <span class="cov0" title="0">f.nodes[nodeName] = fe</span> 11172 } 11173 11174 // Error returns the final error message 11175 func (f *FitErrors) Error() string <span class="cov8" title="1">{ 11176 reasons := make(map[string]int) 11177 11178 for _, node := range f.nodes </span><span class="cov8" title="1">{ 11179 for _, reason := range node.Reasons </span><span class="cov8" title="1">{ 11180 reasons[reason]++ 11181 }</span> 11182 } 11183 11184 <span class="cov8" title="1">sortReasonsHistogram := func() []string </span><span class="cov8" title="1">{ 11185 reasonStrings := []string{} 11186 for k, v := range reasons </span><span class="cov8" title="1">{ 11187 reasonStrings = append(reasonStrings, fmt.Sprintf("%v %v", v, k)) 11188 }</span> 11189 <span class="cov8" title="1">sort.Strings(reasonStrings) 11190 return reasonStrings</span> 11191 } 11192 <span class="cov8" title="1">if f.err == "" </span><span class="cov8" title="1">{ 11193 f.err = AllNodeUnavailableMsg 11194 }</span> 11195 <span class="cov8" title="1">reasonMsg := fmt.Sprintf(f.err+": %v.", strings.Join(sortReasonsHistogram(), ", ")) 11196 return reasonMsg</span> 11197 } 11198 11199 // FitError describe the reason why task could not fit that node 11200 type FitError struct { 11201 taskNamespace string 11202 taskName string 11203 NodeName string 11204 Reasons []string 11205 } 11206 11207 // NewFitError return FitError by message 11208 func NewFitError(task *TaskInfo, node *NodeInfo, message ...string) *FitError <span class="cov0" title="0">{ 11209 fe := &FitError{ 11210 taskName: task.Name, 11211 taskNamespace: task.Namespace, 11212 NodeName: node.Name, 11213 Reasons: message, 11214 } 11215 return fe 11216 }</span> 11217 11218 // Error returns the final error message 11219 func (f *FitError) Error() string <span class="cov0" title="0">{ 11220 return fmt.Sprintf("task %s/%s on node %s fit failed: %s", f.taskNamespace, f.taskName, f.NodeName, strings.Join(f.Reasons, ", ")) 11221 }</span> 11222 </pre> 11223 11224 <pre class="file" id="file58" style="display: none">/* 11225 Copyright 2021 The Volcano Authors. 11226 11227 Licensed under the Apache License, Version 2.0 (the "License"); 11228 you may not use this file except in compliance with the License. 11229 You may obtain a copy of the License at 11230 11231 http://www.apache.org/licenses/LICENSE-2.0 11232 11233 Unless required by applicable law or agreed to in writing, software 11234 distributed under the License is distributed on an "AS IS" BASIS, 11235 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11236 See the License for the specific language governing permissions and 11237 limitations under the License. 11238 */ 11239 11240 package cache 11241 11242 import ( 11243 "context" 11244 "fmt" 11245 "os" 11246 "strconv" 11247 "strings" 11248 "sync" 11249 "time" 11250 11251 v1 "k8s.io/api/core/v1" 11252 schedulingv1 "k8s.io/api/scheduling/v1" 11253 apierrors "k8s.io/apimachinery/pkg/api/errors" 11254 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 11255 "k8s.io/apimachinery/pkg/runtime" 11256 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 11257 "k8s.io/apimachinery/pkg/util/wait" 11258 "k8s.io/client-go/informers" 11259 infov1 "k8s.io/client-go/informers/core/v1" 11260 schedv1 "k8s.io/client-go/informers/scheduling/v1" 11261 storagev1 "k8s.io/client-go/informers/storage/v1" 11262 storagev1alpha1 "k8s.io/client-go/informers/storage/v1alpha1" 11263 "k8s.io/client-go/kubernetes" 11264 corev1 "k8s.io/client-go/kubernetes/typed/core/v1" 11265 "k8s.io/client-go/rest" 11266 "k8s.io/client-go/tools/cache" 11267 "k8s.io/client-go/tools/record" 11268 "k8s.io/client-go/util/workqueue" 11269 "k8s.io/klog" 11270 podutil "k8s.io/kubernetes/pkg/api/v1/pod" 11271 volumescheduling "k8s.io/kubernetes/pkg/controller/volume/scheduling" 11272 11273 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 11274 "volcano.sh/apis/pkg/apis/scheduling" 11275 schedulingscheme "volcano.sh/apis/pkg/apis/scheduling/scheme" 11276 vcv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 11277 vcclient "volcano.sh/apis/pkg/client/clientset/versioned" 11278 "volcano.sh/apis/pkg/client/clientset/versioned/scheme" 11279 vcinformer "volcano.sh/apis/pkg/client/informers/externalversions" 11280 cpuinformerv1 "volcano.sh/apis/pkg/client/informers/externalversions/nodeinfo/v1alpha1" 11281 vcinformerv1 "volcano.sh/apis/pkg/client/informers/externalversions/scheduling/v1beta1" 11282 "volcano.sh/volcano/cmd/scheduler/app/options" 11283 schedulingapi "volcano.sh/volcano/pkg/scheduler/api" 11284 "volcano.sh/volcano/pkg/scheduler/metrics" 11285 ) 11286 11287 func init() <span class="cov8" title="1">{ 11288 schemeBuilder := runtime.SchemeBuilder{ 11289 v1.AddToScheme, 11290 } 11291 11292 utilruntime.Must(schemeBuilder.AddToScheme(scheme.Scheme)) 11293 }</span> 11294 11295 // New returns a Cache implementation. 11296 func New(config *rest.Config, schedulerName string, defaultQueue string, nodeSelectors []string) Cache <span class="cov0" title="0">{ 11297 return newSchedulerCache(config, schedulerName, defaultQueue, nodeSelectors) 11298 }</span> 11299 11300 // SchedulerCache cache for the kube batch 11301 type SchedulerCache struct { 11302 sync.Mutex 11303 11304 kubeClient *kubernetes.Clientset 11305 vcClient *vcclient.Clientset 11306 defaultQueue string 11307 // schedulerName is the name for volcano scheduler 11308 schedulerName string 11309 nodeSelectorLabels map[string]string 11310 11311 podInformer infov1.PodInformer 11312 nodeInformer infov1.NodeInformer 11313 podGroupInformerV1beta1 vcinformerv1.PodGroupInformer 11314 queueInformerV1beta1 vcinformerv1.QueueInformer 11315 pvInformer infov1.PersistentVolumeInformer 11316 pvcInformer infov1.PersistentVolumeClaimInformer 11317 scInformer storagev1.StorageClassInformer 11318 pcInformer schedv1.PriorityClassInformer 11319 quotaInformer infov1.ResourceQuotaInformer 11320 csiNodeInformer storagev1.CSINodeInformer 11321 csiDriverInformer storagev1.CSIDriverInformer 11322 csiStorageCapacityInformer storagev1alpha1.CSIStorageCapacityInformer 11323 cpuInformer cpuinformerv1.NumatopologyInformer 11324 11325 Binder Binder 11326 Evictor Evictor 11327 StatusUpdater StatusUpdater 11328 PodGroupBinder BatchBinder 11329 VolumeBinder VolumeBinder 11330 11331 Recorder record.EventRecorder 11332 11333 Jobs map[schedulingapi.JobID]*schedulingapi.JobInfo 11334 Nodes map[string]*schedulingapi.NodeInfo 11335 Queues map[schedulingapi.QueueID]*schedulingapi.QueueInfo 11336 PriorityClasses map[string]*schedulingv1.PriorityClass 11337 NodeList []string 11338 defaultPriorityClass *schedulingv1.PriorityClass 11339 defaultPriority int32 11340 11341 NamespaceCollection map[string]*schedulingapi.NamespaceCollection 11342 11343 errTasks workqueue.RateLimitingInterface 11344 deletedJobs workqueue.RateLimitingInterface 11345 11346 informerFactory informers.SharedInformerFactory 11347 vcInformerFactory vcinformer.SharedInformerFactory 11348 11349 BindFlowChannel chan *schedulingapi.TaskInfo 11350 bindCache []*schedulingapi.TaskInfo 11351 batchNum int 11352 } 11353 11354 type defaultBinder struct { 11355 kubeclient *kubernetes.Clientset 11356 } 11357 11358 //Bind will send bind request to api server 11359 func (db *defaultBinder) Bind(kubeClient *kubernetes.Clientset, tasks []*schedulingapi.TaskInfo) (error, []*schedulingapi.TaskInfo) <span class="cov0" title="0">{ 11360 var errTasks []*schedulingapi.TaskInfo 11361 for _, task := range tasks </span><span class="cov0" title="0">{ 11362 p := task.Pod 11363 if err := kubeClient.CoreV1().Pods(p.Namespace).Bind(context.TODO(), 11364 &v1.Binding{ 11365 ObjectMeta: metav1.ObjectMeta{Namespace: p.Namespace, Name: p.Name, UID: p.UID, Annotations: p.Annotations}, 11366 Target: v1.ObjectReference{ 11367 Kind: "Node", 11368 Name: task.NodeName, 11369 }, 11370 }, 11371 metav1.CreateOptions{}); err != nil </span><span class="cov0" title="0">{ 11372 klog.Errorf("Failed to bind pod <%v/%v> to node %s : %#v", p.Namespace, p.Name, task.NodeName, err) 11373 errTasks = append(errTasks, task) 11374 }</span> 11375 } 11376 11377 <span class="cov0" title="0">if len(errTasks) > 0 </span><span class="cov0" title="0">{ 11378 return fmt.Errorf("failed to bind pods"), errTasks 11379 }</span> 11380 11381 <span class="cov0" title="0">return nil, nil</span> 11382 } 11383 11384 func NewBinder() *defaultBinder <span class="cov8" title="1">{ 11385 return &defaultBinder{} 11386 }</span> 11387 11388 type defaultEvictor struct { 11389 kubeclient *kubernetes.Clientset 11390 recorder record.EventRecorder 11391 } 11392 11393 // Evict will send delete pod request to api server 11394 func (de *defaultEvictor) Evict(p *v1.Pod, reason string) error <span class="cov0" title="0">{ 11395 klog.V(3).Infof("Evicting pod %v/%v, because of %v", p.Namespace, p.Name, reason) 11396 11397 evictMsg := fmt.Sprintf("Pod is evicted, because of %v", reason) 11398 annotations := map[string]string{} 11399 // record that we are evicting the pod 11400 de.recorder.AnnotatedEventf(p, annotations, v1.EventTypeWarning, "Evict", evictMsg) 11401 11402 pod := p.DeepCopy() 11403 condition := &v1.PodCondition{ 11404 Type: v1.PodReady, 11405 Status: v1.ConditionFalse, 11406 Reason: "Evict", 11407 Message: evictMsg, 11408 } 11409 if !podutil.UpdatePodCondition(&pod.Status, condition) </span><span class="cov0" title="0">{ 11410 klog.V(1).Infof("UpdatePodCondition: existed condition, not update") 11411 klog.V(1).Infof("%+v", pod.Status.Conditions) 11412 return nil 11413 }</span> 11414 <span class="cov0" title="0">if _, err := de.kubeclient.CoreV1().Pods(p.Namespace).UpdateStatus(context.TODO(), pod, metav1.UpdateOptions{}); err != nil </span><span class="cov0" title="0">{ 11415 klog.Errorf("Failed to update pod <%v/%v> status: %v", pod.Namespace, pod.Name, err) 11416 return err 11417 }</span> 11418 <span class="cov0" title="0">if err := de.kubeclient.CoreV1().Pods(p.Namespace).Delete(context.TODO(), p.Name, metav1.DeleteOptions{}); err != nil </span><span class="cov0" title="0">{ 11419 klog.Errorf("Failed to evict pod <%v/%v>: %#v", p.Namespace, p.Name, err) 11420 return err 11421 }</span> 11422 11423 <span class="cov0" title="0">return nil</span> 11424 } 11425 11426 // defaultStatusUpdater is the default implementation of the StatusUpdater interface 11427 type defaultStatusUpdater struct { 11428 kubeclient *kubernetes.Clientset 11429 vcclient *vcclient.Clientset 11430 } 11431 11432 // following the same logic as podutil.UpdatePodCondition 11433 func podConditionHaveUpdate(status *v1.PodStatus, condition *v1.PodCondition) bool <span class="cov0" title="0">{ 11434 lastTransitionTime := metav1.Now() 11435 // Try to find this pod condition. 11436 _, oldCondition := podutil.GetPodCondition(status, condition.Type) 11437 11438 if oldCondition == nil </span><span class="cov0" title="0">{ 11439 // We are adding new pod condition. 11440 return true 11441 }</span> 11442 // We are updating an existing condition, so we need to check if it has changed. 11443 <span class="cov0" title="0">if condition.Status == oldCondition.Status </span><span class="cov0" title="0">{ 11444 lastTransitionTime = oldCondition.LastTransitionTime 11445 }</span> 11446 11447 <span class="cov0" title="0">isEqual := condition.Status == oldCondition.Status && 11448 condition.Reason == oldCondition.Reason && 11449 condition.Message == oldCondition.Message && 11450 condition.LastProbeTime.Equal(&oldCondition.LastProbeTime) && 11451 lastTransitionTime.Equal(&oldCondition.LastTransitionTime) 11452 11453 // Return true if one of the fields have changed. 11454 return !isEqual</span> 11455 } 11456 11457 // UpdatePodCondition will Update pod with podCondition 11458 func (su *defaultStatusUpdater) UpdatePodCondition(pod *v1.Pod, condition *v1.PodCondition) (*v1.Pod, error) <span class="cov0" title="0">{ 11459 klog.V(3).Infof("Updating pod condition for %s/%s to (%s==%s)", pod.Namespace, pod.Name, condition.Type, condition.Status) 11460 if podutil.UpdatePodCondition(&pod.Status, condition) </span><span class="cov0" title="0">{ 11461 return su.kubeclient.CoreV1().Pods(pod.Namespace).UpdateStatus(context.TODO(), pod, metav1.UpdateOptions{}) 11462 }</span> 11463 <span class="cov0" title="0">return pod, nil</span> 11464 } 11465 11466 // UpdatePodGroup will Update pod with podCondition 11467 func (su *defaultStatusUpdater) UpdatePodGroup(pg *schedulingapi.PodGroup) (*schedulingapi.PodGroup, error) <span class="cov0" title="0">{ 11468 podgroup := &vcv1beta1.PodGroup{} 11469 if err := schedulingscheme.Scheme.Convert(&pg.PodGroup, podgroup, nil); err != nil </span><span class="cov0" title="0">{ 11470 klog.Errorf("Error while converting PodGroup to v1alpha1.PodGroup with error: %v", err) 11471 return nil, err 11472 }</span> 11473 11474 <span class="cov0" title="0">updated, err := su.vcclient.SchedulingV1beta1().PodGroups(podgroup.Namespace).Update(context.TODO(), podgroup, metav1.UpdateOptions{}) 11475 if err != nil </span><span class="cov0" title="0">{ 11476 klog.Errorf("Error while updating PodGroup with error: %v", err) 11477 return nil, err 11478 }</span> 11479 11480 <span class="cov0" title="0">podGroupInfo := &schedulingapi.PodGroup{Version: schedulingapi.PodGroupVersionV1Beta1} 11481 if err := schedulingscheme.Scheme.Convert(updated, &podGroupInfo.PodGroup, nil); err != nil </span><span class="cov0" title="0">{ 11482 klog.Errorf("Error while converting v1alpha.PodGroup to api.PodGroup with error: %v", err) 11483 return nil, err 11484 }</span> 11485 11486 <span class="cov0" title="0">return podGroupInfo, nil</span> 11487 } 11488 11489 type defaultVolumeBinder struct { 11490 volumeBinder volumescheduling.SchedulerVolumeBinder 11491 } 11492 11493 // AllocateVolumes allocates volume on the host to the task 11494 func (dvb *defaultVolumeBinder) AllocateVolumes(task *schedulingapi.TaskInfo, hostname string, podVolumes *volumescheduling.PodVolumes) error <span class="cov0" title="0">{ 11495 allBound, err := dvb.volumeBinder.AssumePodVolumes(task.Pod, hostname, podVolumes) 11496 task.VolumeReady = allBound 11497 11498 return err 11499 }</span> 11500 11501 // GetPodVolumes get pod volume on the host 11502 func (dvb *defaultVolumeBinder) GetPodVolumes(task *schedulingapi.TaskInfo, 11503 node *v1.Node) (podVolumes *volumescheduling.PodVolumes, err error) <span class="cov0" title="0">{ 11504 boundClaims, claimsToBind, _, err := dvb.volumeBinder.GetPodVolumes(task.Pod) 11505 if err != nil </span><span class="cov0" title="0">{ 11506 return nil, err 11507 }</span> 11508 11509 <span class="cov0" title="0">podVolumes, _, err = dvb.volumeBinder.FindPodVolumes(task.Pod, boundClaims, claimsToBind, node) 11510 return podVolumes, err</span> 11511 } 11512 11513 // BindVolumes binds volumes to the task 11514 func (dvb *defaultVolumeBinder) BindVolumes(task *schedulingapi.TaskInfo, podVolumes *volumescheduling.PodVolumes) error <span class="cov0" title="0">{ 11515 // If task's volumes are ready, did not bind them again. 11516 if task.VolumeReady </span><span class="cov0" title="0">{ 11517 return nil 11518 }</span> 11519 11520 <span class="cov0" title="0">return dvb.volumeBinder.BindPodVolumes(task.Pod, podVolumes)</span> 11521 } 11522 11523 type podgroupBinder struct { 11524 kubeclient *kubernetes.Clientset 11525 vcclient *vcclient.Clientset 11526 } 11527 11528 // Bind will add silo cluster annotaion on pod and podgroup 11529 func (pgb *podgroupBinder) Bind(job *schedulingapi.JobInfo, cluster string) (*schedulingapi.JobInfo, error) <span class="cov0" title="0">{ 11530 if len(job.Tasks) == 0 </span><span class="cov0" title="0">{ 11531 klog.V(4).Infof("Job pods have not been created yet") 11532 return job, nil 11533 }</span> 11534 <span class="cov0" title="0">for _, task := range job.Tasks </span><span class="cov0" title="0">{ 11535 pod := task.Pod 11536 pod.Annotations[batch.ForwardClusterKey] = cluster 11537 pod.ResourceVersion = "" 11538 _, err := pgb.kubeclient.CoreV1().Pods(pod.Namespace).UpdateStatus(context.TODO(), pod, metav1.UpdateOptions{}) 11539 if err != nil </span><span class="cov0" title="0">{ 11540 klog.Errorf("Error while update pod annotation with error: %v", err) 11541 return nil, err 11542 }</span> 11543 } 11544 11545 <span class="cov0" title="0">pg := job.PodGroup 11546 pg.Annotations[batch.ForwardClusterKey] = cluster 11547 podgroup := &vcv1beta1.PodGroup{} 11548 if err := schedulingscheme.Scheme.Convert(&pg.PodGroup, podgroup, nil); err != nil </span><span class="cov0" title="0">{ 11549 klog.Errorf("Error while converting PodGroup to v1alpha1.PodGroup with error: %v", err) 11550 return nil, err 11551 }</span> 11552 <span class="cov0" title="0">newPg, err := pgb.vcclient.SchedulingV1beta1().PodGroups(pg.Namespace).Update(context.TODO(), podgroup, metav1.UpdateOptions{}) 11553 if err != nil </span><span class="cov0" title="0">{ 11554 klog.Errorf("Error while update PodGroup annotation with error: %v", err) 11555 return nil, err 11556 }</span> 11557 <span class="cov0" title="0">job.PodGroup.ResourceVersion = newPg.ResourceVersion 11558 klog.V(4).Infof("Bind PodGroup <%s> successfully", job.PodGroup.Name) 11559 return job, nil</span> 11560 } 11561 11562 func newSchedulerCache(config *rest.Config, schedulerName string, defaultQueue string, nodeSelectors []string) *SchedulerCache <span class="cov0" title="0">{ 11563 kubeClient, err := kubernetes.NewForConfig(config) 11564 if err != nil </span><span class="cov0" title="0">{ 11565 panic(fmt.Sprintf("failed init kubeClient, with err: %v", err))</span> 11566 } 11567 <span class="cov0" title="0">vcClient, err := vcclient.NewForConfig(config) 11568 if err != nil </span><span class="cov0" title="0">{ 11569 panic(fmt.Sprintf("failed init vcClient, with err: %v", err))</span> 11570 } 11571 <span class="cov0" title="0">eventClient, err := kubernetes.NewForConfig(config) 11572 if err != nil </span><span class="cov0" title="0">{ 11573 panic(fmt.Sprintf("failed init eventClient, with err: %v", err))</span> 11574 } 11575 11576 // create default queue 11577 <span class="cov0" title="0">reclaimable := true 11578 defaultQue := vcv1beta1.Queue{ 11579 ObjectMeta: metav1.ObjectMeta{ 11580 Name: defaultQueue, 11581 }, 11582 Spec: vcv1beta1.QueueSpec{ 11583 Reclaimable: &reclaimable, 11584 Weight: 1, 11585 }, 11586 } 11587 if _, err := vcClient.SchedulingV1beta1().Queues().Create(context.TODO(), &defaultQue, metav1.CreateOptions{}); err != nil && !apierrors.IsAlreadyExists(err) </span><span class="cov0" title="0">{ 11588 panic(fmt.Sprintf("failed init default queue, with err: %v", err))</span> 11589 } 11590 11591 <span class="cov0" title="0">sc := &SchedulerCache{ 11592 Jobs: make(map[schedulingapi.JobID]*schedulingapi.JobInfo), 11593 Nodes: make(map[string]*schedulingapi.NodeInfo), 11594 Queues: make(map[schedulingapi.QueueID]*schedulingapi.QueueInfo), 11595 PriorityClasses: make(map[string]*schedulingv1.PriorityClass), 11596 errTasks: workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()), 11597 deletedJobs: workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()), 11598 kubeClient: kubeClient, 11599 vcClient: vcClient, 11600 defaultQueue: defaultQueue, 11601 schedulerName: schedulerName, 11602 nodeSelectorLabels: make(map[string]string), 11603 NamespaceCollection: make(map[string]*schedulingapi.NamespaceCollection), 11604 11605 NodeList: []string{}, 11606 } 11607 if len(nodeSelectors) > 0 </span><span class="cov0" title="0">{ 11608 for _, nodeSelectorLabel := range nodeSelectors </span><span class="cov0" title="0">{ 11609 nodeSelectorLabelLen := len(nodeSelectorLabel) 11610 if nodeSelectorLabelLen <= 0 </span><span class="cov0" title="0">{ 11611 continue</span> 11612 } 11613 // check input 11614 <span class="cov0" title="0">index := strings.Index(nodeSelectorLabel, ":") 11615 if index < 0 || index >= (nodeSelectorLabelLen-1) </span><span class="cov0" title="0">{ 11616 continue</span> 11617 } 11618 <span class="cov0" title="0">nodeSelectorLabelName := strings.TrimSpace(nodeSelectorLabel[:index]) 11619 nodeSelectorLabelValue := strings.TrimSpace(nodeSelectorLabel[index+1:]) 11620 key := nodeSelectorLabelName + ":" + nodeSelectorLabelValue 11621 sc.nodeSelectorLabels[key] = ""</span> 11622 } 11623 11624 } 11625 // Prepare event clients. 11626 <span class="cov0" title="0">broadcaster := record.NewBroadcaster() 11627 broadcaster.StartRecordingToSink(&corev1.EventSinkImpl{Interface: eventClient.CoreV1().Events("")}) 11628 sc.Recorder = broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: schedulerName}) 11629 11630 sc.BindFlowChannel = make(chan *schedulingapi.TaskInfo, 5000) 11631 sc.Binder = GetBindMethod() 11632 11633 var batchNum int 11634 batchNum, err = strconv.Atoi(os.Getenv("BATCH_BIND_NUM")) 11635 if err == nil && batchNum > 0 </span><span class="cov0" title="0">{ 11636 sc.batchNum = batchNum 11637 }</span> else<span class="cov0" title="0"> { 11638 sc.batchNum = 1 11639 }</span> 11640 11641 <span class="cov0" title="0">sc.Evictor = &defaultEvictor{ 11642 kubeclient: sc.kubeClient, 11643 recorder: sc.Recorder, 11644 } 11645 11646 sc.StatusUpdater = &defaultStatusUpdater{ 11647 kubeclient: sc.kubeClient, 11648 vcclient: sc.vcClient, 11649 } 11650 11651 sc.PodGroupBinder = &podgroupBinder{ 11652 kubeclient: sc.kubeClient, 11653 vcclient: sc.vcClient, 11654 } 11655 11656 informerFactory := informers.NewSharedInformerFactory(sc.kubeClient, 0) 11657 sc.informerFactory = informerFactory 11658 mySchedulerPodName, c := getMultiSchedulerInfo() 11659 11660 // create informer for node information 11661 sc.nodeInformer = informerFactory.Core().V1().Nodes() 11662 sc.nodeInformer.Informer().AddEventHandlerWithResyncPeriod( 11663 cache.FilteringResourceEventHandler{ 11664 FilterFunc: func(obj interface{}) bool </span><span class="cov0" title="0">{ 11665 node, ok := obj.(*v1.Node) 11666 if !ok </span><span class="cov0" title="0">{ 11667 klog.Errorf("Cannot convert to *v1.Node: %v", obj) 11668 return false 11669 }</span> 11670 <span class="cov0" title="0">if !responsibleForNode(node.Name, mySchedulerPodName, c) </span><span class="cov0" title="0">{ 11671 return false 11672 }</span> 11673 <span class="cov0" title="0">if len(sc.nodeSelectorLabels) == 0 </span><span class="cov0" title="0">{ 11674 return true 11675 }</span> 11676 <span class="cov0" title="0">for labelName, labelValue := range node.Labels </span><span class="cov0" title="0">{ 11677 key := labelName + ":" + labelValue 11678 if _, ok := sc.nodeSelectorLabels[key]; ok </span><span class="cov0" title="0">{ 11679 return true 11680 }</span> 11681 } 11682 <span class="cov0" title="0">klog.Infof("node %s ignore add/update/delete into schedulerCache", node.Name) 11683 return false</span> 11684 }, 11685 Handler: cache.ResourceEventHandlerFuncs{ 11686 AddFunc: sc.AddNode, 11687 UpdateFunc: sc.UpdateNode, 11688 DeleteFunc: sc.DeleteNode, 11689 }, 11690 }, 11691 0, 11692 ) 11693 11694 <span class="cov0" title="0">sc.podInformer = informerFactory.Core().V1().Pods() 11695 sc.pvcInformer = informerFactory.Core().V1().PersistentVolumeClaims() 11696 sc.pvInformer = informerFactory.Core().V1().PersistentVolumes() 11697 sc.scInformer = informerFactory.Storage().V1().StorageClasses() 11698 sc.csiNodeInformer = informerFactory.Storage().V1().CSINodes() 11699 sc.csiDriverInformer = informerFactory.Storage().V1().CSIDrivers() 11700 sc.csiStorageCapacityInformer = informerFactory.Storage().V1alpha1().CSIStorageCapacities() 11701 11702 var capacityCheck *volumescheduling.CapacityCheck 11703 if options.ServerOpts.EnableCSIStorage </span><span class="cov0" title="0">{ 11704 capacityCheck = &volumescheduling.CapacityCheck{ 11705 CSIDriverInformer: sc.csiDriverInformer, 11706 CSIStorageCapacityInformer: sc.csiStorageCapacityInformer, 11707 } 11708 }</span> else<span class="cov0" title="0"> { 11709 capacityCheck = nil 11710 }</span> 11711 11712 <span class="cov0" title="0">sc.VolumeBinder = &defaultVolumeBinder{ 11713 volumeBinder: volumescheduling.NewVolumeBinder( 11714 sc.kubeClient, 11715 sc.podInformer, 11716 sc.nodeInformer, 11717 sc.csiNodeInformer, 11718 sc.pvcInformer, 11719 sc.pvInformer, 11720 sc.scInformer, 11721 capacityCheck, 11722 30*time.Second, 11723 ), 11724 } 11725 11726 // create informer for pod information 11727 sc.podInformer.Informer().AddEventHandler( 11728 cache.FilteringResourceEventHandler{ 11729 FilterFunc: func(obj interface{}) bool </span><span class="cov0" title="0">{ 11730 switch v := obj.(type) </span>{ 11731 case *v1.Pod:<span class="cov0" title="0"> 11732 if !responsibleForPod(v, schedulerName, mySchedulerPodName, c) </span><span class="cov0" title="0">{ 11733 if len(v.Spec.NodeName) == 0 </span><span class="cov0" title="0">{ 11734 return false 11735 }</span> 11736 <span class="cov0" title="0">if !responsibleForNode(v.Spec.NodeName, mySchedulerPodName, c) </span><span class="cov0" title="0">{ 11737 return false 11738 }</span> 11739 } 11740 <span class="cov0" title="0">return true</span> 11741 default:<span class="cov0" title="0"> 11742 return false</span> 11743 } 11744 }, 11745 Handler: cache.ResourceEventHandlerFuncs{ 11746 AddFunc: sc.AddPod, 11747 UpdateFunc: sc.UpdatePod, 11748 DeleteFunc: sc.DeletePod, 11749 }, 11750 }) 11751 11752 <span class="cov0" title="0">if options.ServerOpts.EnablePriorityClass </span><span class="cov0" title="0">{ 11753 sc.pcInformer = informerFactory.Scheduling().V1().PriorityClasses() 11754 sc.pcInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 11755 AddFunc: sc.AddPriorityClass, 11756 UpdateFunc: sc.UpdatePriorityClass, 11757 DeleteFunc: sc.DeletePriorityClass, 11758 }) 11759 }</span> 11760 11761 <span class="cov0" title="0">sc.quotaInformer = informerFactory.Core().V1().ResourceQuotas() 11762 sc.quotaInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 11763 AddFunc: sc.AddResourceQuota, 11764 UpdateFunc: sc.UpdateResourceQuota, 11765 DeleteFunc: sc.DeleteResourceQuota, 11766 }) 11767 11768 vcinformers := vcinformer.NewSharedInformerFactory(sc.vcClient, 0) 11769 sc.vcInformerFactory = vcinformers 11770 11771 // create informer for PodGroup(v1beta1) information 11772 sc.podGroupInformerV1beta1 = vcinformers.Scheduling().V1beta1().PodGroups() 11773 sc.podGroupInformerV1beta1.Informer().AddEventHandler( 11774 cache.FilteringResourceEventHandler{ 11775 FilterFunc: func(obj interface{}) bool </span><span class="cov0" title="0">{ 11776 switch v := obj.(type) </span>{ 11777 case *vcv1beta1.PodGroup:<span class="cov0" title="0"> 11778 return responsibleForPodGroup(v, mySchedulerPodName, c)</span> 11779 default:<span class="cov0" title="0"> 11780 return false</span> 11781 } 11782 }, 11783 Handler: cache.ResourceEventHandlerFuncs{ 11784 AddFunc: sc.AddPodGroupV1beta1, 11785 UpdateFunc: sc.UpdatePodGroupV1beta1, 11786 DeleteFunc: sc.DeletePodGroupV1beta1, 11787 }, 11788 }) 11789 11790 // create informer(v1beta1) for Queue information 11791 <span class="cov0" title="0">sc.queueInformerV1beta1 = vcinformers.Scheduling().V1beta1().Queues() 11792 sc.queueInformerV1beta1.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 11793 AddFunc: sc.AddQueueV1beta1, 11794 UpdateFunc: sc.UpdateQueueV1beta1, 11795 DeleteFunc: sc.DeleteQueueV1beta1, 11796 }) 11797 11798 sc.cpuInformer = vcinformers.Nodeinfo().V1alpha1().Numatopologies() 11799 sc.cpuInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 11800 AddFunc: sc.AddNumaInfoV1alpha1, 11801 UpdateFunc: sc.UpdateNumaInfoV1alpha1, 11802 DeleteFunc: sc.DeleteNumaInfoV1alpha1, 11803 }) 11804 return sc</span> 11805 } 11806 11807 // Run starts the schedulerCache 11808 func (sc *SchedulerCache) Run(stopCh <-chan struct{}) <span class="cov0" title="0">{ 11809 sc.informerFactory.Start(stopCh) 11810 sc.vcInformerFactory.Start(stopCh) 11811 // Re-sync error tasks. 11812 go wait.Until(sc.processResyncTask, 0, stopCh) 11813 11814 // Cleanup jobs. 11815 go wait.Until(sc.processCleanupJob, 0, stopCh) 11816 11817 go wait.Until(sc.processBindTask, time.Millisecond*20, stopCh) 11818 }</span> 11819 11820 // WaitForCacheSync sync the cache with the api server 11821 func (sc *SchedulerCache) WaitForCacheSync(stopCh <-chan struct{}) <span class="cov0" title="0">{ 11822 sc.informerFactory.WaitForCacheSync(stopCh) 11823 sc.vcInformerFactory.WaitForCacheSync(stopCh) 11824 }</span> 11825 11826 // findJobAndTask returns job and the task info 11827 func (sc *SchedulerCache) findJobAndTask(taskInfo *schedulingapi.TaskInfo) (*schedulingapi.JobInfo, *schedulingapi.TaskInfo, error) <span class="cov8" title="1">{ 11828 job, found := sc.Jobs[taskInfo.Job] 11829 if !found </span><span class="cov0" title="0">{ 11830 return nil, nil, fmt.Errorf("failed to find Job %v for Task %v", 11831 taskInfo.Job, taskInfo.UID) 11832 }</span> 11833 11834 <span class="cov8" title="1">task, found := job.Tasks[taskInfo.UID] 11835 if !found </span><span class="cov0" title="0">{ 11836 return nil, nil, fmt.Errorf("failed to find task in status %v by id %v", 11837 taskInfo.Status, taskInfo.UID) 11838 }</span> 11839 11840 <span class="cov8" title="1">return job, task, nil</span> 11841 } 11842 11843 // Evict will evict the pod. 11844 // 11845 // If error occurs both task and job are guaranteed to be in the original state. 11846 func (sc *SchedulerCache) Evict(taskInfo *schedulingapi.TaskInfo, reason string) error <span class="cov0" title="0">{ 11847 sc.Mutex.Lock() 11848 defer sc.Mutex.Unlock() 11849 11850 job, task, err := sc.findJobAndTask(taskInfo) 11851 11852 if err != nil </span><span class="cov0" title="0">{ 11853 return err 11854 }</span> 11855 11856 <span class="cov0" title="0">node, found := sc.Nodes[task.NodeName] 11857 if !found </span><span class="cov0" title="0">{ 11858 return fmt.Errorf("failed to bind Task %v to host %v, host does not exist", 11859 task.UID, task.NodeName) 11860 }</span> 11861 11862 <span class="cov0" title="0">originalStatus := task.Status 11863 if err := job.UpdateTaskStatus(task, schedulingapi.Releasing); err != nil </span><span class="cov0" title="0">{ 11864 return err 11865 }</span> 11866 11867 // Add new task to node. 11868 <span class="cov0" title="0">if err := node.UpdateTask(task); err != nil </span><span class="cov0" title="0">{ 11869 // After failing to update task to a node we need to revert task status from Releasing, 11870 // otherwise task might be stuck in the Releasing state indefinitely. 11871 if err := job.UpdateTaskStatus(task, originalStatus); err != nil </span><span class="cov0" title="0">{ 11872 klog.Errorf("Task <%s/%s> will be resynchronized after failing to revert status "+ 11873 "from %s to %s after failing to update Task on Node <%s>: %v", 11874 task.Namespace, task.Name, task.Status, originalStatus, node.Name, err) 11875 sc.resyncTask(task) 11876 }</span> 11877 <span class="cov0" title="0">return err</span> 11878 } 11879 11880 <span class="cov0" title="0">p := task.Pod 11881 11882 go func() </span><span class="cov0" title="0">{ 11883 err := sc.Evictor.Evict(p, reason) 11884 if err != nil </span><span class="cov0" title="0">{ 11885 sc.resyncTask(task) 11886 }</span> 11887 }() 11888 11889 <span class="cov0" title="0">podgroup := &vcv1beta1.PodGroup{} 11890 if err := schedulingscheme.Scheme.Convert(&job.PodGroup.PodGroup, podgroup, nil); err != nil </span><span class="cov0" title="0">{ 11891 klog.Errorf("Error while converting PodGroup to v1alpha1.PodGroup with error: %v", err) 11892 return err 11893 }</span> 11894 <span class="cov0" title="0">sc.Recorder.Eventf(podgroup, v1.EventTypeNormal, "Evict", reason) 11895 return nil</span> 11896 } 11897 11898 // Bind binds task to the target host. 11899 func (sc *SchedulerCache) Bind(tasks []*schedulingapi.TaskInfo) error <span class="cov0" title="0">{ 11900 go func(taskArray []*schedulingapi.TaskInfo) </span><span class="cov0" title="0">{ 11901 tmp := time.Now() 11902 err, errTasks := sc.Binder.Bind(sc.kubeClient, taskArray) 11903 if err == nil </span><span class="cov0" title="0">{ 11904 klog.V(3).Infof("bind ok, latency %v", time.Since(tmp)) 11905 for _, task := range tasks </span><span class="cov0" title="0">{ 11906 sc.Recorder.Eventf(task.Pod, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v/%v to %v", 11907 task.Namespace, task.Name, task.NodeName) 11908 }</span> 11909 } else<span class="cov0" title="0"> { 11910 for _, task := range errTasks </span><span class="cov0" title="0">{ 11911 klog.V(2).Infof("resyncTask task %s", task.Name) 11912 sc.resyncTask(task) 11913 }</span> 11914 } 11915 }(tasks) 11916 11917 <span class="cov0" title="0">return nil</span> 11918 } 11919 11920 // BindPodGroup binds job to silo cluster 11921 func (sc *SchedulerCache) BindPodGroup(job *schedulingapi.JobInfo, cluster string) error <span class="cov0" title="0">{ 11922 if _, err := sc.PodGroupBinder.Bind(job, cluster); err != nil </span><span class="cov0" title="0">{ 11923 klog.Errorf("Bind job <%s> to cluster <%s> failed: %v", job.Name, cluster, err) 11924 return err 11925 }</span> 11926 <span class="cov0" title="0">return nil</span> 11927 } 11928 11929 // GetPodVolumes get pod volume on the host 11930 func (sc *SchedulerCache) GetPodVolumes(task *schedulingapi.TaskInfo, node *v1.Node) (*volumescheduling.PodVolumes, error) <span class="cov0" title="0">{ 11931 return sc.VolumeBinder.GetPodVolumes(task, node) 11932 }</span> 11933 11934 // AllocateVolumes allocates volume on the host to the task 11935 func (sc *SchedulerCache) AllocateVolumes(task *schedulingapi.TaskInfo, hostname string, podVolumes *volumescheduling.PodVolumes) error <span class="cov0" title="0">{ 11936 return sc.VolumeBinder.AllocateVolumes(task, hostname, podVolumes) 11937 }</span> 11938 11939 // BindVolumes binds volumes to the task 11940 func (sc *SchedulerCache) BindVolumes(task *schedulingapi.TaskInfo, podVolumes *volumescheduling.PodVolumes) error <span class="cov0" title="0">{ 11941 return sc.VolumeBinder.BindVolumes(task, podVolumes) 11942 }</span> 11943 11944 // Client returns the kubernetes clientSet 11945 func (sc *SchedulerCache) Client() kubernetes.Interface <span class="cov0" title="0">{ 11946 return sc.kubeClient 11947 }</span> 11948 11949 // SharedInformerFactory returns the scheduler SharedInformerFactory 11950 func (sc *SchedulerCache) SharedInformerFactory() informers.SharedInformerFactory <span class="cov0" title="0">{ 11951 return sc.informerFactory 11952 }</span> 11953 11954 // UpdateSchedulerNumaInfo used to update scheduler node cache NumaSchedulerInfo 11955 func (sc *SchedulerCache) UpdateSchedulerNumaInfo(AllocatedSets map[string]schedulingapi.ResNumaSets) error <span class="cov0" title="0">{ 11956 sc.Mutex.Lock() 11957 defer sc.Mutex.Unlock() 11958 11959 for nodeName, sets := range AllocatedSets </span><span class="cov0" title="0">{ 11960 if _, found := sc.Nodes[nodeName]; !found </span><span class="cov0" title="0">{ 11961 continue</span> 11962 } 11963 11964 <span class="cov0" title="0">numaInfo := sc.Nodes[nodeName].NumaSchedulerInfo 11965 if numaInfo == nil </span><span class="cov0" title="0">{ 11966 continue</span> 11967 } 11968 11969 <span class="cov0" title="0">numaInfo.Allocate(sets)</span> 11970 } 11971 <span class="cov0" title="0">return nil</span> 11972 } 11973 11974 // taskUnschedulable updates pod status of pending task 11975 func (sc *SchedulerCache) taskUnschedulable(task *schedulingapi.TaskInfo, reason, message string) error <span class="cov0" title="0">{ 11976 pod := task.Pod 11977 11978 condition := &v1.PodCondition{ 11979 Type: v1.PodScheduled, 11980 Status: v1.ConditionFalse, 11981 Reason: reason, // Add more reasons in order to distinguish more specific scenario of pending tasks 11982 Message: message, 11983 } 11984 11985 if podConditionHaveUpdate(&pod.Status, condition) </span><span class="cov0" title="0">{ 11986 pod = pod.DeepCopy() 11987 11988 // The reason field in 'Events' should be "FailedScheduling", there is not constants defined for this in 11989 // k8s core, so using the same string here. 11990 // The reason field in PodCondition can be "Unschedulable" 11991 sc.Recorder.Eventf(pod, v1.EventTypeWarning, "FailedScheduling", message) 11992 if _, err := sc.StatusUpdater.UpdatePodCondition(pod, condition); err != nil </span><span class="cov0" title="0">{ 11993 return err 11994 }</span> 11995 } else<span class="cov0" title="0"> { 11996 klog.V(4).Infof("task unscheduleable %s/%s, message: %s, skip by no condition update", pod.Namespace, pod.Name, message) 11997 }</span> 11998 11999 <span class="cov0" title="0">return nil</span> 12000 } 12001 12002 func (sc *SchedulerCache) deleteJob(job *schedulingapi.JobInfo) <span class="cov8" title="1">{ 12003 klog.V(3).Infof("Try to delete Job <%v:%v/%v>", job.UID, job.Namespace, job.Name) 12004 12005 sc.deletedJobs.AddRateLimited(job) 12006 }</span> 12007 12008 func (sc *SchedulerCache) processCleanupJob() <span class="cov0" title="0">{ 12009 obj, shutdown := sc.deletedJobs.Get() 12010 if shutdown </span><span class="cov0" title="0">{ 12011 return 12012 }</span> 12013 12014 <span class="cov0" title="0">defer sc.deletedJobs.Done(obj) 12015 12016 job, found := obj.(*schedulingapi.JobInfo) 12017 if !found </span><span class="cov0" title="0">{ 12018 klog.Errorf("Failed to convert <%v> to *JobInfo", obj) 12019 return 12020 }</span> 12021 12022 <span class="cov0" title="0">sc.Mutex.Lock() 12023 defer sc.Mutex.Unlock() 12024 12025 if schedulingapi.JobTerminated(job) </span><span class="cov0" title="0">{ 12026 delete(sc.Jobs, job.UID) 12027 klog.V(3).Infof("Job <%v:%v/%v> was deleted.", job.UID, job.Namespace, job.Name) 12028 }</span> else<span class="cov0" title="0"> { 12029 // Retry 12030 sc.deleteJob(job) 12031 }</span> 12032 } 12033 12034 func (sc *SchedulerCache) resyncTask(task *schedulingapi.TaskInfo) <span class="cov0" title="0">{ 12035 sc.errTasks.AddRateLimited(task) 12036 }</span> 12037 12038 func (sc *SchedulerCache) processResyncTask() <span class="cov0" title="0">{ 12039 obj, shutdown := sc.errTasks.Get() 12040 if shutdown </span><span class="cov0" title="0">{ 12041 return 12042 }</span> 12043 12044 <span class="cov0" title="0">defer sc.errTasks.Done(obj) 12045 12046 task, ok := obj.(*schedulingapi.TaskInfo) 12047 if !ok </span><span class="cov0" title="0">{ 12048 klog.Errorf("failed to convert %v to *schedulingapi.TaskInfo", obj) 12049 return 12050 }</span> 12051 12052 <span class="cov0" title="0">if err := sc.syncTask(task); err != nil </span><span class="cov0" title="0">{ 12053 klog.Errorf("Failed to sync pod <%v/%v>, retry it.", task.Namespace, task.Name) 12054 sc.resyncTask(task) 12055 }</span> 12056 } 12057 12058 func (sc *SchedulerCache) AddBindTask(taskInfo *schedulingapi.TaskInfo) error <span class="cov8" title="1">{ 12059 klog.V(5).Infof("add bind task %v/%v", taskInfo.Namespace, taskInfo.Name) 12060 sc.Mutex.Lock() 12061 defer sc.Mutex.Unlock() 12062 job, task, err := sc.findJobAndTask(taskInfo) 12063 if err != nil </span><span class="cov0" title="0">{ 12064 return err 12065 }</span> 12066 12067 <span class="cov8" title="1">node, found := sc.Nodes[taskInfo.NodeName] 12068 if !found </span><span class="cov0" title="0">{ 12069 return fmt.Errorf("failed to bind Task %v to host %v, host does not exist", 12070 task.UID, taskInfo.NodeName) 12071 }</span> 12072 12073 <span class="cov8" title="1">originalStatus := task.Status 12074 if err := job.UpdateTaskStatus(task, schedulingapi.Binding); err != nil </span><span class="cov0" title="0">{ 12075 return err 12076 }</span> 12077 12078 // Add task to the node. 12079 <span class="cov8" title="1">if err := node.AddTask(task); err != nil </span><span class="cov8" title="1">{ 12080 // After failing to update task to a node we need to revert task status from Releasing, 12081 // otherwise task might be stuck in the Releasing state indefinitely. 12082 if err := job.UpdateTaskStatus(task, originalStatus); err != nil </span><span class="cov0" title="0">{ 12083 klog.Errorf("Task <%s/%s> will be resynchronized after failing to revert status "+ 12084 "from %s to %s after failing to update Task on Node <%s>: %v", 12085 task.Namespace, task.Name, task.Status, originalStatus, node.Name, err) 12086 sc.resyncTask(task) 12087 }</span> 12088 <span class="cov8" title="1">return err</span> 12089 } 12090 12091 <span class="cov8" title="1">sc.BindFlowChannel <- taskInfo 12092 12093 return nil</span> 12094 } 12095 12096 func (sc *SchedulerCache) processBindTask() <span class="cov0" title="0">{ 12097 for </span><span class="cov0" title="0">{ 12098 select </span>{ 12099 case taskInfo, ok := <-sc.BindFlowChannel:<span class="cov0" title="0"> 12100 if !ok </span><span class="cov0" title="0">{ 12101 return 12102 }</span> 12103 12104 <span class="cov0" title="0">sc.bindCache = append(sc.bindCache, taskInfo) 12105 if len(sc.bindCache) == sc.batchNum </span><span class="cov0" title="0">{ 12106 sc.BindTask() 12107 }</span> 12108 } 12109 12110 <span class="cov0" title="0">if len(sc.BindFlowChannel) == 0 </span><span class="cov0" title="0">{ 12111 break</span> 12112 } 12113 } 12114 12115 <span class="cov0" title="0">if len(sc.bindCache) == 0 </span><span class="cov0" title="0">{ 12116 return 12117 }</span> 12118 12119 <span class="cov0" title="0">sc.BindTask()</span> 12120 } 12121 12122 func (sc *SchedulerCache) BindTask() <span class="cov0" title="0">{ 12123 klog.V(5).Infof("batch bind task count %d", len(sc.bindCache)) 12124 for _, task := range sc.bindCache </span><span class="cov0" title="0">{ 12125 if err := sc.BindVolumes(task, task.PodVolumes); err != nil </span><span class="cov0" title="0">{ 12126 klog.Errorf("task %s/%s bind Volumes failed: %#v", task.Namespace, task.Name, err) 12127 sc.resyncTask(task) 12128 return 12129 }</span> 12130 } 12131 12132 <span class="cov0" title="0">bindTasks := make([]*schedulingapi.TaskInfo, len(sc.bindCache)) 12133 copy(bindTasks, sc.bindCache) 12134 if err := sc.Bind(bindTasks); err != nil </span><span class="cov0" title="0">{ 12135 return 12136 }</span> 12137 12138 <span class="cov0" title="0">for _, task := range sc.bindCache </span><span class="cov0" title="0">{ 12139 metrics.UpdateTaskScheduleDuration(metrics.Duration(task.Pod.CreationTimestamp.Time)) 12140 }</span> 12141 12142 <span class="cov0" title="0">sc.bindCache = sc.bindCache[0:0] 12143 return</span> 12144 } 12145 12146 // Snapshot returns the complete snapshot of the cluster from cache 12147 func (sc *SchedulerCache) Snapshot() *schedulingapi.ClusterInfo <span class="cov0" title="0">{ 12148 sc.Mutex.Lock() 12149 defer sc.Mutex.Unlock() 12150 12151 snapshot := &schedulingapi.ClusterInfo{ 12152 Nodes: make(map[string]*schedulingapi.NodeInfo), 12153 Jobs: make(map[schedulingapi.JobID]*schedulingapi.JobInfo), 12154 Queues: make(map[schedulingapi.QueueID]*schedulingapi.QueueInfo), 12155 NamespaceInfo: make(map[schedulingapi.NamespaceName]*schedulingapi.NamespaceInfo), 12156 RevocableNodes: make(map[string]*schedulingapi.NodeInfo), 12157 NodeList: make([]string, len(sc.NodeList)), 12158 } 12159 12160 copy(snapshot.NodeList, sc.NodeList) 12161 for _, value := range sc.Nodes </span><span class="cov0" title="0">{ 12162 value.RefreshNumaSchedulerInfoByCrd() 12163 }</span> 12164 12165 <span class="cov0" title="0">for _, value := range sc.Nodes </span><span class="cov0" title="0">{ 12166 if !value.Ready() </span><span class="cov0" title="0">{ 12167 continue</span> 12168 } 12169 12170 <span class="cov0" title="0">snapshot.Nodes[value.Name] = value.Clone() 12171 12172 if value.RevocableZone != "" </span><span class="cov0" title="0">{ 12173 snapshot.RevocableNodes[value.Name] = snapshot.Nodes[value.Name] 12174 }</span> 12175 } 12176 12177 <span class="cov0" title="0">for _, value := range sc.Queues </span><span class="cov0" title="0">{ 12178 snapshot.Queues[value.UID] = value.Clone() 12179 }</span> 12180 12181 <span class="cov0" title="0">var cloneJobLock sync.Mutex 12182 var wg sync.WaitGroup 12183 12184 cloneJob := func(value *schedulingapi.JobInfo) </span><span class="cov0" title="0">{ 12185 defer wg.Done() 12186 if value.PodGroup != nil </span><span class="cov0" title="0">{ 12187 value.Priority = sc.defaultPriority 12188 12189 priName := value.PodGroup.Spec.PriorityClassName 12190 if priorityClass, found := sc.PriorityClasses[priName]; found </span><span class="cov0" title="0">{ 12191 value.Priority = priorityClass.Value 12192 }</span> 12193 12194 <span class="cov0" title="0">klog.V(4).Infof("The priority of job <%s/%s> is <%s/%d>", 12195 value.Namespace, value.Name, priName, value.Priority)</span> 12196 } 12197 12198 <span class="cov0" title="0">clonedJob := value.Clone() 12199 12200 cloneJobLock.Lock() 12201 snapshot.Jobs[value.UID] = clonedJob 12202 cloneJobLock.Unlock()</span> 12203 } 12204 12205 <span class="cov0" title="0">for _, value := range sc.NamespaceCollection </span><span class="cov0" title="0">{ 12206 info := value.Snapshot() 12207 snapshot.NamespaceInfo[info.Name] = info 12208 klog.V(4).Infof("Namespace %s has weight %v", 12209 value.Name, info.GetWeight()) 12210 }</span> 12211 12212 <span class="cov0" title="0">for _, value := range sc.Jobs </span><span class="cov0" title="0">{ 12213 // If no scheduling spec, does not handle it. 12214 if value.PodGroup == nil </span><span class="cov0" title="0">{ 12215 klog.V(4).Infof("The scheduling spec of Job <%v:%s/%s> is nil, ignore it.", 12216 value.UID, value.Namespace, value.Name) 12217 12218 continue</span> 12219 } 12220 12221 <span class="cov0" title="0">if _, found := snapshot.Queues[value.Queue]; !found </span><span class="cov0" title="0">{ 12222 klog.V(3).Infof("The Queue <%v> of Job <%v/%v> does not exist, ignore it.", 12223 value.Queue, value.Namespace, value.Name) 12224 continue</span> 12225 } 12226 12227 <span class="cov0" title="0">wg.Add(1) 12228 go cloneJob(value)</span> 12229 } 12230 <span class="cov0" title="0">wg.Wait() 12231 12232 klog.V(3).Infof("There are <%d> Jobs, <%d> Queues and <%d> Nodes in total for scheduling.", 12233 len(snapshot.Jobs), len(snapshot.Queues), len(snapshot.Nodes)) 12234 12235 return snapshot</span> 12236 } 12237 12238 // String returns information about the cache in a string format 12239 func (sc *SchedulerCache) String() string <span class="cov0" title="0">{ 12240 sc.Mutex.Lock() 12241 defer sc.Mutex.Unlock() 12242 12243 str := "Cache:\n" 12244 12245 if len(sc.Nodes) != 0 </span><span class="cov0" title="0">{ 12246 str += "Nodes:\n" 12247 for _, n := range sc.Nodes </span><span class="cov0" title="0">{ 12248 str += fmt.Sprintf("\t %s: idle(%v) used(%v) allocatable(%v) pods(%d)\n", 12249 n.Name, n.Idle, n.Used, n.Allocatable, len(n.Tasks)) 12250 12251 i := 0 12252 for _, p := range n.Tasks </span><span class="cov0" title="0">{ 12253 str += fmt.Sprintf("\t\t %d: %v\n", i, p) 12254 i++ 12255 }</span> 12256 } 12257 } 12258 12259 <span class="cov0" title="0">if len(sc.Jobs) != 0 </span><span class="cov0" title="0">{ 12260 str += "Jobs:\n" 12261 for _, job := range sc.Jobs </span><span class="cov0" title="0">{ 12262 str += fmt.Sprintf("\t %s\n", job) 12263 }</span> 12264 } 12265 12266 <span class="cov0" title="0">if len(sc.NamespaceCollection) != 0 </span><span class="cov0" title="0">{ 12267 str += "Namespaces:\n" 12268 for _, ns := range sc.NamespaceCollection </span><span class="cov0" title="0">{ 12269 info := ns.Snapshot() 12270 str += fmt.Sprintf("\t Namespace(%s) Weight(%v)\n", 12271 info.Name, info.Weight) 12272 }</span> 12273 } 12274 12275 <span class="cov0" title="0">if len(sc.NodeList) != 0 </span><span class="cov0" title="0">{ 12276 str += fmt.Sprintf("NodeList: %v\n", sc.NodeList) 12277 }</span> 12278 12279 <span class="cov0" title="0">return str</span> 12280 } 12281 12282 // RecordJobStatusEvent records related events according to job status. 12283 func (sc *SchedulerCache) RecordJobStatusEvent(job *schedulingapi.JobInfo) <span class="cov0" title="0">{ 12284 pgUnschedulable := job.PodGroup != nil && 12285 (job.PodGroup.Status.Phase == scheduling.PodGroupUnknown || 12286 job.PodGroup.Status.Phase == scheduling.PodGroupPending || 12287 job.PodGroup.Status.Phase == scheduling.PodGroupInqueue) 12288 12289 // If pending or unschedulable, record unschedulable event. 12290 if pgUnschedulable </span><span class="cov0" title="0">{ 12291 msg := fmt.Sprintf("%v/%v tasks in gang unschedulable: %v", 12292 len(job.TaskStatusIndex[schedulingapi.Pending]), 12293 len(job.Tasks), 12294 job.FitError()) 12295 sc.recordPodGroupEvent(job.PodGroup, v1.EventTypeWarning, string(scheduling.PodGroupUnschedulableType), msg) 12296 }</span> else<span class="cov0" title="0"> { 12297 sc.recordPodGroupEvent(job.PodGroup, v1.EventTypeNormal, string(scheduling.PodGroupScheduled), string(scheduling.PodGroupReady)) 12298 }</span> 12299 12300 <span class="cov0" title="0">baseErrorMessage := job.JobFitErrors 12301 if baseErrorMessage == "" </span><span class="cov0" title="0">{ 12302 baseErrorMessage = schedulingapi.AllNodeUnavailableMsg 12303 }</span> 12304 // Update podCondition for tasks Allocated and Pending before job discarded 12305 <span class="cov0" title="0">for _, status := range []schedulingapi.TaskStatus{schedulingapi.Allocated, schedulingapi.Pending, schedulingapi.Pipelined} </span><span class="cov0" title="0">{ 12306 for _, taskInfo := range job.TaskStatusIndex[status] </span><span class="cov0" title="0">{ 12307 reason, msg := job.TaskSchedulingReason(taskInfo.UID) 12308 if len(msg) == 0 </span><span class="cov0" title="0">{ 12309 msg = baseErrorMessage 12310 }</span> 12311 <span class="cov0" title="0">if err := sc.taskUnschedulable(taskInfo, reason, msg); err != nil </span><span class="cov0" title="0">{ 12312 klog.Errorf("Failed to update unschedulable task status <%s/%s>: %v", 12313 taskInfo.Namespace, taskInfo.Name, err) 12314 }</span> 12315 } 12316 } 12317 } 12318 12319 // UpdateJobStatus update the status of job and its tasks. 12320 func (sc *SchedulerCache) UpdateJobStatus(job *schedulingapi.JobInfo, updatePG bool) (*schedulingapi.JobInfo, error) <span class="cov0" title="0">{ 12321 if updatePG </span><span class="cov0" title="0">{ 12322 pg, err := sc.StatusUpdater.UpdatePodGroup(job.PodGroup) 12323 if err != nil </span><span class="cov0" title="0">{ 12324 return nil, err 12325 }</span> 12326 <span class="cov0" title="0">job.PodGroup = pg</span> 12327 } 12328 12329 <span class="cov0" title="0">sc.RecordJobStatusEvent(job) 12330 12331 return job, nil</span> 12332 } 12333 12334 func (sc *SchedulerCache) recordPodGroupEvent(podGroup *schedulingapi.PodGroup, eventType, reason, msg string) <span class="cov0" title="0">{ 12335 if podGroup == nil </span><span class="cov0" title="0">{ 12336 return 12337 }</span> 12338 12339 <span class="cov0" title="0">pg := &vcv1beta1.PodGroup{} 12340 if err := schedulingscheme.Scheme.Convert(&podGroup.PodGroup, pg, nil); err != nil </span><span class="cov0" title="0">{ 12341 klog.Errorf("Error while converting PodGroup to v1alpha1.PodGroup with error: %v", err) 12342 return 12343 }</span> 12344 <span class="cov0" title="0">sc.Recorder.Eventf(pg, eventType, reason, msg)</span> 12345 } 12346 </pre> 12347 12348 <pre class="file" id="file59" style="display: none">/* 12349 Copyright 2017 The Kubernetes Authors. 12350 12351 Licensed under the Apache License, Version 2.0 (the "License"); 12352 you may not use this file except in compliance with the License. 12353 You may obtain a copy of the License at 12354 12355 http://www.apache.org/licenses/LICENSE-2.0 12356 12357 Unless required by applicable law or agreed to in writing, software 12358 distributed under the License is distributed on an "AS IS" BASIS, 12359 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12360 See the License for the specific language governing permissions and 12361 limitations under the License. 12362 */ 12363 12364 package cache 12365 12366 import ( 12367 "context" 12368 "fmt" 12369 "strconv" 12370 12371 v1 "k8s.io/api/core/v1" 12372 schedulingv1 "k8s.io/api/scheduling/v1" 12373 "k8s.io/apimachinery/pkg/api/errors" 12374 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 12375 "k8s.io/client-go/tools/cache" 12376 "k8s.io/klog" 12377 "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" 12378 "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" 12379 12380 nodeinfov1alpha1 "volcano.sh/apis/pkg/apis/nodeinfo/v1alpha1" 12381 "volcano.sh/apis/pkg/apis/scheduling" 12382 "volcano.sh/apis/pkg/apis/scheduling/scheme" 12383 schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 12384 "volcano.sh/apis/pkg/apis/utils" 12385 schedulingapi "volcano.sh/volcano/pkg/scheduler/api" 12386 ) 12387 12388 func isTerminated(status schedulingapi.TaskStatus) bool <span class="cov8" title="1">{ 12389 return status == schedulingapi.Succeeded || status == schedulingapi.Failed 12390 }</span> 12391 12392 // getOrCreateJob will return corresponding Job for pi if it exists, or it will create a Job and return it if 12393 // pi.Pod.Spec.SchedulerName is same as volcano scheduler's name, otherwise it will return nil. 12394 func (sc *SchedulerCache) getOrCreateJob(pi *schedulingapi.TaskInfo) *schedulingapi.JobInfo <span class="cov8" title="1">{ 12395 if len(pi.Job) == 0 </span><span class="cov8" title="1">{ 12396 if pi.Pod.Spec.SchedulerName != sc.schedulerName </span><span class="cov8" title="1">{ 12397 klog.V(4).Infof("Pod %s/%s will not scheduled by %s, skip creating PodGroup and Job for it", 12398 pi.Pod.Namespace, pi.Pod.Name, sc.schedulerName) 12399 }</span> 12400 <span class="cov8" title="1">return nil</span> 12401 } 12402 12403 <span class="cov8" title="1">if _, found := sc.Jobs[pi.Job]; !found </span><span class="cov8" title="1">{ 12404 sc.Jobs[pi.Job] = schedulingapi.NewJobInfo(pi.Job) 12405 }</span> 12406 12407 <span class="cov8" title="1">return sc.Jobs[pi.Job]</span> 12408 } 12409 12410 func (sc *SchedulerCache) addTask(pi *schedulingapi.TaskInfo) error <span class="cov8" title="1">{ 12411 if len(pi.NodeName) != 0 </span><span class="cov8" title="1">{ 12412 if _, found := sc.Nodes[pi.NodeName]; !found </span><span class="cov0" title="0">{ 12413 sc.Nodes[pi.NodeName] = schedulingapi.NewNodeInfo(nil) 12414 sc.Nodes[pi.NodeName].Name = pi.NodeName 12415 }</span> 12416 12417 <span class="cov8" title="1">node := sc.Nodes[pi.NodeName] 12418 if !isTerminated(pi.Status) </span><span class="cov8" title="1">{ 12419 if err := node.AddTask(pi); err != nil </span><span class="cov0" title="0">{ 12420 if _, outOfSync := err.(*schedulingapi.AllocateFailError); outOfSync </span><span class="cov0" title="0">{ 12421 node.State = schedulingapi.NodeState{ 12422 Phase: schedulingapi.NotReady, 12423 Reason: "OutOfSync", 12424 } 12425 }</span> 12426 <span class="cov0" title="0">return err</span> 12427 } 12428 } else<span class="cov8" title="1"> { 12429 klog.V(4).Infof("Pod <%v/%v> is in status %s.", pi.Namespace, pi.Name, pi.Status.String()) 12430 }</span> 12431 } 12432 12433 <span class="cov8" title="1">job := sc.getOrCreateJob(pi) 12434 if job != nil </span><span class="cov8" title="1">{ 12435 job.AddTaskInfo(pi) 12436 }</span> 12437 12438 <span class="cov8" title="1">return nil</span> 12439 } 12440 12441 // Assumes that lock is already acquired. 12442 func (sc *SchedulerCache) addPod(pod *v1.Pod) error <span class="cov8" title="1">{ 12443 pi := schedulingapi.NewTaskInfo(pod) 12444 12445 return sc.addTask(pi) 12446 }</span> 12447 12448 func (sc *SchedulerCache) syncTask(oldTask *schedulingapi.TaskInfo) error <span class="cov0" title="0">{ 12449 newPod, err := sc.kubeClient.CoreV1().Pods(oldTask.Namespace).Get(context.TODO(), oldTask.Name, metav1.GetOptions{}) 12450 if err != nil </span><span class="cov0" title="0">{ 12451 if errors.IsNotFound(err) </span><span class="cov0" title="0">{ 12452 err := sc.deleteTask(oldTask) 12453 if err != nil </span><span class="cov0" title="0">{ 12454 klog.Errorf("Failed to delete Pod <%v/%v> and remove from cache: %s", oldTask.Namespace, oldTask.Name, err.Error()) 12455 return err 12456 }</span> 12457 <span class="cov0" title="0">klog.V(3).Infof("Pod <%v/%v> was deleted, removed from cache.", oldTask.Namespace, oldTask.Name) 12458 12459 return nil</span> 12460 } 12461 <span class="cov0" title="0">return fmt.Errorf("failed to get Pod <%v/%v>: err %v", oldTask.Namespace, oldTask.Name, err)</span> 12462 } 12463 12464 <span class="cov0" title="0">newTask := schedulingapi.NewTaskInfo(newPod) 12465 12466 sc.Mutex.Lock() 12467 defer sc.Mutex.Unlock() 12468 return sc.updateTask(oldTask, newTask)</span> 12469 } 12470 12471 func (sc *SchedulerCache) updateTask(oldTask, newTask *schedulingapi.TaskInfo) error <span class="cov8" title="1">{ 12472 if err := sc.deleteTask(oldTask); err != nil </span><span class="cov0" title="0">{ 12473 klog.Warningf("Failed to delete task: %v", err) 12474 }</span> 12475 12476 <span class="cov8" title="1">return sc.addTask(newTask)</span> 12477 } 12478 12479 // Check the pod allocated status in cache 12480 func (sc *SchedulerCache) allocatedPodInCache(pod *v1.Pod) bool <span class="cov8" title="1">{ 12481 pi := schedulingapi.NewTaskInfo(pod) 12482 12483 if job, found := sc.Jobs[pi.Job]; found </span><span class="cov0" title="0">{ 12484 if t, found := job.Tasks[pi.UID]; found </span><span class="cov0" title="0">{ 12485 return schedulingapi.AllocatedStatus(t.Status) 12486 }</span> 12487 } 12488 12489 <span class="cov8" title="1">return false</span> 12490 } 12491 12492 // Assumes that lock is already acquired. 12493 func (sc *SchedulerCache) updatePod(oldPod, newPod *v1.Pod) error <span class="cov8" title="1">{ 12494 //ignore the update event if pod is allocated in cache but not present in NodeName 12495 if sc.allocatedPodInCache(newPod) && newPod.Spec.NodeName == "" </span><span class="cov0" title="0">{ 12496 klog.V(4).Infof("Pod <%s/%v> already in cache with allocated status, ignore the update event", newPod.Namespace, newPod.Name) 12497 return nil 12498 }</span> 12499 12500 <span class="cov8" title="1">if err := sc.deletePod(oldPod); err != nil </span><span class="cov0" title="0">{ 12501 return err 12502 }</span> 12503 //when delete pod, the ownerreference of pod will be set nil,just as orphan pod 12504 <span class="cov8" title="1">if len(utils.GetController(newPod)) == 0 </span><span class="cov0" title="0">{ 12505 newPod.OwnerReferences = oldPod.OwnerReferences 12506 }</span> 12507 <span class="cov8" title="1">return sc.addPod(newPod)</span> 12508 } 12509 12510 func (sc *SchedulerCache) deleteTask(pi *schedulingapi.TaskInfo) error <span class="cov8" title="1">{ 12511 var jobErr, nodeErr, numaErr error 12512 12513 if len(pi.Job) != 0 </span><span class="cov0" title="0">{ 12514 if job, found := sc.Jobs[pi.Job]; found </span><span class="cov0" title="0">{ 12515 jobErr = job.DeleteTaskInfo(pi) 12516 }</span> else<span class="cov0" title="0"> { 12517 jobErr = fmt.Errorf("failed to find Job <%v> for Task %v/%v", 12518 pi.Job, pi.Namespace, pi.Name) 12519 }</span> 12520 } 12521 12522 <span class="cov8" title="1">if len(pi.NodeName) != 0 </span><span class="cov8" title="1">{ 12523 node := sc.Nodes[pi.NodeName] 12524 if node != nil </span><span class="cov8" title="1">{ 12525 nodeErr = node.RemoveTask(pi) 12526 }</span> 12527 } 12528 12529 <span class="cov8" title="1">if jobErr != nil || nodeErr != nil </span><span class="cov0" title="0">{ 12530 return schedulingapi.MergeErrors(jobErr, nodeErr, numaErr) 12531 }</span> 12532 12533 <span class="cov8" title="1">return nil</span> 12534 } 12535 12536 // Assumes that lock is already acquired. 12537 func (sc *SchedulerCache) deletePod(pod *v1.Pod) error <span class="cov8" title="1">{ 12538 pi := schedulingapi.NewTaskInfo(pod) 12539 12540 // Delete the Task in cache to handle Binding status. 12541 task := pi 12542 if job, found := sc.Jobs[pi.Job]; found </span><span class="cov0" title="0">{ 12543 if t, found := job.Tasks[pi.UID]; found </span><span class="cov0" title="0">{ 12544 task = t 12545 }</span> 12546 } 12547 <span class="cov8" title="1">if err := sc.deleteTask(task); err != nil </span><span class="cov0" title="0">{ 12548 klog.Warningf("Failed to delete task: %v", err) 12549 }</span> 12550 12551 // If job was terminated, delete it. 12552 <span class="cov8" title="1">if job, found := sc.Jobs[pi.Job]; found && schedulingapi.JobTerminated(job) </span><span class="cov0" title="0">{ 12553 sc.deleteJob(job) 12554 }</span> 12555 12556 <span class="cov8" title="1">return nil</span> 12557 } 12558 12559 // AddPod add pod to scheduler cache 12560 func (sc *SchedulerCache) AddPod(obj interface{}) <span class="cov8" title="1">{ 12561 pod, ok := obj.(*v1.Pod) 12562 if !ok </span><span class="cov0" title="0">{ 12563 klog.Errorf("Cannot convert to *v1.Pod: %v", obj) 12564 return 12565 }</span> 12566 12567 <span class="cov8" title="1">sc.Mutex.Lock() 12568 defer sc.Mutex.Unlock() 12569 12570 err := sc.addPod(pod) 12571 if err != nil </span><span class="cov0" title="0">{ 12572 klog.Errorf("Failed to add pod <%s/%s> into cache: %v", 12573 pod.Namespace, pod.Name, err) 12574 return 12575 }</span> 12576 <span class="cov8" title="1">klog.V(3).Infof("Added pod <%s/%v> into cache.", pod.Namespace, pod.Name)</span> 12577 } 12578 12579 // UpdatePod update pod to scheduler cache 12580 func (sc *SchedulerCache) UpdatePod(oldObj, newObj interface{}) <span class="cov0" title="0">{ 12581 oldPod, ok := oldObj.(*v1.Pod) 12582 if !ok </span><span class="cov0" title="0">{ 12583 klog.Errorf("Cannot convert oldObj to *v1.Pod: %v", oldObj) 12584 return 12585 }</span> 12586 <span class="cov0" title="0">newPod, ok := newObj.(*v1.Pod) 12587 if !ok </span><span class="cov0" title="0">{ 12588 klog.Errorf("Cannot convert newObj to *v1.Pod: %v", newObj) 12589 return 12590 }</span> 12591 12592 <span class="cov0" title="0">sc.Mutex.Lock() 12593 defer sc.Mutex.Unlock() 12594 12595 err := sc.updatePod(oldPod, newPod) 12596 if err != nil </span><span class="cov0" title="0">{ 12597 klog.Errorf("Failed to update pod %v in cache: %v", oldPod.Name, err) 12598 return 12599 }</span> 12600 12601 <span class="cov0" title="0">klog.V(4).Infof("Updated pod <%s/%v> in cache.", oldPod.Namespace, oldPod.Name)</span> 12602 } 12603 12604 // DeletePod delete pod from scheduler cache 12605 func (sc *SchedulerCache) DeletePod(obj interface{}) <span class="cov0" title="0">{ 12606 var pod *v1.Pod 12607 switch t := obj.(type) </span>{ 12608 case *v1.Pod:<span class="cov0" title="0"> 12609 pod = t</span> 12610 case cache.DeletedFinalStateUnknown:<span class="cov0" title="0"> 12611 var ok bool 12612 pod, ok = t.Obj.(*v1.Pod) 12613 if !ok </span><span class="cov0" title="0">{ 12614 klog.Errorf("Cannot convert to *v1.Pod: %v", t.Obj) 12615 return 12616 }</span> 12617 default:<span class="cov0" title="0"> 12618 klog.Errorf("Cannot convert to *v1.Pod: %v", t) 12619 return</span> 12620 } 12621 12622 <span class="cov0" title="0">sc.Mutex.Lock() 12623 defer sc.Mutex.Unlock() 12624 12625 err := sc.deletePod(pod) 12626 if err != nil </span><span class="cov0" title="0">{ 12627 klog.Errorf("Failed to delete pod %v from cache: %v", pod.Name, err) 12628 return 12629 }</span> 12630 12631 <span class="cov0" title="0">klog.V(3).Infof("Deleted pod <%s/%v> from cache.", pod.Namespace, pod.Name)</span> 12632 } 12633 12634 // Assumes that lock is already acquired. 12635 func (sc *SchedulerCache) addNode(node *v1.Node) error <span class="cov8" title="1">{ 12636 if sc.Nodes[node.Name] != nil </span><span class="cov0" title="0">{ 12637 sc.Nodes[node.Name].SetNode(node) 12638 }</span> else<span class="cov8" title="1"> { 12639 sc.Nodes[node.Name] = schedulingapi.NewNodeInfo(node) 12640 }</span> 12641 <span class="cov8" title="1">return nil</span> 12642 } 12643 12644 // Assumes that lock is already acquired. 12645 func (sc *SchedulerCache) updateNode(oldNode, newNode *v1.Node) error <span class="cov0" title="0">{ 12646 if sc.Nodes[newNode.Name] != nil </span><span class="cov0" title="0">{ 12647 sc.Nodes[newNode.Name].SetNode(newNode) 12648 return nil 12649 }</span> 12650 12651 <span class="cov0" title="0">return fmt.Errorf("node <%s> does not exist", newNode.Name)</span> 12652 } 12653 12654 // Assumes that lock is already acquired. 12655 func (sc *SchedulerCache) deleteNode(node *v1.Node) error <span class="cov8" title="1">{ 12656 if _, ok := sc.Nodes[node.Name]; !ok </span><span class="cov0" title="0">{ 12657 return fmt.Errorf("node <%s> does not exist", node.Name) 12658 }</span> 12659 12660 <span class="cov8" title="1">numaInfo := sc.Nodes[node.Name].NumaInfo 12661 if numaInfo != nil </span><span class="cov0" title="0">{ 12662 klog.V(3).Infof("delete numatopo <%s/%s>", numaInfo.Namespace, numaInfo.Name) 12663 err := sc.vcClient.NodeinfoV1alpha1().Numatopologies().Delete(context.TODO(), numaInfo.Name, metav1.DeleteOptions{}) 12664 if err != nil </span><span class="cov0" title="0">{ 12665 klog.Errorf("delete numatopo <%s/%s> failed.", numaInfo.Namespace, numaInfo.Name) 12666 }</span> 12667 } 12668 12669 <span class="cov8" title="1">delete(sc.Nodes, node.Name) 12670 12671 return nil</span> 12672 } 12673 12674 // AddNode add node to scheduler cache 12675 func (sc *SchedulerCache) AddNode(obj interface{}) <span class="cov8" title="1">{ 12676 node, ok := obj.(*v1.Node) 12677 if !ok </span><span class="cov0" title="0">{ 12678 klog.Errorf("Cannot convert to *v1.Node: %v", obj) 12679 return 12680 }</span> 12681 12682 <span class="cov8" title="1">sc.Mutex.Lock() 12683 defer sc.Mutex.Unlock() 12684 12685 err := sc.addNode(node) 12686 if err != nil </span><span class="cov0" title="0">{ 12687 klog.Errorf("Failed to add node %s into cache: %v", node.Name, err) 12688 return 12689 }</span> 12690 <span class="cov8" title="1">sc.NodeList = append(sc.NodeList, node.Name)</span> 12691 } 12692 12693 // UpdateNode update node to scheduler cache 12694 func (sc *SchedulerCache) UpdateNode(oldObj, newObj interface{}) <span class="cov0" title="0">{ 12695 oldNode, ok := oldObj.(*v1.Node) 12696 if !ok </span><span class="cov0" title="0">{ 12697 klog.Errorf("Cannot convert oldObj to *v1.Node: %v", oldObj) 12698 return 12699 }</span> 12700 <span class="cov0" title="0">newNode, ok := newObj.(*v1.Node) 12701 if !ok </span><span class="cov0" title="0">{ 12702 klog.Errorf("Cannot convert newObj to *v1.Node: %v", newObj) 12703 return 12704 }</span> 12705 12706 <span class="cov0" title="0">sc.Mutex.Lock() 12707 defer sc.Mutex.Unlock() 12708 12709 err := sc.updateNode(oldNode, newNode) 12710 if err != nil </span><span class="cov0" title="0">{ 12711 klog.Errorf("Failed to update node %v in cache: %v", oldNode.Name, err) 12712 return 12713 }</span> 12714 } 12715 12716 // DeleteNode delete node from scheduler cache 12717 func (sc *SchedulerCache) DeleteNode(obj interface{}) <span class="cov8" title="1">{ 12718 var node *v1.Node 12719 switch t := obj.(type) </span>{ 12720 case *v1.Node:<span class="cov8" title="1"> 12721 node = t</span> 12722 case cache.DeletedFinalStateUnknown:<span class="cov0" title="0"> 12723 var ok bool 12724 node, ok = t.Obj.(*v1.Node) 12725 if !ok </span><span class="cov0" title="0">{ 12726 klog.Errorf("Cannot convert to *v1.Node: %v", t.Obj) 12727 return 12728 }</span> 12729 default:<span class="cov0" title="0"> 12730 klog.Errorf("Cannot convert to *v1.Node: %v", t) 12731 return</span> 12732 } 12733 12734 <span class="cov8" title="1">sc.Mutex.Lock() 12735 defer sc.Mutex.Unlock() 12736 12737 err := sc.deleteNode(node) 12738 if err != nil </span><span class="cov0" title="0">{ 12739 klog.Errorf("Failed to delete node %s from cache: %v", node.Name, err) 12740 return 12741 }</span> 12742 12743 <span class="cov8" title="1">for i, name := range sc.NodeList </span><span class="cov8" title="1">{ 12744 if name == node.Name </span><span class="cov8" title="1">{ 12745 sc.NodeList = append(sc.NodeList[:i], sc.NodeList[i+1:]...) 12746 break</span> 12747 } 12748 } 12749 } 12750 12751 func getJobID(pg *schedulingapi.PodGroup) schedulingapi.JobID <span class="cov8" title="1">{ 12752 return schedulingapi.JobID(fmt.Sprintf("%s/%s", pg.Namespace, pg.Name)) 12753 }</span> 12754 12755 // Assumes that lock is already acquired. 12756 func (sc *SchedulerCache) setPodGroup(ss *schedulingapi.PodGroup) error <span class="cov8" title="1">{ 12757 job := getJobID(ss) 12758 if _, found := sc.Jobs[job]; !found </span><span class="cov8" title="1">{ 12759 sc.Jobs[job] = schedulingapi.NewJobInfo(job) 12760 }</span> 12761 12762 <span class="cov8" title="1">sc.Jobs[job].SetPodGroup(ss) 12763 12764 // TODO(k82cn): set default queue in admission. 12765 if len(ss.Spec.Queue) == 0 </span><span class="cov8" title="1">{ 12766 sc.Jobs[job].Queue = schedulingapi.QueueID(sc.defaultQueue) 12767 }</span> 12768 12769 <span class="cov8" title="1">return nil</span> 12770 } 12771 12772 // Assumes that lock is already acquired. 12773 func (sc *SchedulerCache) updatePodGroup(newPodGroup *schedulingapi.PodGroup) error <span class="cov0" title="0">{ 12774 return sc.setPodGroup(newPodGroup) 12775 }</span> 12776 12777 // Assumes that lock is already acquired. 12778 func (sc *SchedulerCache) deletePodGroup(id schedulingapi.JobID) error <span class="cov8" title="1">{ 12779 job, found := sc.Jobs[id] 12780 if !found </span><span class="cov0" title="0">{ 12781 return fmt.Errorf("can not found job %v", id) 12782 }</span> 12783 12784 // Unset SchedulingSpec 12785 <span class="cov8" title="1">job.UnsetPodGroup() 12786 12787 sc.deleteJob(job) 12788 12789 return nil</span> 12790 } 12791 12792 // AddPodGroupV1beta1 add podgroup to scheduler cache 12793 func (sc *SchedulerCache) AddPodGroupV1beta1(obj interface{}) <span class="cov8" title="1">{ 12794 ss, ok := obj.(*schedulingv1beta1.PodGroup) 12795 if !ok </span><span class="cov0" title="0">{ 12796 klog.Errorf("Cannot convert to *schedulingv1beta1.PodGroup: %v", obj) 12797 return 12798 }</span> 12799 12800 <span class="cov8" title="1">podgroup := scheduling.PodGroup{} 12801 if err := scheme.Scheme.Convert(ss, &podgroup, nil); err != nil </span><span class="cov0" title="0">{ 12802 klog.Errorf("Failed to convert podgroup from %T to %T", ss, podgroup) 12803 return 12804 }</span> 12805 12806 <span class="cov8" title="1">pg := &schedulingapi.PodGroup{PodGroup: podgroup, Version: schedulingapi.PodGroupVersionV1Beta1} 12807 klog.V(4).Infof("Add PodGroup(%s) into cache, spec(%#v)", ss.Name, ss.Spec) 12808 12809 sc.Mutex.Lock() 12810 defer sc.Mutex.Unlock() 12811 12812 if err := sc.setPodGroup(pg); err != nil </span><span class="cov0" title="0">{ 12813 klog.Errorf("Failed to add PodGroup %s into cache: %v", ss.Name, err) 12814 return 12815 }</span> 12816 } 12817 12818 // UpdatePodGroupV1beta1 add podgroup to scheduler cache 12819 func (sc *SchedulerCache) UpdatePodGroupV1beta1(oldObj, newObj interface{}) <span class="cov8" title="1">{ 12820 oldSS, ok := oldObj.(*schedulingv1beta1.PodGroup) 12821 if !ok </span><span class="cov0" title="0">{ 12822 klog.Errorf("Cannot convert oldObj to *schedulingv1beta1.SchedulingSpec: %v", oldObj) 12823 return 12824 }</span> 12825 <span class="cov8" title="1">newSS, ok := newObj.(*schedulingv1beta1.PodGroup) 12826 if !ok </span><span class="cov0" title="0">{ 12827 klog.Errorf("Cannot convert newObj to *schedulingv1beta1.SchedulingSpec: %v", newObj) 12828 return 12829 }</span> 12830 12831 <span class="cov8" title="1">if oldSS.ResourceVersion == newSS.ResourceVersion </span><span class="cov8" title="1">{ 12832 return 12833 }</span> 12834 12835 <span class="cov0" title="0">podgroup := scheduling.PodGroup{} 12836 if err := scheme.Scheme.Convert(newSS, &podgroup, nil); err != nil </span><span class="cov0" title="0">{ 12837 klog.Errorf("Failed to convert podgroup from %T to %T", newSS, podgroup) 12838 return 12839 }</span> 12840 12841 <span class="cov0" title="0">pg := &schedulingapi.PodGroup{PodGroup: podgroup, Version: schedulingapi.PodGroupVersionV1Beta1} 12842 12843 sc.Mutex.Lock() 12844 defer sc.Mutex.Unlock() 12845 12846 if err := sc.updatePodGroup(pg); err != nil </span><span class="cov0" title="0">{ 12847 klog.Errorf("Failed to update SchedulingSpec %s into cache: %v", pg.Name, err) 12848 return 12849 }</span> 12850 } 12851 12852 // DeletePodGroupV1beta1 delete podgroup from scheduler cache 12853 func (sc *SchedulerCache) DeletePodGroupV1beta1(obj interface{}) <span class="cov8" title="1">{ 12854 var ss *schedulingv1beta1.PodGroup 12855 switch t := obj.(type) </span>{ 12856 case *schedulingv1beta1.PodGroup:<span class="cov8" title="1"> 12857 ss = t</span> 12858 case cache.DeletedFinalStateUnknown:<span class="cov0" title="0"> 12859 var ok bool 12860 ss, ok = t.Obj.(*schedulingv1beta1.PodGroup) 12861 if !ok </span><span class="cov0" title="0">{ 12862 klog.Errorf("Cannot convert to podgroup: %v", t.Obj) 12863 return 12864 }</span> 12865 default:<span class="cov0" title="0"> 12866 klog.Errorf("Cannot convert to podgroup: %v", t) 12867 return</span> 12868 } 12869 12870 <span class="cov8" title="1">jobID := schedulingapi.JobID(fmt.Sprintf("%s/%s", ss.Namespace, ss.Name)) 12871 12872 sc.Mutex.Lock() 12873 defer sc.Mutex.Unlock() 12874 12875 if err := sc.deletePodGroup(jobID); err != nil </span><span class="cov0" title="0">{ 12876 klog.Errorf("Failed to delete podgroup %s from cache: %v", ss.Name, err) 12877 return 12878 }</span> 12879 } 12880 12881 // AddQueueV1beta1 add queue to scheduler cache 12882 func (sc *SchedulerCache) AddQueueV1beta1(obj interface{}) <span class="cov8" title="1">{ 12883 ss, ok := obj.(*schedulingv1beta1.Queue) 12884 if !ok </span><span class="cov0" title="0">{ 12885 klog.Errorf("Cannot convert to *schedulingv1beta1.Queue: %v", obj) 12886 return 12887 }</span> 12888 12889 <span class="cov8" title="1">queue := &scheduling.Queue{} 12890 if err := scheme.Scheme.Convert(ss, queue, nil); err != nil </span><span class="cov0" title="0">{ 12891 klog.Errorf("Failed to convert queue from %T to %T", ss, queue) 12892 return 12893 }</span> 12894 12895 <span class="cov8" title="1">sc.Mutex.Lock() 12896 defer sc.Mutex.Unlock() 12897 12898 klog.V(4).Infof("Add Queue(%s) into cache, spec(%#v)", ss.Name, ss.Spec) 12899 sc.addQueue(queue)</span> 12900 } 12901 12902 // UpdateQueueV1beta1 update queue to scheduler cache 12903 func (sc *SchedulerCache) UpdateQueueV1beta1(oldObj, newObj interface{}) <span class="cov8" title="1">{ 12904 oldSS, ok := oldObj.(*schedulingv1beta1.Queue) 12905 if !ok </span><span class="cov0" title="0">{ 12906 klog.Errorf("Cannot convert oldObj to *schedulingv1beta1.Queue: %v", oldObj) 12907 return 12908 }</span> 12909 <span class="cov8" title="1">newSS, ok := newObj.(*schedulingv1beta1.Queue) 12910 if !ok </span><span class="cov0" title="0">{ 12911 klog.Errorf("Cannot convert newObj to *schedulingv1beta1.Queue: %v", newObj) 12912 return 12913 }</span> 12914 12915 <span class="cov8" title="1">if oldSS.ResourceVersion == newSS.ResourceVersion </span><span class="cov8" title="1">{ 12916 return 12917 }</span> 12918 12919 <span class="cov0" title="0">newQueue := &scheduling.Queue{} 12920 if err := scheme.Scheme.Convert(newSS, newQueue, nil); err != nil </span><span class="cov0" title="0">{ 12921 klog.Errorf("Failed to convert queue from %T to %T", newSS, newQueue) 12922 return 12923 }</span> 12924 12925 <span class="cov0" title="0">sc.Mutex.Lock() 12926 defer sc.Mutex.Unlock() 12927 sc.updateQueue(newQueue)</span> 12928 } 12929 12930 // DeleteQueueV1beta1 delete queue from the scheduler cache 12931 func (sc *SchedulerCache) DeleteQueueV1beta1(obj interface{}) <span class="cov8" title="1">{ 12932 var ss *schedulingv1beta1.Queue 12933 switch t := obj.(type) </span>{ 12934 case *schedulingv1beta1.Queue:<span class="cov8" title="1"> 12935 ss = t</span> 12936 case cache.DeletedFinalStateUnknown:<span class="cov0" title="0"> 12937 var ok bool 12938 ss, ok = t.Obj.(*schedulingv1beta1.Queue) 12939 if !ok </span><span class="cov0" title="0">{ 12940 klog.Errorf("Cannot convert to *schedulingv1beta1.Queue: %v", t.Obj) 12941 return 12942 }</span> 12943 default:<span class="cov0" title="0"> 12944 klog.Errorf("Cannot convert to *schedulingv1beta1.Queue: %v", t) 12945 return</span> 12946 } 12947 12948 <span class="cov8" title="1">sc.Mutex.Lock() 12949 defer sc.Mutex.Unlock() 12950 sc.deleteQueue(schedulingapi.QueueID(ss.Name))</span> 12951 } 12952 12953 func (sc *SchedulerCache) addQueue(queue *scheduling.Queue) <span class="cov8" title="1">{ 12954 qi := schedulingapi.NewQueueInfo(queue) 12955 sc.Queues[qi.UID] = qi 12956 }</span> 12957 12958 func (sc *SchedulerCache) updateQueue(queue *scheduling.Queue) <span class="cov0" title="0">{ 12959 sc.addQueue(queue) 12960 }</span> 12961 12962 func (sc *SchedulerCache) deleteQueue(id schedulingapi.QueueID) <span class="cov8" title="1">{ 12963 delete(sc.Queues, id) 12964 }</span> 12965 12966 //DeletePriorityClass delete priorityclass from the scheduler cache 12967 func (sc *SchedulerCache) DeletePriorityClass(obj interface{}) <span class="cov0" title="0">{ 12968 var ss *schedulingv1.PriorityClass 12969 switch t := obj.(type) </span>{ 12970 case *schedulingv1.PriorityClass:<span class="cov0" title="0"> 12971 ss = t</span> 12972 case cache.DeletedFinalStateUnknown:<span class="cov0" title="0"> 12973 var ok bool 12974 ss, ok = t.Obj.(*schedulingv1.PriorityClass) 12975 if !ok </span><span class="cov0" title="0">{ 12976 klog.Errorf("Cannot convert to *schedulingv1.PriorityClass: %v", t.Obj) 12977 return 12978 }</span> 12979 default:<span class="cov0" title="0"> 12980 klog.Errorf("Cannot convert to *schedulingv1.PriorityClass: %v", t) 12981 return</span> 12982 } 12983 12984 <span class="cov0" title="0">sc.Mutex.Lock() 12985 defer sc.Mutex.Unlock() 12986 12987 sc.deletePriorityClass(ss)</span> 12988 } 12989 12990 //UpdatePriorityClass update priorityclass to scheduler cache 12991 func (sc *SchedulerCache) UpdatePriorityClass(oldObj, newObj interface{}) <span class="cov0" title="0">{ 12992 oldSS, ok := oldObj.(*schedulingv1.PriorityClass) 12993 if !ok </span><span class="cov0" title="0">{ 12994 klog.Errorf("Cannot convert oldObj to *schedulingv1.PriorityClass: %v", oldObj) 12995 12996 return 12997 }</span> 12998 12999 <span class="cov0" title="0">newSS, ok := newObj.(*schedulingv1.PriorityClass) 13000 if !ok </span><span class="cov0" title="0">{ 13001 klog.Errorf("Cannot convert newObj to *schedulingv1.PriorityClass: %v", newObj) 13002 return 13003 }</span> 13004 13005 <span class="cov0" title="0">sc.Mutex.Lock() 13006 defer sc.Mutex.Unlock() 13007 13008 sc.deletePriorityClass(oldSS) 13009 sc.addPriorityClass(newSS)</span> 13010 } 13011 13012 //AddPriorityClass add priorityclass to scheduler cache 13013 func (sc *SchedulerCache) AddPriorityClass(obj interface{}) <span class="cov0" title="0">{ 13014 ss, ok := obj.(*schedulingv1.PriorityClass) 13015 if !ok </span><span class="cov0" title="0">{ 13016 klog.Errorf("Cannot convert to *schedulingv1.PriorityClass: %v", obj) 13017 return 13018 }</span> 13019 13020 <span class="cov0" title="0">sc.Mutex.Lock() 13021 defer sc.Mutex.Unlock() 13022 13023 sc.addPriorityClass(ss)</span> 13024 } 13025 13026 func (sc *SchedulerCache) deletePriorityClass(pc *schedulingv1.PriorityClass) <span class="cov0" title="0">{ 13027 if pc.GlobalDefault </span><span class="cov0" title="0">{ 13028 sc.defaultPriorityClass = nil 13029 sc.defaultPriority = 0 13030 }</span> 13031 13032 <span class="cov0" title="0">delete(sc.PriorityClasses, pc.Name)</span> 13033 } 13034 13035 func (sc *SchedulerCache) addPriorityClass(pc *schedulingv1.PriorityClass) <span class="cov0" title="0">{ 13036 if pc.GlobalDefault </span><span class="cov0" title="0">{ 13037 if sc.defaultPriorityClass != nil </span><span class="cov0" title="0">{ 13038 klog.Errorf("Updated default priority class from <%s> to <%s> forcefully.", 13039 sc.defaultPriorityClass.Name, pc.Name) 13040 }</span> 13041 <span class="cov0" title="0">sc.defaultPriorityClass = pc 13042 sc.defaultPriority = pc.Value</span> 13043 } 13044 13045 <span class="cov0" title="0">sc.PriorityClasses[pc.Name] = pc</span> 13046 } 13047 13048 func (sc *SchedulerCache) updateResourceQuota(quota *v1.ResourceQuota) <span class="cov0" title="0">{ 13049 collection, ok := sc.NamespaceCollection[quota.Namespace] 13050 if !ok </span><span class="cov0" title="0">{ 13051 collection = schedulingapi.NewNamespaceCollection(quota.Namespace) 13052 sc.NamespaceCollection[quota.Namespace] = collection 13053 }</span> 13054 13055 <span class="cov0" title="0">collection.Update(quota)</span> 13056 } 13057 13058 func (sc *SchedulerCache) deleteResourceQuota(quota *v1.ResourceQuota) <span class="cov0" title="0">{ 13059 collection, ok := sc.NamespaceCollection[quota.Namespace] 13060 if !ok </span><span class="cov0" title="0">{ 13061 return 13062 }</span> 13063 13064 <span class="cov0" title="0">collection.Delete(quota)</span> 13065 } 13066 13067 // DeleteResourceQuota delete ResourceQuota from the scheduler cache 13068 func (sc *SchedulerCache) DeleteResourceQuota(obj interface{}) <span class="cov0" title="0">{ 13069 var r *v1.ResourceQuota 13070 switch t := obj.(type) </span>{ 13071 case *v1.ResourceQuota:<span class="cov0" title="0"> 13072 r = t</span> 13073 case cache.DeletedFinalStateUnknown:<span class="cov0" title="0"> 13074 var ok bool 13075 r, ok = t.Obj.(*v1.ResourceQuota) 13076 if !ok </span><span class="cov0" title="0">{ 13077 klog.Errorf("Cannot convert to *v1.ResourceQuota: %v", t.Obj) 13078 return 13079 }</span> 13080 default:<span class="cov0" title="0"> 13081 klog.Errorf("Cannot convert to *v1.ResourceQuota: %v", t) 13082 return</span> 13083 } 13084 13085 <span class="cov0" title="0">sc.Mutex.Lock() 13086 defer sc.Mutex.Unlock() 13087 13088 klog.V(3).Infof("Delete ResourceQuota <%s/%v> in cache", r.Namespace, r.Name) 13089 sc.deleteResourceQuota(r)</span> 13090 } 13091 13092 // UpdateResourceQuota update ResourceQuota to scheduler cache 13093 func (sc *SchedulerCache) UpdateResourceQuota(oldObj, newObj interface{}) <span class="cov0" title="0">{ 13094 newR, ok := newObj.(*v1.ResourceQuota) 13095 if !ok </span><span class="cov0" title="0">{ 13096 klog.Errorf("Cannot convert newObj to *v1.ResourceQuota: %v", newObj) 13097 return 13098 }</span> 13099 13100 <span class="cov0" title="0">sc.Mutex.Lock() 13101 defer sc.Mutex.Unlock() 13102 13103 klog.V(3).Infof("Update ResourceQuota <%s/%v> in cache, with spec: %v.", newR.Namespace, newR.Name, newR.Spec.Hard) 13104 sc.updateResourceQuota(newR)</span> 13105 } 13106 13107 // AddResourceQuota add ResourceQuota to scheduler cache 13108 func (sc *SchedulerCache) AddResourceQuota(obj interface{}) <span class="cov0" title="0">{ 13109 var r *v1.ResourceQuota 13110 switch t := obj.(type) </span>{ 13111 case *v1.ResourceQuota:<span class="cov0" title="0"> 13112 r = t</span> 13113 default:<span class="cov0" title="0"> 13114 klog.Errorf("Cannot convert to *v1.ResourceQuota: %v", t) 13115 return</span> 13116 } 13117 13118 <span class="cov0" title="0">sc.Mutex.Lock() 13119 defer sc.Mutex.Unlock() 13120 13121 klog.V(3).Infof("Add ResourceQuota <%s/%v> in cache, with spec: %v.", r.Namespace, r.Name, r.Spec.Hard) 13122 sc.updateResourceQuota(r)</span> 13123 } 13124 13125 func getNumaInfo(srcInfo *nodeinfov1alpha1.Numatopology) *schedulingapi.NumatopoInfo <span class="cov0" title="0">{ 13126 numaInfo := &schedulingapi.NumatopoInfo{ 13127 Namespace: srcInfo.Namespace, 13128 Name: srcInfo.Name, 13129 Policies: make(map[nodeinfov1alpha1.PolicyName]string), 13130 NumaResMap: make(map[string]*schedulingapi.ResourceInfo), 13131 CPUDetail: topology.CPUDetails{}, 13132 ResReserved: make(v1.ResourceList), 13133 } 13134 13135 policies := srcInfo.Spec.Policies 13136 for name, policy := range policies </span><span class="cov0" title="0">{ 13137 numaInfo.Policies[name] = policy 13138 }</span> 13139 13140 <span class="cov0" title="0">numaResMap := srcInfo.Spec.NumaResMap 13141 for name, resInfo := range numaResMap </span><span class="cov0" title="0">{ 13142 tmp := schedulingapi.ResourceInfo{} 13143 tmp.Capacity = resInfo.Capacity 13144 tmp.Allocatable = cpuset.MustParse(resInfo.Allocatable) 13145 numaInfo.NumaResMap[name] = &tmp 13146 }</span> 13147 13148 <span class="cov0" title="0">cpuDetail := srcInfo.Spec.CPUDetail 13149 for key, detail := range cpuDetail </span><span class="cov0" title="0">{ 13150 cpuID, _ := strconv.Atoi(key) 13151 numaInfo.CPUDetail[cpuID] = topology.CPUInfo{ 13152 NUMANodeID: detail.NUMANodeID, 13153 SocketID: detail.SocketID, 13154 CoreID: detail.CoreID, 13155 } 13156 }</span> 13157 13158 <span class="cov0" title="0">resReserved, err := schedulingapi.ParseResourceList(srcInfo.Spec.ResReserved) 13159 if err != nil </span><span class="cov0" title="0">{ 13160 klog.Errorf("ParseResourceList failed, err=%v", err) 13161 }</span> else<span class="cov0" title="0"> { 13162 numaInfo.ResReserved = resReserved 13163 }</span> 13164 13165 <span class="cov0" title="0">return numaInfo</span> 13166 } 13167 13168 // Assumes that lock is already acquired. 13169 func (sc *SchedulerCache) addNumaInfo(info *nodeinfov1alpha1.Numatopology) error <span class="cov0" title="0">{ 13170 if sc.Nodes[info.Name] == nil </span><span class="cov0" title="0">{ 13171 sc.Nodes[info.Name] = schedulingapi.NewNodeInfo(nil) 13172 sc.Nodes[info.Name].Name = info.Name 13173 }</span> 13174 13175 <span class="cov0" title="0">if sc.Nodes[info.Name].NumaInfo == nil </span><span class="cov0" title="0">{ 13176 sc.Nodes[info.Name].NumaInfo = getNumaInfo(info) 13177 sc.Nodes[info.Name].NumaChgFlag = schedulingapi.NumaInfoMoreFlag 13178 }</span> else<span class="cov0" title="0"> { 13179 newLocalInfo := getNumaInfo(info) 13180 if sc.Nodes[info.Name].NumaInfo.Compare(newLocalInfo) </span><span class="cov0" title="0">{ 13181 sc.Nodes[info.Name].NumaChgFlag = schedulingapi.NumaInfoMoreFlag 13182 }</span> else<span class="cov0" title="0"> { 13183 sc.Nodes[info.Name].NumaChgFlag = schedulingapi.NumaInfoLessFlag 13184 }</span> 13185 13186 <span class="cov0" title="0">sc.Nodes[info.Name].NumaInfo = newLocalInfo</span> 13187 } 13188 13189 <span class="cov0" title="0">for resName, NumaResInfo := range sc.Nodes[info.Name].NumaInfo.NumaResMap </span><span class="cov0" title="0">{ 13190 klog.V(3).Infof("resource %s Allocatable %v on node[%s] into cache", resName, NumaResInfo, info.Name) 13191 }</span> 13192 13193 <span class="cov0" title="0">klog.V(3).Infof("Policies %v on node[%s] into cache, change= %v", 13194 sc.Nodes[info.Name].NumaInfo.Policies, info.Name, sc.Nodes[info.Name].NumaChgFlag) 13195 return nil</span> 13196 } 13197 13198 // Assumes that lock is already acquired. 13199 func (sc *SchedulerCache) deleteNumaInfo(info *nodeinfov1alpha1.Numatopology) <span class="cov0" title="0">{ 13200 if sc.Nodes[info.Name] != nil </span><span class="cov0" title="0">{ 13201 sc.Nodes[info.Name].NumaInfo = nil 13202 sc.Nodes[info.Name].NumaChgFlag = schedulingapi.NumaInfoResetFlag 13203 klog.V(3).Infof("delete numainfo in cahce for node<%s>", info.Name) 13204 }</span> 13205 } 13206 13207 // AddNumaInfoV1alpha1 add numa information to scheduler cache 13208 func (sc *SchedulerCache) AddNumaInfoV1alpha1(obj interface{}) <span class="cov0" title="0">{ 13209 ss, ok := obj.(*nodeinfov1alpha1.Numatopology) 13210 if !ok </span><span class="cov0" title="0">{ 13211 klog.Errorf("Cannot convert oldObj to *nodeinfov1alpha1.Numatopology: %v", obj) 13212 return 13213 }</span> 13214 13215 <span class="cov0" title="0">sc.Mutex.Lock() 13216 defer sc.Mutex.Unlock() 13217 13218 sc.addNumaInfo(ss)</span> 13219 } 13220 13221 // UpdateNumaInfoV1alpha1 update numa information to scheduler cache 13222 func (sc *SchedulerCache) UpdateNumaInfoV1alpha1(oldObj, newObj interface{}) <span class="cov0" title="0">{ 13223 ss, ok := newObj.(*nodeinfov1alpha1.Numatopology) 13224 if !ok </span><span class="cov0" title="0">{ 13225 klog.Errorf("Cannot convert oldObj to *nodeinfov1alpha1.Numatopology: %v", newObj) 13226 return 13227 }</span> 13228 13229 <span class="cov0" title="0">sc.Mutex.Lock() 13230 defer sc.Mutex.Unlock() 13231 sc.addNumaInfo(ss) 13232 klog.V(3).Infof("update numaInfo<%s> in cahce, with spec: Policy: %v, resMap: %v", ss.Name, ss.Spec.Policies, ss.Spec.NumaResMap)</span> 13233 } 13234 13235 // DeleteNumaInfoV1alpha1 delete numa information from scheduler cache 13236 func (sc *SchedulerCache) DeleteNumaInfoV1alpha1(obj interface{}) <span class="cov0" title="0">{ 13237 var ss *nodeinfov1alpha1.Numatopology 13238 switch t := obj.(type) </span>{ 13239 case *nodeinfov1alpha1.Numatopology:<span class="cov0" title="0"> 13240 ss = t</span> 13241 case cache.DeletedFinalStateUnknown:<span class="cov0" title="0"> 13242 var ok bool 13243 ss, ok = t.Obj.(*nodeinfov1alpha1.Numatopology) 13244 if !ok </span><span class="cov0" title="0">{ 13245 klog.Errorf("Cannot convert to Numatopo: %v", t.Obj) 13246 return 13247 }</span> 13248 default:<span class="cov0" title="0"> 13249 klog.Errorf("Cannot convert to Numatopo: %v", t) 13250 return</span> 13251 } 13252 13253 <span class="cov0" title="0">sc.Mutex.Lock() 13254 defer sc.Mutex.Unlock() 13255 13256 sc.deleteNumaInfo(ss) 13257 klog.V(3).Infof("Delete numaInfo<%s> from cahce, with spec: Policy: %v, resMap: %v", ss.Name, ss.Spec.Policies, ss.Spec.NumaResMap)</span> 13258 } 13259 13260 // AddJob add job to scheduler cache 13261 func (sc *SchedulerCache) AddJob(obj interface{}) <span class="cov0" title="0">{ 13262 job, ok := obj.(*schedulingapi.JobInfo) 13263 if !ok </span><span class="cov0" title="0">{ 13264 klog.Errorf("Cannot convert to *api.JobInfo: %v", obj) 13265 return 13266 }</span> 13267 <span class="cov0" title="0">sc.Mutex.Lock() 13268 defer sc.Mutex.Unlock() 13269 sc.Jobs[job.UID] = job</span> 13270 } 13271 </pre> 13272 13273 <pre class="file" id="file60" style="display: none">package cache 13274 13275 // bindMethodMap Binder management 13276 var bindMethodMap Binder 13277 13278 // RegisterBindMethod register Bind Method 13279 func RegisterBindMethod(binder Binder) <span class="cov8" title="1">{ 13280 bindMethodMap = binder 13281 }</span> 13282 13283 func GetBindMethod() Binder <span class="cov0" title="0">{ 13284 return bindMethodMap 13285 }</span> 13286 13287 func init() <span class="cov8" title="1">{ 13288 RegisterBindMethod(NewBinder()) 13289 }</span> 13290 </pre> 13291 13292 <pre class="file" id="file61" style="display: none">/* 13293 Copyright 2021 The Volcano Authors. 13294 13295 Licensed under the Apache License, Version 2.0 (the "License"); 13296 you may not use this file except in compliance with the License. 13297 You may obtain a copy of the License at 13298 13299 http://www.apache.org/licenses/LICENSE-2.0 13300 13301 Unless required by applicable law or agreed to in writing, software 13302 distributed under the License is distributed on an "AS IS" BASIS, 13303 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13304 See the License for the specific language governing permissions and 13305 limitations under the License. 13306 */ 13307 13308 package cache 13309 13310 import ( 13311 "fmt" 13312 "os" 13313 "strconv" 13314 "strings" 13315 13316 v1 "k8s.io/api/core/v1" 13317 "k8s.io/klog" 13318 "stathat.com/c/consistent" 13319 13320 scheduling "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 13321 ) 13322 13323 // responsibleForPod returns false at following conditions: 13324 // 1. The current scheduler is not specified scheduler in Pod's spec. 13325 // 2. The Job which the Pod belongs is not assigned to current scheduler based on the hash algorithm in multi-schedulers scenario 13326 func responsibleForPod(pod *v1.Pod, schedulerName string, mySchedulerPodName string, c *consistent.Consistent) bool <span class="cov0" title="0">{ 13327 if schedulerName != pod.Spec.SchedulerName </span><span class="cov0" title="0">{ 13328 return false 13329 }</span> 13330 <span class="cov0" title="0">if c != nil </span><span class="cov0" title="0">{ 13331 var key string 13332 if len(pod.OwnerReferences) != 0 </span><span class="cov0" title="0">{ 13333 key = pod.OwnerReferences[0].Name 13334 }</span> else<span class="cov0" title="0"> { 13335 key = pod.Name 13336 }</span> 13337 <span class="cov0" title="0">schedulerPodName, err := c.Get(key) 13338 if err != nil </span><span class="cov0" title="0">{ 13339 klog.Errorf("Failed to get scheduler by hash algorithm, err: %v", err) 13340 }</span> 13341 <span class="cov0" title="0">if schedulerPodName != mySchedulerPodName </span><span class="cov0" title="0">{ 13342 return false 13343 }</span> 13344 } 13345 13346 <span class="cov0" title="0">klog.V(4).Infof("schedulerPodName %v is responsible to Pod %v/%v", mySchedulerPodName, pod.Namespace, pod.Name) 13347 return true</span> 13348 } 13349 13350 // responsibleForNode returns true if the Node is assigned to current scheduler in multi-scheduler scenario 13351 func responsibleForNode(nodeName string, mySchedulerPodName string, c *consistent.Consistent) bool <span class="cov0" title="0">{ 13352 if c != nil </span><span class="cov0" title="0">{ 13353 schedulerPodName, err := c.Get(nodeName) 13354 if err != nil </span><span class="cov0" title="0">{ 13355 klog.Errorf("Failed to get scheduler by hash algorithm, err: %v", err) 13356 }</span> 13357 <span class="cov0" title="0">if schedulerPodName != mySchedulerPodName </span><span class="cov0" title="0">{ 13358 return false 13359 }</span> 13360 } 13361 13362 <span class="cov0" title="0">klog.V(4).Infof("schedulerPodName %v is responsible to Node %v", mySchedulerPodName, nodeName) 13363 return true</span> 13364 } 13365 13366 // responsibleForPodGroup returns true if Job which PodGroup belongs is assigned to current scheduler in multi-schedulers scenario 13367 func responsibleForPodGroup(pg *scheduling.PodGroup, mySchedulerPodName string, c *consistent.Consistent) bool <span class="cov0" title="0">{ 13368 if c != nil </span><span class="cov0" title="0">{ 13369 var key string 13370 if len(pg.OwnerReferences) != 0 </span><span class="cov0" title="0">{ 13371 key = pg.OwnerReferences[0].Name 13372 }</span> else<span class="cov0" title="0"> { 13373 key = pg.Name 13374 }</span> 13375 <span class="cov0" title="0">schedulerPodName, err := c.Get(key) 13376 if err != nil </span><span class="cov0" title="0">{ 13377 klog.Errorf("Failed to get scheduler by hash algorithm, err: %v", err) 13378 }</span> 13379 <span class="cov0" title="0">if schedulerPodName != mySchedulerPodName </span><span class="cov0" title="0">{ 13380 return false 13381 }</span> 13382 } 13383 13384 <span class="cov0" title="0">klog.V(4).Infof("schedulerPodName %v is responsible to PodGroup %v/%v", mySchedulerPodName, pg.Namespace, pg.Name) 13385 return true</span> 13386 } 13387 13388 // getMultiSchedulerInfo return the Pod name of current scheduler and the hash table for all schedulers 13389 func getMultiSchedulerInfo() (schedulerPodName string, c *consistent.Consistent) <span class="cov0" title="0">{ 13390 multiSchedulerEnable := os.Getenv("MULTI_SCHEDULER_ENABLE") 13391 mySchedulerPodName := os.Getenv("SCHEDULER_POD_NAME") 13392 c = nil 13393 if multiSchedulerEnable == "true" </span><span class="cov0" title="0">{ 13394 klog.V(3).Infof("multiSchedulerEnable true") 13395 schedulerNumStr := os.Getenv("SCHEDULER_NUM") 13396 schedulerNum, err := strconv.Atoi(schedulerNumStr) 13397 if err != nil </span><span class="cov0" title="0">{ 13398 schedulerNum = 1 13399 }</span> 13400 <span class="cov0" title="0">index := strings.LastIndex(mySchedulerPodName, "-") 13401 baseName := mySchedulerPodName[0:index] 13402 c = consistent.New() 13403 for i := 0; i < schedulerNum; i++ </span><span class="cov0" title="0">{ 13404 name := fmt.Sprintf("%s-%d", baseName, i) 13405 c.Add(name) 13406 }</span> 13407 } 13408 <span class="cov0" title="0">return mySchedulerPodName, c</span> 13409 } 13410 </pre> 13411 13412 <pre class="file" id="file62" style="display: none">/* 13413 Copyright 2019 The Kubernetes Authors. 13414 13415 Licensed under the Apache License, Version 2.0 (the "License"); 13416 you may not use this file except in compliance with the License. 13417 You may obtain a copy of the License at 13418 13419 http://www.apache.org/licenses/LICENSE-2.0 13420 13421 Unless required by applicable law or agreed to in writing, software 13422 distributed under the License is distributed on an "AS IS" BASIS, 13423 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13424 See the License for the specific language governing permissions and 13425 limitations under the License. 13426 */ 13427 13428 package framework 13429 13430 import ( 13431 "strconv" 13432 13433 "k8s.io/klog" 13434 13435 "volcano.sh/volcano/pkg/scheduler/conf" 13436 ) 13437 13438 // Arguments map 13439 type Arguments map[string]string 13440 13441 // GetInt get the integer value from string 13442 func (a Arguments) GetInt(ptr *int, key string) <span class="cov8" title="1">{ 13443 if ptr == nil </span><span class="cov8" title="1">{ 13444 return 13445 }</span> 13446 13447 <span class="cov8" title="1">argv, ok := a[key] 13448 if !ok || argv == "" </span><span class="cov8" title="1">{ 13449 return 13450 }</span> 13451 13452 <span class="cov8" title="1">value, err := strconv.Atoi(argv) 13453 if err != nil </span><span class="cov8" title="1">{ 13454 klog.Warningf("Could not parse argument: %s for key %s, with err %v", argv, key, err) 13455 return 13456 }</span> 13457 13458 <span class="cov8" title="1">*ptr = value</span> 13459 } 13460 13461 // GetFloat64 get the float64 value from string 13462 func (a Arguments) GetFloat64(ptr *float64, key string) <span class="cov8" title="1">{ 13463 if ptr == nil </span><span class="cov0" title="0">{ 13464 return 13465 }</span> 13466 13467 <span class="cov8" title="1">argv, ok := a[key] 13468 if !ok || len(argv) == 0 </span><span class="cov8" title="1">{ 13469 return 13470 }</span> 13471 13472 <span class="cov8" title="1">value, err := strconv.ParseFloat(argv, 64) 13473 if err != nil </span><span class="cov8" title="1">{ 13474 klog.Warningf("Could not parse argument: %s for key %s, with err %v", argv, key, err) 13475 return 13476 }</span> 13477 13478 <span class="cov8" title="1">*ptr = value</span> 13479 } 13480 13481 // GetBool get the bool value from string 13482 func (a Arguments) GetBool(ptr *bool, key string) <span class="cov0" title="0">{ 13483 if ptr == nil </span><span class="cov0" title="0">{ 13484 return 13485 }</span> 13486 13487 <span class="cov0" title="0">argv, ok := a[key] 13488 if !ok || argv == "" </span><span class="cov0" title="0">{ 13489 return 13490 }</span> 13491 13492 <span class="cov0" title="0">value, err := strconv.ParseBool(argv) 13493 if err != nil </span><span class="cov0" title="0">{ 13494 klog.Warningf("Could not parse argument: %s for key %s, with err %v", argv, key, err) 13495 return 13496 }</span> 13497 13498 <span class="cov0" title="0">*ptr = value</span> 13499 } 13500 13501 // GetArgOfActionFromConf return argument of action reading from configuration of schedule 13502 func GetArgOfActionFromConf(configurations []conf.Configuration, actionName string) Arguments <span class="cov8" title="1">{ 13503 for _, c := range configurations </span><span class="cov8" title="1">{ 13504 if c.Name == actionName </span><span class="cov8" title="1">{ 13505 return c.Arguments 13506 }</span> 13507 } 13508 13509 <span class="cov8" title="1">return nil</span> 13510 } 13511 </pre> 13512 13513 <pre class="file" id="file63" style="display: none">/* 13514 Copyright 2018 The Kubernetes Authors. 13515 13516 Licensed under the Apache License, Version 2.0 (the "License"); 13517 you may not use this file except in compliance with the License. 13518 You may obtain a copy of the License at 13519 13520 http://www.apache.org/licenses/LICENSE-2.0 13521 13522 Unless required by applicable law or agreed to in writing, software 13523 distributed under the License is distributed on an "AS IS" BASIS, 13524 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13525 See the License for the specific language governing permissions and 13526 limitations under the License. 13527 */ 13528 13529 package framework 13530 13531 import ( 13532 "time" 13533 13534 "k8s.io/klog" 13535 13536 "volcano.sh/volcano/pkg/scheduler/cache" 13537 "volcano.sh/volcano/pkg/scheduler/conf" 13538 "volcano.sh/volcano/pkg/scheduler/metrics" 13539 ) 13540 13541 // OpenSession start the session 13542 func OpenSession(cache cache.Cache, tiers []conf.Tier, configurations []conf.Configuration) *Session <span class="cov0" title="0">{ 13543 ssn := openSession(cache) 13544 ssn.Tiers = tiers 13545 ssn.Configurations = configurations 13546 13547 for _, tier := range tiers </span><span class="cov0" title="0">{ 13548 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 13549 if pb, found := GetPluginBuilder(plugin.Name); !found </span><span class="cov0" title="0">{ 13550 klog.Errorf("Failed to get plugin %s.", plugin.Name) 13551 }</span> else<span class="cov0" title="0"> { 13552 plugin := pb(plugin.Arguments) 13553 ssn.plugins[plugin.Name()] = plugin 13554 onSessionOpenStart := time.Now() 13555 plugin.OnSessionOpen(ssn) 13556 metrics.UpdatePluginDuration(plugin.Name(), metrics.OnSessionOpen, metrics.Duration(onSessionOpenStart)) 13557 }</span> 13558 } 13559 } 13560 <span class="cov0" title="0">return ssn</span> 13561 } 13562 13563 // CloseSession close the session 13564 func CloseSession(ssn *Session) <span class="cov0" title="0">{ 13565 for _, plugin := range ssn.plugins </span><span class="cov0" title="0">{ 13566 onSessionCloseStart := time.Now() 13567 plugin.OnSessionClose(ssn) 13568 metrics.UpdatePluginDuration(plugin.Name(), metrics.OnSessionClose, metrics.Duration(onSessionCloseStart)) 13569 }</span> 13570 13571 <span class="cov0" title="0">closeSession(ssn)</span> 13572 } 13573 </pre> 13574 13575 <pre class="file" id="file64" style="display: none">package framework 13576 13577 import ( 13578 "context" 13579 "math/rand" 13580 "reflect" 13581 "time" 13582 13583 "k8s.io/client-go/util/workqueue" 13584 "k8s.io/klog" 13585 13586 "volcano.sh/apis/pkg/apis/scheduling" 13587 "volcano.sh/volcano/pkg/scheduler/api" 13588 ) 13589 13590 const ( 13591 jobUpdaterWorker = 16 13592 13593 jobConditionUpdateTime = time.Minute 13594 jobConditionUpdateTimeJitter = 30 * time.Second 13595 ) 13596 13597 // TimeJitterAfter means: new after old + duration + jitter 13598 func TimeJitterAfter(new, old time.Time, duration, maxJitter time.Duration) bool <span class="cov0" title="0">{ 13599 var jitter int64 13600 if maxJitter > 0 </span><span class="cov0" title="0">{ 13601 jitter = rand.Int63n(int64(maxJitter)) 13602 }</span> 13603 <span class="cov0" title="0">return new.After(old.Add(duration + time.Duration(jitter)))</span> 13604 } 13605 13606 type jobUpdater struct { 13607 ssn *Session 13608 jobQueue []*api.JobInfo 13609 } 13610 13611 func newJobUpdater(ssn *Session) *jobUpdater <span class="cov0" title="0">{ 13612 queue := make([]*api.JobInfo, 0, len(ssn.Jobs)) 13613 for _, job := range ssn.Jobs </span><span class="cov0" title="0">{ 13614 queue = append(queue, job) 13615 }</span> 13616 13617 <span class="cov0" title="0">ju := &jobUpdater{ 13618 ssn: ssn, 13619 jobQueue: queue, 13620 } 13621 return ju</span> 13622 } 13623 13624 func (ju *jobUpdater) UpdateAll() <span class="cov0" title="0">{ 13625 workqueue.ParallelizeUntil(context.TODO(), jobUpdaterWorker, len(ju.jobQueue), ju.updateJob) 13626 }</span> 13627 13628 func isPodGroupConditionsUpdated(newCondition, oldCondition []scheduling.PodGroupCondition) bool <span class="cov0" title="0">{ 13629 if len(newCondition) != len(oldCondition) </span><span class="cov0" title="0">{ 13630 return true 13631 }</span> 13632 13633 <span class="cov0" title="0">for index, newCond := range newCondition </span><span class="cov0" title="0">{ 13634 oldCond := oldCondition[index] 13635 13636 newTime := newCond.LastTransitionTime 13637 oldTime := oldCond.LastTransitionTime 13638 if TimeJitterAfter(newTime.Time, oldTime.Time, jobConditionUpdateTime, jobConditionUpdateTimeJitter) </span><span class="cov0" title="0">{ 13639 return true 13640 }</span> 13641 13642 // if newCond is not new enough, we treat it the same as the old one 13643 <span class="cov0" title="0">newCond.LastTransitionTime = oldTime 13644 13645 // comparing should ignore the TransitionID 13646 newTransitionID := newCond.TransitionID 13647 newCond.TransitionID = oldCond.TransitionID 13648 13649 shouldUpdate := !reflect.DeepEqual(&newCond, &oldCond) 13650 13651 newCond.LastTransitionTime = newTime 13652 newCond.TransitionID = newTransitionID 13653 if shouldUpdate </span><span class="cov0" title="0">{ 13654 return true 13655 }</span> 13656 } 13657 13658 <span class="cov0" title="0">return false</span> 13659 } 13660 13661 func isPodGroupStatusUpdated(newStatus, oldStatus scheduling.PodGroupStatus) bool <span class="cov0" title="0">{ 13662 newCondition := newStatus.Conditions 13663 newStatus.Conditions = nil 13664 oldCondition := oldStatus.Conditions 13665 oldStatus.Conditions = nil 13666 13667 return !reflect.DeepEqual(newStatus, oldStatus) || isPodGroupConditionsUpdated(newCondition, oldCondition) 13668 }</span> 13669 13670 // updateJob update specified job 13671 func (ju *jobUpdater) updateJob(index int) <span class="cov0" title="0">{ 13672 job := ju.jobQueue[index] 13673 ssn := ju.ssn 13674 13675 job.PodGroup.Status = jobStatus(ssn, job) 13676 oldStatus, found := ssn.podGroupStatus[job.UID] 13677 updatePG := !found || isPodGroupStatusUpdated(job.PodGroup.Status, oldStatus) 13678 if _, err := ssn.cache.UpdateJobStatus(job, updatePG); err != nil </span><span class="cov0" title="0">{ 13679 klog.Errorf("Failed to update job <%s/%s>: %v", 13680 job.Namespace, job.Name, err) 13681 }</span> 13682 } 13683 </pre> 13684 13685 <pre class="file" id="file65" style="display: none">/* 13686 Copyright 2018 The Kubernetes Authors. 13687 13688 Licensed under the Apache License, Version 2.0 (the "License"); 13689 you may not use this file except in compliance with the License. 13690 You may obtain a copy of the License at 13691 13692 http://www.apache.org/licenses/LICENSE-2.0 13693 13694 Unless required by applicable law or agreed to in writing, software 13695 distributed under the License is distributed on an "AS IS" BASIS, 13696 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13697 See the License for the specific language governing permissions and 13698 limitations under the License. 13699 */ 13700 13701 package framework 13702 13703 import ( 13704 "fmt" 13705 "path/filepath" 13706 "plugin" 13707 "strings" 13708 "sync" 13709 13710 "k8s.io/klog" 13711 ) 13712 13713 var pluginMutex sync.Mutex 13714 13715 // PluginBuilder plugin management 13716 type PluginBuilder = func(Arguments) Plugin 13717 13718 // Plugin management 13719 var pluginBuilders = map[string]PluginBuilder{} 13720 13721 // RegisterPluginBuilder register the plugin 13722 func RegisterPluginBuilder(name string, pc PluginBuilder) <span class="cov0" title="0">{ 13723 pluginMutex.Lock() 13724 defer pluginMutex.Unlock() 13725 13726 pluginBuilders[name] = pc 13727 }</span> 13728 13729 // CleanupPluginBuilders cleans up all the plugin 13730 func CleanupPluginBuilders() <span class="cov0" title="0">{ 13731 pluginMutex.Lock() 13732 defer pluginMutex.Unlock() 13733 13734 pluginBuilders = map[string]PluginBuilder{} 13735 }</span> 13736 13737 // GetPluginBuilder get the pluginbuilder by name 13738 func GetPluginBuilder(name string) (PluginBuilder, bool) <span class="cov0" title="0">{ 13739 pluginMutex.Lock() 13740 defer pluginMutex.Unlock() 13741 13742 pb, found := pluginBuilders[name] 13743 return pb, found 13744 }</span> 13745 13746 // LoadCustomPlugins loads custom implement plugins 13747 func LoadCustomPlugins(pluginsDir string) error <span class="cov0" title="0">{ 13748 pluginPaths, _ := filepath.Glob(fmt.Sprintf("%s/*.so", pluginsDir)) 13749 for _, pluginPath := range pluginPaths </span><span class="cov0" title="0">{ 13750 pluginBuilder, err := loadPluginBuilder(pluginPath) 13751 if err != nil </span><span class="cov0" title="0">{ 13752 return err 13753 }</span> 13754 <span class="cov0" title="0">pluginName := getPluginName(pluginPath) 13755 RegisterPluginBuilder(pluginName, pluginBuilder) 13756 klog.V(4).Infof("Custom plugin %s loaded", pluginName)</span> 13757 } 13758 13759 <span class="cov0" title="0">return nil</span> 13760 } 13761 13762 func getPluginName(pluginPath string) string <span class="cov8" title="1">{ 13763 return strings.TrimSuffix(filepath.Base(pluginPath), filepath.Ext(pluginPath)) 13764 }</span> 13765 13766 func loadPluginBuilder(pluginPath string) (PluginBuilder, error) <span class="cov0" title="0">{ 13767 plug, err := plugin.Open(pluginPath) 13768 if err != nil </span><span class="cov0" title="0">{ 13769 return nil, err 13770 }</span> 13771 13772 <span class="cov0" title="0">symBuilder, err := plug.Lookup("New") 13773 if err != nil </span><span class="cov0" title="0">{ 13774 return nil, err 13775 }</span> 13776 13777 <span class="cov0" title="0">builder, ok := symBuilder.(PluginBuilder) 13778 if !ok </span><span class="cov0" title="0">{ 13779 return nil, fmt.Errorf("unexpected plugin: %s, failed to convert PluginBuilder `New`", pluginPath) 13780 }</span> 13781 13782 <span class="cov0" title="0">return builder, nil</span> 13783 } 13784 13785 // Action management 13786 var actionMap = map[string]Action{} 13787 13788 // RegisterAction register action 13789 func RegisterAction(act Action) <span class="cov0" title="0">{ 13790 pluginMutex.Lock() 13791 defer pluginMutex.Unlock() 13792 13793 actionMap[act.Name()] = act 13794 }</span> 13795 13796 // GetAction get the action by name 13797 func GetAction(name string) (Action, bool) <span class="cov0" title="0">{ 13798 pluginMutex.Lock() 13799 defer pluginMutex.Unlock() 13800 13801 act, found := actionMap[name] 13802 return act, found 13803 }</span> 13804 </pre> 13805 13806 <pre class="file" id="file66" style="display: none">/* 13807 Copyright 2018 The Kubernetes Authors. 13808 13809 Licensed under the Apache License, Version 2.0 (the "License"); 13810 you may not use this file except in compliance with the License. 13811 You may obtain a copy of the License at 13812 13813 http://www.apache.org/licenses/LICENSE-2.0 13814 13815 Unless required by applicable law or agreed to in writing, software 13816 distributed under the License is distributed on an "AS IS" BASIS, 13817 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13818 See the License for the specific language governing permissions and 13819 limitations under the License. 13820 */ 13821 13822 package framework 13823 13824 import ( 13825 "fmt" 13826 13827 v1 "k8s.io/api/core/v1" 13828 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 13829 "k8s.io/apimachinery/pkg/types" 13830 "k8s.io/apimachinery/pkg/util/uuid" 13831 "k8s.io/client-go/informers" 13832 "k8s.io/client-go/kubernetes" 13833 "k8s.io/klog" 13834 volumescheduling "k8s.io/kubernetes/pkg/controller/volume/scheduling" 13835 13836 "volcano.sh/apis/pkg/apis/scheduling" 13837 "volcano.sh/volcano/pkg/scheduler/api" 13838 "volcano.sh/volcano/pkg/scheduler/cache" 13839 "volcano.sh/volcano/pkg/scheduler/conf" 13840 "volcano.sh/volcano/pkg/scheduler/metrics" 13841 "volcano.sh/volcano/pkg/scheduler/util" 13842 ) 13843 13844 // Session information for the current session 13845 type Session struct { 13846 UID types.UID 13847 13848 kubeClient kubernetes.Interface 13849 cache cache.Cache 13850 informerFactory informers.SharedInformerFactory 13851 13852 TotalResource *api.Resource 13853 // podGroupStatus cache podgroup status during schedule 13854 // This should not be mutated after initiated 13855 podGroupStatus map[api.JobID]scheduling.PodGroupStatus 13856 13857 Jobs map[api.JobID]*api.JobInfo 13858 Nodes map[string]*api.NodeInfo 13859 RevocableNodes map[string]*api.NodeInfo 13860 Queues map[api.QueueID]*api.QueueInfo 13861 NamespaceInfo map[api.NamespaceName]*api.NamespaceInfo 13862 13863 Tiers []conf.Tier 13864 Configurations []conf.Configuration 13865 NodeList []*api.NodeInfo 13866 13867 plugins map[string]Plugin 13868 eventHandlers []*EventHandler 13869 jobOrderFns map[string]api.CompareFn 13870 queueOrderFns map[string]api.CompareFn 13871 taskOrderFns map[string]api.CompareFn 13872 namespaceOrderFns map[string]api.CompareFn 13873 clusterOrderFns map[string]api.CompareFn 13874 predicateFns map[string]api.PredicateFn 13875 bestNodeFns map[string]api.BestNodeFn 13876 nodeOrderFns map[string]api.NodeOrderFn 13877 batchNodeOrderFns map[string]api.BatchNodeOrderFn 13878 nodeMapFns map[string]api.NodeMapFn 13879 nodeReduceFns map[string]api.NodeReduceFn 13880 preemptableFns map[string]api.EvictableFn 13881 reclaimableFns map[string]api.EvictableFn 13882 overusedFns map[string]api.ValidateFn 13883 underUsedFns map[string]api.UnderUsedResourceFn 13884 jobReadyFns map[string]api.ValidateFn 13885 jobPipelinedFns map[string]api.VoteFn 13886 jobValidFns map[string]api.ValidateExFn 13887 jobEnqueueableFns map[string]api.VoteFn 13888 jobEnqueuedFns map[string]api.JobEnqueuedFn 13889 targetJobFns map[string]api.TargetJobFn 13890 reservedNodesFns map[string]api.ReservedNodesFn 13891 victimTasksFns map[string]api.VictimTasksFn 13892 jobStarvingFns map[string]api.ValidateFn 13893 } 13894 13895 func openSession(cache cache.Cache) *Session <span class="cov0" title="0">{ 13896 ssn := &Session{ 13897 UID: uuid.NewUUID(), 13898 kubeClient: cache.Client(), 13899 cache: cache, 13900 informerFactory: cache.SharedInformerFactory(), 13901 13902 TotalResource: api.EmptyResource(), 13903 podGroupStatus: map[api.JobID]scheduling.PodGroupStatus{}, 13904 13905 Jobs: map[api.JobID]*api.JobInfo{}, 13906 Nodes: map[string]*api.NodeInfo{}, 13907 RevocableNodes: map[string]*api.NodeInfo{}, 13908 Queues: map[api.QueueID]*api.QueueInfo{}, 13909 13910 plugins: map[string]Plugin{}, 13911 jobOrderFns: map[string]api.CompareFn{}, 13912 queueOrderFns: map[string]api.CompareFn{}, 13913 taskOrderFns: map[string]api.CompareFn{}, 13914 namespaceOrderFns: map[string]api.CompareFn{}, 13915 clusterOrderFns: map[string]api.CompareFn{}, 13916 predicateFns: map[string]api.PredicateFn{}, 13917 bestNodeFns: map[string]api.BestNodeFn{}, 13918 nodeOrderFns: map[string]api.NodeOrderFn{}, 13919 batchNodeOrderFns: map[string]api.BatchNodeOrderFn{}, 13920 nodeMapFns: map[string]api.NodeMapFn{}, 13921 nodeReduceFns: map[string]api.NodeReduceFn{}, 13922 preemptableFns: map[string]api.EvictableFn{}, 13923 reclaimableFns: map[string]api.EvictableFn{}, 13924 overusedFns: map[string]api.ValidateFn{}, 13925 underUsedFns: map[string]api.UnderUsedResourceFn{}, 13926 jobReadyFns: map[string]api.ValidateFn{}, 13927 jobPipelinedFns: map[string]api.VoteFn{}, 13928 jobValidFns: map[string]api.ValidateExFn{}, 13929 jobEnqueueableFns: map[string]api.VoteFn{}, 13930 jobEnqueuedFns: map[string]api.JobEnqueuedFn{}, 13931 targetJobFns: map[string]api.TargetJobFn{}, 13932 reservedNodesFns: map[string]api.ReservedNodesFn{}, 13933 victimTasksFns: map[string]api.VictimTasksFn{}, 13934 jobStarvingFns: map[string]api.ValidateFn{}, 13935 } 13936 13937 snapshot := cache.Snapshot() 13938 13939 ssn.Jobs = snapshot.Jobs 13940 for _, job := range ssn.Jobs </span><span class="cov0" title="0">{ 13941 // only conditions will be updated periodically 13942 if job.PodGroup != nil && job.PodGroup.Status.Conditions != nil </span><span class="cov0" title="0">{ 13943 ssn.podGroupStatus[job.UID] = *job.PodGroup.Status.DeepCopy() 13944 }</span> 13945 13946 <span class="cov0" title="0">if vjr := ssn.JobValid(job); vjr != nil </span><span class="cov0" title="0">{ 13947 if !vjr.Pass </span><span class="cov0" title="0">{ 13948 jc := &scheduling.PodGroupCondition{ 13949 Type: scheduling.PodGroupUnschedulableType, 13950 Status: v1.ConditionTrue, 13951 LastTransitionTime: metav1.Now(), 13952 TransitionID: string(ssn.UID), 13953 Reason: vjr.Reason, 13954 Message: vjr.Message, 13955 } 13956 13957 if err := ssn.UpdatePodGroupCondition(job, jc); err != nil </span><span class="cov0" title="0">{ 13958 klog.Errorf("Failed to update job condition: %v", err) 13959 }</span> 13960 } 13961 13962 <span class="cov0" title="0">delete(ssn.Jobs, job.UID)</span> 13963 } 13964 } 13965 <span class="cov0" title="0">ssn.NodeList = util.GetNodeList(snapshot.Nodes, snapshot.NodeList) 13966 ssn.Nodes = snapshot.Nodes 13967 ssn.RevocableNodes = snapshot.RevocableNodes 13968 ssn.Queues = snapshot.Queues 13969 ssn.NamespaceInfo = snapshot.NamespaceInfo 13970 // calculate all nodes' resource only once in each schedule cycle, other plugins can clone it when need 13971 for _, n := range ssn.Nodes </span><span class="cov0" title="0">{ 13972 ssn.TotalResource.Add(n.Allocatable) 13973 }</span> 13974 13975 <span class="cov0" title="0">klog.V(3).Infof("Open Session %v with <%d> Job and <%d> Queues", 13976 ssn.UID, len(ssn.Jobs), len(ssn.Queues)) 13977 13978 return ssn</span> 13979 } 13980 13981 func closeSession(ssn *Session) <span class="cov0" title="0">{ 13982 ju := newJobUpdater(ssn) 13983 ju.UpdateAll() 13984 13985 ssn.Jobs = nil 13986 ssn.Nodes = nil 13987 ssn.RevocableNodes = nil 13988 ssn.plugins = nil 13989 ssn.eventHandlers = nil 13990 ssn.jobOrderFns = nil 13991 ssn.namespaceOrderFns = nil 13992 ssn.queueOrderFns = nil 13993 ssn.clusterOrderFns = nil 13994 ssn.NodeList = nil 13995 ssn.TotalResource = nil 13996 13997 klog.V(3).Infof("Close Session %v", ssn.UID) 13998 }</span> 13999 14000 func jobStatus(ssn *Session, jobInfo *api.JobInfo) scheduling.PodGroupStatus <span class="cov0" title="0">{ 14001 status := jobInfo.PodGroup.Status 14002 14003 unschedulable := false 14004 for _, c := range status.Conditions </span><span class="cov0" title="0">{ 14005 if c.Type == scheduling.PodGroupUnschedulableType && 14006 c.Status == v1.ConditionTrue && 14007 c.TransitionID == string(ssn.UID) </span><span class="cov0" title="0">{ 14008 unschedulable = true 14009 break</span> 14010 } 14011 } 14012 14013 // If running tasks && unschedulable, unknown phase 14014 <span class="cov0" title="0">if len(jobInfo.TaskStatusIndex[api.Running]) != 0 && unschedulable </span><span class="cov0" title="0">{ 14015 status.Phase = scheduling.PodGroupUnknown 14016 }</span> else<span class="cov0" title="0"> { 14017 allocated := 0 14018 for status, tasks := range jobInfo.TaskStatusIndex </span><span class="cov0" title="0">{ 14019 if api.AllocatedStatus(status) || status == api.Succeeded </span><span class="cov0" title="0">{ 14020 allocated += len(tasks) 14021 }</span> 14022 } 14023 14024 // If there're enough allocated resource, it's running 14025 <span class="cov0" title="0">if int32(allocated) >= jobInfo.PodGroup.Spec.MinMember </span><span class="cov0" title="0">{ 14026 status.Phase = scheduling.PodGroupRunning 14027 }</span> else<span class="cov0" title="0"> if jobInfo.PodGroup.Status.Phase != scheduling.PodGroupInqueue </span><span class="cov0" title="0">{ 14028 status.Phase = scheduling.PodGroupPending 14029 }</span> 14030 } 14031 14032 <span class="cov0" title="0">status.Running = int32(len(jobInfo.TaskStatusIndex[api.Running])) 14033 status.Failed = int32(len(jobInfo.TaskStatusIndex[api.Failed])) 14034 status.Succeeded = int32(len(jobInfo.TaskStatusIndex[api.Succeeded])) 14035 14036 return status</span> 14037 } 14038 14039 // Statement returns new statement object 14040 func (ssn *Session) Statement() *Statement <span class="cov0" title="0">{ 14041 return &Statement{ 14042 ssn: ssn, 14043 } 14044 }</span> 14045 14046 // Pipeline the task to the node in the session 14047 func (ssn *Session) Pipeline(task *api.TaskInfo, hostname string) error <span class="cov0" title="0">{ 14048 // Only update status in session 14049 job, found := ssn.Jobs[task.Job] 14050 if found </span><span class="cov0" title="0">{ 14051 if err := job.UpdateTaskStatus(task, api.Pipelined); err != nil </span><span class="cov0" title="0">{ 14052 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 14053 task.Namespace, task.Name, api.Pipelined, ssn.UID, err) 14054 return err 14055 }</span> 14056 } else<span class="cov0" title="0"> { 14057 klog.Errorf("Failed to find Job <%s> in Session <%s> index when binding.", 14058 task.Job, ssn.UID) 14059 return fmt.Errorf("failed to find job %s when binding", task.Job) 14060 }</span> 14061 14062 <span class="cov0" title="0">task.NodeName = hostname 14063 14064 if node, found := ssn.Nodes[hostname]; found </span><span class="cov0" title="0">{ 14065 if err := node.AddTask(task); err != nil </span><span class="cov0" title="0">{ 14066 klog.Errorf("Failed to add task <%v/%v> to node <%v> in Session <%v>: %v", 14067 task.Namespace, task.Name, hostname, ssn.UID, err) 14068 return err 14069 }</span> 14070 <span class="cov0" title="0">klog.V(3).Infof("After added Task <%v/%v> to Node <%v>: idle <%v>, used <%v>, releasing <%v>", 14071 task.Namespace, task.Name, node.Name, node.Idle, node.Used, node.Releasing)</span> 14072 } else<span class="cov0" title="0"> { 14073 klog.Errorf("Failed to find Node <%s> in Session <%s> index when binding.", 14074 hostname, ssn.UID) 14075 return fmt.Errorf("failed to find node %s", hostname) 14076 }</span> 14077 14078 <span class="cov0" title="0">for _, eh := range ssn.eventHandlers </span><span class="cov0" title="0">{ 14079 if eh.AllocateFunc != nil </span><span class="cov0" title="0">{ 14080 eh.AllocateFunc(&Event{ 14081 Task: task, 14082 }) 14083 }</span> 14084 } 14085 14086 <span class="cov0" title="0">return nil</span> 14087 } 14088 14089 //Allocate the task to the node in the session 14090 func (ssn *Session) Allocate(task *api.TaskInfo, nodeInfo *api.NodeInfo) error <span class="cov0" title="0">{ 14091 podVolumes, err := ssn.cache.GetPodVolumes(task, nodeInfo.Node) 14092 if err != nil </span><span class="cov0" title="0">{ 14093 return err 14094 }</span> 14095 14096 <span class="cov0" title="0">hostname := nodeInfo.Name 14097 if err := ssn.cache.AllocateVolumes(task, hostname, podVolumes); err != nil </span><span class="cov0" title="0">{ 14098 return err 14099 }</span> 14100 14101 <span class="cov0" title="0">task.Pod.Spec.NodeName = hostname 14102 task.PodVolumes = podVolumes 14103 14104 // Only update status in session 14105 job, found := ssn.Jobs[task.Job] 14106 if found </span><span class="cov0" title="0">{ 14107 if err := job.UpdateTaskStatus(task, api.Allocated); err != nil </span><span class="cov0" title="0">{ 14108 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 14109 task.Namespace, task.Name, api.Allocated, ssn.UID, err) 14110 return err 14111 }</span> 14112 } else<span class="cov0" title="0"> { 14113 klog.Errorf("Failed to find Job <%s> in Session <%s> index when binding.", 14114 task.Job, ssn.UID) 14115 return fmt.Errorf("failed to find job %s", task.Job) 14116 }</span> 14117 14118 <span class="cov0" title="0">task.NodeName = hostname 14119 14120 if node, found := ssn.Nodes[hostname]; found </span><span class="cov0" title="0">{ 14121 if err := node.AddTask(task); err != nil </span><span class="cov0" title="0">{ 14122 klog.Errorf("Failed to add task <%v/%v> to node <%v> in Session <%v>: %v", 14123 task.Namespace, task.Name, hostname, ssn.UID, err) 14124 return err 14125 }</span> 14126 <span class="cov0" title="0">klog.V(3).Infof("After allocated Task <%v/%v> to Node <%v>: idle <%v>, used <%v>, releasing <%v>", 14127 task.Namespace, task.Name, node.Name, node.Idle, node.Used, node.Releasing)</span> 14128 } else<span class="cov0" title="0"> { 14129 klog.Errorf("Failed to find Node <%s> in Session <%s> index when binding.", 14130 hostname, ssn.UID) 14131 return fmt.Errorf("failed to find node %s", hostname) 14132 }</span> 14133 14134 // Callbacks 14135 <span class="cov0" title="0">for _, eh := range ssn.eventHandlers </span><span class="cov0" title="0">{ 14136 if eh.AllocateFunc != nil </span><span class="cov0" title="0">{ 14137 eh.AllocateFunc(&Event{ 14138 Task: task, 14139 }) 14140 }</span> 14141 } 14142 14143 <span class="cov0" title="0">if ssn.JobReady(job) </span><span class="cov0" title="0">{ 14144 for _, task := range job.TaskStatusIndex[api.Allocated] </span><span class="cov0" title="0">{ 14145 if err := ssn.dispatch(task, podVolumes); err != nil </span><span class="cov0" title="0">{ 14146 klog.Errorf("Failed to dispatch task <%v/%v>: %v", 14147 task.Namespace, task.Name, err) 14148 return err 14149 }</span> 14150 } 14151 } 14152 14153 <span class="cov0" title="0">return nil</span> 14154 } 14155 14156 func (ssn *Session) dispatch(task *api.TaskInfo, volumes *volumescheduling.PodVolumes) error <span class="cov0" title="0">{ 14157 if err := ssn.cache.AddBindTask(task); err != nil </span><span class="cov0" title="0">{ 14158 return err 14159 }</span> 14160 14161 // Update status in session 14162 <span class="cov0" title="0">if job, found := ssn.Jobs[task.Job]; found </span><span class="cov0" title="0">{ 14163 if err := job.UpdateTaskStatus(task, api.Binding); err != nil </span><span class="cov0" title="0">{ 14164 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 14165 task.Namespace, task.Name, api.Binding, ssn.UID, err) 14166 return err 14167 }</span> 14168 } else<span class="cov0" title="0"> { 14169 klog.Errorf("Failed to find Job <%s> in Session <%s> index when binding.", 14170 task.Job, ssn.UID) 14171 return fmt.Errorf("failed to find job %s", task.Job) 14172 }</span> 14173 14174 <span class="cov0" title="0">metrics.UpdateTaskScheduleDuration(metrics.Duration(task.Pod.CreationTimestamp.Time)) 14175 return nil</span> 14176 } 14177 14178 //Evict the task in the session 14179 func (ssn *Session) Evict(reclaimee *api.TaskInfo, reason string) error <span class="cov0" title="0">{ 14180 if err := ssn.cache.Evict(reclaimee, reason); err != nil </span><span class="cov0" title="0">{ 14181 return err 14182 }</span> 14183 14184 // Update status in session 14185 <span class="cov0" title="0">job, found := ssn.Jobs[reclaimee.Job] 14186 if found </span><span class="cov0" title="0">{ 14187 if err := job.UpdateTaskStatus(reclaimee, api.Releasing); err != nil </span><span class="cov0" title="0">{ 14188 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 14189 reclaimee.Namespace, reclaimee.Name, api.Releasing, ssn.UID, err) 14190 return err 14191 }</span> 14192 } else<span class="cov0" title="0"> { 14193 klog.Errorf("Failed to find Job <%s> in Session <%s> index when binding.", 14194 reclaimee.Job, ssn.UID) 14195 return fmt.Errorf("failed to find job %s", reclaimee.Job) 14196 }</span> 14197 14198 // Update task in node. 14199 <span class="cov0" title="0">if node, found := ssn.Nodes[reclaimee.NodeName]; found </span><span class="cov0" title="0">{ 14200 if err := node.UpdateTask(reclaimee); err != nil </span><span class="cov0" title="0">{ 14201 klog.Errorf("Failed to update task <%v/%v> in Session <%v>: %v", 14202 reclaimee.Namespace, reclaimee.Name, ssn.UID, err) 14203 return err 14204 }</span> 14205 } 14206 14207 <span class="cov0" title="0">for _, eh := range ssn.eventHandlers </span><span class="cov0" title="0">{ 14208 if eh.DeallocateFunc != nil </span><span class="cov0" title="0">{ 14209 eh.DeallocateFunc(&Event{ 14210 Task: reclaimee, 14211 }) 14212 }</span> 14213 } 14214 14215 <span class="cov0" title="0">return nil</span> 14216 } 14217 14218 // BindPodGroup bind PodGroup to specified cluster 14219 func (ssn *Session) BindPodGroup(job *api.JobInfo, cluster string) error <span class="cov0" title="0">{ 14220 return ssn.cache.BindPodGroup(job, cluster) 14221 }</span> 14222 14223 // UpdatePodGroupCondition update job condition accordingly. 14224 func (ssn *Session) UpdatePodGroupCondition(jobInfo *api.JobInfo, cond *scheduling.PodGroupCondition) error <span class="cov0" title="0">{ 14225 job, ok := ssn.Jobs[jobInfo.UID] 14226 if !ok </span><span class="cov0" title="0">{ 14227 return fmt.Errorf("failed to find job <%s/%s>", jobInfo.Namespace, jobInfo.Name) 14228 }</span> 14229 14230 <span class="cov0" title="0">index := -1 14231 for i, c := range job.PodGroup.Status.Conditions </span><span class="cov0" title="0">{ 14232 if c.Type == cond.Type </span><span class="cov0" title="0">{ 14233 index = i 14234 break</span> 14235 } 14236 } 14237 14238 // Update condition to the new condition. 14239 <span class="cov0" title="0">if index < 0 </span><span class="cov0" title="0">{ 14240 job.PodGroup.Status.Conditions = append(job.PodGroup.Status.Conditions, *cond) 14241 }</span> else<span class="cov0" title="0"> { 14242 job.PodGroup.Status.Conditions[index] = *cond 14243 }</span> 14244 14245 <span class="cov0" title="0">return nil</span> 14246 } 14247 14248 // AddEventHandler add event handlers 14249 func (ssn *Session) AddEventHandler(eh *EventHandler) <span class="cov0" title="0">{ 14250 ssn.eventHandlers = append(ssn.eventHandlers, eh) 14251 }</span> 14252 14253 // UpdateSchedulerNumaInfo update SchedulerNumaInfo 14254 func (ssn *Session) UpdateSchedulerNumaInfo(AllocatedSets map[string]api.ResNumaSets) <span class="cov0" title="0">{ 14255 ssn.cache.UpdateSchedulerNumaInfo(AllocatedSets) 14256 }</span> 14257 14258 // KubeClient returns the kubernetes client 14259 func (ssn Session) KubeClient() kubernetes.Interface <span class="cov0" title="0">{ 14260 return ssn.kubeClient 14261 }</span> 14262 14263 // InformerFactory returns the scheduler ShareInformerFactory 14264 func (ssn Session) InformerFactory() informers.SharedInformerFactory <span class="cov0" title="0">{ 14265 return ssn.informerFactory 14266 }</span> 14267 14268 //String return nodes and jobs information in the session 14269 func (ssn Session) String() string <span class="cov0" title="0">{ 14270 msg := fmt.Sprintf("Session %v: \n", ssn.UID) 14271 14272 for _, job := range ssn.Jobs </span><span class="cov0" title="0">{ 14273 msg = fmt.Sprintf("%s%v\n", msg, job) 14274 }</span> 14275 14276 <span class="cov0" title="0">for _, node := range ssn.Nodes </span><span class="cov0" title="0">{ 14277 msg = fmt.Sprintf("%s%v\n", msg, node) 14278 }</span> 14279 14280 <span class="cov0" title="0">return msg</span> 14281 } 14282 </pre> 14283 14284 <pre class="file" id="file67" style="display: none">/* 14285 Copyright 2018 The Kubernetes Authors. 14286 14287 Licensed under the Apache License, Version 2.0 (the "License"); 14288 you may not use this file except in compliance with the License. 14289 You may obtain a copy of the License at 14290 14291 http://www.apache.org/licenses/LICENSE-2.0 14292 14293 Unless required by applicable law or agreed to in writing, software 14294 distributed under the License is distributed on an "AS IS" BASIS, 14295 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14296 See the License for the specific language governing permissions and 14297 limitations under the License. 14298 */ 14299 14300 package framework 14301 14302 import ( 14303 k8sframework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1" 14304 14305 "volcano.sh/apis/pkg/apis/scheduling" 14306 "volcano.sh/volcano/pkg/controllers/job/helpers" 14307 "volcano.sh/volcano/pkg/scheduler/api" 14308 ) 14309 14310 // AddJobOrderFn add job order function 14311 func (ssn *Session) AddJobOrderFn(name string, cf api.CompareFn) <span class="cov0" title="0">{ 14312 ssn.jobOrderFns[name] = cf 14313 }</span> 14314 14315 // AddQueueOrderFn add queue order function 14316 func (ssn *Session) AddQueueOrderFn(name string, qf api.CompareFn) <span class="cov0" title="0">{ 14317 ssn.queueOrderFns[name] = qf 14318 }</span> 14319 14320 // AddClusterOrderFn add queue order function 14321 func (ssn *Session) AddClusterOrderFn(name string, qf api.CompareFn) <span class="cov0" title="0">{ 14322 ssn.clusterOrderFns[name] = qf 14323 }</span> 14324 14325 // AddTaskOrderFn add task order function 14326 func (ssn *Session) AddTaskOrderFn(name string, cf api.CompareFn) <span class="cov0" title="0">{ 14327 ssn.taskOrderFns[name] = cf 14328 }</span> 14329 14330 // AddNamespaceOrderFn add namespace order function 14331 func (ssn *Session) AddNamespaceOrderFn(name string, cf api.CompareFn) <span class="cov0" title="0">{ 14332 ssn.namespaceOrderFns[name] = cf 14333 }</span> 14334 14335 // AddPreemptableFn add preemptable function 14336 func (ssn *Session) AddPreemptableFn(name string, cf api.EvictableFn) <span class="cov0" title="0">{ 14337 ssn.preemptableFns[name] = cf 14338 }</span> 14339 14340 // AddReclaimableFn add Reclaimable function 14341 func (ssn *Session) AddReclaimableFn(name string, rf api.EvictableFn) <span class="cov0" title="0">{ 14342 ssn.reclaimableFns[name] = rf 14343 }</span> 14344 14345 // AddJobReadyFn add JobReady function 14346 func (ssn *Session) AddJobReadyFn(name string, vf api.ValidateFn) <span class="cov0" title="0">{ 14347 ssn.jobReadyFns[name] = vf 14348 }</span> 14349 14350 // AddJobPipelinedFn add pipelined function 14351 func (ssn *Session) AddJobPipelinedFn(name string, vf api.VoteFn) <span class="cov0" title="0">{ 14352 ssn.jobPipelinedFns[name] = vf 14353 }</span> 14354 14355 // AddPredicateFn add Predicate function 14356 func (ssn *Session) AddPredicateFn(name string, pf api.PredicateFn) <span class="cov0" title="0">{ 14357 ssn.predicateFns[name] = pf 14358 }</span> 14359 14360 // AddBestNodeFn add BestNode function 14361 func (ssn *Session) AddBestNodeFn(name string, pf api.BestNodeFn) <span class="cov0" title="0">{ 14362 ssn.bestNodeFns[name] = pf 14363 }</span> 14364 14365 // AddNodeOrderFn add Node order function 14366 func (ssn *Session) AddNodeOrderFn(name string, pf api.NodeOrderFn) <span class="cov0" title="0">{ 14367 ssn.nodeOrderFns[name] = pf 14368 }</span> 14369 14370 // AddBatchNodeOrderFn add Batch Node order function 14371 func (ssn *Session) AddBatchNodeOrderFn(name string, pf api.BatchNodeOrderFn) <span class="cov0" title="0">{ 14372 ssn.batchNodeOrderFns[name] = pf 14373 }</span> 14374 14375 // AddNodeMapFn add Node map function 14376 func (ssn *Session) AddNodeMapFn(name string, pf api.NodeMapFn) <span class="cov0" title="0">{ 14377 ssn.nodeMapFns[name] = pf 14378 }</span> 14379 14380 // AddNodeReduceFn add Node reduce function 14381 func (ssn *Session) AddNodeReduceFn(name string, pf api.NodeReduceFn) <span class="cov0" title="0">{ 14382 ssn.nodeReduceFns[name] = pf 14383 }</span> 14384 14385 // AddOverusedFn add overused function 14386 func (ssn *Session) AddOverusedFn(name string, fn api.ValidateFn) <span class="cov0" title="0">{ 14387 ssn.overusedFns[name] = fn 14388 }</span> 14389 14390 // AddUnderusedResourceFn add underused function 14391 func (ssn *Session) AddUnderusedResourceFn(name string, fn api.UnderUsedResourceFn) <span class="cov0" title="0">{ 14392 ssn.underUsedFns[name] = fn 14393 }</span> 14394 14395 // AddJobValidFn add jobvalid function 14396 func (ssn *Session) AddJobValidFn(name string, fn api.ValidateExFn) <span class="cov0" title="0">{ 14397 ssn.jobValidFns[name] = fn 14398 }</span> 14399 14400 // AddJobEnqueueableFn add jobenqueueable function 14401 func (ssn *Session) AddJobEnqueueableFn(name string, fn api.VoteFn) <span class="cov0" title="0">{ 14402 ssn.jobEnqueueableFns[name] = fn 14403 }</span> 14404 14405 // AddJobEnqueuedFn add jobEnqueued function 14406 func (ssn *Session) AddJobEnqueuedFn(name string, fn api.JobEnqueuedFn) <span class="cov0" title="0">{ 14407 ssn.jobEnqueuedFns[name] = fn 14408 }</span> 14409 14410 // AddTargetJobFn add targetjob function 14411 func (ssn *Session) AddTargetJobFn(name string, fn api.TargetJobFn) <span class="cov0" title="0">{ 14412 ssn.targetJobFns[name] = fn 14413 }</span> 14414 14415 // AddReservedNodesFn add reservedNodesFn function 14416 func (ssn *Session) AddReservedNodesFn(name string, fn api.ReservedNodesFn) <span class="cov0" title="0">{ 14417 ssn.reservedNodesFns[name] = fn 14418 }</span> 14419 14420 // AddVictimTasksFns add victimTasksFns function 14421 func (ssn *Session) AddVictimTasksFns(name string, fn api.VictimTasksFn) <span class="cov0" title="0">{ 14422 ssn.victimTasksFns[name] = fn 14423 }</span> 14424 14425 // AddJobStarvingFns add jobStarvingFns function 14426 func (ssn *Session) AddJobStarvingFns(name string, fn api.ValidateFn) <span class="cov0" title="0">{ 14427 ssn.jobStarvingFns[name] = fn 14428 }</span> 14429 14430 // Reclaimable invoke reclaimable function of the plugins 14431 func (ssn *Session) Reclaimable(reclaimer *api.TaskInfo, reclaimees []*api.TaskInfo) []*api.TaskInfo <span class="cov0" title="0">{ 14432 var victims []*api.TaskInfo 14433 var init bool 14434 14435 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14436 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14437 if !isEnabled(plugin.EnabledReclaimable) </span><span class="cov0" title="0">{ 14438 continue</span> 14439 } 14440 <span class="cov0" title="0">rf, found := ssn.reclaimableFns[plugin.Name] 14441 if !found </span><span class="cov0" title="0">{ 14442 continue</span> 14443 } 14444 14445 <span class="cov0" title="0">candidates, abstain := rf(reclaimer, reclaimees) 14446 if abstain == 0 </span><span class="cov0" title="0">{ 14447 continue</span> 14448 } 14449 <span class="cov0" title="0">if len(candidates) == 0 </span><span class="cov0" title="0">{ 14450 victims = nil 14451 break</span> 14452 } 14453 <span class="cov0" title="0">if !init </span><span class="cov0" title="0">{ 14454 victims = candidates 14455 init = true 14456 }</span> else<span class="cov0" title="0"> { 14457 var intersection []*api.TaskInfo 14458 // Get intersection of victims and candidates. 14459 for _, v := range victims </span><span class="cov0" title="0">{ 14460 for _, c := range candidates </span><span class="cov0" title="0">{ 14461 if v.UID == c.UID </span><span class="cov0" title="0">{ 14462 intersection = append(intersection, v) 14463 }</span> 14464 } 14465 } 14466 14467 // Update victims to intersection 14468 <span class="cov0" title="0">victims = intersection</span> 14469 } 14470 } 14471 // Plugins in this tier made decision if victims is not nil 14472 <span class="cov0" title="0">if victims != nil </span><span class="cov0" title="0">{ 14473 return victims 14474 }</span> 14475 } 14476 14477 <span class="cov0" title="0">return victims</span> 14478 } 14479 14480 // Preemptable invoke preemptable function of the plugins 14481 func (ssn *Session) Preemptable(preemptor *api.TaskInfo, preemptees []*api.TaskInfo) []*api.TaskInfo <span class="cov0" title="0">{ 14482 var victims []*api.TaskInfo 14483 var init bool 14484 14485 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14486 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14487 if !isEnabled(plugin.EnabledPreemptable) </span><span class="cov0" title="0">{ 14488 continue</span> 14489 } 14490 14491 <span class="cov0" title="0">pf, found := ssn.preemptableFns[plugin.Name] 14492 if !found </span><span class="cov0" title="0">{ 14493 continue</span> 14494 } 14495 <span class="cov0" title="0">candidates, abstain := pf(preemptor, preemptees) 14496 if abstain == 0 </span><span class="cov0" title="0">{ 14497 continue</span> 14498 } 14499 // intersection will be nil if length is 0, don't need to do any more check 14500 <span class="cov0" title="0">if len(candidates) == 0 </span><span class="cov0" title="0">{ 14501 victims = nil 14502 break</span> 14503 } 14504 14505 <span class="cov0" title="0">if !init </span><span class="cov0" title="0">{ 14506 victims = candidates 14507 init = true 14508 }</span> else<span class="cov0" title="0"> { 14509 var intersection []*api.TaskInfo 14510 // Get intersection of victims and candidates. 14511 for _, v := range victims </span><span class="cov0" title="0">{ 14512 for _, c := range candidates </span><span class="cov0" title="0">{ 14513 if v.UID == c.UID </span><span class="cov0" title="0">{ 14514 intersection = append(intersection, v) 14515 }</span> 14516 } 14517 } 14518 14519 // Update victims to intersection 14520 <span class="cov0" title="0">victims = intersection</span> 14521 } 14522 } 14523 // Plugins in this tier made decision if victims is not nil 14524 <span class="cov0" title="0">if victims != nil </span><span class="cov0" title="0">{ 14525 return victims 14526 }</span> 14527 } 14528 14529 <span class="cov0" title="0">return victims</span> 14530 } 14531 14532 // Overused invoke overused function of the plugins 14533 func (ssn *Session) Overused(queue *api.QueueInfo) bool <span class="cov0" title="0">{ 14534 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14535 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14536 of, found := ssn.overusedFns[plugin.Name] 14537 if !found </span><span class="cov0" title="0">{ 14538 continue</span> 14539 } 14540 <span class="cov0" title="0">if of(queue) </span><span class="cov0" title="0">{ 14541 return true 14542 }</span> 14543 } 14544 } 14545 14546 <span class="cov0" title="0">return false</span> 14547 } 14548 14549 // UnderusedResources invoke underused function of the plugins 14550 // Returns: 14551 // * nil if no `UnderUsedResourceFn` is registered 14552 // * [] if no under-used resources 14553 func (ssn *Session) UnderusedResources(queue *api.QueueInfo) api.ResourceNameList <span class="cov0" title="0">{ 14554 if len(ssn.underUsedFns) == 0 </span><span class="cov0" title="0">{ 14555 return nil 14556 }</span> 14557 <span class="cov0" title="0">for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14558 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14559 of, found := ssn.underUsedFns[plugin.Name] 14560 if !found </span><span class="cov0" title="0">{ 14561 continue</span> 14562 } 14563 <span class="cov0" title="0">underUsedResourceList := of(queue) 14564 return underUsedResourceList</span> 14565 } 14566 } 14567 14568 <span class="cov0" title="0">return api.ResourceNameList{}</span> 14569 } 14570 14571 // JobReady invoke jobready function of the plugins 14572 func (ssn *Session) JobReady(obj interface{}) bool <span class="cov0" title="0">{ 14573 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14574 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14575 if !isEnabled(plugin.EnabledJobReady) </span><span class="cov0" title="0">{ 14576 continue</span> 14577 } 14578 <span class="cov0" title="0">jrf, found := ssn.jobReadyFns[plugin.Name] 14579 if !found </span><span class="cov0" title="0">{ 14580 continue</span> 14581 } 14582 14583 <span class="cov0" title="0">if !jrf(obj) </span><span class="cov0" title="0">{ 14584 return false 14585 }</span> 14586 } 14587 } 14588 14589 <span class="cov0" title="0">return true</span> 14590 } 14591 14592 // JobPipelined invoke pipelined function of the plugins 14593 // Check if job has get enough resource to run 14594 func (ssn *Session) JobPipelined(obj interface{}) bool <span class="cov0" title="0">{ 14595 var hasFound bool 14596 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14597 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14598 if !isEnabled(plugin.EnabledJobPipelined) </span><span class="cov0" title="0">{ 14599 continue</span> 14600 } 14601 <span class="cov0" title="0">jrf, found := ssn.jobPipelinedFns[plugin.Name] 14602 if !found </span><span class="cov0" title="0">{ 14603 continue</span> 14604 } 14605 14606 <span class="cov0" title="0">res := jrf(obj) 14607 if res < 0 </span><span class="cov0" title="0">{ 14608 return false 14609 }</span> 14610 <span class="cov0" title="0">if res > 0 </span><span class="cov0" title="0">{ 14611 hasFound = true 14612 }</span> 14613 } 14614 // if plugin exists that votes permit, meanwhile other plugin votes abstention, 14615 // permit job to be pipelined, do not check next tier 14616 <span class="cov0" title="0">if hasFound </span><span class="cov0" title="0">{ 14617 return true 14618 }</span> 14619 } 14620 14621 <span class="cov0" title="0">return true</span> 14622 } 14623 14624 // JobStarving invoke jobStarving function of the plugins 14625 // Check if job still need more resource 14626 func (ssn *Session) JobStarving(obj interface{}) bool <span class="cov0" title="0">{ 14627 var hasFound bool 14628 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14629 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14630 if !isEnabled(plugin.EnabledJobStarving) </span><span class="cov0" title="0">{ 14631 continue</span> 14632 } 14633 <span class="cov0" title="0">jrf, found := ssn.jobStarvingFns[plugin.Name] 14634 if !found </span><span class="cov0" title="0">{ 14635 continue</span> 14636 } 14637 <span class="cov0" title="0">hasFound = true 14638 14639 if !jrf(obj) </span><span class="cov0" title="0">{ 14640 return false 14641 }</span> 14642 } 14643 // this tier registered function 14644 <span class="cov0" title="0">if hasFound </span><span class="cov0" title="0">{ 14645 return true 14646 }</span> 14647 } 14648 14649 <span class="cov0" title="0">return false</span> 14650 } 14651 14652 // JobValid invoke jobvalid function of the plugins 14653 func (ssn *Session) JobValid(obj interface{}) *api.ValidateResult <span class="cov0" title="0">{ 14654 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14655 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14656 jrf, found := ssn.jobValidFns[plugin.Name] 14657 if !found </span><span class="cov0" title="0">{ 14658 continue</span> 14659 } 14660 14661 <span class="cov0" title="0">if vr := jrf(obj); vr != nil && !vr.Pass </span><span class="cov0" title="0">{ 14662 return vr 14663 }</span> 14664 } 14665 } 14666 14667 <span class="cov0" title="0">return nil</span> 14668 } 14669 14670 // JobEnqueueable invoke jobEnqueueableFns function of the plugins 14671 func (ssn *Session) JobEnqueueable(obj interface{}) bool <span class="cov0" title="0">{ 14672 var hasFound bool 14673 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14674 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14675 if !isEnabled(plugin.EnabledJobEnqueued) </span><span class="cov0" title="0">{ 14676 continue</span> 14677 } 14678 <span class="cov0" title="0">fn, found := ssn.jobEnqueueableFns[plugin.Name] 14679 if !found </span><span class="cov0" title="0">{ 14680 continue</span> 14681 } 14682 14683 <span class="cov0" title="0">res := fn(obj) 14684 if res < 0 </span><span class="cov0" title="0">{ 14685 return false 14686 }</span> 14687 <span class="cov0" title="0">if res > 0 </span><span class="cov0" title="0">{ 14688 hasFound = true 14689 }</span> 14690 } 14691 // if plugin exists that votes permit, meanwhile other plugin votes abstention, 14692 // permit job to be enqueueable, do not check next tier 14693 <span class="cov0" title="0">if hasFound </span><span class="cov0" title="0">{ 14694 return true 14695 }</span> 14696 } 14697 14698 <span class="cov0" title="0">return true</span> 14699 } 14700 14701 // JobEnqueued invoke jobEnqueuedFns function of the plugins 14702 func (ssn *Session) JobEnqueued(obj interface{}) <span class="cov0" title="0">{ 14703 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14704 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14705 if !isEnabled(plugin.EnabledJobEnqueued) </span><span class="cov0" title="0">{ 14706 continue</span> 14707 } 14708 <span class="cov0" title="0">fn, found := ssn.jobEnqueuedFns[plugin.Name] 14709 if !found </span><span class="cov0" title="0">{ 14710 continue</span> 14711 } 14712 14713 <span class="cov0" title="0">fn(obj)</span> 14714 } 14715 } 14716 } 14717 14718 // TargetJob invoke targetJobFns function of the plugins 14719 func (ssn *Session) TargetJob(jobs []*api.JobInfo) *api.JobInfo <span class="cov0" title="0">{ 14720 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14721 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14722 if !isEnabled(plugin.EnabledTargetJob) </span><span class="cov0" title="0">{ 14723 continue</span> 14724 } 14725 <span class="cov0" title="0">fn, found := ssn.targetJobFns[plugin.Name] 14726 if !found </span><span class="cov0" title="0">{ 14727 continue</span> 14728 } 14729 <span class="cov0" title="0">return fn(jobs)</span> 14730 } 14731 } 14732 <span class="cov0" title="0">return nil</span> 14733 } 14734 14735 // VictimTasks invoke ReservedNodes function of the plugins 14736 func (ssn *Session) VictimTasks() []*api.TaskInfo <span class="cov0" title="0">{ 14737 var victims []*api.TaskInfo 14738 var init bool 14739 14740 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14741 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14742 if !isEnabled(plugin.EnabledVictim) </span><span class="cov0" title="0">{ 14743 continue</span> 14744 } 14745 14746 <span class="cov0" title="0">pf, found := ssn.victimTasksFns[plugin.Name] 14747 if !found </span><span class="cov0" title="0">{ 14748 continue</span> 14749 } 14750 <span class="cov0" title="0">candidates := pf() 14751 if !init </span><span class="cov0" title="0">{ 14752 victims = candidates 14753 init = true 14754 }</span> else<span class="cov0" title="0"> { 14755 var intersection []*api.TaskInfo 14756 // Get intersection of victims and candidates. 14757 for _, v := range victims </span><span class="cov0" title="0">{ 14758 for _, c := range candidates </span><span class="cov0" title="0">{ 14759 if v.UID == c.UID </span><span class="cov0" title="0">{ 14760 intersection = append(intersection, v) 14761 }</span> 14762 } 14763 } 14764 14765 // Update victims to intersection 14766 <span class="cov0" title="0">victims = intersection</span> 14767 } 14768 } 14769 // Plugins in this tier made decision if victims is not nil 14770 <span class="cov0" title="0">if victims != nil </span><span class="cov0" title="0">{ 14771 return victims 14772 }</span> 14773 } 14774 14775 <span class="cov0" title="0">return victims</span> 14776 } 14777 14778 // ReservedNodes invoke ReservedNodes function of the plugins 14779 func (ssn *Session) ReservedNodes() <span class="cov0" title="0">{ 14780 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14781 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14782 if !isEnabled(plugin.EnabledReservedNodes) </span><span class="cov0" title="0">{ 14783 continue</span> 14784 } 14785 <span class="cov0" title="0">fn, found := ssn.reservedNodesFns[plugin.Name] 14786 if !found </span><span class="cov0" title="0">{ 14787 continue</span> 14788 } 14789 <span class="cov0" title="0">fn()</span> 14790 } 14791 } 14792 } 14793 14794 // JobOrderFn invoke joborder function of the plugins 14795 func (ssn *Session) JobOrderFn(l, r interface{}) bool <span class="cov0" title="0">{ 14796 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14797 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14798 if !isEnabled(plugin.EnabledJobOrder) </span><span class="cov0" title="0">{ 14799 continue</span> 14800 } 14801 <span class="cov0" title="0">jof, found := ssn.jobOrderFns[plugin.Name] 14802 if !found </span><span class="cov0" title="0">{ 14803 continue</span> 14804 } 14805 <span class="cov0" title="0">if j := jof(l, r); j != 0 </span><span class="cov0" title="0">{ 14806 return j < 0 14807 }</span> 14808 } 14809 } 14810 14811 // If no job order funcs, order job by CreationTimestamp first, then by UID. 14812 <span class="cov0" title="0">lv := l.(*api.JobInfo) 14813 rv := r.(*api.JobInfo) 14814 if lv.CreationTimestamp.Equal(&rv.CreationTimestamp) </span><span class="cov0" title="0">{ 14815 return lv.UID < rv.UID 14816 }</span> 14817 <span class="cov0" title="0">return lv.CreationTimestamp.Before(&rv.CreationTimestamp)</span> 14818 } 14819 14820 // NamespaceOrderFn invoke namespaceorder function of the plugins 14821 func (ssn *Session) NamespaceOrderFn(l, r interface{}) bool <span class="cov0" title="0">{ 14822 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14823 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14824 if !isEnabled(plugin.EnabledNamespaceOrder) </span><span class="cov0" title="0">{ 14825 continue</span> 14826 } 14827 <span class="cov0" title="0">nof, found := ssn.namespaceOrderFns[plugin.Name] 14828 if !found </span><span class="cov0" title="0">{ 14829 continue</span> 14830 } 14831 <span class="cov0" title="0">if j := nof(l, r); j != 0 </span><span class="cov0" title="0">{ 14832 return j < 0 14833 }</span> 14834 } 14835 } 14836 14837 // TODO(lminzhw): if all NamespaceOrderFn treat these two namespace as the same, 14838 // we should make the job order have its affect among namespaces. 14839 // or just schedule namespace one by one 14840 <span class="cov0" title="0">lv := l.(api.NamespaceName) 14841 rv := r.(api.NamespaceName) 14842 return lv < rv</span> 14843 } 14844 14845 // ClusterOrderFn invoke ClusterOrderFn function of the plugins 14846 func (ssn *Session) ClusterOrderFn(l, r interface{}) bool <span class="cov0" title="0">{ 14847 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14848 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14849 if !isEnabled(plugin.EnabledClusterOrder) </span><span class="cov0" title="0">{ 14850 continue</span> 14851 } 14852 <span class="cov0" title="0">cof, found := ssn.clusterOrderFns[plugin.Name] 14853 if !found </span><span class="cov0" title="0">{ 14854 continue</span> 14855 } 14856 <span class="cov0" title="0">if j := cof(l, r); j != 0 </span><span class="cov0" title="0">{ 14857 return j < 0 14858 }</span> 14859 } 14860 } 14861 14862 // If no cluster order funcs, order cluster by ClusterID 14863 <span class="cov0" title="0">lv := l.(*scheduling.Cluster) 14864 rv := r.(*scheduling.Cluster) 14865 return lv.Name < rv.Name</span> 14866 } 14867 14868 // QueueOrderFn invoke queueorder function of the plugins 14869 func (ssn *Session) QueueOrderFn(l, r interface{}) bool <span class="cov0" title="0">{ 14870 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14871 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14872 if !isEnabled(plugin.EnabledQueueOrder) </span><span class="cov0" title="0">{ 14873 continue</span> 14874 } 14875 <span class="cov0" title="0">qof, found := ssn.queueOrderFns[plugin.Name] 14876 if !found </span><span class="cov0" title="0">{ 14877 continue</span> 14878 } 14879 <span class="cov0" title="0">if j := qof(l, r); j != 0 </span><span class="cov0" title="0">{ 14880 return j < 0 14881 }</span> 14882 } 14883 } 14884 14885 // If no queue order funcs, order queue by CreationTimestamp first, then by UID. 14886 <span class="cov0" title="0">lv := l.(*api.QueueInfo) 14887 rv := r.(*api.QueueInfo) 14888 if lv.Queue.CreationTimestamp.Equal(&rv.Queue.CreationTimestamp) </span><span class="cov0" title="0">{ 14889 return lv.UID < rv.UID 14890 }</span> 14891 <span class="cov0" title="0">return lv.Queue.CreationTimestamp.Before(&rv.Queue.CreationTimestamp)</span> 14892 } 14893 14894 // TaskCompareFns invoke taskorder function of the plugins 14895 func (ssn *Session) TaskCompareFns(l, r interface{}) int <span class="cov0" title="0">{ 14896 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14897 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14898 if !isEnabled(plugin.EnabledTaskOrder) </span><span class="cov0" title="0">{ 14899 continue</span> 14900 } 14901 <span class="cov0" title="0">tof, found := ssn.taskOrderFns[plugin.Name] 14902 if !found </span><span class="cov0" title="0">{ 14903 continue</span> 14904 } 14905 <span class="cov0" title="0">if j := tof(l, r); j != 0 </span><span class="cov0" title="0">{ 14906 return j 14907 }</span> 14908 } 14909 } 14910 14911 <span class="cov0" title="0">return 0</span> 14912 } 14913 14914 // TaskOrderFn invoke taskorder function of the plugins 14915 func (ssn *Session) TaskOrderFn(l, r interface{}) bool <span class="cov0" title="0">{ 14916 if res := ssn.TaskCompareFns(l, r); res != 0 </span><span class="cov0" title="0">{ 14917 return res < 0 14918 }</span> 14919 14920 // If no task order funcs, order task by default func. 14921 <span class="cov0" title="0">lv := l.(*api.TaskInfo) 14922 rv := r.(*api.TaskInfo) 14923 return helpers.CompareTask(lv, rv)</span> 14924 } 14925 14926 // PredicateFn invoke predicate function of the plugins 14927 func (ssn *Session) PredicateFn(task *api.TaskInfo, node *api.NodeInfo) error <span class="cov0" title="0">{ 14928 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14929 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14930 if !isEnabled(plugin.EnabledPredicate) </span><span class="cov0" title="0">{ 14931 continue</span> 14932 } 14933 <span class="cov0" title="0">pfn, found := ssn.predicateFns[plugin.Name] 14934 if !found </span><span class="cov0" title="0">{ 14935 continue</span> 14936 } 14937 <span class="cov0" title="0">err := pfn(task, node) 14938 if err != nil </span><span class="cov0" title="0">{ 14939 return err 14940 }</span> 14941 } 14942 } 14943 <span class="cov0" title="0">return nil</span> 14944 } 14945 14946 // BestNodeFn invoke bestNode function of the plugins 14947 func (ssn *Session) BestNodeFn(task *api.TaskInfo, nodeScores map[float64][]*api.NodeInfo) *api.NodeInfo <span class="cov0" title="0">{ 14948 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14949 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14950 if !isEnabled(plugin.EnabledBestNode) </span><span class="cov0" title="0">{ 14951 continue</span> 14952 } 14953 <span class="cov0" title="0">pfn, found := ssn.bestNodeFns[plugin.Name] 14954 if !found </span><span class="cov0" title="0">{ 14955 continue</span> 14956 } 14957 // Only the first plugin that enables and realizes bestNodeFn is allowed to choose best node for task 14958 <span class="cov0" title="0">if bestNode := pfn(task, nodeScores); bestNode != nil </span><span class="cov0" title="0">{ 14959 return bestNode 14960 }</span> 14961 } 14962 } 14963 <span class="cov0" title="0">return nil</span> 14964 } 14965 14966 // NodeOrderFn invoke node order function of the plugins 14967 func (ssn *Session) NodeOrderFn(task *api.TaskInfo, node *api.NodeInfo) (float64, error) <span class="cov0" title="0">{ 14968 priorityScore := 0.0 14969 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14970 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14971 if !isEnabled(plugin.EnabledNodeOrder) </span><span class="cov0" title="0">{ 14972 continue</span> 14973 } 14974 <span class="cov0" title="0">pfn, found := ssn.nodeOrderFns[plugin.Name] 14975 if !found </span><span class="cov0" title="0">{ 14976 continue</span> 14977 } 14978 <span class="cov0" title="0">score, err := pfn(task, node) 14979 if err != nil </span><span class="cov0" title="0">{ 14980 return 0, err 14981 }</span> 14982 <span class="cov0" title="0">priorityScore += score</span> 14983 } 14984 } 14985 <span class="cov0" title="0">return priorityScore, nil</span> 14986 } 14987 14988 // BatchNodeOrderFn invoke node order function of the plugins 14989 func (ssn *Session) BatchNodeOrderFn(task *api.TaskInfo, nodes []*api.NodeInfo) (map[string]float64, error) <span class="cov0" title="0">{ 14990 priorityScore := make(map[string]float64, len(nodes)) 14991 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 14992 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 14993 if !isEnabled(plugin.EnabledNodeOrder) </span><span class="cov0" title="0">{ 14994 continue</span> 14995 } 14996 <span class="cov0" title="0">pfn, found := ssn.batchNodeOrderFns[plugin.Name] 14997 if !found </span><span class="cov0" title="0">{ 14998 continue</span> 14999 } 15000 <span class="cov0" title="0">score, err := pfn(task, nodes) 15001 if err != nil </span><span class="cov0" title="0">{ 15002 return nil, err 15003 }</span> 15004 <span class="cov0" title="0">for nodeName, score := range score </span><span class="cov0" title="0">{ 15005 priorityScore[nodeName] += score 15006 }</span> 15007 } 15008 } 15009 <span class="cov0" title="0">return priorityScore, nil</span> 15010 } 15011 15012 func isEnabled(enabled *bool) bool <span class="cov0" title="0">{ 15013 return enabled != nil && *enabled 15014 }</span> 15015 15016 // NodeOrderMapFn invoke node order function of the plugins 15017 func (ssn *Session) NodeOrderMapFn(task *api.TaskInfo, node *api.NodeInfo) (map[string]float64, float64, error) <span class="cov0" title="0">{ 15018 nodeScoreMap := map[string]float64{} 15019 var priorityScore float64 15020 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 15021 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 15022 if !isEnabled(plugin.EnabledNodeOrder) </span><span class="cov0" title="0">{ 15023 continue</span> 15024 } 15025 <span class="cov0" title="0">if pfn, found := ssn.nodeOrderFns[plugin.Name]; found </span><span class="cov0" title="0">{ 15026 score, err := pfn(task, node) 15027 if err != nil </span><span class="cov0" title="0">{ 15028 return nodeScoreMap, priorityScore, err 15029 }</span> 15030 <span class="cov0" title="0">priorityScore += score</span> 15031 } 15032 <span class="cov0" title="0">if pfn, found := ssn.nodeMapFns[plugin.Name]; found </span><span class="cov0" title="0">{ 15033 score, err := pfn(task, node) 15034 if err != nil </span><span class="cov0" title="0">{ 15035 return nodeScoreMap, priorityScore, err 15036 }</span> 15037 <span class="cov0" title="0">nodeScoreMap[plugin.Name] = score</span> 15038 } 15039 } 15040 } 15041 <span class="cov0" title="0">return nodeScoreMap, priorityScore, nil</span> 15042 } 15043 15044 // NodeOrderReduceFn invoke node order function of the plugins 15045 func (ssn *Session) NodeOrderReduceFn(task *api.TaskInfo, pluginNodeScoreMap map[string]k8sframework.NodeScoreList) (map[string]float64, error) <span class="cov0" title="0">{ 15046 nodeScoreMap := map[string]float64{} 15047 for _, tier := range ssn.Tiers </span><span class="cov0" title="0">{ 15048 for _, plugin := range tier.Plugins </span><span class="cov0" title="0">{ 15049 if !isEnabled(plugin.EnabledNodeOrder) </span><span class="cov0" title="0">{ 15050 continue</span> 15051 } 15052 <span class="cov0" title="0">pfn, found := ssn.nodeReduceFns[plugin.Name] 15053 if !found </span><span class="cov0" title="0">{ 15054 continue</span> 15055 } 15056 <span class="cov0" title="0">if err := pfn(task, pluginNodeScoreMap[plugin.Name]); err != nil </span><span class="cov0" title="0">{ 15057 return nodeScoreMap, err 15058 }</span> 15059 <span class="cov0" title="0">for _, hp := range pluginNodeScoreMap[plugin.Name] </span><span class="cov0" title="0">{ 15060 nodeScoreMap[hp.Name] += float64(hp.Score) 15061 }</span> 15062 } 15063 } 15064 <span class="cov0" title="0">return nodeScoreMap, nil</span> 15065 } 15066 </pre> 15067 15068 <pre class="file" id="file68" style="display: none">/* 15069 Copyright 2018 The Kubernetes Authors. 15070 15071 Licensed under the Apache License, Version 2.0 (the "License"); 15072 you may not use this file except in compliance with the License. 15073 You may obtain a copy of the License at 15074 15075 http://www.apache.org/licenses/LICENSE-2.0 15076 15077 Unless required by applicable law or agreed to in writing, software 15078 distributed under the License is distributed on an "AS IS" BASIS, 15079 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15080 See the License for the specific language governing permissions and 15081 limitations under the License. 15082 */ 15083 15084 package framework 15085 15086 import ( 15087 "fmt" 15088 15089 "k8s.io/klog" 15090 15091 "volcano.sh/volcano/pkg/scheduler/api" 15092 "volcano.sh/volcano/pkg/scheduler/metrics" 15093 ) 15094 15095 // Operation type 15096 type Operation int8 15097 15098 const ( 15099 // Evict op 15100 Evict = iota 15101 // Pipeline op 15102 Pipeline 15103 // Allocate op 15104 Allocate 15105 ) 15106 15107 type operation struct { 15108 name Operation 15109 task *api.TaskInfo 15110 reason string 15111 } 15112 15113 // Statement structure 15114 type Statement struct { 15115 operations []operation 15116 ssn *Session 15117 } 15118 15119 // NewStatement returns new statement object 15120 func NewStatement(ssn *Session) *Statement <span class="cov0" title="0">{ 15121 return &Statement{ 15122 ssn: ssn, 15123 } 15124 }</span> 15125 15126 // Evict the pod 15127 func (s *Statement) Evict(reclaimee *api.TaskInfo, reason string) error <span class="cov0" title="0">{ 15128 // Update status in session 15129 if job, found := s.ssn.Jobs[reclaimee.Job]; found </span><span class="cov0" title="0">{ 15130 if err := job.UpdateTaskStatus(reclaimee, api.Releasing); err != nil </span><span class="cov0" title="0">{ 15131 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 15132 reclaimee.Namespace, reclaimee.Name, api.Releasing, s.ssn.UID, err) 15133 }</span> 15134 } else<span class="cov0" title="0"> { 15135 klog.Errorf("Failed to find Job <%s> in Session <%s> index when binding.", 15136 reclaimee.Job, s.ssn.UID) 15137 }</span> 15138 15139 // Update task in node. 15140 <span class="cov0" title="0">if node, found := s.ssn.Nodes[reclaimee.NodeName]; found </span><span class="cov0" title="0">{ 15141 err := node.UpdateTask(reclaimee) 15142 if err != nil </span><span class="cov0" title="0">{ 15143 klog.Errorf("Failed to update task <%v/%v> in node %v for: %s", 15144 reclaimee.Namespace, reclaimee.Name, reclaimee.NodeName, err.Error()) 15145 return err 15146 }</span> 15147 } 15148 15149 <span class="cov0" title="0">for _, eh := range s.ssn.eventHandlers </span><span class="cov0" title="0">{ 15150 if eh.DeallocateFunc != nil </span><span class="cov0" title="0">{ 15151 eh.DeallocateFunc(&Event{ 15152 Task: reclaimee, 15153 }) 15154 }</span> 15155 } 15156 15157 <span class="cov0" title="0">s.operations = append(s.operations, operation{ 15158 name: Evict, 15159 task: reclaimee, 15160 reason: reason, 15161 }) 15162 15163 return nil</span> 15164 } 15165 15166 func (s *Statement) evict(reclaimee *api.TaskInfo, reason string) error <span class="cov0" title="0">{ 15167 if err := s.ssn.cache.Evict(reclaimee, reason); err != nil </span><span class="cov0" title="0">{ 15168 if e := s.unevict(reclaimee); e != nil </span><span class="cov0" title="0">{ 15169 klog.Errorf("Faled to unevict task <%v/%v>: %v.", 15170 reclaimee.Namespace, reclaimee.Name, e) 15171 }</span> 15172 <span class="cov0" title="0">return err</span> 15173 } 15174 15175 <span class="cov0" title="0">return nil</span> 15176 } 15177 15178 func (s *Statement) unevict(reclaimee *api.TaskInfo) error <span class="cov0" title="0">{ 15179 // Update status in session 15180 job, found := s.ssn.Jobs[reclaimee.Job] 15181 if found </span><span class="cov0" title="0">{ 15182 if err := job.UpdateTaskStatus(reclaimee, api.Running); err != nil </span><span class="cov0" title="0">{ 15183 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 15184 reclaimee.Namespace, reclaimee.Name, api.Releasing, s.ssn.UID, err) 15185 }</span> 15186 } else<span class="cov0" title="0"> { 15187 klog.Errorf("Failed to find Job <%s> in Session <%s> index when binding.", 15188 reclaimee.Job, s.ssn.UID) 15189 }</span> 15190 15191 // Update task in node. 15192 <span class="cov0" title="0">if node, found := s.ssn.Nodes[reclaimee.NodeName]; found </span><span class="cov0" title="0">{ 15193 err := node.UpdateTask(reclaimee) 15194 if err != nil </span><span class="cov0" title="0">{ 15195 klog.Errorf("Failed to update task <%v/%v> in node %v for: %s", 15196 reclaimee.Namespace, reclaimee.Name, reclaimee.NodeName, err.Error()) 15197 return err 15198 }</span> 15199 } 15200 15201 <span class="cov0" title="0">for _, eh := range s.ssn.eventHandlers </span><span class="cov0" title="0">{ 15202 if eh.AllocateFunc != nil </span><span class="cov0" title="0">{ 15203 eh.AllocateFunc(&Event{ 15204 Task: reclaimee, 15205 }) 15206 }</span> 15207 } 15208 15209 <span class="cov0" title="0">return nil</span> 15210 } 15211 15212 // Pipeline the task for the node 15213 func (s *Statement) Pipeline(task *api.TaskInfo, hostname string) error <span class="cov0" title="0">{ 15214 job, found := s.ssn.Jobs[task.Job] 15215 if found </span><span class="cov0" title="0">{ 15216 if err := job.UpdateTaskStatus(task, api.Pipelined); err != nil </span><span class="cov0" title="0">{ 15217 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 15218 task.Namespace, task.Name, api.Pipelined, s.ssn.UID, err) 15219 }</span> 15220 } else<span class="cov0" title="0"> { 15221 klog.Errorf("Failed to find Job <%s> in Session <%s> index when binding.", 15222 task.Job, s.ssn.UID) 15223 }</span> 15224 15225 <span class="cov0" title="0">task.NodeName = hostname 15226 15227 if node, found := s.ssn.Nodes[hostname]; found </span><span class="cov0" title="0">{ 15228 if err := node.AddTask(task); err != nil </span><span class="cov0" title="0">{ 15229 klog.Errorf("Failed to pipeline task <%v/%v> to node <%v> in Session <%v>: %v", 15230 task.Namespace, task.Name, hostname, s.ssn.UID, err) 15231 }</span> 15232 <span class="cov0" title="0">klog.V(3).Infof("After pipelined Task <%v/%v> to Node <%v>: idle <%v>, used <%v>, releasing <%v>", 15233 task.Namespace, task.Name, node.Name, node.Idle, node.Used, node.Releasing)</span> 15234 } else<span class="cov0" title="0"> { 15235 klog.Errorf("Failed to find Node <%s> in Session <%s> index when binding.", 15236 hostname, s.ssn.UID) 15237 }</span> 15238 15239 <span class="cov0" title="0">for _, eh := range s.ssn.eventHandlers </span><span class="cov0" title="0">{ 15240 if eh.AllocateFunc != nil </span><span class="cov0" title="0">{ 15241 eh.AllocateFunc(&Event{ 15242 Task: task, 15243 }) 15244 }</span> 15245 } 15246 15247 <span class="cov0" title="0">s.operations = append(s.operations, operation{ 15248 name: Pipeline, 15249 task: task, 15250 }) 15251 15252 return nil</span> 15253 } 15254 15255 func (s *Statement) pipeline(task *api.TaskInfo) {<span class="cov0" title="0"> 15256 }</span> 15257 15258 func (s *Statement) unpipeline(task *api.TaskInfo) error <span class="cov0" title="0">{ 15259 job, found := s.ssn.Jobs[task.Job] 15260 if found </span><span class="cov0" title="0">{ 15261 if err := job.UpdateTaskStatus(task, api.Pending); err != nil </span><span class="cov0" title="0">{ 15262 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 15263 task.Namespace, task.Name, api.Pipelined, s.ssn.UID, err) 15264 }</span> 15265 } else<span class="cov0" title="0"> { 15266 klog.Errorf("Failed to find Job <%s> in Session <%s> index when binding.", 15267 task.Job, s.ssn.UID) 15268 }</span> 15269 15270 <span class="cov0" title="0">if node, found := s.ssn.Nodes[task.NodeName]; found </span><span class="cov0" title="0">{ 15271 if err := node.RemoveTask(task); err != nil </span><span class="cov0" title="0">{ 15272 klog.Errorf("Failed to pipeline task <%v/%v> to node <%v> in Session <%v>: %v", 15273 task.Namespace, task.Name, task.NodeName, s.ssn.UID, err) 15274 }</span> 15275 <span class="cov0" title="0">klog.V(3).Infof("After pipelined Task <%v/%v> to Node <%v>: idle <%v>, used <%v>, releasing <%v>", 15276 task.Namespace, task.Name, node.Name, node.Idle, node.Used, node.Releasing)</span> 15277 } else<span class="cov0" title="0"> { 15278 klog.Errorf("Failed to find Node <%s> in Session <%s> index when binding.", 15279 task.NodeName, s.ssn.UID) 15280 }</span> 15281 15282 <span class="cov0" title="0">for _, eh := range s.ssn.eventHandlers </span><span class="cov0" title="0">{ 15283 if eh.DeallocateFunc != nil </span><span class="cov0" title="0">{ 15284 eh.DeallocateFunc(&Event{ 15285 Task: task, 15286 }) 15287 }</span> 15288 } 15289 <span class="cov0" title="0">task.NodeName = "" 15290 15291 return nil</span> 15292 } 15293 15294 // Allocate the task to node 15295 func (s *Statement) Allocate(task *api.TaskInfo, nodeInfo *api.NodeInfo) error <span class="cov0" title="0">{ 15296 podVolumes, err := s.ssn.cache.GetPodVolumes(task, nodeInfo.Node) 15297 if err != nil </span><span class="cov0" title="0">{ 15298 return err 15299 }</span> 15300 15301 <span class="cov0" title="0">hostname := nodeInfo.Name 15302 if err := s.ssn.cache.AllocateVolumes(task, hostname, podVolumes); err != nil </span><span class="cov0" title="0">{ 15303 return err 15304 }</span> 15305 15306 <span class="cov0" title="0">task.Pod.Spec.NodeName = hostname 15307 task.PodVolumes = podVolumes 15308 15309 // Only update status in session 15310 job, found := s.ssn.Jobs[task.Job] 15311 if found </span><span class="cov0" title="0">{ 15312 if err := job.UpdateTaskStatus(task, api.Allocated); err != nil </span><span class="cov0" title="0">{ 15313 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 15314 task.Namespace, task.Name, api.Allocated, s.ssn.UID, err) 15315 return err 15316 }</span> 15317 } else<span class="cov0" title="0"> { 15318 klog.Errorf("Failed to find Job <%s> in Session <%s> index when binding.", 15319 task.Job, s.ssn.UID) 15320 return fmt.Errorf("failed to find job %s", task.Job) 15321 }</span> 15322 15323 <span class="cov0" title="0">task.NodeName = hostname 15324 if node, found := s.ssn.Nodes[hostname]; found </span><span class="cov0" title="0">{ 15325 if err := node.AddTask(task); err != nil </span><span class="cov0" title="0">{ 15326 klog.Errorf("Failed to add task <%v/%v> to node <%v> in Session <%v>: %v", 15327 task.Namespace, task.Name, hostname, s.ssn.UID, err) 15328 return err 15329 }</span> 15330 <span class="cov0" title="0">klog.V(3).Infof("After allocated Task <%v/%v> to Node <%v>: idle <%v>, used <%v>, releasing <%v>", 15331 task.Namespace, task.Name, node.Name, node.Idle, node.Used, node.Releasing)</span> 15332 } else<span class="cov0" title="0"> { 15333 klog.Errorf("Failed to find Node <%s> in Session <%s> index when binding.", 15334 hostname, s.ssn.UID) 15335 return fmt.Errorf("failed to find node %s", hostname) 15336 }</span> 15337 15338 // Callbacks 15339 <span class="cov0" title="0">for _, eh := range s.ssn.eventHandlers </span><span class="cov0" title="0">{ 15340 if eh.AllocateFunc != nil </span><span class="cov0" title="0">{ 15341 eh.AllocateFunc(&Event{ 15342 Task: task, 15343 }) 15344 }</span> 15345 } 15346 15347 // Update status in session 15348 <span class="cov0" title="0">klog.V(3).Info("Allocating operations ...") 15349 s.operations = append(s.operations, operation{ 15350 name: Allocate, 15351 task: task, 15352 }) 15353 15354 return nil</span> 15355 } 15356 15357 func (s *Statement) allocate(task *api.TaskInfo) error <span class="cov0" title="0">{ 15358 if err := s.ssn.cache.AddBindTask(task); err != nil </span><span class="cov0" title="0">{ 15359 return err 15360 }</span> 15361 15362 <span class="cov0" title="0">if job, found := s.ssn.Jobs[task.Job]; found </span><span class="cov0" title="0">{ 15363 if err := job.UpdateTaskStatus(task, api.Binding); err != nil </span><span class="cov0" title="0">{ 15364 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 15365 task.Namespace, task.Name, api.Binding, s.ssn.UID, err) 15366 return err 15367 }</span> 15368 } else<span class="cov0" title="0"> { 15369 klog.Errorf("Failed to find Job <%s> in Session <%s> index when binding.", 15370 task.Job, s.ssn.UID) 15371 return fmt.Errorf("failed to find job %s", task.Job) 15372 }</span> 15373 15374 <span class="cov0" title="0">metrics.UpdateTaskScheduleDuration(metrics.Duration(task.Pod.CreationTimestamp.Time)) 15375 return nil</span> 15376 } 15377 15378 // unallocate the pod for task 15379 func (s *Statement) unallocate(task *api.TaskInfo) error <span class="cov0" title="0">{ 15380 // Update status in session 15381 job, found := s.ssn.Jobs[task.Job] 15382 if found </span><span class="cov0" title="0">{ 15383 if err := job.UpdateTaskStatus(task, api.Pending); err != nil </span><span class="cov0" title="0">{ 15384 klog.Errorf("Failed to update task <%v/%v> status to %v in Session <%v>: %v", 15385 task.Namespace, task.Name, api.Pending, s.ssn.UID, err) 15386 }</span> 15387 } else<span class="cov0" title="0"> { 15388 klog.Errorf("Failed to find Job <%s> in Session <%s> index when unallocating.", 15389 task.Job, s.ssn.UID) 15390 }</span> 15391 15392 <span class="cov0" title="0">if node, found := s.ssn.Nodes[task.NodeName]; found </span><span class="cov0" title="0">{ 15393 klog.V(3).Infof("Remove Task <%v> on node <%v>", task.Name, task.NodeName) 15394 err := node.RemoveTask(task) 15395 if err != nil </span><span class="cov0" title="0">{ 15396 klog.Errorf("Failed to remove Task <%v> on node <%v>: %s", task.Name, task.NodeName, err.Error()) 15397 }</span> 15398 } 15399 15400 <span class="cov0" title="0">for _, eh := range s.ssn.eventHandlers </span><span class="cov0" title="0">{ 15401 if eh.DeallocateFunc != nil </span><span class="cov0" title="0">{ 15402 eh.DeallocateFunc(&Event{ 15403 Task: task, 15404 }) 15405 }</span> 15406 } 15407 <span class="cov0" title="0">task.NodeName = "" 15408 15409 return nil</span> 15410 } 15411 15412 // Discard operation for evict, pipeline and allocate 15413 func (s *Statement) Discard() <span class="cov0" title="0">{ 15414 klog.V(3).Info("Discarding operations ...") 15415 for i := len(s.operations) - 1; i >= 0; i-- </span><span class="cov0" title="0">{ 15416 op := s.operations[i] 15417 op.task.GenerateLastTxContext() 15418 switch op.name </span>{ 15419 case Evict:<span class="cov0" title="0"> 15420 err := s.unevict(op.task) 15421 if err != nil </span><span class="cov0" title="0">{ 15422 klog.Errorf("Failed to unevict task: %s", err.Error()) 15423 }</span> 15424 case Pipeline:<span class="cov0" title="0"> 15425 err := s.unpipeline(op.task) 15426 if err != nil </span><span class="cov0" title="0">{ 15427 klog.Errorf("Failed to unpipeline task: %s", err.Error()) 15428 }</span> 15429 case Allocate:<span class="cov0" title="0"> 15430 err := s.unallocate(op.task) 15431 if err != nil </span><span class="cov0" title="0">{ 15432 klog.Errorf("Failed to unallocate task: %s", err.Error()) 15433 }</span> 15434 } 15435 } 15436 } 15437 15438 // Commit operation for evict and pipeline 15439 func (s *Statement) Commit() <span class="cov0" title="0">{ 15440 klog.V(3).Info("Committing operations ...") 15441 for _, op := range s.operations </span><span class="cov0" title="0">{ 15442 op.task.ClearLastTxContext() 15443 switch op.name </span>{ 15444 case Evict:<span class="cov0" title="0"> 15445 err := s.evict(op.task, op.reason) 15446 if err != nil </span><span class="cov0" title="0">{ 15447 klog.Errorf("Failed to evict task: %s", err.Error()) 15448 }</span> 15449 case Pipeline:<span class="cov0" title="0"> 15450 s.pipeline(op.task)</span> 15451 case Allocate:<span class="cov0" title="0"> 15452 err := s.allocate(op.task) 15453 if err != nil </span><span class="cov0" title="0">{ 15454 klog.Errorf("Failed to allocate task: for %s", err.Error()) 15455 }</span> 15456 } 15457 } 15458 } 15459 </pre> 15460 15461 <pre class="file" id="file69" style="display: none">/* 15462 Copyright 2019 The Volcano Authors. 15463 15464 Licensed under the Apache License, Version 2.0 (the "License"); 15465 you may not use this file except in compliance with the License. 15466 You may obtain a copy of the License at 15467 15468 http://www.apache.org/licenses/LICENSE-2.0 15469 15470 Unless required by applicable law or agreed to in writing, software 15471 distributed under the License is distributed on an "AS IS" BASIS, 15472 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15473 See the License for the specific language governing permissions and 15474 limitations under the License. 15475 */ 15476 15477 package binpack 15478 15479 import ( 15480 "fmt" 15481 "strings" 15482 15483 v1 "k8s.io/api/core/v1" 15484 "k8s.io/klog" 15485 "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1" 15486 15487 "volcano.sh/volcano/pkg/scheduler/api" 15488 "volcano.sh/volcano/pkg/scheduler/framework" 15489 ) 15490 15491 const ( 15492 // PluginName indicates name of volcano scheduler plugin. 15493 PluginName = "binpack" 15494 ) 15495 15496 const ( 15497 // BinpackWeight is the key for providing Binpack Priority Weight in YAML 15498 BinpackWeight = "binpack.weight" 15499 // BinpackCPU is the key for weight of cpu 15500 BinpackCPU = "binpack.cpu" 15501 // BinpackMemory is the key for weight of memory 15502 BinpackMemory = "binpack.memory" 15503 15504 // BinpackResources is the key for additional resource key name 15505 BinpackResources = "binpack.resources" 15506 // BinpackResourcesPrefix is the key prefix for additional resource key name 15507 BinpackResourcesPrefix = BinpackResources + "." 15508 15509 resourceFmt = "%s[%d]" 15510 ) 15511 15512 type priorityWeight struct { 15513 BinPackingWeight int 15514 BinPackingCPU int 15515 BinPackingMemory int 15516 BinPackingResources map[v1.ResourceName]int 15517 } 15518 15519 func (w *priorityWeight) String() string <span class="cov0" title="0">{ 15520 length := 3 15521 if extendLength := len(w.BinPackingResources); extendLength == 0 </span><span class="cov0" title="0">{ 15522 length++ 15523 }</span> else<span class="cov0" title="0"> { 15524 length += extendLength 15525 }</span> 15526 <span class="cov0" title="0">msg := make([]string, 0, length) 15527 msg = append(msg, 15528 fmt.Sprintf(resourceFmt, BinpackWeight, w.BinPackingWeight), 15529 fmt.Sprintf(resourceFmt, BinpackCPU, w.BinPackingCPU), 15530 fmt.Sprintf(resourceFmt, BinpackMemory, w.BinPackingMemory), 15531 ) 15532 15533 if len(w.BinPackingResources) == 0 </span><span class="cov0" title="0">{ 15534 msg = append(msg, "no extend resources.") 15535 }</span> else<span class="cov0" title="0"> { 15536 for name, weight := range w.BinPackingResources </span><span class="cov0" title="0">{ 15537 msg = append(msg, fmt.Sprintf(resourceFmt, name, weight)) 15538 }</span> 15539 } 15540 <span class="cov0" title="0">return strings.Join(msg, ", ")</span> 15541 } 15542 15543 type binpackPlugin struct { 15544 // Arguments given for the plugin 15545 weight priorityWeight 15546 } 15547 15548 //New function returns prioritizePlugin object 15549 func New(aruguments framework.Arguments) framework.Plugin <span class="cov8" title="1">{ 15550 weight := calculateWeight(aruguments) 15551 return &binpackPlugin{weight: weight} 15552 }</span> 15553 15554 func calculateWeight(args framework.Arguments) priorityWeight <span class="cov8" title="1">{ 15555 /* 15556 User Should give priorityWeight in this format(binpack.weight, binpack.cpu, binpack.memory). 15557 Support change the weight about cpu, memory and additional resource by arguments. 15558 15559 actions: "enqueue, reclaim, allocate, backfill, preempt" 15560 tiers: 15561 - plugins: 15562 - name: binpack 15563 arguments: 15564 binpack.weight: 10 15565 binpack.cpu: 5 15566 binpack.memory: 1 15567 binpack.resources: nvidia.com/gpu, example.com/foo 15568 binpack.resources.nvidia.com/gpu: 2 15569 binpack.resources.example.com/foo: 3 15570 */ 15571 // Values are initialized to 1. 15572 weight := priorityWeight{ 15573 BinPackingWeight: 1, 15574 BinPackingCPU: 1, 15575 BinPackingMemory: 1, 15576 BinPackingResources: make(map[v1.ResourceName]int), 15577 } 15578 15579 // Checks whether binpack.weight is provided or not, if given, modifies the value in weight struct. 15580 args.GetInt(&weight.BinPackingWeight, BinpackWeight) 15581 // Checks whether binpack.cpu is provided or not, if given, modifies the value in weight struct. 15582 args.GetInt(&weight.BinPackingCPU, BinpackCPU) 15583 if weight.BinPackingCPU < 0 </span><span class="cov0" title="0">{ 15584 weight.BinPackingCPU = 1 15585 }</span> 15586 // Checks whether binpack.memory is provided or not, if given, modifies the value in weight struct. 15587 <span class="cov8" title="1">args.GetInt(&weight.BinPackingMemory, BinpackMemory) 15588 if weight.BinPackingMemory < 0 </span><span class="cov0" title="0">{ 15589 weight.BinPackingMemory = 1 15590 }</span> 15591 15592 <span class="cov8" title="1">resourcesStr := args[BinpackResources] 15593 resources := strings.Split(resourcesStr, ",") 15594 for _, resource := range resources </span><span class="cov8" title="1">{ 15595 resource = strings.TrimSpace(resource) 15596 if resource == "" </span><span class="cov0" title="0">{ 15597 continue</span> 15598 } 15599 15600 // binpack.resources.[ResourceName] 15601 <span class="cov8" title="1">resourceKey := BinpackResourcesPrefix + resource 15602 resourceWeight := 1 15603 args.GetInt(&resourceWeight, resourceKey) 15604 if resourceWeight < 0 </span><span class="cov8" title="1">{ 15605 resourceWeight = 1 15606 }</span> 15607 <span class="cov8" title="1">weight.BinPackingResources[v1.ResourceName(resource)] = resourceWeight</span> 15608 } 15609 15610 <span class="cov8" title="1">return weight</span> 15611 } 15612 15613 func (bp *binpackPlugin) Name() string <span class="cov8" title="1">{ 15614 return PluginName 15615 }</span> 15616 15617 func (bp *binpackPlugin) OnSessionOpen(ssn *framework.Session) <span class="cov8" title="1">{ 15618 klog.V(4).Infof("Enter binpack plugin ...") 15619 if klog.V(4) </span><span class="cov0" title="0">{ 15620 defer func() </span><span class="cov0" title="0">{ 15621 klog.V(4).Infof("Leaving binpack plugin. %s ...", bp.weight.String()) 15622 }</span>() 15623 15624 <span class="cov0" title="0">notFoundResource := []string{} 15625 for resource := range bp.weight.BinPackingResources </span><span class="cov0" title="0">{ 15626 found := false 15627 for _, nodeInfo := range ssn.Nodes </span><span class="cov0" title="0">{ 15628 if nodeInfo.Allocatable.Get(resource) > 0 </span><span class="cov0" title="0">{ 15629 found = true 15630 break</span> 15631 } 15632 } 15633 <span class="cov0" title="0">if !found </span><span class="cov0" title="0">{ 15634 notFoundResource = append(notFoundResource, string(resource)) 15635 }</span> 15636 } 15637 <span class="cov0" title="0">klog.V(4).Infof("resources [%s] record in weight but not found on any node", strings.Join(notFoundResource, ", "))</span> 15638 } 15639 15640 <span class="cov8" title="1">nodeOrderFn := func(task *api.TaskInfo, node *api.NodeInfo) (float64, error) </span><span class="cov8" title="1">{ 15641 binPackingScore := BinPackingScore(task, node, bp.weight) 15642 15643 klog.V(4).Infof("Binpack score for Task %s/%s on node %s is: %v", task.Namespace, task.Name, node.Name, binPackingScore) 15644 return binPackingScore, nil 15645 }</span> 15646 <span class="cov8" title="1">if bp.weight.BinPackingWeight != 0 </span><span class="cov8" title="1">{ 15647 ssn.AddNodeOrderFn(bp.Name(), nodeOrderFn) 15648 }</span> else<span class="cov0" title="0"> { 15649 klog.Infof("binpack weight is zero, skip node order function") 15650 }</span> 15651 } 15652 15653 func (bp *binpackPlugin) OnSessionClose(ssn *framework.Session) {<span class="cov8" title="1"> 15654 }</span> 15655 15656 // BinPackingScore use the best fit polices during scheduling. 15657 // Goals: 15658 // - Schedule Jobs using BestFit Policy using Resource Bin Packing Priority Function 15659 // - Reduce Fragmentation of scarce resources on the Cluster 15660 func BinPackingScore(task *api.TaskInfo, node *api.NodeInfo, weight priorityWeight) float64 <span class="cov8" title="1">{ 15661 score := 0.0 15662 weightSum := 0 15663 requested := task.Resreq 15664 allocatable := node.Allocatable 15665 used := node.Used 15666 15667 for _, resource := range requested.ResourceNames() </span><span class="cov8" title="1">{ 15668 request := requested.Get(resource) 15669 if request == 0 </span><span class="cov0" title="0">{ 15670 continue</span> 15671 } 15672 <span class="cov8" title="1">allocate := allocatable.Get(resource) 15673 nodeUsed := used.Get(resource) 15674 15675 resourceWeight := 0 15676 found := false 15677 switch resource </span>{ 15678 case v1.ResourceCPU:<span class="cov8" title="1"> 15679 resourceWeight = weight.BinPackingCPU 15680 found = true</span> 15681 case v1.ResourceMemory:<span class="cov8" title="1"> 15682 resourceWeight = weight.BinPackingMemory 15683 found = true</span> 15684 default:<span class="cov8" title="1"> 15685 resourceWeight, found = weight.BinPackingResources[resource]</span> 15686 } 15687 <span class="cov8" title="1">if !found </span><span class="cov8" title="1">{ 15688 continue</span> 15689 } 15690 15691 <span class="cov8" title="1">resourceScore := ResourceBinPackingScore(request, allocate, nodeUsed, resourceWeight) 15692 klog.V(5).Infof("task %s/%s on node %s resource %s, need %f, used %f, allocatable %f, weight %d, score %f", task.Namespace, task.Name, node.Name, resource, request, nodeUsed, allocate, resourceWeight, resourceScore) 15693 15694 score += resourceScore 15695 weightSum += resourceWeight</span> 15696 } 15697 15698 // mapping the result from [0, weightSum] to [0, 10(MaxPriority)] 15699 <span class="cov8" title="1">if weightSum > 0 </span><span class="cov8" title="1">{ 15700 score /= float64(weightSum) 15701 }</span> 15702 <span class="cov8" title="1">score *= float64(v1alpha1.MaxNodeScore * int64(weight.BinPackingWeight)) 15703 15704 return score</span> 15705 } 15706 15707 // ResourceBinPackingScore calculate the binpack score for resource with provided info 15708 func ResourceBinPackingScore(requested, capacity, used float64, weight int) float64 <span class="cov8" title="1">{ 15709 if capacity == 0 || weight == 0 </span><span class="cov8" title="1">{ 15710 return 0 15711 }</span> 15712 15713 <span class="cov8" title="1">usedFinally := requested + used 15714 if usedFinally > capacity </span><span class="cov8" title="1">{ 15715 return 0 15716 }</span> 15717 15718 <span class="cov8" title="1">score := usedFinally * float64(weight) / capacity 15719 return score</span> 15720 } 15721 </pre> 15722 15723 <pre class="file" id="file70" style="display: none">/* 15724 Copyright 2018 The Kubernetes Authors. 15725 15726 Licensed under the Apache License, Version 2.0 (the "License"); 15727 you may not use this file except in compliance with the License. 15728 You may obtain a copy of the License at 15729 15730 http://www.apache.org/licenses/LICENSE-2.0 15731 15732 Unless required by applicable law or agreed to in writing, software 15733 distributed under the License is distributed on an "AS IS" BASIS, 15734 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15735 See the License for the specific language governing permissions and 15736 limitations under the License. 15737 */ 15738 15739 package drf 15740 15741 import ( 15742 "fmt" 15743 "math" 15744 "strconv" 15745 "strings" 15746 15747 v1 "k8s.io/api/core/v1" 15748 "k8s.io/klog" 15749 15750 "volcano.sh/volcano/pkg/scheduler/api" 15751 "volcano.sh/volcano/pkg/scheduler/api/helpers" 15752 "volcano.sh/volcano/pkg/scheduler/framework" 15753 "volcano.sh/volcano/pkg/scheduler/metrics" 15754 "volcano.sh/volcano/pkg/scheduler/plugins/util" 15755 ) 15756 15757 // PluginName indicates name of volcano scheduler plugin. 15758 const PluginName = "drf" 15759 15760 var shareDelta = 0.000001 15761 15762 // hierarchicalNode represents the node hierarchy 15763 // and the corresponding weight and drf attribute 15764 type hierarchicalNode struct { 15765 parent *hierarchicalNode 15766 attr *drfAttr 15767 // If the node is a leaf node, 15768 // request represents the request of the job. 15769 request *api.Resource 15770 weight float64 15771 saturated bool 15772 hierarchy string 15773 children map[string]*hierarchicalNode 15774 } 15775 15776 func (node *hierarchicalNode) Clone(parent *hierarchicalNode) *hierarchicalNode <span class="cov0" title="0">{ 15777 newNode := &hierarchicalNode{ 15778 parent: parent, 15779 attr: &drfAttr{ 15780 share: node.attr.share, 15781 dominantResource: node.attr.dominantResource, 15782 allocated: node.attr.allocated.Clone(), 15783 }, 15784 request: node.request.Clone(), 15785 weight: node.weight, 15786 saturated: node.saturated, 15787 hierarchy: node.hierarchy, 15788 children: nil, 15789 } 15790 if node.children != nil </span><span class="cov0" title="0">{ 15791 newNode.children = map[string]*hierarchicalNode{} 15792 for _, child := range node.children </span><span class="cov0" title="0">{ 15793 newNode.children[child.hierarchy] = child.Clone(newNode) 15794 }</span> 15795 } 15796 <span class="cov0" title="0">return newNode</span> 15797 } 15798 15799 // resourceSaturated returns true if any resource of the job is saturated or the job demands fully allocated resource 15800 func resourceSaturated(allocated *api.Resource, 15801 jobRequest *api.Resource, demandingResources map[v1.ResourceName]bool) bool <span class="cov8" title="1">{ 15802 for _, rn := range allocated.ResourceNames() </span><span class="cov8" title="1">{ 15803 if allocated.Get(rn) != 0 && jobRequest.Get(rn) != 0 && 15804 allocated.Get(rn) >= jobRequest.Get(rn) </span><span class="cov0" title="0">{ 15805 return true 15806 }</span> 15807 <span class="cov8" title="1">if !demandingResources[rn] && jobRequest.Get(rn) != 0 </span><span class="cov8" title="1">{ 15808 return true 15809 }</span> 15810 } 15811 <span class="cov8" title="1">return false</span> 15812 } 15813 15814 type drfAttr struct { 15815 share float64 15816 dominantResource string 15817 allocated *api.Resource 15818 } 15819 15820 func (attr *drfAttr) String() string <span class="cov8" title="1">{ 15821 return fmt.Sprintf("dominant resource <%s>, dominant share %f, allocated %s", 15822 attr.dominantResource, attr.share, attr.allocated) 15823 }</span> 15824 15825 type drfPlugin struct { 15826 totalResource *api.Resource 15827 totalAllocated *api.Resource 15828 15829 // Key is Job ID 15830 jobAttrs map[api.JobID]*drfAttr 15831 15832 // map[namespaceName]->attr 15833 namespaceOpts map[string]*drfAttr 15834 15835 // hierarchical tree root 15836 hierarchicalRoot *hierarchicalNode 15837 15838 // Arguments given for the plugin 15839 pluginArguments framework.Arguments 15840 } 15841 15842 // New return drf plugin 15843 func New(arguments framework.Arguments) framework.Plugin <span class="cov8" title="1">{ 15844 return &drfPlugin{ 15845 totalResource: api.EmptyResource(), 15846 totalAllocated: api.EmptyResource(), 15847 jobAttrs: map[api.JobID]*drfAttr{}, 15848 namespaceOpts: map[string]*drfAttr{}, 15849 hierarchicalRoot: &hierarchicalNode{ 15850 attr: &drfAttr{allocated: api.EmptyResource()}, 15851 request: api.EmptyResource(), 15852 hierarchy: "root", 15853 weight: 1, 15854 children: map[string]*hierarchicalNode{}, 15855 }, 15856 pluginArguments: arguments, 15857 } 15858 }</span> 15859 15860 func (drf *drfPlugin) Name() string <span class="cov8" title="1">{ 15861 return PluginName 15862 }</span> 15863 15864 // HierarchyEnabled returns if hierarchy is enabled 15865 func (drf *drfPlugin) HierarchyEnabled(ssn *framework.Session) bool <span class="cov8" title="1">{ 15866 for _, tier := range ssn.Tiers </span><span class="cov8" title="1">{ 15867 for _, plugin := range tier.Plugins </span><span class="cov8" title="1">{ 15868 if plugin.Name != PluginName </span><span class="cov0" title="0">{ 15869 continue</span> 15870 } 15871 <span class="cov8" title="1">return plugin.EnabledHierarchy != nil && *plugin.EnabledHierarchy</span> 15872 } 15873 } 15874 <span class="cov0" title="0">return false</span> 15875 } 15876 15877 // NamespaceOrderEnabled returns the NamespaceOrder for this plugin is enabled in this session or not 15878 func (drf *drfPlugin) NamespaceOrderEnabled(ssn *framework.Session) bool <span class="cov8" title="1">{ 15879 for _, tier := range ssn.Tiers </span><span class="cov8" title="1">{ 15880 for _, plugin := range tier.Plugins </span><span class="cov8" title="1">{ 15881 if plugin.Name != PluginName </span><span class="cov0" title="0">{ 15882 continue</span> 15883 } 15884 <span class="cov8" title="1">return plugin.EnabledNamespaceOrder != nil && *plugin.EnabledNamespaceOrder</span> 15885 } 15886 } 15887 <span class="cov0" title="0">return false</span> 15888 } 15889 15890 func (drf *drfPlugin) compareQueues(root *hierarchicalNode, lqueue *api.QueueInfo, rqueue *api.QueueInfo) float64 <span class="cov8" title="1">{ 15891 lnode := root 15892 lpaths := strings.Split(lqueue.Hierarchy, "/") 15893 rnode := root 15894 rpaths := strings.Split(rqueue.Hierarchy, "/") 15895 depth := 0 15896 if len(lpaths) < len(rpaths) </span><span class="cov8" title="1">{ 15897 depth = len(lpaths) 15898 }</span> else<span class="cov8" title="1"> { 15899 depth = len(rpaths) 15900 }</span> 15901 <span class="cov8" title="1">for i := 0; i < depth; i++ </span><span class="cov8" title="1">{ 15902 // Saturated nodes have minumun prioirty, 15903 // so that demanding nodes will be poped first. 15904 if !lnode.saturated && rnode.saturated </span><span class="cov0" title="0">{ 15905 return -1 15906 }</span> 15907 <span class="cov8" title="1">if lnode.saturated && !rnode.saturated </span><span class="cov0" title="0">{ 15908 return 1 15909 }</span> 15910 <span class="cov8" title="1">if lnode.attr.share/lnode.weight == rnode.attr.share/rnode.weight </span><span class="cov8" title="1">{ 15911 if i < depth-1 </span><span class="cov8" title="1">{ 15912 lnode = lnode.children[lpaths[i+1]] 15913 rnode = rnode.children[rpaths[i+1]] 15914 }</span> 15915 } else<span class="cov8" title="1"> { 15916 return lnode.attr.share/lnode.weight - rnode.attr.share/rnode.weight 15917 }</span> 15918 } 15919 <span class="cov8" title="1">return 0</span> 15920 } 15921 15922 func (drf *drfPlugin) OnSessionOpen(ssn *framework.Session) <span class="cov8" title="1">{ 15923 // Prepare scheduling data for this session. 15924 drf.totalResource.Add(ssn.TotalResource) 15925 15926 klog.V(4).Infof("Total Allocatable %s", drf.totalResource) 15927 15928 namespaceOrderEnabled := drf.NamespaceOrderEnabled(ssn) 15929 hierarchyEnabled := drf.HierarchyEnabled(ssn) 15930 15931 for _, job := range ssn.Jobs </span><span class="cov8" title="1">{ 15932 attr := &drfAttr{ 15933 allocated: api.EmptyResource(), 15934 } 15935 15936 for status, tasks := range job.TaskStatusIndex </span><span class="cov8" title="1">{ 15937 if api.AllocatedStatus(status) </span><span class="cov0" title="0">{ 15938 for _, t := range tasks </span><span class="cov0" title="0">{ 15939 attr.allocated.Add(t.Resreq) 15940 }</span> 15941 } 15942 } 15943 15944 // Calculate the init share of Job 15945 <span class="cov8" title="1">drf.updateJobShare(job.Namespace, job.Name, attr) 15946 15947 drf.jobAttrs[job.UID] = attr 15948 15949 if namespaceOrderEnabled </span><span class="cov0" title="0">{ 15950 nsOpts, found := drf.namespaceOpts[job.Namespace] 15951 if !found </span><span class="cov0" title="0">{ 15952 nsOpts = &drfAttr{ 15953 allocated: api.EmptyResource(), 15954 } 15955 drf.namespaceOpts[job.Namespace] = nsOpts 15956 }</span> 15957 // all task in job should have the same namespace with job 15958 <span class="cov0" title="0">nsOpts.allocated.Add(attr.allocated) 15959 drf.updateNamespaceShare(job.Namespace, nsOpts)</span> 15960 } 15961 <span class="cov8" title="1">if hierarchyEnabled </span><span class="cov8" title="1">{ 15962 queue := ssn.Queues[job.Queue] 15963 drf.totalAllocated.Add(attr.allocated) 15964 drf.UpdateHierarchicalShare(drf.hierarchicalRoot, drf.totalAllocated, job, attr, queue.Hierarchy, queue.Weights) 15965 }</span> 15966 } 15967 15968 <span class="cov8" title="1">preemptableFn := func(preemptor *api.TaskInfo, preemptees []*api.TaskInfo) ([]*api.TaskInfo, int) </span><span class="cov0" title="0">{ 15969 var victims []*api.TaskInfo 15970 15971 addVictim := func(candidate *api.TaskInfo) </span><span class="cov0" title="0">{ 15972 victims = append(victims, candidate) 15973 }</span> 15974 15975 <span class="cov0" title="0">if namespaceOrderEnabled </span><span class="cov0" title="0">{ 15976 // apply the namespace share policy on preemptee firstly 15977 15978 lWeight := ssn.NamespaceInfo[api.NamespaceName(preemptor.Namespace)].GetWeight() 15979 lNsAtt := drf.namespaceOpts[preemptor.Namespace] 15980 lNsAlloc := lNsAtt.allocated.Clone().Add(preemptor.Resreq) 15981 _, lNsShare := drf.calculateShare(lNsAlloc, drf.totalResource) 15982 lNsShareWeighted := lNsShare / float64(lWeight) 15983 15984 namespaceAllocation := map[string]*api.Resource{} 15985 15986 // undecidedPreemptees means this policy could not judge preemptee is preemptable or not 15987 // and left it to next policy 15988 undecidedPreemptees := []*api.TaskInfo{} 15989 15990 for _, preemptee := range preemptees </span><span class="cov0" title="0">{ 15991 if preemptor.Namespace == preemptee.Namespace </span><span class="cov0" title="0">{ 15992 // policy is disabled when they are in the same namespace 15993 undecidedPreemptees = append(undecidedPreemptees, preemptee) 15994 continue</span> 15995 } 15996 15997 // compute the preemptee namespace weighted share after preemption 15998 <span class="cov0" title="0">nsAllocation, found := namespaceAllocation[preemptee.Namespace] 15999 if !found </span><span class="cov0" title="0">{ 16000 rNsAtt := drf.namespaceOpts[preemptee.Namespace] 16001 nsAllocation = rNsAtt.allocated.Clone() 16002 namespaceAllocation[preemptee.Namespace] = nsAllocation 16003 }</span> 16004 <span class="cov0" title="0">rWeight := ssn.NamespaceInfo[api.NamespaceName(preemptee.Namespace)].GetWeight() 16005 rNsAlloc := nsAllocation.Sub(preemptee.Resreq) 16006 _, rNsShare := drf.calculateShare(rNsAlloc, drf.totalResource) 16007 rNsShareWeighted := rNsShare / float64(rWeight) 16008 16009 // to avoid ping pong actions, the preemptee namespace should 16010 // have the higher weighted share after preemption. 16011 if lNsShareWeighted < rNsShareWeighted </span><span class="cov0" title="0">{ 16012 addVictim(preemptee) 16013 continue</span> 16014 } 16015 <span class="cov0" title="0">if lNsShareWeighted-rNsShareWeighted > shareDelta </span><span class="cov0" title="0">{ 16016 continue</span> 16017 } 16018 16019 // equal namespace order leads to judgement of jobOrder 16020 <span class="cov0" title="0">undecidedPreemptees = append(undecidedPreemptees, preemptee)</span> 16021 } 16022 16023 <span class="cov0" title="0">preemptees = undecidedPreemptees</span> 16024 } 16025 16026 <span class="cov0" title="0">latt := drf.jobAttrs[preemptor.Job] 16027 lalloc := latt.allocated.Clone().Add(preemptor.Resreq) 16028 _, ls := drf.calculateShare(lalloc, drf.totalResource) 16029 16030 allocations := map[api.JobID]*api.Resource{} 16031 16032 for _, preemptee := range preemptees </span><span class="cov0" title="0">{ 16033 if _, found := allocations[preemptee.Job]; !found </span><span class="cov0" title="0">{ 16034 ratt := drf.jobAttrs[preemptee.Job] 16035 allocations[preemptee.Job] = ratt.allocated.Clone() 16036 }</span> 16037 <span class="cov0" title="0">ralloc := allocations[preemptee.Job].Sub(preemptee.Resreq) 16038 _, rs := drf.calculateShare(ralloc, drf.totalResource) 16039 16040 if ls < rs || math.Abs(ls-rs) <= shareDelta </span><span class="cov0" title="0">{ 16041 addVictim(preemptee) 16042 }</span> 16043 } 16044 16045 <span class="cov0" title="0">klog.V(4).Infof("Victims from DRF plugins are %+v", victims) 16046 16047 return victims, util.Permit</span> 16048 } 16049 16050 <span class="cov8" title="1">ssn.AddPreemptableFn(drf.Name(), preemptableFn) 16051 16052 if hierarchyEnabled </span><span class="cov8" title="1">{ 16053 queueOrderFn := func(l interface{}, r interface{}) int </span><span class="cov8" title="1">{ 16054 lv := l.(*api.QueueInfo) 16055 rv := r.(*api.QueueInfo) 16056 ret := drf.compareQueues(drf.hierarchicalRoot, lv, rv) 16057 if ret < 0 </span><span class="cov8" title="1">{ 16058 return -1 16059 }</span> 16060 <span class="cov8" title="1">if ret > 0 </span><span class="cov8" title="1">{ 16061 return 1 16062 }</span> 16063 <span class="cov8" title="1">return 0</span> 16064 } 16065 <span class="cov8" title="1">ssn.AddQueueOrderFn(drf.Name(), queueOrderFn) 16066 16067 reclaimFn := func(reclaimer *api.TaskInfo, reclaimees []*api.TaskInfo) ([]*api.TaskInfo, int) </span><span class="cov0" title="0">{ 16068 var victims []*api.TaskInfo 16069 // clone hdrf tree 16070 totalAllocated := drf.totalAllocated.Clone() 16071 root := drf.hierarchicalRoot.Clone(nil) 16072 16073 // update reclaimer hdrf 16074 ljob := ssn.Jobs[reclaimer.Job] 16075 lqueue := ssn.Queues[ljob.Queue] 16076 ljob = ljob.Clone() 16077 attr := drf.jobAttrs[ljob.UID] 16078 lattr := &drfAttr{ 16079 allocated: attr.allocated.Clone(), 16080 } 16081 lattr.allocated.Add(reclaimer.Resreq) 16082 totalAllocated.Add(reclaimer.Resreq) 16083 drf.updateShare(lattr) 16084 drf.UpdateHierarchicalShare(root, totalAllocated, ljob, lattr, lqueue.Hierarchy, lqueue.Weights) 16085 16086 for _, preemptee := range reclaimees </span><span class="cov0" title="0">{ 16087 rjob := ssn.Jobs[preemptee.Job] 16088 rqueue := ssn.Queues[rjob.Queue] 16089 16090 // update hdrf of reclaimee job 16091 totalAllocated.Sub(preemptee.Resreq) 16092 rjob = rjob.Clone() 16093 attr := drf.jobAttrs[rjob.UID] 16094 rattr := &drfAttr{ 16095 allocated: attr.allocated.Clone(), 16096 } 16097 rattr.allocated.Sub(preemptee.Resreq) 16098 drf.updateShare(rattr) 16099 drf.UpdateHierarchicalShare(root, totalAllocated, rjob, rattr, rqueue.Hierarchy, rqueue.Weights) 16100 16101 // compare hdrf of queues 16102 ret := drf.compareQueues(root, lqueue, rqueue) 16103 16104 // resume hdrf of reclaimee job 16105 totalAllocated.Add(preemptee.Resreq) 16106 rattr.allocated.Add(preemptee.Resreq) 16107 drf.updateShare(rattr) 16108 drf.UpdateHierarchicalShare(root, totalAllocated, rjob, rattr, rqueue.Hierarchy, rqueue.Weights) 16109 16110 if ret < 0 </span><span class="cov0" title="0">{ 16111 victims = append(victims, preemptee) 16112 }</span> 16113 16114 <span class="cov0" title="0">if ret > shareDelta </span><span class="cov0" title="0">{ 16115 continue</span> 16116 } 16117 } 16118 16119 <span class="cov0" title="0">klog.V(4).Infof("Victims from HDRF plugins are %+v", victims) 16120 16121 return victims, util.Permit</span> 16122 } 16123 <span class="cov8" title="1">ssn.AddReclaimableFn(drf.Name(), reclaimFn)</span> 16124 } 16125 16126 <span class="cov8" title="1">jobOrderFn := func(l interface{}, r interface{}) int </span><span class="cov0" title="0">{ 16127 lv := l.(*api.JobInfo) 16128 rv := r.(*api.JobInfo) 16129 16130 klog.V(4).Infof("DRF JobOrderFn: <%v/%v> share state: %v, <%v/%v> share state: %v", 16131 lv.Namespace, lv.Name, drf.jobAttrs[lv.UID].share, rv.Namespace, rv.Name, drf.jobAttrs[rv.UID].share) 16132 16133 if drf.jobAttrs[lv.UID].share == drf.jobAttrs[rv.UID].share </span><span class="cov0" title="0">{ 16134 return 0 16135 }</span> 16136 16137 <span class="cov0" title="0">if drf.jobAttrs[lv.UID].share < drf.jobAttrs[rv.UID].share </span><span class="cov0" title="0">{ 16138 return -1 16139 }</span> 16140 16141 <span class="cov0" title="0">return 1</span> 16142 } 16143 16144 <span class="cov8" title="1">ssn.AddJobOrderFn(drf.Name(), jobOrderFn) 16145 16146 namespaceOrderFn := func(l interface{}, r interface{}) int </span><span class="cov0" title="0">{ 16147 lv := l.(api.NamespaceName) 16148 rv := r.(api.NamespaceName) 16149 16150 lOpt := drf.namespaceOpts[string(lv)] 16151 rOpt := drf.namespaceOpts[string(rv)] 16152 16153 lWeight := ssn.NamespaceInfo[lv].GetWeight() 16154 rWeight := ssn.NamespaceInfo[rv].GetWeight() 16155 16156 klog.V(4).Infof("DRF NamespaceOrderFn: <%v> share state: %f, weight %v, <%v> share state: %f, weight %v", 16157 lv, lOpt.share, lWeight, rv, rOpt.share, rWeight) 16158 16159 lWeightedShare := lOpt.share / float64(lWeight) 16160 rWeightedShare := rOpt.share / float64(rWeight) 16161 16162 metrics.UpdateNamespaceWeight(string(lv), lWeight) 16163 metrics.UpdateNamespaceWeight(string(rv), rWeight) 16164 metrics.UpdateNamespaceWeightedShare(string(lv), lWeightedShare) 16165 metrics.UpdateNamespaceWeightedShare(string(rv), rWeightedShare) 16166 16167 if lWeightedShare == rWeightedShare </span><span class="cov0" title="0">{ 16168 return 0 16169 }</span> 16170 16171 <span class="cov0" title="0">if lWeightedShare < rWeightedShare </span><span class="cov0" title="0">{ 16172 return -1 16173 }</span> 16174 16175 <span class="cov0" title="0">return 1</span> 16176 } 16177 16178 <span class="cov8" title="1">if namespaceOrderEnabled </span><span class="cov0" title="0">{ 16179 ssn.AddNamespaceOrderFn(drf.Name(), namespaceOrderFn) 16180 }</span> 16181 16182 // Register event handlers. 16183 <span class="cov8" title="1">ssn.AddEventHandler(&framework.EventHandler{ 16184 AllocateFunc: func(event *framework.Event) </span><span class="cov8" title="1">{ 16185 attr := drf.jobAttrs[event.Task.Job] 16186 attr.allocated.Add(event.Task.Resreq) 16187 16188 job := ssn.Jobs[event.Task.Job] 16189 drf.updateJobShare(job.Namespace, job.Name, attr) 16190 16191 nsShare := -1.0 16192 if namespaceOrderEnabled </span><span class="cov0" title="0">{ 16193 nsOpt := drf.namespaceOpts[event.Task.Namespace] 16194 nsOpt.allocated.Add(event.Task.Resreq) 16195 16196 drf.updateNamespaceShare(event.Task.Namespace, nsOpt) 16197 nsShare = nsOpt.share 16198 }</span> 16199 <span class="cov8" title="1">if hierarchyEnabled </span><span class="cov8" title="1">{ 16200 queue := ssn.Queues[job.Queue] 16201 16202 drf.totalAllocated.Add(event.Task.Resreq) 16203 drf.UpdateHierarchicalShare(drf.hierarchicalRoot, drf.totalAllocated, job, attr, queue.Hierarchy, queue.Weights) 16204 }</span> 16205 16206 <span class="cov8" title="1">klog.V(4).Infof("DRF AllocateFunc: task <%v/%v>, resreq <%v>, share <%v>, namespace share <%v>", 16207 event.Task.Namespace, event.Task.Name, event.Task.Resreq, attr.share, nsShare)</span> 16208 }, 16209 DeallocateFunc: func(event *framework.Event) <span class="cov0" title="0">{ 16210 attr := drf.jobAttrs[event.Task.Job] 16211 attr.allocated.Sub(event.Task.Resreq) 16212 16213 job := ssn.Jobs[event.Task.Job] 16214 drf.updateJobShare(job.Namespace, job.Name, attr) 16215 16216 nsShare := -1.0 16217 if namespaceOrderEnabled </span><span class="cov0" title="0">{ 16218 nsOpt := drf.namespaceOpts[event.Task.Namespace] 16219 nsOpt.allocated.Sub(event.Task.Resreq) 16220 16221 drf.updateNamespaceShare(event.Task.Namespace, nsOpt) 16222 nsShare = nsOpt.share 16223 }</span> 16224 16225 <span class="cov0" title="0">if hierarchyEnabled </span><span class="cov0" title="0">{ 16226 queue := ssn.Queues[job.Queue] 16227 drf.totalAllocated.Sub(event.Task.Resreq) 16228 drf.UpdateHierarchicalShare(drf.hierarchicalRoot, drf.totalAllocated, job, attr, queue.Hierarchy, queue.Weights) 16229 }</span> 16230 16231 <span class="cov0" title="0">klog.V(4).Infof("DRF EvictFunc: task <%v/%v>, resreq <%v>, share <%v>, namespace share <%v>", 16232 event.Task.Namespace, event.Task.Name, event.Task.Resreq, attr.share, nsShare)</span> 16233 }, 16234 }) 16235 } 16236 16237 func (drf *drfPlugin) updateNamespaceShare(namespaceName string, attr *drfAttr) <span class="cov0" title="0">{ 16238 drf.updateShare(attr) 16239 metrics.UpdateNamespaceShare(namespaceName, attr.share) 16240 }</span> 16241 16242 // build hierarchy if the node does not exist 16243 func (drf *drfPlugin) buildHierarchy(root *hierarchicalNode, job *api.JobInfo, attr *drfAttr, 16244 hierarchy, hierarchicalWeights string) <span class="cov8" title="1">{ 16245 inode := root 16246 paths := strings.Split(hierarchy, "/") 16247 weights := strings.Split(hierarchicalWeights, "/") 16248 16249 for i := 1; i < len(paths); i++ </span><span class="cov8" title="1">{ 16250 if child, ok := inode.children[paths[i]]; ok </span><span class="cov8" title="1">{ 16251 inode = child 16252 }</span> else<span class="cov8" title="1"> { 16253 fweight, _ := strconv.ParseFloat(weights[i], 64) 16254 if fweight < 1 </span><span class="cov0" title="0">{ 16255 fweight = 1 16256 }</span> 16257 <span class="cov8" title="1">child = &hierarchicalNode{ 16258 weight: fweight, 16259 hierarchy: paths[i], 16260 request: api.EmptyResource(), 16261 attr: &drfAttr{ 16262 allocated: api.EmptyResource(), 16263 }, 16264 children: make(map[string]*hierarchicalNode), 16265 } 16266 klog.V(4).Infof("Node %s added to %s, weight %f", 16267 child.hierarchy, inode.hierarchy, fweight) 16268 inode.children[paths[i]] = child 16269 child.parent = inode 16270 inode = child</span> 16271 } 16272 } 16273 16274 <span class="cov8" title="1">child := &hierarchicalNode{ 16275 weight: 1, 16276 attr: attr, 16277 hierarchy: string(job.UID), 16278 request: job.TotalRequest.Clone(), 16279 children: nil, 16280 } 16281 inode.children[string(job.UID)] = child 16282 // update drf attribute bottom up 16283 klog.V(4).Infof("Job <%s/%s> added to %s, weights %s, attr %v, total request: %s", 16284 job.Namespace, job.Name, inode.hierarchy, hierarchicalWeights, child.attr, job.TotalRequest)</span> 16285 } 16286 16287 // updateNamespaceShare updates the node attribute recursively 16288 func (drf *drfPlugin) updateHierarchicalShare(node *hierarchicalNode, 16289 demandingResources map[v1.ResourceName]bool) <span class="cov8" title="1">{ 16290 if node.children == nil </span><span class="cov8" title="1">{ 16291 node.saturated = resourceSaturated(node.attr.allocated, 16292 node.request, demandingResources) 16293 klog.V(4).Infof("Update hierarchical node %s, share %f, dominant %s, resource %v, saturated: %t", 16294 node.hierarchy, node.attr.share, node.attr.dominantResource, node.attr.allocated, node.saturated) 16295 }</span> else<span class="cov8" title="1"> { 16296 var mdr float64 = 1 16297 // get minimun dominant resource share 16298 for _, child := range node.children </span><span class="cov8" title="1">{ 16299 drf.updateHierarchicalShare(child, demandingResources) 16300 // skip empty child and saturated child 16301 if child.attr.share != 0 && !child.saturated </span><span class="cov8" title="1">{ 16302 _, resShare := drf.calculateShare(child.attr.allocated, drf.totalResource) 16303 if resShare < mdr </span><span class="cov8" title="1">{ 16304 mdr = resShare 16305 }</span> 16306 } 16307 } 16308 16309 <span class="cov8" title="1">node.attr.allocated = api.EmptyResource() 16310 saturated := true 16311 for _, child := range node.children </span><span class="cov8" title="1">{ 16312 if !child.saturated </span><span class="cov8" title="1">{ 16313 saturated = false 16314 }</span> 16315 // only consider non-empty children 16316 <span class="cov8" title="1">if child.attr.share != 0 </span><span class="cov8" title="1">{ 16317 // saturated child is not scaled 16318 if child.saturated </span><span class="cov8" title="1">{ 16319 t := child.attr.allocated 16320 node.attr.allocated.Add(t) 16321 }</span> else<span class="cov8" title="1"> { 16322 t := child.attr.allocated.Clone().Multi(mdr / child.attr.share) 16323 node.attr.allocated.Add(t) 16324 }</span> 16325 } 16326 } 16327 <span class="cov8" title="1">node.attr.dominantResource, node.attr.share = drf.calculateShare( 16328 node.attr.allocated, drf.totalResource) 16329 node.saturated = saturated 16330 klog.V(4).Infof("Update hierarchical node %s, share %f, dominant resource %s, resource %v, saturated: %t", 16331 node.hierarchy, node.attr.share, node.attr.dominantResource, node.attr.allocated, node.saturated)</span> 16332 } 16333 } 16334 16335 func (drf *drfPlugin) UpdateHierarchicalShare(root *hierarchicalNode, totalAllocated *api.Resource, job *api.JobInfo, attr *drfAttr, hierarchy, hierarchicalWeights string) <span class="cov8" title="1">{ 16336 // filter out demanding resources 16337 demandingResources := map[v1.ResourceName]bool{} 16338 for _, rn := range drf.totalResource.ResourceNames() </span><span class="cov8" title="1">{ 16339 if totalAllocated.Get(rn) < drf.totalResource.Get(rn) </span><span class="cov8" title="1">{ 16340 demandingResources[rn] = true 16341 }</span> 16342 } 16343 <span class="cov8" title="1">drf.buildHierarchy(root, job, attr, hierarchy, hierarchicalWeights) 16344 drf.updateHierarchicalShare(root, demandingResources)</span> 16345 } 16346 16347 func (drf *drfPlugin) updateJobShare(jobNs, jobName string, attr *drfAttr) <span class="cov8" title="1">{ 16348 drf.updateShare(attr) 16349 metrics.UpdateJobShare(jobNs, jobName, attr.share) 16350 }</span> 16351 16352 func (drf *drfPlugin) updateShare(attr *drfAttr) <span class="cov8" title="1">{ 16353 attr.dominantResource, attr.share = drf.calculateShare(attr.allocated, drf.totalResource) 16354 }</span> 16355 16356 func (drf *drfPlugin) calculateShare(allocated, totalResource *api.Resource) (string, float64) <span class="cov8" title="1">{ 16357 res := float64(0) 16358 dominantResource := "" 16359 for _, rn := range totalResource.ResourceNames() </span><span class="cov8" title="1">{ 16360 share := helpers.Share(allocated.Get(rn), totalResource.Get(rn)) 16361 if share > res </span><span class="cov8" title="1">{ 16362 res = share 16363 dominantResource = string(rn) 16364 }</span> 16365 } 16366 16367 <span class="cov8" title="1">return dominantResource, res</span> 16368 } 16369 16370 func (drf *drfPlugin) OnSessionClose(session *framework.Session) <span class="cov8" title="1">{ 16371 // Clean schedule data. 16372 drf.totalResource = api.EmptyResource() 16373 drf.totalAllocated = api.EmptyResource() 16374 drf.jobAttrs = map[api.JobID]*drfAttr{} 16375 }</span> 16376 </pre> 16377 16378 <pre class="file" id="file71" style="display: none">/* 16379 Copyright 2021 The Volcano Authors. 16380 16381 Licensed under the Apache License, Version 2.0 (the "License"); 16382 you may not use this file except in compliance with the License. 16383 You may obtain a copy of the License at 16384 16385 http://www.apache.org/licenses/LICENSE-2.0 16386 16387 Unless required by applicable law or agreed to in writing, software 16388 distributed under the License is distributed on an "AS IS" BASIS, 16389 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16390 See the License for the specific language governing permissions and 16391 limitations under the License. 16392 */ 16393 16394 package policy 16395 16396 import ( 16397 v1 "k8s.io/api/core/v1" 16398 "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" 16399 "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" 16400 16401 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 16402 nodeinfov1alpha1 "volcano.sh/apis/pkg/apis/nodeinfo/v1alpha1" 16403 "volcano.sh/volcano/pkg/scheduler/api" 16404 ) 16405 16406 // TopologyHint is a struct containing the NUMANodeAffinity for a Container 16407 type TopologyHint struct { 16408 NUMANodeAffinity bitmask.BitMask 16409 // Preferred is set to true when the NUMANodeAffinity encodes a preferred 16410 // allocation for the Container. It is set to false otherwise. 16411 Preferred bool 16412 } 16413 16414 // Policy is an interface for topology manager policy 16415 type Policy interface { 16416 // Predicate Get the best hit. 16417 Predicate(providersHints []map[string][]TopologyHint) (TopologyHint, bool) 16418 } 16419 16420 // HintProvider is an interface for components that want to collaborate to 16421 // achieve globally optimal concrete resource alignment with respect to 16422 // NUMA locality. 16423 type HintProvider interface { 16424 // Name returns provider name used for register and logging. 16425 Name() string 16426 // GetTopologyHints returns hints if this hint provider has a preference, 16427 GetTopologyHints(container *v1.Container, topoInfo *api.NumatopoInfo, resNumaSets api.ResNumaSets) map[string][]TopologyHint 16428 Allocate(container *v1.Container, bestHit *TopologyHint, topoInfo *api.NumatopoInfo, resNumaSets api.ResNumaSets) map[string]cpuset.CPUSet 16429 } 16430 16431 // GetPolicy return the interface matched the input task topology config 16432 func GetPolicy(node *api.NodeInfo, numaNodes []int) Policy <span class="cov0" title="0">{ 16433 switch batch.NumaPolicy(node.NumaSchedulerInfo.Policies[nodeinfov1alpha1.TopologyManagerPolicy]) </span>{ 16434 case batch.None:<span class="cov0" title="0"> 16435 return NewPolicyNone(numaNodes)</span> 16436 case batch.BestEffort:<span class="cov0" title="0"> 16437 return NewPolicyBestEffort(numaNodes)</span> 16438 case batch.Restricted:<span class="cov0" title="0"> 16439 return NewPolicyRestricted(numaNodes)</span> 16440 case batch.SingleNumaNode:<span class="cov0" title="0"> 16441 return NewPolicySingleNumaNode(numaNodes)</span> 16442 } 16443 16444 <span class="cov0" title="0">return &policyNone{}</span> 16445 } 16446 16447 // AccumulateProvidersHints return all TopologyHint collection from different providers 16448 func AccumulateProvidersHints(container *v1.Container, 16449 topoInfo *api.NumatopoInfo, resNumaSets api.ResNumaSets, 16450 hintProviders []HintProvider) (providersHints []map[string][]TopologyHint) <span class="cov0" title="0">{ 16451 for _, provider := range hintProviders </span><span class="cov0" title="0">{ 16452 hints := provider.GetTopologyHints(container, topoInfo, resNumaSets) 16453 providersHints = append(providersHints, hints) 16454 }</span> 16455 16456 <span class="cov0" title="0">return providersHints</span> 16457 } 16458 16459 // Allocate return all resource assignment collection from different providers 16460 func Allocate(container *v1.Container, bestHit *TopologyHint, 16461 topoInfo *api.NumatopoInfo, resNumaSets api.ResNumaSets, hintProviders []HintProvider) map[string]cpuset.CPUSet <span class="cov0" title="0">{ 16462 allResAlloc := make(map[string]cpuset.CPUSet) 16463 for _, provider := range hintProviders </span><span class="cov0" title="0">{ 16464 resAlloc := provider.Allocate(container, bestHit, topoInfo, resNumaSets) 16465 for resName, assign := range resAlloc </span><span class="cov0" title="0">{ 16466 allResAlloc[resName] = assign 16467 }</span> 16468 } 16469 16470 <span class="cov0" title="0">return allResAlloc</span> 16471 } 16472 </pre> 16473 16474 <pre class="file" id="file72" style="display: none">/* 16475 Copyright 2021 The Volcano Authors. 16476 16477 Licensed under the Apache License, Version 2.0 (the "License"); 16478 you may not use this file except in compliance with the License. 16479 You may obtain a copy of the License at 16480 16481 http://www.apache.org/licenses/LICENSE-2.0 16482 16483 Unless required by applicable law or agreed to in writing, software 16484 distributed under the License is distributed on an "AS IS" BASIS, 16485 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16486 See the License for the specific language governing permissions and 16487 limitations under the License. 16488 */ 16489 16490 package policy 16491 16492 import ( 16493 "k8s.io/klog" 16494 "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" 16495 ) 16496 16497 func filterProvidersHints(providersHints []map[string][]TopologyHint) [][]TopologyHint <span class="cov8" title="1">{ 16498 var allProviderHints [][]TopologyHint 16499 for _, hints := range providersHints </span><span class="cov8" title="1">{ 16500 // If hints is nil, insert a single, preferred any-numa hint into allProviderHints. 16501 if len(hints) == 0 </span><span class="cov0" title="0">{ 16502 klog.Infof("[numatopo] Hint Provider has no preference for NUMA affinity with any resource") 16503 allProviderHints = append(allProviderHints, []TopologyHint{{nil, true}}) 16504 continue</span> 16505 } 16506 16507 // Otherwise, accumulate the hints for each resource type into allProviderHints. 16508 <span class="cov8" title="1">for resource := range hints </span><span class="cov8" title="1">{ 16509 if hints[resource] == nil </span><span class="cov0" title="0">{ 16510 klog.Infof("[numatopo] Hint Provider has no preference for NUMA affinity with resource '%s'", resource) 16511 allProviderHints = append(allProviderHints, []TopologyHint{{nil, true}}) 16512 continue</span> 16513 } 16514 16515 <span class="cov8" title="1">if len(hints[resource]) == 0 </span><span class="cov0" title="0">{ 16516 klog.Infof("[numatopo] Hint Provider has no possible NUMA affinities for resource '%s'", resource) 16517 allProviderHints = append(allProviderHints, []TopologyHint{{nil, false}}) 16518 continue</span> 16519 } 16520 16521 <span class="cov8" title="1">allProviderHints = append(allProviderHints, hints[resource])</span> 16522 } 16523 } 16524 <span class="cov8" title="1">return allProviderHints</span> 16525 } 16526 16527 func mergeFilteredHints(numaNodes []int, filteredHints [][]TopologyHint) TopologyHint <span class="cov8" title="1">{ 16528 // Set the default affinity as an any-numa affinity containing the list 16529 // of NUMA Nodes available on this machine. 16530 defaultAffinity, _ := bitmask.NewBitMask(numaNodes...) 16531 16532 // Set the bestHint to return from this function as {nil false}. 16533 // This will only be returned if no better hint can be found when 16534 // merging hints from each hint provider. 16535 bestHint := TopologyHint{defaultAffinity, false} 16536 iterateAllProviderTopologyHints(filteredHints, func(permutation []TopologyHint) </span><span class="cov8" title="1">{ 16537 // Get the NUMANodeAffinity from each hint in the permutation and see if any 16538 // of them encode unpreferred allocations. 16539 mergedHint := mergePermutation(numaNodes, permutation) 16540 // Only consider mergedHints that result in a NUMANodeAffinity > 0 to 16541 // replace the current bestHint. 16542 if mergedHint.NUMANodeAffinity.Count() == 0 </span><span class="cov8" title="1">{ 16543 return 16544 }</span> 16545 16546 // If the current bestHint is non-preferred and the new mergedHint is 16547 // preferred, always choose the preferred hint over the non-preferred one. 16548 <span class="cov8" title="1">if mergedHint.Preferred && !bestHint.Preferred </span><span class="cov8" title="1">{ 16549 bestHint = mergedHint 16550 return 16551 }</span> 16552 16553 // If the current bestHint is preferred and the new mergedHint is 16554 // non-preferred, never update bestHint, regardless of mergedHint's 16555 // narowness. 16556 <span class="cov8" title="1">if !mergedHint.Preferred && bestHint.Preferred </span><span class="cov8" title="1">{ 16557 return 16558 }</span> 16559 16560 // If mergedHint and bestHint has the same preference, only consider 16561 // mergedHints that have a narrower NUMANodeAffinity than the 16562 // NUMANodeAffinity in the current bestHint. 16563 <span class="cov8" title="1">if !mergedHint.NUMANodeAffinity.IsNarrowerThan(bestHint.NUMANodeAffinity) </span><span class="cov8" title="1">{ 16564 return 16565 }</span> 16566 16567 // In all other cases, update bestHint to the current mergedHint 16568 <span class="cov8" title="1">bestHint = mergedHint</span> 16569 }) 16570 16571 <span class="cov8" title="1">return bestHint</span> 16572 } 16573 16574 // Iterate over all permutations of hints in 'allProviderHints [][]TopologyHint'. 16575 // 16576 // This procedure is implemented as a recursive function over the set of hints 16577 // in 'allproviderHints[i]'. It applies the function 'callback' to each 16578 // permutation as it is found. It is the equivalent of: 16579 // 16580 // for i := 0; i < len(providerHints[0]); i++ 16581 // for j := 0; j < len(providerHints[1]); j++ 16582 // for k := 0; k < len(providerHints[2]); k++ 16583 // ... 16584 // for z := 0; z < len(providerHints[-1]); z++ 16585 // permutation := []TopologyHint{ 16586 // providerHints[0][i], 16587 // providerHints[1][j], 16588 // providerHints[2][k], 16589 // ... 16590 // providerHints[-1][z] 16591 // } 16592 // callback(permutation) 16593 func iterateAllProviderTopologyHints(allProviderHints [][]TopologyHint, callback func([]TopologyHint)) <span class="cov8" title="1">{ 16594 // Internal helper function to accumulate the permutation before calling the callback. 16595 var iterate func(i int, accum []TopologyHint) 16596 iterate = func(i int, accum []TopologyHint) </span><span class="cov8" title="1">{ 16597 // Base case: we have looped through all providers and have a full permutation. 16598 if i == len(allProviderHints) </span><span class="cov8" title="1">{ 16599 callback(accum) 16600 return 16601 }</span> 16602 16603 // Loop through all hints for provider 'i', and recurse to build the 16604 // the permutation of this hint with all hints from providers 'i++'. 16605 <span class="cov8" title="1">for j := range allProviderHints[i] </span><span class="cov8" title="1">{ 16606 iterate(i+1, append(accum, allProviderHints[i][j])) 16607 }</span> 16608 } 16609 <span class="cov8" title="1">iterate(0, []TopologyHint{})</span> 16610 } 16611 16612 // Merge a TopologyHints permutation to a single hint by performing a bitwise-AND 16613 // of their affinity masks. The hint shall be preferred if all hits in the permutation 16614 // are preferred. 16615 func mergePermutation(numaNodes []int, permutation []TopologyHint) TopologyHint <span class="cov8" title="1">{ 16616 // Get the NUMANodeAffinity from each hint in the permutation and see if any 16617 // of them encode unpreferred allocations. 16618 preferred := true 16619 defaultAffinity, _ := bitmask.NewBitMask(numaNodes...) 16620 var numaAffinities []bitmask.BitMask 16621 for _, hint := range permutation </span><span class="cov8" title="1">{ 16622 // Only consider hints that have an actual NUMANodeAffinity set. 16623 if hint.NUMANodeAffinity == nil </span><span class="cov0" title="0">{ 16624 numaAffinities = append(numaAffinities, defaultAffinity) 16625 }</span> else<span class="cov8" title="1"> { 16626 numaAffinities = append(numaAffinities, hint.NUMANodeAffinity) 16627 }</span> 16628 16629 <span class="cov8" title="1">if !hint.Preferred </span><span class="cov8" title="1">{ 16630 preferred = false 16631 }</span> 16632 } 16633 16634 // Merge the affinities using a bitwise-and operation. 16635 <span class="cov8" title="1">mergedAffinity := bitmask.And(defaultAffinity, numaAffinities...) 16636 // Build a mergedHint from the merged affinity mask, indicating if an 16637 // preferred allocation was used to generate the affinity mask or not. 16638 return TopologyHint{mergedAffinity, preferred}</span> 16639 } 16640 </pre> 16641 16642 <pre class="file" id="file73" style="display: none">/* 16643 Copyright 2021 The Volcano Authors. 16644 16645 Licensed under the Apache License, Version 2.0 (the "License"); 16646 you may not use this file except in compliance with the License. 16647 You may obtain a copy of the License at 16648 16649 http://www.apache.org/licenses/LICENSE-2.0 16650 16651 Unless required by applicable law or agreed to in writing, software 16652 distributed under the License is distributed on an "AS IS" BASIS, 16653 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16654 See the License for the specific language governing permissions and 16655 limitations under the License. 16656 */ 16657 16658 package policy 16659 16660 import "k8s.io/klog" 16661 16662 type policyBestEffort struct { 16663 numaNodes []int 16664 } 16665 16666 // NewPolicyBestEffort return a new policy interface 16667 func NewPolicyBestEffort(numaNodes []int) Policy <span class="cov8" title="1">{ 16668 return &policyBestEffort{numaNodes: numaNodes} 16669 }</span> 16670 16671 func (p *policyBestEffort) canAdmitPodResult(hint *TopologyHint) bool <span class="cov8" title="1">{ 16672 return true 16673 }</span> 16674 16675 func (p *policyBestEffort) Predicate(providersHints []map[string][]TopologyHint) (TopologyHint, bool) <span class="cov8" title="1">{ 16676 filteredProvidersHints := filterProvidersHints(providersHints) 16677 bestHint := mergeFilteredHints(p.numaNodes, filteredProvidersHints) 16678 admit := p.canAdmitPodResult(&bestHint) 16679 16680 klog.V(4).Infof("bestHint: %v admit %v\n", bestHint, admit) 16681 return bestHint, admit 16682 }</span> 16683 </pre> 16684 16685 <pre class="file" id="file74" style="display: none">/* 16686 Copyright 2021 The Volcano Authors. 16687 16688 Licensed under the Apache License, Version 2.0 (the "License"); 16689 you may not use this file except in compliance with the License. 16690 You may obtain a copy of the License at 16691 16692 http://www.apache.org/licenses/LICENSE-2.0 16693 16694 Unless required by applicable law or agreed to in writing, software 16695 distributed under the License is distributed on an "AS IS" BASIS, 16696 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16697 See the License for the specific language governing permissions and 16698 limitations under the License. 16699 */ 16700 16701 package policy 16702 16703 type policyNone struct { 16704 numaNodes []int 16705 } 16706 16707 // NewPolicyNone return a new policy interface 16708 func NewPolicyNone(numaNodes []int) Policy <span class="cov0" title="0">{ 16709 return &policyNone{numaNodes: numaNodes} 16710 }</span> 16711 16712 func (policy *policyNone) canAdmitPodResult(hint *TopologyHint) bool <span class="cov0" title="0">{ 16713 return true 16714 }</span> 16715 16716 func (policy *policyNone) Predicate(providersHints []map[string][]TopologyHint) (TopologyHint, bool) <span class="cov0" title="0">{ 16717 return TopologyHint{}, policy.canAdmitPodResult(nil) 16718 }</span> 16719 </pre> 16720 16721 <pre class="file" id="file75" style="display: none">/* 16722 Copyright 2021 The Volcano Authors. 16723 16724 Licensed under the Apache License, Version 2.0 (the "License"); 16725 you may not use this file except in compliance with the License. 16726 You may obtain a copy of the License at 16727 16728 http://www.apache.org/licenses/LICENSE-2.0 16729 16730 Unless required by applicable law or agreed to in writing, software 16731 distributed under the License is distributed on an "AS IS" BASIS, 16732 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16733 See the License for the specific language governing permissions and 16734 limitations under the License. 16735 */ 16736 16737 package policy 16738 16739 import "k8s.io/klog" 16740 16741 type policyRestricted struct { 16742 numaNodes []int 16743 } 16744 16745 // NewPolicyRestricted return a new policy interface 16746 func NewPolicyRestricted(numaNodes []int) Policy <span class="cov8" title="1">{ 16747 return &policyRestricted{numaNodes: numaNodes} 16748 }</span> 16749 16750 func (p *policyRestricted) canAdmitPodResult(hint *TopologyHint) bool <span class="cov8" title="1">{ 16751 return hint.Preferred 16752 }</span> 16753 16754 func (p *policyRestricted) Predicate(providersHints []map[string][]TopologyHint) (TopologyHint, bool) <span class="cov8" title="1">{ 16755 filteredHints := filterProvidersHints(providersHints) 16756 bestHint := mergeFilteredHints(p.numaNodes, filteredHints) 16757 admit := p.canAdmitPodResult(&bestHint) 16758 16759 klog.V(4).Infof("bestHint: %v admit %v\n", bestHint, admit) 16760 return bestHint, admit 16761 }</span> 16762 </pre> 16763 16764 <pre class="file" id="file76" style="display: none">/* 16765 Copyright 2021 The Volcano Authors. 16766 16767 Licensed under the Apache License, Version 2.0 (the "License"); 16768 you may not use this file except in compliance with the License. 16769 You may obtain a copy of the License at 16770 16771 http://www.apache.org/licenses/LICENSE-2.0 16772 16773 Unless required by applicable law or agreed to in writing, software 16774 distributed under the License is distributed on an "AS IS" BASIS, 16775 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16776 See the License for the specific language governing permissions and 16777 limitations under the License. 16778 */ 16779 16780 package policy 16781 16782 import "k8s.io/klog" 16783 16784 type policySingleNumaNode struct { 16785 numaNodes []int 16786 } 16787 16788 // NewPolicySingleNumaNode return a new policy interface 16789 func NewPolicySingleNumaNode(numaNodes []int) Policy <span class="cov8" title="1">{ 16790 return &policySingleNumaNode{numaNodes: numaNodes} 16791 }</span> 16792 16793 func (policy *policySingleNumaNode) canAdmitPodResult(hint *TopologyHint) bool <span class="cov8" title="1">{ 16794 return hint.Preferred 16795 }</span> 16796 16797 // Return hints that have valid bitmasks with exactly one bit set. 16798 func filterSingleNumaHints(allResourcesHints [][]TopologyHint) [][]TopologyHint <span class="cov8" title="1">{ 16799 var filteredResourcesHints [][]TopologyHint 16800 for _, oneResourceHints := range allResourcesHints </span><span class="cov8" title="1">{ 16801 var filtered []TopologyHint 16802 for _, hint := range oneResourceHints </span><span class="cov8" title="1">{ 16803 if hint.NUMANodeAffinity == nil && hint.Preferred </span><span class="cov0" title="0">{ 16804 filtered = append(filtered, hint) 16805 }</span> 16806 <span class="cov8" title="1">if hint.NUMANodeAffinity != nil && hint.NUMANodeAffinity.Count() == 1 && hint.Preferred </span><span class="cov8" title="1">{ 16807 filtered = append(filtered, hint) 16808 }</span> 16809 } 16810 <span class="cov8" title="1">filteredResourcesHints = append(filteredResourcesHints, filtered)</span> 16811 } 16812 <span class="cov8" title="1">return filteredResourcesHints</span> 16813 } 16814 16815 func (policy *policySingleNumaNode) Predicate(providersHints []map[string][]TopologyHint) (TopologyHint, bool) <span class="cov8" title="1">{ 16816 filteredHints := filterProvidersHints(providersHints) 16817 singleNumaHints := filterSingleNumaHints(filteredHints) 16818 bestHint := mergeFilteredHints(policy.numaNodes, singleNumaHints) 16819 klog.V(4).Infof("bestHint: %v\n", bestHint) 16820 admit := policy.canAdmitPodResult(&bestHint) 16821 return bestHint, admit 16822 }</span> 16823 </pre> 16824 16825 <pre class="file" id="file77" style="display: none">/* 16826 Copyright 2021 The Volcano Authors. 16827 16828 Licensed under the Apache License, Version 2.0 (the "License"); 16829 you may not use this file except in compliance with the License. 16830 You may obtain a copy of the License at 16831 16832 http://www.apache.org/licenses/LICENSE-2.0 16833 16834 Unless required by applicable law or agreed to in writing, software 16835 distributed under the License is distributed on an "AS IS" BASIS, 16836 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16837 See the License for the specific language governing permissions and 16838 limitations under the License. 16839 */ 16840 16841 package cpumanager 16842 16843 import ( 16844 "fmt" 16845 "sort" 16846 16847 "k8s.io/klog" 16848 "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" 16849 "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" 16850 ) 16851 16852 type cpuAccumulator struct { 16853 topo *topology.CPUTopology 16854 details topology.CPUDetails 16855 numCPUsNeeded int 16856 result cpuset.CPUSet 16857 } 16858 16859 func newCPUAccumulator(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int) *cpuAccumulator <span class="cov8" title="1">{ 16860 return &cpuAccumulator{ 16861 topo: topo, 16862 details: topo.CPUDetails.KeepOnly(availableCPUs), 16863 numCPUsNeeded: numCPUs, 16864 result: cpuset.NewCPUSet(), 16865 } 16866 }</span> 16867 16868 func (a *cpuAccumulator) take(cpus cpuset.CPUSet) <span class="cov8" title="1">{ 16869 a.result = a.result.Union(cpus) 16870 a.details = a.details.KeepOnly(a.details.CPUs().Difference(a.result)) 16871 a.numCPUsNeeded -= cpus.Size() 16872 }</span> 16873 16874 // isSocketFree Returns true if the supplied socket is fully available in `topoDetails`. 16875 func (a *cpuAccumulator) isSocketFree(socketID int) bool <span class="cov8" title="1">{ 16876 return a.details.CPUsInSockets(socketID).Size() == a.topo.CPUsPerSocket() 16877 }</span> 16878 16879 // isCoreFree Returns true if the supplied core is fully available in `topoDetails`. 16880 func (a *cpuAccumulator) isCoreFree(coreID int) bool <span class="cov8" title="1">{ 16881 return a.details.CPUsInCores(coreID).Size() == a.topo.CPUsPerCore() 16882 }</span> 16883 16884 // freeSockets Returns free socket IDs as a slice sorted by: 16885 // - socket ID, ascending. 16886 func (a *cpuAccumulator) freeSockets() []int <span class="cov8" title="1">{ 16887 return a.details.Sockets().Filter(a.isSocketFree).ToSlice() 16888 }</span> 16889 16890 // freeCores Returns core IDs as a slice sorted by: 16891 // - the number of whole available cores on the socket, ascending 16892 // - socket ID, ascending 16893 // - core ID, ascending 16894 func (a *cpuAccumulator) freeCores() []int <span class="cov8" title="1">{ 16895 socketIDs := a.details.Sockets().ToSliceNoSort() 16896 sort.Slice(socketIDs, 16897 func(i, j int) bool </span><span class="cov0" title="0">{ 16898 iCores := a.details.CoresInSockets(socketIDs[i]).Filter(a.isCoreFree) 16899 jCores := a.details.CoresInSockets(socketIDs[j]).Filter(a.isCoreFree) 16900 return iCores.Size() < jCores.Size() || socketIDs[i] < socketIDs[j] 16901 }</span>) 16902 16903 <span class="cov8" title="1">coreIDs := []int{} 16904 for _, s := range socketIDs </span><span class="cov8" title="1">{ 16905 coreIDs = append(coreIDs, a.details.CoresInSockets(s).Filter(a.isCoreFree).ToSlice()...) 16906 }</span> 16907 <span class="cov8" title="1">return coreIDs</span> 16908 } 16909 16910 // freeCPUs Returns CPU IDs as a slice sorted by: 16911 // - socket affinity with result 16912 // - number of CPUs available on the same socket 16913 // - number of CPUs available on the same core 16914 // - socket ID. 16915 // - core ID. 16916 func (a *cpuAccumulator) freeCPUs() []int <span class="cov8" title="1">{ 16917 result := []int{} 16918 cores := a.details.Cores().ToSlice() 16919 16920 sort.Slice( 16921 cores, 16922 func(i, j int) bool </span><span class="cov8" title="1">{ 16923 iCore := cores[i] 16924 jCore := cores[j] 16925 16926 iCPUs := a.topo.CPUDetails.CPUsInCores(iCore).ToSlice() 16927 jCPUs := a.topo.CPUDetails.CPUsInCores(jCore).ToSlice() 16928 16929 iSocket := a.topo.CPUDetails[iCPUs[0]].SocketID 16930 jSocket := a.topo.CPUDetails[jCPUs[0]].SocketID 16931 16932 // Compute the number of CPUs in the result reside on the same socket 16933 // as each core. 16934 iSocketColoScore := a.topo.CPUDetails.CPUsInSockets(iSocket).Intersection(a.result).Size() 16935 jSocketColoScore := a.topo.CPUDetails.CPUsInSockets(jSocket).Intersection(a.result).Size() 16936 16937 // Compute the number of available CPUs available on the same socket 16938 // as each core. 16939 iSocketFreeScore := a.details.CPUsInSockets(iSocket).Size() 16940 jSocketFreeScore := a.details.CPUsInSockets(jSocket).Size() 16941 16942 // Compute the number of available CPUs on each core. 16943 iCoreFreeScore := a.details.CPUsInCores(iCore).Size() 16944 jCoreFreeScore := a.details.CPUsInCores(jCore).Size() 16945 16946 return iSocketColoScore > jSocketColoScore || 16947 iSocketFreeScore < jSocketFreeScore || 16948 iCoreFreeScore < jCoreFreeScore || 16949 iSocket < jSocket || 16950 iCore < jCore 16951 }</span>) 16952 16953 // For each core, append sorted CPU IDs to result. 16954 <span class="cov8" title="1">for _, core := range cores </span><span class="cov8" title="1">{ 16955 result = append(result, a.details.CPUsInCores(core).ToSlice()...) 16956 }</span> 16957 <span class="cov8" title="1">return result</span> 16958 } 16959 16960 func (a *cpuAccumulator) needs(n int) bool <span class="cov8" title="1">{ 16961 return a.numCPUsNeeded >= n 16962 }</span> 16963 16964 func (a *cpuAccumulator) isSatisfied() bool <span class="cov8" title="1">{ 16965 return a.numCPUsNeeded < 1 16966 }</span> 16967 16968 func (a *cpuAccumulator) isFailed() bool <span class="cov8" title="1">{ 16969 return a.numCPUsNeeded > a.details.CPUs().Size() 16970 }</span> 16971 16972 // takeByTopology return the assigned cpuset 16973 func takeByTopology(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int) (cpuset.CPUSet, error) <span class="cov8" title="1">{ 16974 acc := newCPUAccumulator(topo, availableCPUs, numCPUs) 16975 if acc.isSatisfied() </span><span class="cov8" title="1">{ 16976 return acc.result, nil 16977 }</span> 16978 <span class="cov8" title="1">if acc.isFailed() </span><span class="cov8" title="1">{ 16979 return cpuset.NewCPUSet(), fmt.Errorf("not enough cpus available to satisfy request") 16980 }</span> 16981 16982 // Algorithm: topology-aware best-fit 16983 // 1. Acquire whole sockets, if available and the container requires at 16984 // least a socket's-worth of CPUs. 16985 <span class="cov8" title="1">if acc.needs(acc.topo.CPUsPerSocket()) </span><span class="cov8" title="1">{ 16986 for _, s := range acc.freeSockets() </span><span class="cov8" title="1">{ 16987 klog.V(4).Infof("[cpumanager] takeByTopology: claiming socket [%d]", s) 16988 acc.take(acc.details.CPUsInSockets(s)) 16989 if acc.isSatisfied() </span><span class="cov8" title="1">{ 16990 return acc.result, nil 16991 }</span> 16992 <span class="cov8" title="1">if !acc.needs(acc.topo.CPUsPerSocket()) </span><span class="cov8" title="1">{ 16993 break</span> 16994 } 16995 } 16996 } 16997 16998 // 2. Acquire whole cores, if available and the container requires at least 16999 // a core's-worth of CPUs. 17000 <span class="cov8" title="1">if acc.needs(acc.topo.CPUsPerCore()) </span><span class="cov8" title="1">{ 17001 for _, c := range acc.freeCores() </span><span class="cov8" title="1">{ 17002 klog.V(4).Infof("[cpumanager] takeByTopology: claiming core [%d]", c) 17003 acc.take(acc.details.CPUsInCores(c)) 17004 if acc.isSatisfied() </span><span class="cov8" title="1">{ 17005 return acc.result, nil 17006 }</span> 17007 <span class="cov8" title="1">if !acc.needs(acc.topo.CPUsPerCore()) </span><span class="cov0" title="0">{ 17008 break</span> 17009 } 17010 } 17011 } 17012 17013 // 3. Acquire single threads, preferring to fill partially-allocated cores 17014 // on the same sockets as the whole cores we have already taken in this 17015 // allocation. 17016 <span class="cov8" title="1">for _, c := range acc.freeCPUs() </span><span class="cov8" title="1">{ 17017 klog.V(4).Infof("[cpumanager] takeByTopology: claiming CPU [%d]", c) 17018 if acc.needs(1) </span><span class="cov8" title="1">{ 17019 acc.take(cpuset.NewCPUSet(c)) 17020 }</span> 17021 <span class="cov8" title="1">if acc.isSatisfied() </span><span class="cov8" title="1">{ 17022 return acc.result, nil 17023 }</span> 17024 } 17025 17026 <span class="cov0" title="0">return cpuset.NewCPUSet(), fmt.Errorf("failed to allocate cpus")</span> 17027 } 17028 </pre> 17029 17030 <pre class="file" id="file78" style="display: none">/* 17031 Copyright 2021 The Volcano Authors. 17032 17033 Licensed under the Apache License, Version 2.0 (the "License"); 17034 you may not use this file except in compliance with the License. 17035 You may obtain a copy of the License at 17036 17037 http://www.apache.org/licenses/LICENSE-2.0 17038 17039 Unless required by applicable law or agreed to in writing, software 17040 distributed under the License is distributed on an "AS IS" BASIS, 17041 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17042 See the License for the specific language governing permissions and 17043 limitations under the License. 17044 */ 17045 17046 package cpumanager 17047 17048 import ( 17049 "math" 17050 17051 v1 "k8s.io/api/core/v1" 17052 "k8s.io/klog" 17053 "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" 17054 "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" 17055 "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" 17056 17057 "volcano.sh/volcano/pkg/scheduler/api" 17058 "volcano.sh/volcano/pkg/scheduler/plugins/numaaware/policy" 17059 ) 17060 17061 type cpuMng struct { 17062 } 17063 17064 // NewProvider return a new provider 17065 func NewProvider() policy.HintProvider <span class="cov8" title="1">{ 17066 return &cpuMng{} 17067 }</span> 17068 17069 // Name return the cpu manager name 17070 func (mng *cpuMng) Name() string <span class="cov0" title="0">{ 17071 return "cpuMng" 17072 }</span> 17073 17074 // guaranteedCPUs return the intger num of request cpu 17075 func guaranteedCPUs(container *v1.Container) int <span class="cov8" title="1">{ 17076 cpuQuantity := container.Resources.Requests[v1.ResourceCPU] 17077 if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() </span><span class="cov0" title="0">{ 17078 return 0 17079 }</span> 17080 17081 <span class="cov8" title="1">return int(cpuQuantity.Value())</span> 17082 } 17083 17084 // generateCPUTopologyHints return the numa topology hints based on 17085 // - availableCPUs 17086 func generateCPUTopologyHints(availableCPUs cpuset.CPUSet, CPUDetails topology.CPUDetails, request int) []policy.TopologyHint <span class="cov8" title="1">{ 17087 minAffinitySize := CPUDetails.NUMANodes().Size() 17088 hints := []policy.TopologyHint{} 17089 bitmask.IterateBitMasks(CPUDetails.NUMANodes().ToSlice(), func(mask bitmask.BitMask) </span><span class="cov8" title="1">{ 17090 // First, update minAffinitySize for the current request size. 17091 cpusInMask := CPUDetails.CPUsInNUMANodes(mask.GetBits()...).Size() 17092 if cpusInMask >= request && mask.Count() < minAffinitySize </span><span class="cov8" title="1">{ 17093 minAffinitySize = mask.Count() 17094 }</span> 17095 17096 // Then check to see if we have enough CPUs available on the current 17097 // numa node bitmask to satisfy the CPU request. 17098 <span class="cov8" title="1">numMatching := 0 17099 // Finally, check to see if enough available CPUs remain on the current 17100 // NUMA node combination to satisfy the CPU request. 17101 for _, c := range availableCPUs.ToSlice() </span><span class="cov8" title="1">{ 17102 if mask.IsSet(CPUDetails[c].NUMANodeID) </span><span class="cov8" title="1">{ 17103 numMatching++ 17104 }</span> 17105 } 17106 17107 // If they don't, then move onto the next combination. 17108 <span class="cov8" title="1">if numMatching < request </span><span class="cov8" title="1">{ 17109 return 17110 }</span> 17111 17112 // Otherwise, create a new hint from the numa node bitmask and add it to the 17113 // list of hints. We set all hint preferences to 'false' on the first 17114 // pass through. 17115 <span class="cov8" title="1">hints = append(hints, policy.TopologyHint{ 17116 NUMANodeAffinity: mask, 17117 Preferred: false, 17118 })</span> 17119 }) 17120 17121 // Loop back through all hints and update the 'Preferred' field based on 17122 // counting the number of bits sets in the affinity mask and comparing it 17123 // to the minAffinitySize. Only those with an equal number of bits set (and 17124 // with a minimal set of numa nodes) will be considered preferred. 17125 <span class="cov8" title="1">for i := range hints </span><span class="cov8" title="1">{ 17126 if hints[i].NUMANodeAffinity.Count() == minAffinitySize </span><span class="cov8" title="1">{ 17127 hints[i].Preferred = true 17128 }</span> 17129 } 17130 17131 <span class="cov8" title="1">return hints</span> 17132 } 17133 17134 func (mng *cpuMng) GetTopologyHints(container *v1.Container, 17135 topoInfo *api.NumatopoInfo, resNumaSets api.ResNumaSets) map[string][]policy.TopologyHint <span class="cov8" title="1">{ 17136 if _, ok := container.Resources.Requests[v1.ResourceCPU]; !ok </span><span class="cov0" title="0">{ 17137 klog.Warningf("container %s has no cpu request", container.Name) 17138 return nil 17139 }</span> 17140 17141 <span class="cov8" title="1">requestNum := guaranteedCPUs(container) 17142 if requestNum == 0 </span><span class="cov0" title="0">{ 17143 klog.Warningf(" the cpu request isn't integer in container %s", container.Name) 17144 return nil 17145 }</span> 17146 17147 <span class="cov8" title="1">cputopo := &topology.CPUTopology{ 17148 NumCPUs: topoInfo.CPUDetail.CPUs().Size(), 17149 NumCores: topoInfo.CPUDetail.Cores().Size() * topoInfo.CPUDetail.Sockets().Size(), 17150 NumSockets: topoInfo.CPUDetail.Sockets().Size(), 17151 CPUDetails: topoInfo.CPUDetail, 17152 } 17153 17154 reserved := cpuset.NewCPUSet() 17155 reservedCPUs, ok := topoInfo.ResReserved[v1.ResourceCPU] 17156 if ok </span><span class="cov0" title="0">{ 17157 // Take the ceiling of the reservation, since fractional CPUs cannot be 17158 // exclusively allocated. 17159 reservedCPUsFloat := float64(reservedCPUs.MilliValue()) / 1000 17160 numReservedCPUs := int(math.Ceil(reservedCPUsFloat)) 17161 reserved, _ = takeByTopology(cputopo, cputopo.CPUDetails.CPUs(), numReservedCPUs) 17162 klog.V(4).Infof("[cpumanager] reserve cpuset :%v", reserved) 17163 }</span> 17164 17165 <span class="cov8" title="1">availableCPUSet, ok := resNumaSets[string(v1.ResourceCPU)] 17166 if !ok </span><span class="cov0" title="0">{ 17167 klog.Warningf("no cpu resource") 17168 return nil 17169 }</span> 17170 17171 <span class="cov8" title="1">availableCPUSet = availableCPUSet.Difference(reserved) 17172 klog.V(4).Infof("requested: %d, availableCPUSet: %v", requestNum, availableCPUSet) 17173 return map[string][]policy.TopologyHint{ 17174 string(v1.ResourceCPU): generateCPUTopologyHints(availableCPUSet, topoInfo.CPUDetail, requestNum), 17175 }</span> 17176 } 17177 17178 func (mng *cpuMng) Allocate(container *v1.Container, bestHit *policy.TopologyHint, 17179 topoInfo *api.NumatopoInfo, resNumaSets api.ResNumaSets) map[string]cpuset.CPUSet <span class="cov8" title="1">{ 17180 cputopo := &topology.CPUTopology{ 17181 NumCPUs: topoInfo.CPUDetail.CPUs().Size(), 17182 NumCores: topoInfo.CPUDetail.Cores().Size() * topoInfo.CPUDetail.Sockets().Size(), 17183 NumSockets: topoInfo.CPUDetail.Sockets().Size(), 17184 CPUDetails: topoInfo.CPUDetail, 17185 } 17186 17187 reserved := cpuset.NewCPUSet() 17188 reservedCPUs, ok := topoInfo.ResReserved[v1.ResourceCPU] 17189 if ok </span><span class="cov0" title="0">{ 17190 // Take the ceiling of the reservation, since fractional CPUs cannot be 17191 // exclusively allocated. 17192 reservedCPUsFloat := float64(reservedCPUs.MilliValue()) / 1000 17193 numReservedCPUs := int(math.Ceil(reservedCPUsFloat)) 17194 reserved, _ = takeByTopology(cputopo, cputopo.CPUDetails.CPUs(), numReservedCPUs) 17195 klog.V(3).Infof("[cpumanager] reserve cpuset :%v", reserved) 17196 }</span> 17197 17198 <span class="cov8" title="1">requestNum := guaranteedCPUs(container) 17199 availableCPUSet := resNumaSets[string(v1.ResourceCPU)] 17200 availableCPUSet = availableCPUSet.Difference(reserved) 17201 17202 klog.V(4).Infof("alignedCPUs: %v requestNum: %v bestHit %v", availableCPUSet, requestNum, bestHit) 17203 17204 result := cpuset.NewCPUSet() 17205 if bestHit.NUMANodeAffinity != nil </span><span class="cov8" title="1">{ 17206 alignedCPUs := cpuset.NewCPUSet() 17207 for _, numaNodeID := range bestHit.NUMANodeAffinity.GetBits() </span><span class="cov8" title="1">{ 17208 alignedCPUs = alignedCPUs.Union(availableCPUSet.Intersection(cputopo.CPUDetails.CPUsInNUMANodes(numaNodeID))) 17209 }</span> 17210 17211 <span class="cov8" title="1">numAlignedToAlloc := alignedCPUs.Size() 17212 if requestNum < numAlignedToAlloc </span><span class="cov8" title="1">{ 17213 numAlignedToAlloc = requestNum 17214 }</span> 17215 17216 <span class="cov8" title="1">alignedCPUs, err := takeByTopology(cputopo, alignedCPUs, numAlignedToAlloc) 17217 if err != nil </span><span class="cov0" title="0">{ 17218 return map[string]cpuset.CPUSet{ 17219 string(v1.ResourceCPU): cpuset.NewCPUSet(), 17220 } 17221 }</span> 17222 17223 <span class="cov8" title="1">result = result.Union(alignedCPUs)</span> 17224 } 17225 17226 // Get any remaining CPUs from what's leftover after attempting to grab aligned ones. 17227 <span class="cov8" title="1">remainingCPUs, err := takeByTopology(cputopo, availableCPUSet.Difference(result), requestNum-result.Size()) 17228 if err != nil </span><span class="cov8" title="1">{ 17229 return map[string]cpuset.CPUSet{ 17230 string(v1.ResourceCPU): cpuset.NewCPUSet(), 17231 } 17232 }</span> 17233 17234 <span class="cov8" title="1">result = result.Union(remainingCPUs) 17235 17236 return map[string]cpuset.CPUSet{ 17237 string(v1.ResourceCPU): result, 17238 }</span> 17239 } 17240 </pre> 17241 17242 <pre class="file" id="file79" style="display: none">/* 17243 Copyright 2020 The Volcano Authors. 17244 17245 Licensed under the Apache License, Version 2.0 (the "License"); 17246 you may not use this file except in compliance with the License. 17247 You may obtain a copy of the License at 17248 17249 http://www.apache.org/licenses/LICENSE-2.0 17250 17251 Unless required by applicable law or agreed to in writing, software 17252 distributed under the License is distributed on an "AS IS" BASIS, 17253 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17254 See the License for the specific language governing permissions and 17255 limitations under the License. 17256 */ 17257 17258 package predicates 17259 17260 import ( 17261 "fmt" 17262 "sync" 17263 17264 v1 "k8s.io/api/core/v1" 17265 "k8s.io/klog" 17266 17267 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 17268 ) 17269 17270 type predicateCache struct { 17271 sync.RWMutex 17272 cache map[string]map[string]bool //key_1: nodename key_2:pod uid 17273 } 17274 17275 // predicateCacheNew return cache map 17276 func predicateCacheNew() *predicateCache <span class="cov8" title="1">{ 17277 return &predicateCache{ 17278 cache: make(map[string]map[string]bool), 17279 } 17280 }</span> 17281 17282 // getPodTemplateUID return pod template key 17283 func getPodTemplateUID(pod *v1.Pod) string <span class="cov0" title="0">{ 17284 uid, found := pod.Annotations[batch.PodTemplateKey] 17285 if !found </span><span class="cov0" title="0">{ 17286 return "" 17287 }</span> 17288 17289 <span class="cov0" title="0">return uid</span> 17290 } 17291 17292 // PredicateWithCache: check the predicate result existed in cache 17293 func (pc *predicateCache) PredicateWithCache(nodeName string, pod *v1.Pod) (bool, error) <span class="cov0" title="0">{ 17294 podTemplateUID := getPodTemplateUID(pod) 17295 if podTemplateUID == "" </span><span class="cov0" title="0">{ 17296 return false, fmt.Errorf("no anonation of volcano.sh/template-uid in pod %s", pod.Name) 17297 }</span> 17298 17299 <span class="cov0" title="0">pc.RLock() 17300 defer pc.RUnlock() 17301 if nodeCache, exist := pc.cache[nodeName]; exist </span><span class="cov0" title="0">{ 17302 if result, exist := nodeCache[podTemplateUID]; exist </span><span class="cov0" title="0">{ 17303 klog.V(4).Infof("Predicate node %s and pod %s result %v", nodeName, pod.Name, result) 17304 return result, nil 17305 }</span> 17306 } 17307 17308 <span class="cov0" title="0">return false, fmt.Errorf("no information of node %s and pod %s in predicate cache", nodeName, pod.Name)</span> 17309 } 17310 17311 // UpdateCache update cache data 17312 func (pc *predicateCache) UpdateCache(nodeName string, pod *v1.Pod, fit bool) <span class="cov0" title="0">{ 17313 podTemplateUID := getPodTemplateUID(pod) 17314 if podTemplateUID == "" </span><span class="cov0" title="0">{ 17315 klog.V(3).Infof("Don't find pod %s template uid", pod.Name) 17316 return 17317 }</span> 17318 17319 <span class="cov0" title="0">pc.Lock() 17320 defer pc.Unlock() 17321 17322 if _, exist := pc.cache[nodeName]; !exist </span><span class="cov0" title="0">{ 17323 podCache := make(map[string]bool) 17324 podCache[podTemplateUID] = fit 17325 pc.cache[nodeName] = podCache 17326 }</span> else<span class="cov0" title="0"> { 17327 pc.cache[nodeName][podTemplateUID] = fit 17328 }</span> 17329 } 17330 </pre> 17331 17332 <pre class="file" id="file80" style="display: none">/* 17333 Copyright 2020 The Kubernetes Authors. 17334 17335 Licensed under the Apache License, Version 2.0 (the "License"); 17336 you may not use this file except in compliance with the License. 17337 You may obtain a copy of the License at 17338 17339 http://www.apache.org/licenses/LICENSE-2.0 17340 17341 Unless required by applicable law or agreed to in writing, software 17342 distributed under the License is distributed on an "AS IS" BASIS, 17343 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17344 See the License for the specific language governing permissions and 17345 limitations under the License. 17346 */ 17347 17348 package predicates 17349 17350 import ( 17351 "fmt" 17352 17353 v1 "k8s.io/api/core/v1" 17354 17355 "volcano.sh/volcano/pkg/scheduler/api" 17356 ) 17357 17358 // checkNodeGPUSharingPredicate checks if a gpu sharing pod can be scheduled on a node. 17359 func checkNodeGPUSharingPredicate(pod *v1.Pod, nodeInfo *api.NodeInfo) (bool, error) <span class="cov0" title="0">{ 17360 // no gpu sharing request 17361 if api.GetGPUResourceOfPod(pod) <= 0 </span><span class="cov0" title="0">{ 17362 return true, nil 17363 }</span> 17364 17365 <span class="cov0" title="0">id := predicateGPU(pod, nodeInfo) 17366 if id < 0 </span><span class="cov0" title="0">{ 17367 return false, fmt.Errorf("no enough gpu memory on single device of node %s", nodeInfo.Name) 17368 }</span> 17369 <span class="cov0" title="0">return true, nil</span> 17370 } 17371 17372 // predicateGPU returns the available GPU ID 17373 func predicateGPU(pod *v1.Pod, node *api.NodeInfo) int <span class="cov0" title="0">{ 17374 gpuRequest := api.GetGPUResourceOfPod(pod) 17375 allocatableGPUs := node.GetDevicesIdleGPUMemory() 17376 17377 for devID := 0; devID < len(allocatableGPUs); devID++ </span><span class="cov0" title="0">{ 17378 availableGPU, ok := allocatableGPUs[devID] 17379 if ok </span><span class="cov0" title="0">{ 17380 if availableGPU >= gpuRequest </span><span class="cov0" title="0">{ 17381 return devID 17382 }</span> 17383 } 17384 } 17385 17386 <span class="cov0" title="0">return -1</span> 17387 } 17388 </pre> 17389 17390 <pre class="file" id="file81" style="display: none">/* 17391 Copyright 2018 The Kubernetes Authors. 17392 17393 Licensed under the Apache License, Version 2.0 (the "License"); 17394 you may not use this file except in compliance with the License. 17395 You may obtain a copy of the License at 17396 17397 http://www.apache.org/licenses/LICENSE-2.0 17398 17399 Unless required by applicable law or agreed to in writing, software 17400 distributed under the License is distributed on an "AS IS" BASIS, 17401 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17402 See the License for the specific language governing permissions and 17403 limitations under the License. 17404 */ 17405 17406 package predicates 17407 17408 import ( 17409 "context" 17410 "fmt" 17411 "strings" 17412 17413 v1 "k8s.io/api/core/v1" 17414 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 17415 "k8s.io/apimachinery/pkg/types" 17416 "k8s.io/klog" 17417 "k8s.io/kubernetes/pkg/scheduler/apis/config" 17418 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity" 17419 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeaffinity" 17420 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeports" 17421 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeunschedulable" 17422 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/tainttoleration" 17423 k8sframework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1" 17424 17425 "volcano.sh/volcano/pkg/scheduler/api" 17426 "volcano.sh/volcano/pkg/scheduler/framework" 17427 "volcano.sh/volcano/pkg/scheduler/plugins/util" 17428 "volcano.sh/volcano/pkg/scheduler/plugins/util/k8s" 17429 ) 17430 17431 const ( 17432 // PluginName indicates name of volcano scheduler plugin. 17433 PluginName = "predicates" 17434 17435 // GPUSharingPredicate is the key for enabling GPU Sharing Predicate in YAML 17436 GPUSharingPredicate = "predicate.GPUSharingEnable" 17437 17438 // CachePredicate control cache predicate feature 17439 CachePredicate = "predicate.CacheEnable" 17440 17441 // ProportionalPredicate is the key for enabling Proportional Predicate in YAML 17442 ProportionalPredicate = "predicate.ProportionalEnable" 17443 // ProportionalResource is the key for additional resource key name 17444 ProportionalResource = "predicate.resources" 17445 // ProportionalResourcesPrefix is the key prefix for additional resource key name 17446 ProportionalResourcesPrefix = ProportionalResource + "." 17447 ) 17448 17449 type predicatesPlugin struct { 17450 // Arguments given for the plugin 17451 pluginArguments framework.Arguments 17452 } 17453 17454 // New return predicate plugin 17455 func New(arguments framework.Arguments) framework.Plugin <span class="cov8" title="1">{ 17456 return &predicatesPlugin{pluginArguments: arguments} 17457 }</span> 17458 17459 func (pp *predicatesPlugin) Name() string <span class="cov8" title="1">{ 17460 return PluginName 17461 }</span> 17462 17463 type baseResource struct { 17464 CPU float64 17465 Memory float64 17466 } 17467 17468 type predicateEnable struct { 17469 gpuSharingEnable bool 17470 cacheEnable bool 17471 proportionalEnable bool 17472 proportional map[v1.ResourceName]baseResource 17473 } 17474 17475 func enablePredicate(args framework.Arguments) predicateEnable <span class="cov8" title="1">{ 17476 /* 17477 User Should give predicatesEnable in this format(predicate.GPUSharingEnable). 17478 Currently supported only GPUSharing predicate checks. 17479 17480 actions: "reclaim, allocate, backfill, preempt" 17481 tiers: 17482 - plugins: 17483 - name: priority 17484 - name: gang 17485 - name: conformance 17486 - plugins: 17487 - name: drf 17488 - name: predicates 17489 arguments: 17490 predicate.GPUSharingEnable: true 17491 predicate.CacheEnable: true 17492 predicate.ProportionalEnable: true 17493 predicate.resources: nvidia.com/gpu 17494 predicate.resources.nvidia.com/gpu.cpu: 4 17495 predicate.resources.nvidia.com/gpu.memory: 8 17496 - name: proportion 17497 - name: nodeorder 17498 */ 17499 17500 predicate := predicateEnable{ 17501 gpuSharingEnable: false, 17502 cacheEnable: false, 17503 proportionalEnable: false, 17504 } 17505 17506 // Checks whether predicate.GPUSharingEnable is provided or not, if given, modifies the value in predicateEnable struct. 17507 args.GetBool(&predicate.gpuSharingEnable, GPUSharingPredicate) 17508 args.GetBool(&predicate.cacheEnable, CachePredicate) 17509 // Checks whether predicate.ProportionalEnable is provided or not, if given, modifies the value in predicateEnable struct. 17510 args.GetBool(&predicate.proportionalEnable, ProportionalPredicate) 17511 resourcesProportional := make(map[v1.ResourceName]baseResource) 17512 resourcesStr := args[ProportionalResource] 17513 resources := strings.Split(resourcesStr, ",") 17514 for _, resource := range resources </span><span class="cov8" title="1">{ 17515 resource = strings.TrimSpace(resource) 17516 if resource == "" </span><span class="cov8" title="1">{ 17517 continue</span> 17518 } 17519 // proportional.resources.[ResourceName] 17520 <span class="cov0" title="0">cpuResourceKey := ProportionalResourcesPrefix + resource + ".cpu" 17521 cpuResourceRate := 1.0 17522 args.GetFloat64(&cpuResourceRate, cpuResourceKey) 17523 if cpuResourceRate < 0 </span><span class="cov0" title="0">{ 17524 cpuResourceRate = 1.0 17525 }</span> 17526 <span class="cov0" title="0">memoryResourceKey := ProportionalResourcesPrefix + resource + ".memory" 17527 memoryResourceRate := 1.0 17528 args.GetFloat64(&memoryResourceRate, memoryResourceKey) 17529 if memoryResourceRate < 0 </span><span class="cov0" title="0">{ 17530 memoryResourceRate = 1.0 17531 }</span> 17532 <span class="cov0" title="0">r := baseResource{ 17533 CPU: cpuResourceRate, 17534 Memory: memoryResourceRate, 17535 } 17536 resourcesProportional[v1.ResourceName(resource)] = r</span> 17537 } 17538 <span class="cov8" title="1">predicate.proportional = resourcesProportional 17539 17540 return predicate</span> 17541 } 17542 17543 func (pp *predicatesPlugin) OnSessionOpen(ssn *framework.Session) <span class="cov8" title="1">{ 17544 pl := util.NewPodListerFromNode(ssn) 17545 nodeMap := util.GenerateNodeMapAndSlice(ssn.Nodes) 17546 17547 pCache := predicateCacheNew() 17548 predicate := enablePredicate(pp.pluginArguments) 17549 17550 kubeClient := ssn.KubeClient() 17551 // Register event handlers to update task info in PodLister & nodeMap 17552 ssn.AddEventHandler(&framework.EventHandler{ 17553 AllocateFunc: func(event *framework.Event) </span><span class="cov8" title="1">{ 17554 pod := pl.UpdateTask(event.Task, event.Task.NodeName) 17555 17556 nodeName := event.Task.NodeName 17557 node, found := nodeMap[nodeName] 17558 if !found </span><span class="cov0" title="0">{ 17559 klog.Errorf("predicates, update pod %s/%s allocate to NOT EXIST node [%s]", pod.Namespace, pod.Name, nodeName) 17560 return 17561 }</span> 17562 17563 <span class="cov8" title="1">if predicate.gpuSharingEnable && api.GetGPUResourceOfPod(pod) > 0 </span><span class="cov0" title="0">{ 17564 nodeInfo, ok := ssn.Nodes[nodeName] 17565 if !ok </span><span class="cov0" title="0">{ 17566 klog.Errorf("Failed to get node %s info from cache", nodeName) 17567 return 17568 }</span> 17569 17570 <span class="cov0" title="0">id := predicateGPU(pod, nodeInfo) 17571 if id < 0 </span><span class="cov0" title="0">{ 17572 klog.Errorf("The node %s can't place the pod %s in ns %s", pod.Spec.NodeName, pod.Name, pod.Namespace) 17573 return 17574 }</span> 17575 <span class="cov0" title="0">dev, ok := nodeInfo.GPUDevices[id] 17576 if !ok </span><span class="cov0" title="0">{ 17577 klog.Errorf("Failed to get GPU %d from node %s", id, nodeName) 17578 return 17579 }</span> 17580 <span class="cov0" title="0">patch := api.AddGPUIndexPatch(id) 17581 pod, err := kubeClient.CoreV1().Pods(pod.Namespace).Patch(context.TODO(), pod.Name, types.JSONPatchType, []byte(patch), metav1.PatchOptions{}) 17582 if err != nil </span><span class="cov0" title="0">{ 17583 klog.Errorf("Patch pod %s failed with patch %s: %v", pod.Name, patch, err) 17584 return 17585 }</span> 17586 <span class="cov0" title="0">dev.PodMap[string(pod.UID)] = pod 17587 klog.V(4).Infof("predicates with gpu sharing, update pod %s/%s allocate to node [%s]", pod.Namespace, pod.Name, nodeName)</span> 17588 } 17589 17590 <span class="cov8" title="1">node.AddPod(pod) 17591 klog.V(4).Infof("predicates, update pod %s/%s allocate to node [%s]", pod.Namespace, pod.Name, nodeName)</span> 17592 }, 17593 DeallocateFunc: func(event *framework.Event) <span class="cov8" title="1">{ 17594 pod := pl.UpdateTask(event.Task, "") 17595 nodeName := event.Task.NodeName 17596 node, found := nodeMap[nodeName] 17597 if !found </span><span class="cov0" title="0">{ 17598 klog.Errorf("predicates, update pod %s/%s allocate from NOT EXIST node [%s]", pod.Namespace, pod.Name, nodeName) 17599 return 17600 }</span> 17601 17602 <span class="cov8" title="1">if predicate.gpuSharingEnable && api.GetGPUResourceOfPod(pod) > 0 </span><span class="cov0" title="0">{ 17603 // deallocate pod gpu id 17604 id := api.GetGPUIndex(pod) 17605 patch := api.RemoveGPUIndexPatch() 17606 _, err := kubeClient.CoreV1().Pods(pod.Namespace).Patch(context.TODO(), pod.Name, types.JSONPatchType, []byte(patch), metav1.PatchOptions{}) 17607 if err != nil </span><span class="cov0" title="0">{ 17608 klog.Errorf("Patch pod %s failed with patch %s: %v", pod.Name, patch, err) 17609 return 17610 }</span> 17611 17612 <span class="cov0" title="0">nodeInfo, ok := ssn.Nodes[nodeName] 17613 if !ok </span><span class="cov0" title="0">{ 17614 klog.Errorf("Failed to get node %s info from cache", nodeName) 17615 return 17616 }</span> 17617 <span class="cov0" title="0">if dev, ok := nodeInfo.GPUDevices[id]; ok </span><span class="cov0" title="0">{ 17618 delete(dev.PodMap, string(pod.UID)) 17619 }</span> 17620 17621 <span class="cov0" title="0">klog.V(4).Infof("predicates with gpu sharing, update pod %s/%s deallocate from node [%s]", pod.Namespace, pod.Name, nodeName)</span> 17622 } 17623 17624 <span class="cov8" title="1">err := node.RemovePod(pod) 17625 if err != nil </span><span class="cov0" title="0">{ 17626 klog.Errorf("predicates, remove pod %s/%s from node [%s] error: %v", pod.Namespace, pod.Name, nodeName, err) 17627 return 17628 }</span> 17629 <span class="cov8" title="1">klog.V(4).Infof("predicates, update pod %s/%s deallocate from node [%s]", pod.Namespace, pod.Name, nodeName)</span> 17630 }, 17631 }) 17632 17633 // Initialize k8s plugins 17634 // TODO: Add more predicates, k8s.io/kubernetes/pkg/scheduler/framework/plugins/legacy_registry.go 17635 <span class="cov8" title="1">handle := k8s.NewFrameworkHandle(nodeMap, ssn.KubeClient(), ssn.InformerFactory()) 17636 // 1. NodeUnschedulable 17637 plugin, _ := nodeunschedulable.New(nil, handle) 17638 nodeUnscheduleFilter := plugin.(*nodeunschedulable.NodeUnschedulable) 17639 // 2. NodeAffinity 17640 plugin, _ = nodeaffinity.New(nil, handle) 17641 nodeAffinityFilter := plugin.(*nodeaffinity.NodeAffinity) 17642 // 3. NodePorts 17643 plugin, _ = nodeports.New(nil, handle) 17644 nodePortFilter := plugin.(*nodeports.NodePorts) 17645 // 4. TaintToleration 17646 plugin, _ = tainttoleration.New(nil, handle) 17647 tolerationFilter := plugin.(*tainttoleration.TaintToleration) 17648 // 5. InterPodAffinity 17649 plArgs := &config.InterPodAffinityArgs{} 17650 plugin, _ = interpodaffinity.New(plArgs, handle) 17651 podAffinityFilter := plugin.(*interpodaffinity.InterPodAffinity) 17652 17653 ssn.AddPredicateFn(pp.Name(), func(task *api.TaskInfo, node *api.NodeInfo) error </span><span class="cov8" title="1">{ 17654 nodeInfo, found := nodeMap[node.Name] 17655 if !found </span><span class="cov0" title="0">{ 17656 return fmt.Errorf("failed to predicates, node info for %s not found", node.Name) 17657 }</span> 17658 17659 <span class="cov8" title="1">if node.Allocatable.MaxTaskNum <= len(nodeInfo.Pods) </span><span class="cov0" title="0">{ 17660 klog.V(4).Infof("NodePodNumber predicates Task <%s/%s> on Node <%s> failed", 17661 task.Namespace, task.Name, node.Name) 17662 return api.NewFitError(task, node, api.NodePodNumberExceeded) 17663 }</span> 17664 17665 <span class="cov8" title="1">state := k8sframework.NewCycleState() 17666 predicateByStablefilter := func(pod *v1.Pod, nodeInfo *k8sframework.NodeInfo) (bool, error) </span><span class="cov8" title="1">{ 17667 // CheckNodeUnschedulable 17668 status := nodeUnscheduleFilter.Filter(context.TODO(), state, task.Pod, nodeInfo) 17669 if !status.IsSuccess() </span><span class="cov0" title="0">{ 17670 return false, fmt.Errorf("plugin %s predicates failed %s", nodeunschedulable.Name, status.Message()) 17671 }</span> 17672 17673 // Check NodeAffinity 17674 <span class="cov8" title="1">status = nodeAffinityFilter.Filter(context.TODO(), state, task.Pod, nodeInfo) 17675 if !status.IsSuccess() </span><span class="cov8" title="1">{ 17676 return false, fmt.Errorf("plugin %s predicates failed %s", nodeaffinity.Name, status.Message()) 17677 }</span> 17678 17679 // PodToleratesNodeTaints: TaintToleration 17680 <span class="cov8" title="1">status = tolerationFilter.Filter(context.TODO(), state, task.Pod, nodeInfo) 17681 if !status.IsSuccess() </span><span class="cov0" title="0">{ 17682 return false, fmt.Errorf("plugin %s predicates failed %s", tainttoleration.Name, status.Message()) 17683 }</span> 17684 17685 <span class="cov8" title="1">return true, nil</span> 17686 } 17687 17688 // Check PredicateWithCache 17689 <span class="cov8" title="1">{ 17690 var err error 17691 var fit bool 17692 if predicate.cacheEnable </span><span class="cov0" title="0">{ 17693 fit, err = pCache.PredicateWithCache(node.Name, task.Pod) 17694 if err != nil </span><span class="cov0" title="0">{ 17695 fit, err = predicateByStablefilter(task.Pod, nodeInfo) 17696 pCache.UpdateCache(node.Name, task.Pod, fit) 17697 }</span> else<span class="cov0" title="0"> { 17698 if !fit </span><span class="cov0" title="0">{ 17699 err = fmt.Errorf("plugin equivalence cache predicates failed") 17700 }</span> 17701 } 17702 } else<span class="cov8" title="1"> { 17703 fit, err = predicateByStablefilter(task.Pod, nodeInfo) 17704 }</span> 17705 17706 <span class="cov8" title="1">if !fit </span><span class="cov8" title="1">{ 17707 return err 17708 }</span> 17709 } 17710 17711 // Check NodePorts 17712 <span class="cov8" title="1">nodePortFilter.PreFilter(context.TODO(), state, task.Pod) 17713 status := nodePortFilter.Filter(context.TODO(), state, nil, nodeInfo) 17714 if !status.IsSuccess() </span><span class="cov0" title="0">{ 17715 return fmt.Errorf("plugin %s predicates failed %s", nodeaffinity.Name, status.Message()) 17716 }</span> 17717 17718 // InterPodAffinity Predicate 17719 <span class="cov8" title="1">status = podAffinityFilter.PreFilter(context.TODO(), state, task.Pod) 17720 if !status.IsSuccess() </span><span class="cov0" title="0">{ 17721 return fmt.Errorf("plugin %s pre-predicates failed %s", interpodaffinity.Name, status.Message()) 17722 }</span> 17723 17724 <span class="cov8" title="1">status = podAffinityFilter.Filter(context.TODO(), state, task.Pod, nodeInfo) 17725 if !status.IsSuccess() </span><span class="cov0" title="0">{ 17726 return fmt.Errorf("plugin %s predicates failed %s", interpodaffinity.Name, status.Message()) 17727 }</span> 17728 17729 <span class="cov8" title="1">if predicate.gpuSharingEnable </span><span class="cov0" title="0">{ 17730 // CheckGPUSharingPredicate 17731 fit, err := checkNodeGPUSharingPredicate(task.Pod, node) 17732 if err != nil </span><span class="cov0" title="0">{ 17733 return err 17734 }</span> 17735 17736 <span class="cov0" title="0">klog.V(4).Infof("checkNodeGPUSharingPredicate predicates Task <%s/%s> on Node <%s>: fit %v", 17737 task.Namespace, task.Name, node.Name, fit)</span> 17738 } 17739 <span class="cov8" title="1">if predicate.proportionalEnable </span><span class="cov0" title="0">{ 17740 // Check ProportionalPredicate 17741 fit, err := checkNodeResourceIsProportional(task, node, predicate.proportional) 17742 if err != nil </span><span class="cov0" title="0">{ 17743 return err 17744 }</span> 17745 <span class="cov0" title="0">klog.V(4).Infof("checkNodeResourceIsProportional predicates Task <%s/%s> on Node <%s>: fit %v", 17746 task.Namespace, task.Name, node.Name, fit)</span> 17747 } 17748 <span class="cov8" title="1">return nil</span> 17749 }) 17750 } 17751 17752 func (pp *predicatesPlugin) OnSessionClose(ssn *framework.Session) {<span class="cov8" title="1">}</span> 17753 </pre> 17754 17755 <pre class="file" id="file82" style="display: none">/* 17756 Copyright 2018 The Kubernetes Authors. 17757 17758 Licensed under the Apache License, Version 2.0 (the "License"); 17759 you may not use this file except in compliance with the License. 17760 You may obtain a copy of the License at 17761 17762 http://www.apache.org/licenses/LICENSE-2.0 17763 17764 Unless required by applicable law or agreed to in writing, software 17765 distributed under the License is distributed on an "AS IS" BASIS, 17766 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17767 See the License for the specific language governing permissions and 17768 limitations under the License. 17769 */ 17770 17771 package predicates 17772 17773 import ( 17774 "fmt" 17775 17776 v1 "k8s.io/api/core/v1" 17777 17778 "volcano.sh/volcano/pkg/scheduler/api" 17779 ) 17780 17781 // checkNodeResourceIsProportional checks if a gpu:cpu:memory is Proportional 17782 func checkNodeResourceIsProportional(task *api.TaskInfo, node *api.NodeInfo, proportional map[v1.ResourceName]baseResource) (bool, error) <span class="cov8" title="1">{ 17783 for resourceName := range proportional </span><span class="cov8" title="1">{ 17784 if value, found := task.Resreq.ScalarResources[resourceName]; found && value > 0 </span><span class="cov8" title="1">{ 17785 return true, nil 17786 }</span> 17787 } 17788 <span class="cov8" title="1">for resourceName, resourceRate := range proportional </span><span class="cov8" title="1">{ 17789 if value, found := node.Idle.ScalarResources[resourceName]; found </span><span class="cov8" title="1">{ 17790 cpuReserved := value * resourceRate.CPU 17791 memoryReserved := value * resourceRate.Memory * 1000 * 1000 17792 r := node.Idle.Clone() 17793 r = r.Sub(task.Resreq) 17794 if r.MilliCPU < cpuReserved || r.Memory < memoryReserved </span><span class="cov8" title="1">{ 17795 return false, fmt.Errorf("proportional of resource %s check failed", resourceName) 17796 }</span> 17797 } 17798 } 17799 <span class="cov8" title="1">return true, nil</span> 17800 } 17801 </pre> 17802 17803 <pre class="file" id="file83" style="display: none">/* 17804 Copyright 2021 The Volcano Authors. 17805 17806 Licensed under the Apache License, Version 2.0 (the "License"); 17807 you may not use this file except in compliance with the License. 17808 You may obtain a copy of the License at 17809 17810 http://www.apache.org/licenses/LICENSE-2.0 17811 17812 Unless required by applicable law or agreed to in writing, software 17813 distributed under the License is distributed on an "AS IS" BASIS, 17814 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17815 See the License for the specific language governing permissions and 17816 limitations under the License. 17817 */ 17818 17819 package tasktopology 17820 17821 import ( 17822 "k8s.io/apimachinery/pkg/types" 17823 "k8s.io/klog" 17824 17825 "volcano.sh/volcano/pkg/scheduler/api" 17826 ) 17827 17828 type reqAction int 17829 17830 const ( 17831 reqSub reqAction = iota 17832 reqAdd 17833 ) 17834 17835 // Bucket is struct used to classify tasks by affinity and anti-affinity 17836 type Bucket struct { 17837 index int 17838 tasks map[types.UID]*api.TaskInfo 17839 taskNameSet map[string]int 17840 17841 // reqScore is score of resource 17842 // now, we regard 1 CPU and 1 GPU and 1Gi memory as the same score. 17843 reqScore float64 17844 request *api.Resource 17845 17846 boundTask int 17847 node map[string]int 17848 } 17849 17850 // NewBucket create a new empty bucket 17851 func NewBucket() *Bucket <span class="cov0" title="0">{ 17852 return &Bucket{ 17853 index: 0, 17854 tasks: make(map[types.UID]*api.TaskInfo), 17855 taskNameSet: make(map[string]int), 17856 17857 reqScore: 0, 17858 request: api.EmptyResource(), 17859 17860 boundTask: 0, 17861 node: make(map[string]int), 17862 } 17863 }</span> 17864 17865 // CalcResReq calculates task resources request 17866 func (b *Bucket) CalcResReq(req *api.Resource, action reqAction) <span class="cov0" title="0">{ 17867 if req == nil </span><span class="cov0" title="0">{ 17868 return 17869 }</span> 17870 17871 <span class="cov0" title="0">cpu := req.MilliCPU 17872 // treat 1Mi the same as 1m cpu 1m gpu 17873 mem := req.Memory / 1024 / 1024 17874 score := cpu + mem 17875 for _, request := range req.ScalarResources </span><span class="cov0" title="0">{ 17876 score += request 17877 }</span> 17878 17879 <span class="cov0" title="0">switch action </span>{ 17880 case reqSub:<span class="cov0" title="0"> 17881 b.reqScore -= score 17882 b.request.Sub(req)</span> 17883 case reqAdd:<span class="cov0" title="0"> 17884 b.reqScore += score 17885 b.request.Add(req)</span> 17886 default:<span class="cov0" title="0"> 17887 klog.V(3).Infof("Invalid action <%v> for resource <%v>", action, req)</span> 17888 } 17889 } 17890 17891 // AddTask adds task into bucket 17892 func (b *Bucket) AddTask(taskName string, task *api.TaskInfo) <span class="cov0" title="0">{ 17893 b.taskNameSet[taskName]++ 17894 if task.NodeName != "" </span><span class="cov0" title="0">{ 17895 b.node[task.NodeName]++ 17896 b.boundTask++ 17897 return 17898 }</span> 17899 17900 <span class="cov0" title="0">b.tasks[task.Pod.UID] = task 17901 b.CalcResReq(task.Resreq, reqAdd)</span> 17902 } 17903 17904 // TaskBound binds task to bucket 17905 func (b *Bucket) TaskBound(task *api.TaskInfo) <span class="cov0" title="0">{ 17906 b.node[task.NodeName]++ 17907 b.boundTask++ 17908 17909 delete(b.tasks, task.Pod.UID) 17910 b.CalcResReq(task.Resreq, reqSub) 17911 }</span> 17912 </pre> 17913 17914 <pre class="file" id="file84" style="display: none">/* 17915 Copyright 2021 The Volcano Authors. 17916 17917 Licensed under the Apache License, Version 2.0 (the "License"); 17918 you may not use this file except in compliance with the License. 17919 You may obtain a copy of the License at 17920 17921 http://www.apache.org/licenses/LICENSE-2.0 17922 17923 Unless required by applicable law or agreed to in writing, software 17924 distributed under the License is distributed on an "AS IS" BASIS, 17925 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17926 See the License for the specific language governing permissions and 17927 limitations under the License. 17928 */ 17929 17930 package tasktopology 17931 17932 import ( 17933 "fmt" 17934 "math" 17935 "sort" 17936 "strings" 17937 17938 "k8s.io/apimachinery/pkg/types" 17939 "k8s.io/klog" 17940 17941 "volcano.sh/volcano/pkg/scheduler/api" 17942 ) 17943 17944 type topologyType int 17945 17946 const ( 17947 selfAntiAffinity topologyType = iota 17948 interAntiAffinity 17949 selfAffinity 17950 interAffinity 17951 ) 17952 17953 // map[topologyType]priority, the larger number means the higher priority 17954 var affinityPriority = map[topologyType]int{ 17955 selfAntiAffinity: 4, 17956 interAffinity: 3, 17957 selfAffinity: 2, 17958 interAntiAffinity: 1, 17959 } 17960 17961 // JobManager is struct used to save infos about affinity and buckets of a job 17962 type JobManager struct { 17963 jobID api.JobID 17964 17965 buckets []*Bucket 17966 podInBucket map[types.UID]int 17967 podInTask map[types.UID]string 17968 taskOverPod map[string]map[types.UID]struct{} 17969 17970 taskAffinityPriority map[string]int // [taskName] -> priority 17971 taskExistOrder map[string]int 17972 interAffinity map[string]map[string]struct{} // [taskName]->[taskName] 17973 selfAffinity map[string]struct{} 17974 interAntiAffinity map[string]map[string]struct{} // [taskName]->[taskName] 17975 selfAntiAffinity map[string]struct{} 17976 17977 bucketMaxSize int 17978 nodeTaskSet map[string]map[string]int // [nodeName]->[taskName] 17979 } 17980 17981 // NewJobManager creates a new job manager for job 17982 func NewJobManager(jobID api.JobID) *JobManager <span class="cov0" title="0">{ 17983 return &JobManager{ 17984 jobID: jobID, 17985 17986 buckets: make([]*Bucket, 0), 17987 podInBucket: make(map[types.UID]int), 17988 podInTask: make(map[types.UID]string), 17989 taskOverPod: make(map[string]map[types.UID]struct{}), 17990 17991 taskAffinityPriority: make(map[string]int), 17992 taskExistOrder: make(map[string]int), 17993 interAffinity: make(map[string]map[string]struct{}), 17994 interAntiAffinity: make(map[string]map[string]struct{}), 17995 selfAffinity: make(map[string]struct{}), 17996 selfAntiAffinity: make(map[string]struct{}), 17997 17998 bucketMaxSize: 0, 17999 nodeTaskSet: make(map[string]map[string]int), 18000 } 18001 }</span> 18002 18003 // MarkOutOfBucket indicates task is outside of any bucket 18004 func (jm *JobManager) MarkOutOfBucket(uid types.UID) <span class="cov0" title="0">{ 18005 jm.podInBucket[uid] = OutOfBucket 18006 }</span> 18007 18008 // MarkTaskHasTopology indicates task has topology settings 18009 func (jm *JobManager) MarkTaskHasTopology(taskName string, topoType topologyType) <span class="cov0" title="0">{ 18010 priority := affinityPriority[topoType] 18011 if priority > jm.taskAffinityPriority[taskName] </span><span class="cov0" title="0">{ 18012 jm.taskAffinityPriority[taskName] = priority 18013 }</span> 18014 } 18015 18016 // ApplyTaskTopology transforms taskTopology to matrix 18017 // affinity: [[a, b], [c]] 18018 // interAffinity: 18019 // a b c 18020 // a - x - 18021 // b x - - 18022 // c - - - 18023 // selfAffinity: 18024 // a b c 18025 // - - x 18026 func (jm *JobManager) ApplyTaskTopology(topo *TaskTopology) <span class="cov0" title="0">{ 18027 for _, aff := range topo.Affinity </span><span class="cov0" title="0">{ 18028 if len(aff) == 1 </span><span class="cov0" title="0">{ 18029 taskName := aff[0] 18030 jm.selfAffinity[taskName] = struct{}{} 18031 jm.MarkTaskHasTopology(taskName, selfAffinity) 18032 continue</span> 18033 } 18034 <span class="cov0" title="0">for index, src := range aff </span><span class="cov0" title="0">{ 18035 for _, dst := range aff[:index] </span><span class="cov0" title="0">{ 18036 addAffinity(jm.interAffinity, src, dst) 18037 addAffinity(jm.interAffinity, dst, src) 18038 }</span> 18039 <span class="cov0" title="0">jm.MarkTaskHasTopology(src, interAffinity)</span> 18040 } 18041 } 18042 18043 <span class="cov0" title="0">for _, aff := range topo.AntiAffinity </span><span class="cov0" title="0">{ 18044 if len(aff) == 1 </span><span class="cov0" title="0">{ 18045 taskName := aff[0] 18046 jm.selfAntiAffinity[taskName] = struct{}{} 18047 jm.MarkTaskHasTopology(taskName, selfAntiAffinity) 18048 continue</span> 18049 } 18050 <span class="cov0" title="0">for index, src := range aff </span><span class="cov0" title="0">{ 18051 for _, dst := range aff[:index] </span><span class="cov0" title="0">{ 18052 addAffinity(jm.interAntiAffinity, src, dst) 18053 addAffinity(jm.interAntiAffinity, dst, src) 18054 }</span> 18055 <span class="cov0" title="0">jm.MarkTaskHasTopology(src, interAntiAffinity)</span> 18056 } 18057 } 18058 18059 <span class="cov0" title="0">length := len(topo.TaskOrder) 18060 for index, taskName := range topo.TaskOrder </span><span class="cov0" title="0">{ 18061 jm.taskExistOrder[taskName] = length - index 18062 }</span> 18063 } 18064 18065 // NewBucket creates a new bucket 18066 func (jm *JobManager) NewBucket() *Bucket <span class="cov0" title="0">{ 18067 bucket := NewBucket() 18068 bucket.index = len(jm.buckets) 18069 jm.buckets = append(jm.buckets, bucket) 18070 return bucket 18071 }</span> 18072 18073 // AddTaskToBucket adds task into bucket 18074 func (jm *JobManager) AddTaskToBucket(bucketIndex int, taskName string, task *api.TaskInfo) <span class="cov0" title="0">{ 18075 bucket := jm.buckets[bucketIndex] 18076 jm.podInBucket[task.Pod.UID] = bucketIndex 18077 bucket.AddTask(taskName, task) 18078 if size := len(bucket.tasks) + bucket.boundTask; size > jm.bucketMaxSize </span><span class="cov0" title="0">{ 18079 jm.bucketMaxSize = size 18080 }</span> 18081 } 18082 18083 // L compared with R, -1 for L < R, 0 for L == R, 1 for L > R 18084 func (jm *JobManager) taskAffinityOrder(L, R *api.TaskInfo) int <span class="cov0" title="0">{ 18085 LTaskName := jm.podInTask[L.Pod.UID] 18086 RTaskName := jm.podInTask[R.Pod.UID] 18087 18088 // in the same vk task, they are equal 18089 if LTaskName == RTaskName </span><span class="cov0" title="0">{ 18090 return 0 18091 }</span> 18092 18093 // use user defined order firstly 18094 <span class="cov0" title="0">LOrder := jm.taskExistOrder[LTaskName] 18095 ROrder := jm.taskExistOrder[RTaskName] 18096 if LOrder != ROrder </span><span class="cov0" title="0">{ 18097 if LOrder > ROrder </span><span class="cov0" title="0">{ 18098 return 1 18099 }</span> 18100 <span class="cov0" title="0">return -1</span> 18101 } 18102 18103 <span class="cov0" title="0">LPriority := jm.taskAffinityPriority[LTaskName] 18104 RPriority := jm.taskAffinityPriority[RTaskName] 18105 if LPriority != RPriority </span><span class="cov0" title="0">{ 18106 if LPriority > RPriority </span><span class="cov0" title="0">{ 18107 return 1 18108 }</span> 18109 <span class="cov0" title="0">return -1</span> 18110 } 18111 18112 // all affinity setting of L and R are the same, they are equal 18113 <span class="cov0" title="0">return 0</span> 18114 } 18115 18116 func (jm *JobManager) buildTaskInfo(tasks map[api.TaskID]*api.TaskInfo) []*api.TaskInfo <span class="cov0" title="0">{ 18117 taskWithoutBucket := make([]*api.TaskInfo, 0, len(tasks)) 18118 for _, task := range tasks </span><span class="cov0" title="0">{ 18119 pod := task.Pod 18120 18121 taskName := getTaskName(task) 18122 if taskName == "" </span><span class="cov0" title="0">{ 18123 jm.MarkOutOfBucket(pod.UID) 18124 continue</span> 18125 } 18126 <span class="cov0" title="0">if _, hasTopology := jm.taskAffinityPriority[taskName]; !hasTopology </span><span class="cov0" title="0">{ 18127 jm.MarkOutOfBucket(pod.UID) 18128 continue</span> 18129 } 18130 18131 <span class="cov0" title="0">jm.podInTask[pod.UID] = taskName 18132 taskSet, ok := jm.taskOverPod[taskName] 18133 if !ok </span><span class="cov0" title="0">{ 18134 taskSet = make(map[types.UID]struct{}) 18135 jm.taskOverPod[taskName] = taskSet 18136 }</span> 18137 <span class="cov0" title="0">taskSet[pod.UID] = struct{}{} 18138 taskWithoutBucket = append(taskWithoutBucket, task)</span> 18139 } 18140 <span class="cov0" title="0">return taskWithoutBucket</span> 18141 } 18142 18143 func (jm *JobManager) checkTaskSetAffinity(taskName string, taskNameSet map[string]int, onlyAnti bool) int <span class="cov0" title="0">{ 18144 bucketPodAff := 0 18145 18146 if taskName == "" </span><span class="cov0" title="0">{ 18147 return bucketPodAff 18148 }</span> 18149 18150 <span class="cov0" title="0">for taskNameInBucket, count := range taskNameSet </span><span class="cov0" title="0">{ 18151 theSameTask := taskNameInBucket == taskName 18152 18153 if !onlyAnti </span><span class="cov0" title="0">{ 18154 affinity := false 18155 if theSameTask </span><span class="cov0" title="0">{ 18156 _, affinity = jm.selfAffinity[taskName] 18157 }</span> else<span class="cov0" title="0"> { 18158 _, affinity = jm.interAffinity[taskName][taskNameInBucket] 18159 }</span> 18160 <span class="cov0" title="0">if affinity </span><span class="cov0" title="0">{ 18161 bucketPodAff += count 18162 }</span> 18163 } 18164 18165 <span class="cov0" title="0">antiAffinity := false 18166 if theSameTask </span><span class="cov0" title="0">{ 18167 _, antiAffinity = jm.selfAntiAffinity[taskName] 18168 }</span> else<span class="cov0" title="0"> { 18169 _, antiAffinity = jm.interAntiAffinity[taskName][taskNameInBucket] 18170 }</span> 18171 <span class="cov0" title="0">if antiAffinity </span><span class="cov0" title="0">{ 18172 bucketPodAff -= count 18173 }</span> 18174 } 18175 18176 <span class="cov0" title="0">return bucketPodAff</span> 18177 } 18178 18179 func (jm *JobManager) buildBucket(taskWithOrder []*api.TaskInfo) <span class="cov0" title="0">{ 18180 nodeBucketMapping := make(map[string]*Bucket) 18181 18182 for _, task := range taskWithOrder </span><span class="cov0" title="0">{ 18183 klog.V(5).Infof("jobID %s task with order task %s/%s", jm.jobID, task.Namespace, task.Name) 18184 18185 var selectedBucket *Bucket 18186 maxAffinity := math.MinInt32 18187 18188 taskName := getTaskName(task) 18189 18190 if task.NodeName != "" </span><span class="cov0" title="0">{ 18191 // generate bucket by node 18192 maxAffinity = 0 18193 selectedBucket = nodeBucketMapping[task.NodeName] 18194 }</span> else<span class="cov0" title="0"> { 18195 for _, bucket := range jm.buckets </span><span class="cov0" title="0">{ 18196 bucketPodAff := jm.checkTaskSetAffinity(taskName, bucket.taskNameSet, false) 18197 18198 // choose the best fit affinity, or balance resource between bucket 18199 if bucketPodAff > maxAffinity </span><span class="cov0" title="0">{ 18200 maxAffinity = bucketPodAff 18201 selectedBucket = bucket 18202 }</span> else<span class="cov0" title="0"> if bucketPodAff == maxAffinity && selectedBucket != nil && 18203 bucket.reqScore < selectedBucket.reqScore </span><span class="cov0" title="0">{ 18204 selectedBucket = bucket 18205 }</span> 18206 } 18207 } 18208 18209 <span class="cov0" title="0">if maxAffinity < 0 || selectedBucket == nil </span><span class="cov0" title="0">{ 18210 selectedBucket = jm.NewBucket() 18211 if task.NodeName != "" </span><span class="cov0" title="0">{ 18212 nodeBucketMapping[task.NodeName] = selectedBucket 18213 }</span> 18214 } 18215 18216 <span class="cov0" title="0">jm.AddTaskToBucket(selectedBucket.index, taskName, task)</span> 18217 } 18218 } 18219 18220 // ConstructBucket builds bucket for tasks 18221 func (jm *JobManager) ConstructBucket(tasks map[api.TaskID]*api.TaskInfo) <span class="cov0" title="0">{ 18222 taskWithoutBucket := jm.buildTaskInfo(tasks) 18223 18224 o := TaskOrder{ 18225 tasks: taskWithoutBucket, 18226 18227 manager: jm, 18228 } 18229 sort.Sort(sort.Reverse(&o)) 18230 18231 jm.buildBucket(o.tasks) 18232 }</span> 18233 18234 // TaskBound binds task to bucket 18235 func (jm *JobManager) TaskBound(task *api.TaskInfo) <span class="cov0" title="0">{ 18236 if taskName := getTaskName(task); taskName != "" </span><span class="cov0" title="0">{ 18237 set, ok := jm.nodeTaskSet[task.NodeName] 18238 if !ok </span><span class="cov0" title="0">{ 18239 set = make(map[string]int) 18240 jm.nodeTaskSet[task.NodeName] = set 18241 }</span> 18242 <span class="cov0" title="0">set[taskName]++</span> 18243 } 18244 18245 <span class="cov0" title="0">bucket := jm.GetBucket(task) 18246 if bucket != nil </span><span class="cov0" title="0">{ 18247 bucket.TaskBound(task) 18248 }</span> 18249 } 18250 18251 // GetBucket get bucket inside which task has been 18252 func (jm *JobManager) GetBucket(task *api.TaskInfo) *Bucket <span class="cov0" title="0">{ 18253 index, ok := jm.podInBucket[task.Pod.UID] 18254 if !ok || index == OutOfBucket </span><span class="cov0" title="0">{ 18255 return nil 18256 }</span> 18257 18258 <span class="cov0" title="0">bucket := jm.buckets[index] 18259 return bucket</span> 18260 } 18261 18262 func (jm *JobManager) String() string <span class="cov0" title="0">{ 18263 // saa: selfAntiAffinity 18264 // iaa: interAntiAffinity 18265 // sa: selfAffinity 18266 // ia: interAffinity 18267 msg := []string{ 18268 fmt.Sprintf("%s - job %s max %d || saa: %v - iaa: %v - sa: %v - ia: %v || priority: %v - order: %v || ", 18269 PluginName, jm.jobID, jm.bucketMaxSize, 18270 jm.selfAntiAffinity, jm.interAntiAffinity, 18271 jm.selfAffinity, jm.interAffinity, 18272 jm.taskAffinityPriority, jm.taskExistOrder, 18273 ), 18274 } 18275 18276 for _, bucket := range jm.buckets </span><span class="cov0" title="0">{ 18277 bucketMsg := fmt.Sprintf("b:%d -- ", bucket.index) 18278 var info []string 18279 for _, task := range bucket.tasks </span><span class="cov0" title="0">{ 18280 info = append(info, task.Pod.Name) 18281 }</span> 18282 <span class="cov0" title="0">bucketMsg += strings.Join(info, ", ") 18283 bucketMsg += "|" 18284 18285 info = nil 18286 for nodeName, count := range bucket.node </span><span class="cov0" title="0">{ 18287 info = append(info, fmt.Sprintf("n%s-%d", nodeName, count)) 18288 }</span> 18289 <span class="cov0" title="0">bucketMsg += strings.Join(info, ", ") 18290 18291 msg = append(msg, "["+bucketMsg+"]")</span> 18292 } 18293 <span class="cov0" title="0">return strings.Join(msg, " ")</span> 18294 } 18295 </pre> 18296 18297 <pre class="file" id="file85" style="display: none">/* 18298 Copyright 2021 The Volcano Authors. 18299 18300 Licensed under the Apache License, Version 2.0 (the "License"); 18301 you may not use this file except in compliance with the License. 18302 You may obtain a copy of the License at 18303 18304 http://www.apache.org/licenses/LICENSE-2.0 18305 18306 Unless required by applicable law or agreed to in writing, software 18307 distributed under the License is distributed on an "AS IS" BASIS, 18308 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18309 See the License for the specific language governing permissions and 18310 limitations under the License. 18311 */ 18312 18313 package tasktopology 18314 18315 import ( 18316 "fmt" 18317 "strings" 18318 "time" 18319 18320 "k8s.io/klog" 18321 "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1" 18322 18323 "volcano.sh/volcano/pkg/scheduler/api" 18324 "volcano.sh/volcano/pkg/scheduler/framework" 18325 ) 18326 18327 type taskTopologyPlugin struct { 18328 arguments framework.Arguments 18329 18330 weight int 18331 managers map[api.JobID]*JobManager 18332 } 18333 18334 // New function returns taskTopologyPlugin object 18335 func New(arguments framework.Arguments) framework.Plugin <span class="cov0" title="0">{ 18336 return &taskTopologyPlugin{ 18337 arguments: arguments, 18338 18339 weight: calculateWeight(arguments), 18340 managers: make(map[api.JobID]*JobManager), 18341 } 18342 }</span> 18343 18344 func (p *taskTopologyPlugin) Name() string <span class="cov0" title="0">{ 18345 return PluginName 18346 }</span> 18347 18348 // TaskOrderFn returns -1 to make l prior to r. 18349 // 18350 // for example: 18351 // A: 18352 // | bucket1 | bucket2 | out of bucket 18353 // | a1 a3 | a2 | a4 18354 // B: 18355 // | bucket1 | out of bucket 18356 // | b1 b2 | b3 18357 // the right task order should be: 18358 // a1 a3 a2 b1 b2 a4 b3 18359 func (p *taskTopologyPlugin) TaskOrderFn(l interface{}, r interface{}) int <span class="cov0" title="0">{ 18360 lv, ok := l.(*api.TaskInfo) 18361 if !ok </span><span class="cov0" title="0">{ 18362 klog.Errorf("Object is not a taskinfo") 18363 }</span> 18364 <span class="cov0" title="0">rv, ok := r.(*api.TaskInfo) 18365 if !ok </span><span class="cov0" title="0">{ 18366 klog.Errorf("Object is not a taskinfo") 18367 }</span> 18368 18369 <span class="cov0" title="0">lvJobManager := p.managers[lv.Job] 18370 rvJobManager := p.managers[rv.Job] 18371 18372 var lvBucket, rvBucket *Bucket 18373 if lvJobManager != nil </span><span class="cov0" title="0">{ 18374 lvBucket = lvJobManager.GetBucket(lv) 18375 }</span> else<span class="cov0" title="0"> { 18376 klog.V(4).Infof("No job manager for job <ID: %s>, do not return task order.", lv.Job) 18377 return 0 18378 }</span> 18379 <span class="cov0" title="0">if rvJobManager != nil </span><span class="cov0" title="0">{ 18380 rvBucket = rvJobManager.GetBucket(rv) 18381 }</span> else<span class="cov0" title="0"> { 18382 klog.V(4).Infof("No job manager for job <ID: %s>, do not return task order.", rv.Job) 18383 return 0 18384 }</span> 18385 18386 // the one have bucket would always prior to another 18387 <span class="cov0" title="0">lvInBucket := lvBucket != nil 18388 rvInBucket := rvBucket != nil 18389 if lvInBucket != rvInBucket </span><span class="cov0" title="0">{ 18390 if lvInBucket </span><span class="cov0" title="0">{ 18391 return -1 18392 }</span> 18393 <span class="cov0" title="0">return 1</span> 18394 } 18395 18396 // comparison between job is not the duty of this plugin 18397 <span class="cov0" title="0">if lv.Job != rv.Job </span><span class="cov0" title="0">{ 18398 return 0 18399 }</span> 18400 18401 // task out of bucket have no order 18402 <span class="cov0" title="0">if !lvInBucket && !rvInBucket </span><span class="cov0" title="0">{ 18403 return 0 18404 }</span> 18405 18406 // the big bucket should prior to small one 18407 <span class="cov0" title="0">lvHasTask := len(lvBucket.tasks) 18408 rvHasTask := len(rvBucket.tasks) 18409 if lvHasTask != rvHasTask </span><span class="cov0" title="0">{ 18410 if lvHasTask > rvHasTask </span><span class="cov0" title="0">{ 18411 return -1 18412 }</span> 18413 <span class="cov0" title="0">return 1</span> 18414 } 18415 18416 <span class="cov0" title="0">lvBucketIndex := lvBucket.index 18417 rvBucketIndex := rvBucket.index 18418 // in the same bucket, the affinityOrder is ok 18419 if lvBucketIndex == rvBucketIndex </span><span class="cov0" title="0">{ 18420 affinityOrder := lvJobManager.taskAffinityOrder(lv, rv) 18421 return -affinityOrder 18422 }</span> 18423 18424 // the old bucket should prior to young one 18425 <span class="cov0" title="0">if lvBucketIndex < rvBucketIndex </span><span class="cov0" title="0">{ 18426 return -1 18427 }</span> 18428 <span class="cov0" title="0">return 1</span> 18429 } 18430 18431 func (p *taskTopologyPlugin) calcBucketScore(task *api.TaskInfo, node *api.NodeInfo) (int, *JobManager, error) <span class="cov0" title="0">{ 18432 // task could never fits the node 18433 maxResource := node.Idle.Clone().Add(node.Releasing) 18434 if req := task.Resreq; req != nil && maxResource.LessPartly(req, api.Zero) </span><span class="cov0" title="0">{ 18435 return 0, nil, nil 18436 }</span> 18437 18438 <span class="cov0" title="0">jobManager, hasManager := p.managers[task.Job] 18439 if !hasManager </span><span class="cov0" title="0">{ 18440 return 0, nil, nil 18441 }</span> 18442 18443 <span class="cov0" title="0">bucket := jobManager.GetBucket(task) 18444 // task out of bucket 18445 if bucket == nil </span><span class="cov0" title="0">{ 18446 return 0, jobManager, nil 18447 }</span> 18448 18449 // 1. bound task in bucket is the base score of this node 18450 <span class="cov0" title="0">score := bucket.node[node.Name] 18451 18452 // 2. task inter/self anti-affinity should be calculated 18453 if nodeTaskSet := jobManager.nodeTaskSet[node.Name]; nodeTaskSet != nil </span><span class="cov0" title="0">{ 18454 taskName := getTaskName(task) 18455 affinityScore := jobManager.checkTaskSetAffinity(taskName, nodeTaskSet, true) 18456 if affinityScore < 0 </span><span class="cov0" title="0">{ 18457 score += affinityScore 18458 }</span> 18459 } 18460 <span class="cov0" title="0">klog.V(4).Infof("task %s/%s, node %s, additional score %d, task %d", 18461 task.Namespace, task.Name, node.Name, score, len(bucket.tasks)) 18462 18463 // 3. the other tasks in bucket take into considering 18464 score += len(bucket.tasks) 18465 if bucket.request == nil || bucket.request.LessEqual(maxResource, api.Zero) </span><span class="cov0" title="0">{ 18466 return score, jobManager, nil 18467 }</span> 18468 18469 <span class="cov0" title="0">remains := bucket.request.Clone() 18470 // randomly (by map) take out task to make the bucket fits the node 18471 for bucketTaskID, bucketTask := range bucket.tasks </span><span class="cov0" title="0">{ 18472 // current task should kept in bucket 18473 if bucketTaskID == task.Pod.UID || bucketTask.Resreq == nil </span><span class="cov0" title="0">{ 18474 continue</span> 18475 } 18476 <span class="cov0" title="0">remains.Sub(bucketTask.Resreq) 18477 score-- 18478 if remains.LessEqual(maxResource, api.Zero) </span><span class="cov0" title="0">{ 18479 break</span> 18480 } 18481 } 18482 // here, the bucket remained request will always fit the maxResource 18483 <span class="cov0" title="0">return score, jobManager, nil</span> 18484 } 18485 18486 func (p *taskTopologyPlugin) NodeOrderFn(task *api.TaskInfo, node *api.NodeInfo) (float64, error) <span class="cov0" title="0">{ 18487 score, jobManager, err := p.calcBucketScore(task, node) 18488 if err != nil </span><span class="cov0" title="0">{ 18489 return 0, err 18490 }</span> 18491 <span class="cov0" title="0">fScore := float64(score * p.weight) 18492 if jobManager != nil && jobManager.bucketMaxSize != 0 </span><span class="cov0" title="0">{ 18493 fScore = fScore * float64(v1alpha1.MaxNodeScore) / float64(jobManager.bucketMaxSize) 18494 }</span> 18495 <span class="cov0" title="0">klog.V(4).Infof("task %s/%s at node %s has bucket score %d, score %f", 18496 task.Namespace, task.Name, node.Name, score, fScore) 18497 return fScore, nil</span> 18498 } 18499 18500 func (p *taskTopologyPlugin) AllocateFunc(event *framework.Event) <span class="cov0" title="0">{ 18501 task := event.Task 18502 18503 jobManager, hasManager := p.managers[task.Job] 18504 if !hasManager </span><span class="cov0" title="0">{ 18505 return 18506 }</span> 18507 <span class="cov0" title="0">jobManager.TaskBound(task)</span> 18508 } 18509 18510 func (p *taskTopologyPlugin) initBucket(ssn *framework.Session) <span class="cov0" title="0">{ 18511 for jobID, job := range ssn.Jobs </span><span class="cov0" title="0">{ 18512 if noPendingTasks(job) </span><span class="cov0" title="0">{ 18513 klog.V(4).Infof("No pending tasks in job <%s/%s> by plugin %s.", 18514 job.Namespace, job.Name, PluginName) 18515 continue</span> 18516 } 18517 18518 <span class="cov0" title="0">jobTopology, err := readTopologyFromPgAnnotations(job) 18519 if err != nil </span><span class="cov0" title="0">{ 18520 klog.V(4).Infof("Failed to read task topology from job <%s/%s> annotations, error: %s.", 18521 job.Namespace, job.Name, err.Error()) 18522 continue</span> 18523 } 18524 <span class="cov0" title="0">if jobTopology == nil </span><span class="cov0" title="0">{ 18525 continue</span> 18526 } 18527 18528 <span class="cov0" title="0">manager := NewJobManager(jobID) 18529 manager.ApplyTaskTopology(jobTopology) 18530 manager.ConstructBucket(job.Tasks) 18531 18532 p.managers[job.UID] = manager</span> 18533 } 18534 } 18535 18536 func affinityCheck(job *api.JobInfo, affinity [][]string) error <span class="cov8" title="1">{ 18537 if job == nil || affinity == nil </span><span class="cov0" title="0">{ 18538 return fmt.Errorf("empty input, job: %v, affinity: %v", job, affinity) 18539 }</span> 18540 18541 <span class="cov8" title="1">var taskNumber = len(job.Tasks) 18542 var taskRef = make(map[string]bool, taskNumber) 18543 for _, task := range job.Tasks </span><span class="cov8" title="1">{ 18544 tmpStrings := strings.Split(task.Name, "-") 18545 if _, exist := taskRef[tmpStrings[len(tmpStrings)-2]]; !exist </span><span class="cov8" title="1">{ 18546 taskRef[tmpStrings[len(tmpStrings)-2]] = true 18547 }</span> 18548 } 18549 18550 <span class="cov8" title="1">for _, aff := range affinity </span><span class="cov8" title="1">{ 18551 affTasks := make(map[string]bool, len(aff)) 18552 for _, task := range aff </span><span class="cov8" title="1">{ 18553 if len(task) == 0 </span><span class="cov8" title="1">{ 18554 continue</span> 18555 } 18556 <span class="cov8" title="1">if _, exist := taskRef[task]; !exist </span><span class="cov8" title="1">{ 18557 return fmt.Errorf("task %s do not exist in job <%s/%s>", task, job.Namespace, job.Name) 18558 }</span> 18559 <span class="cov8" title="1">if _, exist := affTasks[task]; exist </span><span class="cov8" title="1">{ 18560 return fmt.Errorf("task %s is duplicated in job <%s/%s>", task, job.Namespace, job.Name) 18561 }</span> 18562 <span class="cov8" title="1">affTasks[task] = true</span> 18563 } 18564 } 18565 18566 <span class="cov8" title="1">return nil</span> 18567 } 18568 18569 func splitAnnotations(job *api.JobInfo, annotation string) ([][]string, error) <span class="cov8" title="1">{ 18570 affinityStr := strings.Split(annotation, ";") 18571 if len(affinityStr) == 0 </span><span class="cov0" title="0">{ 18572 return nil, nil 18573 }</span> 18574 <span class="cov8" title="1">var affinity = make([][]string, len(affinityStr)) 18575 for i, str := range affinityStr </span><span class="cov8" title="1">{ 18576 affinity[i] = strings.Split(str, ",") 18577 }</span> 18578 <span class="cov8" title="1">if err := affinityCheck(job, affinity); err != nil </span><span class="cov8" title="1">{ 18579 klog.V(4).Infof("Job <%s/%s> affinity key invalid: %s.", 18580 job.Namespace, job.Name, err.Error()) 18581 return nil, err 18582 }</span> 18583 <span class="cov8" title="1">return affinity, nil</span> 18584 } 18585 18586 func readTopologyFromPgAnnotations(job *api.JobInfo) (*TaskTopology, error) <span class="cov8" title="1">{ 18587 jobAffinityStr, affinityExist := job.PodGroup.Annotations[JobAffinityAnnotations] 18588 jobAntiAffinityStr, antiAffinityExist := job.PodGroup.Annotations[JobAntiAffinityAnnotations] 18589 taskOrderStr, taskOrderExist := job.PodGroup.Annotations[TaskOrderAnnotations] 18590 18591 if !(affinityExist || antiAffinityExist || taskOrderExist) </span><span class="cov8" title="1">{ 18592 return nil, nil 18593 }</span> 18594 18595 <span class="cov8" title="1">var jobTopology = TaskTopology{ 18596 Affinity: nil, 18597 AntiAffinity: nil, 18598 TaskOrder: nil, 18599 } 18600 18601 if affinityExist </span><span class="cov8" title="1">{ 18602 affinities, err := splitAnnotations(job, jobAffinityStr) 18603 if err != nil </span><span class="cov8" title="1">{ 18604 klog.V(4).Infof("Job <%s/%s> affinity key invalid: %s.", 18605 job.Namespace, job.Name, err.Error()) 18606 return nil, err 18607 }</span> 18608 <span class="cov8" title="1">jobTopology.Affinity = affinities</span> 18609 } 18610 18611 <span class="cov8" title="1">if antiAffinityExist </span><span class="cov8" title="1">{ 18612 affinities, err := splitAnnotations(job, jobAntiAffinityStr) 18613 if err != nil </span><span class="cov8" title="1">{ 18614 klog.V(4).Infof("Job <%s/%s> anti affinity key invalid: %s.", 18615 job.Namespace, job.Name, err.Error()) 18616 return nil, err 18617 }</span> 18618 <span class="cov8" title="1">jobTopology.AntiAffinity = affinities</span> 18619 } 18620 18621 <span class="cov8" title="1">if taskOrderExist </span><span class="cov8" title="1">{ 18622 jobTopology.TaskOrder = strings.Split(taskOrderStr, ",") 18623 if err := affinityCheck(job, [][]string{jobTopology.TaskOrder}); err != nil </span><span class="cov8" title="1">{ 18624 klog.V(4).Infof("Job <%s/%s> task order key invalid: %s.", 18625 job.Namespace, job.Name, err.Error()) 18626 return nil, err 18627 }</span> 18628 } 18629 18630 <span class="cov8" title="1">return &jobTopology, nil</span> 18631 } 18632 18633 func (p *taskTopologyPlugin) OnSessionOpen(ssn *framework.Session) <span class="cov0" title="0">{ 18634 start := time.Now() 18635 klog.V(3).Infof("start to init task topology plugin, weight[%d], defined order %v", p.weight, affinityPriority) 18636 18637 p.initBucket(ssn) 18638 18639 ssn.AddTaskOrderFn(p.Name(), p.TaskOrderFn) 18640 18641 ssn.AddNodeOrderFn(p.Name(), p.NodeOrderFn) 18642 18643 ssn.AddEventHandler(&framework.EventHandler{ 18644 AllocateFunc: p.AllocateFunc, 18645 }) 18646 18647 klog.V(3).Infof("finished to init task topology plugin, using time %v", time.Since(start)) 18648 }</span> 18649 18650 func (p *taskTopologyPlugin) OnSessionClose(ssn *framework.Session) <span class="cov0" title="0">{ 18651 p.managers = nil 18652 }</span> 18653 </pre> 18654 18655 <pre class="file" id="file86" style="display: none">/* 18656 Copyright 2021 The Volcano Authors. 18657 18658 Licensed under the Apache License, Version 2.0 (the "License"); 18659 you may not use this file except in compliance with the License. 18660 You may obtain a copy of the License at 18661 18662 http://www.apache.org/licenses/LICENSE-2.0 18663 18664 Unless required by applicable law or agreed to in writing, software 18665 distributed under the License is distributed on an "AS IS" BASIS, 18666 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18667 See the License for the specific language governing permissions and 18668 limitations under the License. 18669 */ 18670 18671 package tasktopology 18672 18673 import ( 18674 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 18675 "volcano.sh/volcano/pkg/scheduler/api" 18676 "volcano.sh/volcano/pkg/scheduler/framework" 18677 ) 18678 18679 const ( 18680 // PluginName indicates name of volcano scheduler plugin 18681 PluginName = "task-topology" 18682 // PluginWeight is task-topology plugin weight in nodeOrderFn 18683 PluginWeight = "task-topology.weight" 18684 // JobAffinityKey is the key to read in task-topology arguments from job annotations 18685 JobAffinityKey = "volcano.sh/task-topology" 18686 // OutOfBucket indicates task is outside of any bucket 18687 OutOfBucket = -1 18688 18689 // JobAffinityAnnotations is the key to read in task-topology affinity arguments from podgroup annotations 18690 JobAffinityAnnotations = "volcano.sh/task-topology-affinity" 18691 // JobAntiAffinityAnnotations is the key to read in task-topology anti-affinity arguments from podgroup annotations 18692 JobAntiAffinityAnnotations = "volcano.sh/task-topology-anti-affinity" 18693 // TaskOrderAnnotations is the key to read in task-topology task order arguments from podgroup annotations 18694 TaskOrderAnnotations = "volcano.sh/task-topology-task-order" 18695 ) 18696 18697 // TaskTopology is struct used to save affinity infos of a job read from job plugin or annotations 18698 type TaskTopology struct { 18699 Affinity [][]string `json:"affinity,omitempty"` 18700 AntiAffinity [][]string `json:"antiAffinity,omitempty"` 18701 TaskOrder []string `json:"taskOrder,omitempty"` 18702 } 18703 18704 func calculateWeight(args framework.Arguments) int <span class="cov0" title="0">{ 18705 /* 18706 User Should give taskTopologyWeight in this format(task-topology.weight). 18707 18708 actions: "enqueue, reclaim, allocate, backfill, preempt" 18709 tiers: 18710 - plugins: 18711 - name: task-topology 18712 arguments: 18713 task-topology.weight: 10 18714 */ 18715 // Values are initialized to 1. 18716 weight := 1 18717 18718 args.GetInt(&weight, PluginWeight) 18719 18720 return weight 18721 }</span> 18722 18723 func getTaskName(task *api.TaskInfo) string <span class="cov0" title="0">{ 18724 return task.Pod.Annotations[v1alpha1.TaskSpecKey] 18725 }</span> 18726 18727 func addAffinity(m map[string]map[string]struct{}, src, dst string) <span class="cov0" title="0">{ 18728 srcMap, ok := m[src] 18729 if !ok </span><span class="cov0" title="0">{ 18730 srcMap = make(map[string]struct{}) 18731 m[src] = srcMap 18732 }</span> 18733 <span class="cov0" title="0">srcMap[dst] = struct{}{}</span> 18734 } 18735 18736 func noPendingTasks(job *api.JobInfo) bool <span class="cov0" title="0">{ 18737 return len(job.TaskStatusIndex[api.Pending]) == 0 18738 }</span> 18739 18740 // TaskOrder is struct used to save task order 18741 type TaskOrder struct { 18742 tasks []*api.TaskInfo 18743 manager *JobManager 18744 } 18745 18746 func (p *TaskOrder) Len() int <span class="cov0" title="0">{ return len(p.tasks) }</span> 18747 18748 func (p *TaskOrder) Swap(l, r int) <span class="cov0" title="0">{ 18749 p.tasks[l], p.tasks[r] = p.tasks[r], p.tasks[l] 18750 }</span> 18751 18752 func (p *TaskOrder) Less(l, r int) bool <span class="cov0" title="0">{ 18753 L := p.tasks[l] 18754 R := p.tasks[r] 18755 18756 LHasNode := L.NodeName != "" 18757 RHasNode := R.NodeName != "" 18758 if LHasNode || RHasNode </span><span class="cov0" title="0">{ 18759 // the task bounded would have high priority 18760 if LHasNode != RHasNode </span><span class="cov0" title="0">{ 18761 return !LHasNode 18762 }</span> 18763 // all bound, any order is alright 18764 <span class="cov0" title="0">return L.NodeName > R.NodeName</span> 18765 } 18766 18767 <span class="cov0" title="0">result := p.manager.taskAffinityOrder(L, R) 18768 // they have the same taskAffinity order, any order is alright 18769 if result == 0 </span><span class="cov0" title="0">{ 18770 return L.Name > R.Name 18771 }</span> 18772 <span class="cov0" title="0">return result < 0</span> 18773 } 18774 </pre> 18775 18776 <pre class="file" id="file87" style="display: none">/* 18777 Copyright 2021 The Volcano Authors. 18778 18779 Licensed under the Apache License, Version 2.0 (the "License"); 18780 you may not use this file except in compliance with the License. 18781 You may obtain a copy of the License at 18782 18783 http://www.apache.org/licenses/LICENSE-2.0 18784 18785 Unless required by applicable law or agreed to in writing, software 18786 distributed under the License is distributed on an "AS IS" BASIS, 18787 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18788 See the License for the specific language governing permissions and 18789 limitations under the License. 18790 */ 18791 18792 package tdm 18793 18794 import ( 18795 "fmt" 18796 "strings" 18797 "time" 18798 18799 "k8s.io/apimachinery/pkg/util/intstr" 18800 "k8s.io/klog" 18801 "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1" 18802 18803 "volcano.sh/volcano/pkg/scheduler/api" 18804 "volcano.sh/volcano/pkg/scheduler/framework" 18805 tutil "volcano.sh/volcano/pkg/scheduler/plugins/util" 18806 "volcano.sh/volcano/pkg/scheduler/util" 18807 ) 18808 18809 const ( 18810 // PluginName indicates name of volcano scheduler plugin. 18811 PluginName = "tdm" 18812 // revocableZoneLayout revocable zone layout 18813 revocableZoneLayout = "15:04" 18814 revocableZoneLabelPrefix = "tdm.revocable-zone." 18815 evictPeriodLabel = "tdm.evict.period" 18816 defaultPodEvictNum = 1 18817 ) 18818 18819 var lastEvictAt time.Time 18820 18821 /* 18822 actions: "enqueue, reclaim, allocate, preempt" 18823 tiers: 18824 - plugins: 18825 - name: tdm 18826 arguments: 18827 tdm.revocable-zone.rz1: 10:00-21:00 18828 tdm.revocable-zone.rz2: 12:00-14:00 18829 tdm.evict.period: 1m 18830 */ 18831 18832 type tdmPlugin struct { 18833 revocableZone map[string]string 18834 // evictPeriod 18835 // default 1m 18836 evictPeriod time.Duration 18837 } 18838 18839 // New function returns prioritizePlugin object 18840 func New(args framework.Arguments) framework.Plugin <span class="cov8" title="1">{ 18841 revocableZone := make(map[string]string) 18842 evictPeriod := time.Minute 18843 18844 for k, v := range args </span><span class="cov8" title="1">{ 18845 if strings.Contains(k, revocableZoneLabelPrefix) </span><span class="cov8" title="1">{ 18846 revocableZone[strings.Replace(k, revocableZoneLabelPrefix, "", 1)] = v 18847 }</span> 18848 } 18849 18850 <span class="cov8" title="1">if period, ok := args[evictPeriodLabel]; ok </span><span class="cov8" title="1">{ 18851 if d, err := time.ParseDuration(period); err == nil </span><span class="cov8" title="1">{ 18852 evictPeriod = d 18853 }</span> 18854 } 18855 18856 <span class="cov8" title="1">return &tdmPlugin{revocableZone, evictPeriod}</span> 18857 } 18858 18859 func (tp *tdmPlugin) Name() string <span class="cov8" title="1">{ 18860 return PluginName 18861 }</span> 18862 18863 func parseRevocableZone(rzRaw string) (start, end time.Time, err error) <span class="cov8" title="1">{ 18864 rzValues := strings.Split(strings.TrimSpace(rzRaw), "-") 18865 18866 if len(rzValues) != 2 </span><span class="cov8" title="1">{ 18867 err = fmt.Errorf("revocable zone %v format error", rzRaw) 18868 return 18869 }</span> 18870 18871 <span class="cov8" title="1">t1, err := time.Parse(revocableZoneLayout, rzValues[0]) 18872 if err != nil </span><span class="cov8" title="1">{ 18873 return 18874 }</span> 18875 18876 <span class="cov8" title="1">t2, err := time.Parse(revocableZoneLayout, rzValues[1]) 18877 if err != nil </span><span class="cov0" title="0">{ 18878 return 18879 }</span> 18880 18881 <span class="cov8" title="1">now := time.Now() 18882 18883 start = time.Date(now.Year(), now.Month(), now.Day(), t1.Hour(), t1.Minute(), 0, 0, now.Location()) 18884 if t1.After(t2) || t1.Equal(t2) </span><span class="cov8" title="1">{ 18885 end = time.Date(now.Year(), now.Month(), now.Day()+1, t2.Hour(), t2.Minute(), 0, 0, now.Location()) 18886 }</span> else<span class="cov8" title="1"> { 18887 end = time.Date(now.Year(), now.Month(), now.Day(), t2.Hour(), t2.Minute(), 0, 0, now.Location()) 18888 }</span> 18889 18890 <span class="cov8" title="1">return</span> 18891 } 18892 18893 func (tp *tdmPlugin) availableRevocableZone(rz string) error <span class="cov8" title="1">{ 18894 // rzRaw format 00:00-23:59 18895 rzRaw, ok := tp.revocableZone[rz] 18896 if !ok </span><span class="cov0" title="0">{ 18897 return fmt.Errorf("revocable zone %v not support", rz) 18898 }</span> 18899 18900 <span class="cov8" title="1">now := time.Now() 18901 18902 start, end, err := parseRevocableZone(rzRaw) 18903 if err != nil </span><span class="cov0" title="0">{ 18904 return err 18905 }</span> 18906 18907 <span class="cov8" title="1">if now.Unix() < start.Unix() || now.Unix() > end.Unix() </span><span class="cov8" title="1">{ 18908 return fmt.Errorf("current time beyond revocable zone %v:%v", rz, rzRaw) 18909 }</span> 18910 18911 <span class="cov8" title="1">return nil</span> 18912 } 18913 18914 func (tp *tdmPlugin) OnSessionOpen(ssn *framework.Session) <span class="cov8" title="1">{ 18915 klog.V(4).Infof("Enter tdm plugin ...") 18916 if klog.V(4) </span><span class="cov0" title="0">{ 18917 defer func() </span><span class="cov0" title="0">{ 18918 klog.V(4).Infof("Leaving tdm plugin.") 18919 }</span>() 18920 } 18921 18922 // tdm plugin just handle revocable node 18923 <span class="cov8" title="1">predicateFn := func(task *api.TaskInfo, node *api.NodeInfo) error </span><span class="cov8" title="1">{ 18924 if node.RevocableZone == "" </span><span class="cov8" title="1">{ 18925 return nil 18926 }</span> 18927 18928 <span class="cov8" title="1">if err := tp.availableRevocableZone(node.RevocableZone); err != nil </span><span class="cov8" title="1">{ 18929 return fmt.Errorf("plugin %s predicates %w", tp.Name(), err) 18930 }</span> 18931 18932 <span class="cov8" title="1">klog.V(4).Infof("TDM node %v revocable zone %v:%v is active", node.Name, node.RevocableZone, tp.revocableZone[node.RevocableZone]) 18933 18934 if len(task.RevocableZone) == 0 </span><span class="cov8" title="1">{ 18935 msg := fmt.Sprintf("task %s/%s is not allow to dispatch to revocable node %s", task.Namespace, task.Name, node.Name) 18936 return fmt.Errorf("plugin %s predicates %s", tp.Name(), msg) 18937 }</span> 18938 18939 <span class="cov8" title="1">klog.V(4).Infof("TDM filter for Task %s/%s on node %s pass.", task.Namespace, task.Name, node.Name) 18940 return nil</span> 18941 } 18942 18943 // tdm plugin just handle revocable node 18944 <span class="cov8" title="1">nodeOrderFn := func(task *api.TaskInfo, node *api.NodeInfo) (float64, error) </span><span class="cov8" title="1">{ 18945 score := 0.0 18946 18947 if node.RevocableZone == "" </span><span class="cov8" title="1">{ 18948 return score, nil 18949 }</span> 18950 18951 <span class="cov8" title="1">if err := tp.availableRevocableZone(node.RevocableZone); err != nil </span><span class="cov0" title="0">{ 18952 klog.V(4).Infof("TDM not available %s", err) 18953 return score, err 18954 }</span> 18955 18956 <span class="cov8" title="1">if len(task.RevocableZone) == 0 </span><span class="cov0" title="0">{ 18957 klog.V(4).Infof("TDM task %s/%s is not allow to dispatch to revocable node %s", task.Namespace, task.Name, node.Name) 18958 return score, nil 18959 }</span> 18960 18961 <span class="cov8" title="1">score = float64(v1alpha1.MaxNodeScore) 18962 18963 klog.V(4).Infof("TDM score for Task %s/%s on node %s is: %v", task.Namespace, task.Name, node.Name, score) 18964 return score, nil</span> 18965 } 18966 18967 <span class="cov8" title="1">preemptableFn := func(preemptor *api.TaskInfo, preemptees []*api.TaskInfo) ([]*api.TaskInfo, int) </span><span class="cov0" title="0">{ 18968 // for the preemptable or can use revocablezone workload, they can not preempt other tasks. 18969 if preemptor.Preemptable || len(preemptor.RevocableZone) > 0 </span><span class="cov0" title="0">{ 18970 klog.V(4).Infof("TDM task %s/%s is preemptable, do nothing skip", preemptor.Namespace, preemptor.Name) 18971 return nil, tutil.Reject 18972 }</span> 18973 18974 <span class="cov0" title="0">var victims []*api.TaskInfo 18975 tasksMap := make(map[api.JobID][]*api.TaskInfo) 18976 18977 // find preemptable tasks which appear on none revocable node 18978 for _, task := range preemptees </span><span class="cov0" title="0">{ 18979 if !task.Preemptable || task.Status != api.Running </span><span class="cov0" title="0">{ 18980 continue</span> 18981 } 18982 18983 <span class="cov0" title="0">node, ok := ssn.Nodes[task.NodeName] 18984 if !ok </span><span class="cov0" title="0">{ 18985 continue</span> 18986 } 18987 18988 <span class="cov0" title="0">if node.RevocableZone != "" </span><span class="cov0" title="0">{ 18989 continue</span> 18990 } 18991 18992 <span class="cov0" title="0">tasksMap[task.Job] = append(tasksMap[task.Job], task)</span> 18993 } 18994 18995 <span class="cov0" title="0">for jobID, preemptableTasks := range tasksMap </span><span class="cov0" title="0">{ 18996 if job, ok := ssn.Jobs[jobID]; ok </span><span class="cov0" title="0">{ 18997 victims = append(victims, tp.maxVictims(job, preemptableTasks)...) 18998 }</span> 18999 } 19000 19001 <span class="cov0" title="0">klog.V(4).Infof("TDM victims are %+v", victims) 19002 19003 return victims, tutil.Permit</span> 19004 } 19005 19006 <span class="cov8" title="1">victimsFn := func() []*api.TaskInfo </span><span class="cov8" title="1">{ 19007 if lastEvictAt.Add(tp.evictPeriod).After(time.Now()) </span><span class="cov0" title="0">{ 19008 klog.V(4).Infof("TDM next evict time at %v", lastEvictAt) 19009 return nil 19010 }</span> 19011 19012 <span class="cov8" title="1">klog.V(4).Infof("TDM start to find victims") 19013 19014 // find preemptable task on timeout revocable zone node 19015 victims := make([]*api.TaskInfo, 0) 19016 for rz := range tp.revocableZone </span><span class="cov8" title="1">{ 19017 if err := tp.availableRevocableZone(rz); err != nil </span><span class="cov8" title="1">{ 19018 klog.V(4).Infof("TDM revocable zone %v disactive, %v", rz, err) 19019 // rz disactive, then evict preemptable tasks by job from the revocable node 19020 for jobID, preemtableTasks := range tp.revocableNodePreemptableTask(rz, ssn) </span><span class="cov8" title="1">{ 19021 if job, ok := ssn.Jobs[jobID]; ok </span><span class="cov8" title="1">{ 19022 victims = append(victims, tp.maxVictims(job, preemtableTasks)...) 19023 }</span> 19024 } 19025 } 19026 } 19027 19028 // need to consider concurrency? 19029 <span class="cov8" title="1">lastEvictAt = time.Now() 19030 19031 klog.V(4).Infof("TDM got %v victims", len(victims)) 19032 19033 return victims</span> 19034 } 19035 19036 <span class="cov8" title="1">jobOrderFn := func(l, r interface{}) int </span><span class="cov0" title="0">{ 19037 lv := l.(*api.JobInfo) 19038 rv := r.(*api.JobInfo) 19039 19040 if lv.Preemptable == rv.Preemptable </span><span class="cov0" title="0">{ 19041 return 0 19042 }</span> 19043 19044 <span class="cov0" title="0">if !lv.Preemptable </span><span class="cov0" title="0">{ 19045 return -1 19046 }</span> 19047 19048 <span class="cov0" title="0">return 1</span> 19049 } 19050 19051 <span class="cov8" title="1">jobPipelinedFn := func(obj interface{}) int </span><span class="cov0" title="0">{ 19052 jobInfo := obj.(*api.JobInfo) 19053 occupied := jobInfo.WaitingTaskNum() + jobInfo.ReadyTaskNum() 19054 if occupied >= jobInfo.MinAvailable </span><span class="cov0" title="0">{ 19055 return tutil.Permit 19056 }</span> 19057 <span class="cov0" title="0">return tutil.Reject</span> 19058 } 19059 19060 <span class="cov8" title="1">jobStarvingFn := func(obj interface{}) bool </span><span class="cov0" title="0">{ 19061 jobInfo := obj.(*api.JobInfo) 19062 // allow none preemptable elastic job (deployment) preempt task 19063 if jobInfo.Preemptable </span><span class="cov0" title="0">{ 19064 return false 19065 }</span> 19066 <span class="cov0" title="0">return len(jobInfo.TaskStatusIndex[api.Pending]) > 0</span> 19067 } 19068 19069 <span class="cov8" title="1">ssn.AddPredicateFn(tp.Name(), predicateFn) 19070 ssn.AddNodeOrderFn(tp.Name(), nodeOrderFn) 19071 ssn.AddPreemptableFn(tp.Name(), preemptableFn) 19072 ssn.AddVictimTasksFns(tp.Name(), victimsFn) 19073 ssn.AddJobOrderFn(tp.Name(), jobOrderFn) 19074 ssn.AddJobPipelinedFn(tp.Name(), jobPipelinedFn) 19075 ssn.AddJobStarvingFns(tp.Name(), jobStarvingFn)</span> 19076 } 19077 19078 func (tp *tdmPlugin) maxVictims(job *api.JobInfo, victims []*api.TaskInfo) []*api.TaskInfo <span class="cov8" title="1">{ 19079 maxPodEvictNum := tp.getMaxPodEvictNum(job) 19080 targetNum := util.GetMinInt(maxPodEvictNum, len(victims)) 19081 klog.V(3).Infof("Job <%s/%s> max evict:%v, potential victims number:%v, max victims number:%v", 19082 job.Namespace, job.Name, maxPodEvictNum, len(victims), targetNum) 19083 19084 return victims[:targetNum] 19085 }</span> 19086 19087 // get max pod evict number from job budget configure 19088 func (tp *tdmPlugin) getMaxPodEvictNum(job *api.JobInfo) int <span class="cov8" title="1">{ 19089 jobRunningTaskNum := len(job.TaskStatusIndex[api.Running]) 19090 if job.Budget.MaxUnavilable != "" </span><span class="cov8" title="1">{ 19091 maxUnavilable := tp.parseIntStr(job.Budget.MaxUnavilable, len(job.Tasks)) 19092 finalTaskNum := len(job.TaskStatusIndex[api.Succeeded]) + len(job.TaskStatusIndex[api.Failed]) 19093 realUnavilable := len(job.Tasks) - finalTaskNum - jobRunningTaskNum 19094 if realUnavilable >= maxUnavilable </span><span class="cov0" title="0">{ 19095 return 0 19096 }</span> 19097 <span class="cov8" title="1">return maxUnavilable - realUnavilable</span> 19098 } 19099 19100 <span class="cov8" title="1">if job.Budget.MinAvailable != "" </span><span class="cov8" title="1">{ 19101 minAvailable := tp.parseIntStr(job.Budget.MinAvailable, len(job.Tasks)) 19102 if jobRunningTaskNum >= minAvailable </span><span class="cov8" title="1">{ 19103 return jobRunningTaskNum - minAvailable 19104 }</span> 19105 } 19106 19107 <span class="cov0" title="0">return defaultPodEvictNum</span> 19108 } 19109 19110 func (tp *tdmPlugin) parseIntStr(input string, taskNum int) int <span class="cov8" title="1">{ 19111 resultValue := 0 19112 tmp := intstr.Parse(input) 19113 switch tmp.Type </span>{ 19114 case intstr.Int:<span class="cov8" title="1"> 19115 resultValue = tmp.IntValue()</span> 19116 case intstr.String:<span class="cov8" title="1"> 19117 if v, err := intstr.GetValueFromIntOrPercent(&tmp, taskNum, true); err == nil </span><span class="cov8" title="1">{ 19118 resultValue = v 19119 }</span> else<span class="cov0" title="0"> { 19120 klog.Warningf("TDM get percent value err: %v", err) 19121 }</span> 19122 } 19123 19124 <span class="cov8" title="1">return resultValue</span> 19125 } 19126 19127 func (tp *tdmPlugin) revocableNodePreemptableTask(rz string, ssn *framework.Session) map[api.JobID][]*api.TaskInfo <span class="cov8" title="1">{ 19128 tasksMap := make(map[api.JobID][]*api.TaskInfo) 19129 for _, node := range ssn.RevocableNodes </span><span class="cov8" title="1">{ 19130 if node.RevocableZone != rz </span><span class="cov0" title="0">{ 19131 continue</span> 19132 } 19133 19134 <span class="cov8" title="1">for _, task := range node.Tasks </span><span class="cov8" title="1">{ 19135 if task.Preemptable </span><span class="cov8" title="1">{ 19136 if task.Status == api.Running </span><span class="cov8" title="1">{ 19137 tasksMap[task.Job] = append(tasksMap[task.Job], task) 19138 }</span> 19139 } 19140 } 19141 } 19142 19143 <span class="cov8" title="1">return tasksMap</span> 19144 } 19145 19146 func (tp *tdmPlugin) OnSessionClose(ssn *framework.Session) {<span class="cov8" title="1">}</span> 19147 </pre> 19148 19149 <pre class="file" id="file88" style="display: none">/* 19150 Copyright 2017 The Kubernetes Authors. 19151 19152 Licensed under the Apache License, Version 2.0 (the "License"); 19153 you may not use this file except in compliance with the License. 19154 You may obtain a copy of the License at 19155 19156 http://www.apache.org/licenses/LICENSE-2.0 19157 19158 Unless required by applicable law or agreed to in writing, software 19159 distributed under the License is distributed on an "AS IS" BASIS, 19160 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19161 See the License for the specific language governing permissions and 19162 limitations under the License. 19163 */ 19164 19165 package scheduler 19166 19167 import ( 19168 "fmt" 19169 "path/filepath" 19170 "sync" 19171 "time" 19172 19173 "github.com/fsnotify/fsnotify" 19174 "k8s.io/apimachinery/pkg/util/wait" 19175 "k8s.io/client-go/rest" 19176 "k8s.io/klog" 19177 19178 "volcano.sh/volcano/pkg/filewatcher" 19179 schedcache "volcano.sh/volcano/pkg/scheduler/cache" 19180 "volcano.sh/volcano/pkg/scheduler/conf" 19181 "volcano.sh/volcano/pkg/scheduler/framework" 19182 "volcano.sh/volcano/pkg/scheduler/metrics" 19183 ) 19184 19185 // Scheduler watches for new unscheduled pods for volcano. It attempts to find 19186 // nodes that they fit on and writes bindings back to the api server. 19187 type Scheduler struct { 19188 cache schedcache.Cache 19189 schedulerConf string 19190 fileWatcher filewatcher.FileWatcher 19191 schedulePeriod time.Duration 19192 once sync.Once 19193 19194 mutex sync.Mutex 19195 actions []framework.Action 19196 plugins []conf.Tier 19197 configurations []conf.Configuration 19198 } 19199 19200 // NewScheduler returns a scheduler 19201 func NewScheduler( 19202 config *rest.Config, 19203 schedulerName string, 19204 schedulerConf string, 19205 period time.Duration, 19206 defaultQueue string, 19207 nodeSelectors []string, 19208 ) (*Scheduler, error) <span class="cov0" title="0">{ 19209 var watcher filewatcher.FileWatcher 19210 if schedulerConf != "" </span><span class="cov0" title="0">{ 19211 var err error 19212 path := filepath.Dir(schedulerConf) 19213 watcher, err = filewatcher.NewFileWatcher(path) 19214 if err != nil </span><span class="cov0" title="0">{ 19215 return nil, fmt.Errorf("failed creating filewatcher for %s: %v", schedulerConf, err) 19216 }</span> 19217 } 19218 19219 <span class="cov0" title="0">scheduler := &Scheduler{ 19220 schedulerConf: schedulerConf, 19221 fileWatcher: watcher, 19222 cache: schedcache.New(config, schedulerName, defaultQueue, nodeSelectors), 19223 schedulePeriod: period, 19224 } 19225 19226 return scheduler, nil</span> 19227 } 19228 19229 // Run runs the Scheduler 19230 func (pc *Scheduler) Run(stopCh <-chan struct{}) <span class="cov0" title="0">{ 19231 pc.loadSchedulerConf() 19232 go pc.watchSchedulerConf(stopCh) 19233 // Start cache for policy. 19234 pc.cache.Run(stopCh) 19235 pc.cache.WaitForCacheSync(stopCh) 19236 klog.V(2).Infof("scheduler completes Initialization and start to run") 19237 go wait.Until(pc.runOnce, pc.schedulePeriod, stopCh) 19238 }</span> 19239 19240 func (pc *Scheduler) runOnce() <span class="cov0" title="0">{ 19241 klog.V(4).Infof("Start scheduling ...") 19242 scheduleStartTime := time.Now() 19243 defer klog.V(4).Infof("End scheduling ...") 19244 19245 pc.mutex.Lock() 19246 actions := pc.actions 19247 plugins := pc.plugins 19248 configurations := pc.configurations 19249 pc.mutex.Unlock() 19250 19251 ssn := framework.OpenSession(pc.cache, plugins, configurations) 19252 defer framework.CloseSession(ssn) 19253 19254 for _, action := range actions </span><span class="cov0" title="0">{ 19255 actionStartTime := time.Now() 19256 action.Execute(ssn) 19257 metrics.UpdateActionDuration(action.Name(), metrics.Duration(actionStartTime)) 19258 }</span> 19259 <span class="cov0" title="0">metrics.UpdateE2eDuration(metrics.Duration(scheduleStartTime))</span> 19260 } 19261 19262 func (pc *Scheduler) loadSchedulerConf() <span class="cov0" title="0">{ 19263 var err error 19264 pc.once.Do(func() </span><span class="cov0" title="0">{ 19265 pc.actions, pc.plugins, pc.configurations, err = unmarshalSchedulerConf(defaultSchedulerConf) 19266 if err != nil </span><span class="cov0" title="0">{ 19267 klog.Errorf("unmarshal scheduler config %s failed: %v", defaultSchedulerConf, err) 19268 panic("invalid default configuration")</span> 19269 } 19270 }) 19271 19272 <span class="cov0" title="0">var config string 19273 if len(pc.schedulerConf) != 0 </span><span class="cov0" title="0">{ 19274 if config, err = readSchedulerConf(pc.schedulerConf); err != nil </span><span class="cov0" title="0">{ 19275 klog.Errorf("Failed to read scheduler configuration '%s', using previous configuration: %v", 19276 pc.schedulerConf, err) 19277 return 19278 }</span> 19279 } 19280 19281 <span class="cov0" title="0">actions, plugins, configurations, err := unmarshalSchedulerConf(config) 19282 if err != nil </span><span class="cov0" title="0">{ 19283 klog.Errorf("scheduler config %s is invalid: %v", config, err) 19284 return 19285 }</span> 19286 19287 <span class="cov0" title="0">pc.mutex.Lock() 19288 // If it is valid, use the new configuration 19289 pc.actions = actions 19290 pc.plugins = plugins 19291 pc.configurations = configurations 19292 pc.mutex.Unlock()</span> 19293 } 19294 19295 func (pc *Scheduler) watchSchedulerConf(stopCh <-chan struct{}) <span class="cov0" title="0">{ 19296 if pc.fileWatcher == nil </span><span class="cov0" title="0">{ 19297 return 19298 }</span> 19299 <span class="cov0" title="0">eventCh := pc.fileWatcher.Events() 19300 errCh := pc.fileWatcher.Errors() 19301 for </span><span class="cov0" title="0">{ 19302 select </span>{ 19303 case event, ok := <-eventCh:<span class="cov0" title="0"> 19304 if !ok </span><span class="cov0" title="0">{ 19305 return 19306 }</span> 19307 <span class="cov0" title="0">klog.V(4).Infof("watch %s event: %v", pc.schedulerConf, event) 19308 if event.Op&fsnotify.Write == fsnotify.Write || event.Op&fsnotify.Create == fsnotify.Create </span><span class="cov0" title="0">{ 19309 pc.loadSchedulerConf() 19310 }</span> 19311 case err, ok := <-errCh:<span class="cov0" title="0"> 19312 if !ok </span><span class="cov0" title="0">{ 19313 return 19314 }</span> 19315 <span class="cov0" title="0">klog.Infof("watch %s error: %v", pc.schedulerConf, err)</span> 19316 case <-stopCh:<span class="cov0" title="0"> 19317 return</span> 19318 } 19319 } 19320 } 19321 </pre> 19322 19323 <pre class="file" id="file89" style="display: none">/* 19324 Copyright 2018 The Kubernetes Authors. 19325 19326 Licensed under the Apache License, Version 2.0 (the "License"); 19327 you may not use this file except in compliance with the License. 19328 You may obtain a copy of the License at 19329 19330 http://www.apache.org/licenses/LICENSE-2.0 19331 19332 Unless required by applicable law or agreed to in writing, software 19333 distributed under the License is distributed on an "AS IS" BASIS, 19334 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19335 See the License for the specific language governing permissions and 19336 limitations under the License. 19337 */ 19338 19339 package scheduler 19340 19341 import ( 19342 "fmt" 19343 "io/ioutil" 19344 "strings" 19345 19346 "gopkg.in/yaml.v2" 19347 19348 "volcano.sh/volcano/pkg/scheduler/conf" 19349 "volcano.sh/volcano/pkg/scheduler/framework" 19350 "volcano.sh/volcano/pkg/scheduler/plugins" 19351 ) 19352 19353 var defaultSchedulerConf = ` 19354 actions: "enqueue, allocate, backfill" 19355 tiers: 19356 - plugins: 19357 - name: priority 19358 - name: gang 19359 - name: conformance 19360 - plugins: 19361 - name: overcommit 19362 - name: drf 19363 - name: predicates 19364 - name: proportion 19365 - name: nodeorder 19366 ` 19367 19368 func unmarshalSchedulerConf(confStr string) ([]framework.Action, []conf.Tier, []conf.Configuration, error) <span class="cov8" title="1">{ 19369 var actions []framework.Action 19370 19371 schedulerConf := &conf.SchedulerConfiguration{} 19372 19373 if err := yaml.Unmarshal([]byte(confStr), schedulerConf); err != nil </span><span class="cov0" title="0">{ 19374 return nil, nil, nil, err 19375 }</span> 19376 // Set default settings for each plugin if not set 19377 <span class="cov8" title="1">for i, tier := range schedulerConf.Tiers </span><span class="cov8" title="1">{ 19378 // drf with hierarchy enabled 19379 hdrf := false 19380 // proportion enabled 19381 proportion := false 19382 for j := range tier.Plugins </span><span class="cov8" title="1">{ 19383 if tier.Plugins[j].Name == "drf" && 19384 tier.Plugins[j].EnabledHierarchy != nil && 19385 *tier.Plugins[j].EnabledHierarchy </span><span class="cov0" title="0">{ 19386 hdrf = true 19387 }</span> 19388 <span class="cov8" title="1">if tier.Plugins[j].Name == "proportion" </span><span class="cov8" title="1">{ 19389 proportion = true 19390 }</span> 19391 <span class="cov8" title="1">plugins.ApplyPluginConfDefaults(&schedulerConf.Tiers[i].Plugins[j])</span> 19392 } 19393 <span class="cov8" title="1">if hdrf && proportion </span><span class="cov0" title="0">{ 19394 return nil, nil, nil, fmt.Errorf("proportion and drf with hierarchy enabled conflicts") 19395 }</span> 19396 } 19397 19398 <span class="cov8" title="1">actionNames := strings.Split(schedulerConf.Actions, ",") 19399 for _, actionName := range actionNames </span><span class="cov8" title="1">{ 19400 if action, found := framework.GetAction(strings.TrimSpace(actionName)); found </span><span class="cov8" title="1">{ 19401 actions = append(actions, action) 19402 }</span> else<span class="cov0" title="0"> { 19403 return nil, nil, nil, fmt.Errorf("failed to find Action %s, ignore it", actionName) 19404 }</span> 19405 } 19406 19407 <span class="cov8" title="1">return actions, schedulerConf.Tiers, schedulerConf.Configurations, nil</span> 19408 } 19409 19410 func readSchedulerConf(confPath string) (string, error) <span class="cov0" title="0">{ 19411 dat, err := ioutil.ReadFile(confPath) 19412 if err != nil </span><span class="cov0" title="0">{ 19413 return "", err 19414 }</span> 19415 <span class="cov0" title="0">return string(dat), nil</span> 19416 } 19417 </pre> 19418 19419 <pre class="file" id="file90" style="display: none">package util 19420 19421 import ( 19422 "context" 19423 "fmt" 19424 "sync" 19425 "sync/atomic" 19426 19427 "k8s.io/client-go/util/workqueue" 19428 "k8s.io/klog" 19429 "volcano.sh/volcano/pkg/scheduler/api" 19430 ) 19431 19432 type PredicateHelper interface { 19433 PredicateNodes(task *api.TaskInfo, nodes []*api.NodeInfo, fn api.PredicateFn) ([]*api.NodeInfo, *api.FitErrors) 19434 } 19435 19436 type predicateHelper struct { 19437 taskPredicateErrorCache map[string]map[string]error 19438 } 19439 19440 // PredicateNodes returns the specified number of nodes that fit a task 19441 func (ph *predicateHelper) PredicateNodes(task *api.TaskInfo, nodes []*api.NodeInfo, fn api.PredicateFn) ([]*api.NodeInfo, *api.FitErrors) <span class="cov0" title="0">{ 19442 var errorLock sync.RWMutex 19443 fe := api.NewFitErrors() 19444 19445 allNodes := len(nodes) 19446 if allNodes == 0 </span><span class="cov0" title="0">{ 19447 return make([]*api.NodeInfo, 0), fe 19448 }</span> 19449 <span class="cov0" title="0">numNodesToFind := CalculateNumOfFeasibleNodesToFind(int32(allNodes)) 19450 19451 //allocate enough space to avoid growing it 19452 predicateNodes := make([]*api.NodeInfo, numNodesToFind) 19453 19454 numFoundNodes := int32(0) 19455 processedNodes := int32(0) 19456 19457 taskGroupid := taskGroupID(task) 19458 nodeErrorCache, taskFailedBefore := ph.taskPredicateErrorCache[taskGroupid] 19459 if nodeErrorCache == nil </span><span class="cov0" title="0">{ 19460 nodeErrorCache = map[string]error{} 19461 }</span> 19462 19463 //create a context with cancellation 19464 <span class="cov0" title="0">ctx, cancel := context.WithCancel(context.Background()) 19465 19466 checkNode := func(index int) </span><span class="cov0" title="0">{ 19467 // Check the nodes starting from where is left off in the previous scheduling cycle, 19468 // to make sure all nodes have the same chance of being examined across pods. 19469 node := nodes[(lastProcessedNodeIndex+index)%allNodes] 19470 atomic.AddInt32(&processedNodes, 1) 19471 klog.V(4).Infof("Considering Task <%v/%v> on node <%v>: <%v> vs. <%v>", 19472 task.Namespace, task.Name, node.Name, task.Resreq, node.Idle) 19473 19474 // Check if the task had "predicate" failure before. 19475 // And then check if the task failed to predict on this node before. 19476 if taskFailedBefore </span><span class="cov0" title="0">{ 19477 errorLock.RLock() 19478 errC, ok := nodeErrorCache[node.Name] 19479 errorLock.RUnlock() 19480 19481 if ok </span><span class="cov0" title="0">{ 19482 errorLock.Lock() 19483 fe.SetNodeError(node.Name, errC) 19484 errorLock.Unlock() 19485 return 19486 }</span> 19487 } 19488 19489 // TODO (k82cn): Enable eCache for performance improvement. 19490 <span class="cov0" title="0">if err := fn(task, node); err != nil </span><span class="cov0" title="0">{ 19491 klog.V(3).Infof("Predicates failed for task <%s/%s> on node <%s>: %v", 19492 task.Namespace, task.Name, node.Name, err) 19493 errorLock.Lock() 19494 nodeErrorCache[node.Name] = err 19495 ph.taskPredicateErrorCache[taskGroupid] = nodeErrorCache 19496 fe.SetNodeError(node.Name, err) 19497 errorLock.Unlock() 19498 return 19499 }</span> 19500 19501 //check if the number of found nodes is more than the numNodesTofind 19502 <span class="cov0" title="0">length := atomic.AddInt32(&numFoundNodes, 1) 19503 if length > numNodesToFind </span><span class="cov0" title="0">{ 19504 cancel() 19505 atomic.AddInt32(&numFoundNodes, -1) 19506 }</span> else<span class="cov0" title="0"> { 19507 predicateNodes[length-1] = node 19508 }</span> 19509 } 19510 19511 //workqueue.ParallelizeUntil(context.TODO(), 16, len(nodes), checkNode) 19512 <span class="cov0" title="0">workqueue.ParallelizeUntil(ctx, 16, allNodes, checkNode) 19513 19514 //processedNodes := int(numFoundNodes) + len(filteredNodesStatuses) + len(failedPredicateMap) 19515 lastProcessedNodeIndex = (lastProcessedNodeIndex + int(processedNodes)) % allNodes 19516 predicateNodes = predicateNodes[:numFoundNodes] 19517 return predicateNodes, fe</span> 19518 } 19519 19520 func taskGroupID(task *api.TaskInfo) string <span class="cov0" title="0">{ 19521 return fmt.Sprintf("%s/%s", task.Job, task.GetTaskSpecKey()) 19522 }</span> 19523 19524 func NewPredicateHelper() PredicateHelper <span class="cov0" title="0">{ 19525 return &predicateHelper{taskPredicateErrorCache: map[string]map[string]error{}} 19526 }</span> 19527 </pre> 19528 19529 <pre class="file" id="file91" style="display: none">/* 19530 Copyright 2017 The Kubernetes Authors. 19531 19532 Licensed under the Apache License, Version 2.0 (the "License"); 19533 you may not use this file except in compliance with the License. 19534 You may obtain a copy of the License at 19535 19536 http://www.apache.org/licenses/LICENSE-2.0 19537 19538 Unless required by applicable law or agreed to in writing, software 19539 distributed under the License is distributed on an "AS IS" BASIS, 19540 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19541 See the License for the specific language governing permissions and 19542 limitations under the License. 19543 */ 19544 19545 package util 19546 19547 import ( 19548 "container/heap" 19549 19550 "volcano.sh/volcano/pkg/scheduler/api" 19551 ) 19552 19553 //PriorityQueue implements a scheduling queue. 19554 type PriorityQueue struct { 19555 queue priorityQueue 19556 } 19557 19558 type priorityQueue struct { 19559 items []interface{} 19560 lessFn api.LessFn 19561 } 19562 19563 // NewPriorityQueue returns a PriorityQueue 19564 func NewPriorityQueue(lessFn api.LessFn) *PriorityQueue <span class="cov0" title="0">{ 19565 return &PriorityQueue{ 19566 queue: priorityQueue{ 19567 items: make([]interface{}, 0), 19568 lessFn: lessFn, 19569 }, 19570 } 19571 }</span> 19572 19573 // Push pushes element in the priority Queue 19574 func (q *PriorityQueue) Push(it interface{}) <span class="cov0" title="0">{ 19575 heap.Push(&q.queue, it) 19576 }</span> 19577 19578 // Pop pops element in the priority Queue 19579 func (q *PriorityQueue) Pop() interface{} <span class="cov0" title="0">{ 19580 if q.Len() == 0 </span><span class="cov0" title="0">{ 19581 return nil 19582 }</span> 19583 19584 <span class="cov0" title="0">return heap.Pop(&q.queue)</span> 19585 } 19586 19587 // Empty check if queue is empty 19588 func (q *PriorityQueue) Empty() bool <span class="cov0" title="0">{ 19589 return q.queue.Len() == 0 19590 }</span> 19591 19592 // Len returns Len of the priority queue 19593 func (q *PriorityQueue) Len() int <span class="cov0" title="0">{ 19594 return q.queue.Len() 19595 }</span> 19596 19597 func (pq *priorityQueue) Len() int <span class="cov0" title="0">{ return len(pq.items) }</span> 19598 19599 func (pq *priorityQueue) Less(i, j int) bool <span class="cov0" title="0">{ 19600 if pq.lessFn == nil </span><span class="cov0" title="0">{ 19601 return i < j 19602 }</span> 19603 19604 // We want Pop to give us the highest, not lowest, priority so we use greater than here. 19605 <span class="cov0" title="0">return pq.lessFn(pq.items[i], pq.items[j])</span> 19606 } 19607 19608 func (pq priorityQueue) Swap(i, j int) <span class="cov0" title="0">{ 19609 pq.items[i], pq.items[j] = pq.items[j], pq.items[i] 19610 }</span> 19611 19612 func (pq *priorityQueue) Push(x interface{}) <span class="cov0" title="0">{ 19613 (*pq).items = append((*pq).items, x) 19614 }</span> 19615 19616 func (pq *priorityQueue) Pop() interface{} <span class="cov0" title="0">{ 19617 old := (*pq).items 19618 n := len(old) 19619 item := old[n-1] 19620 (*pq).items = old[0 : n-1] 19621 return item 19622 }</span> 19623 </pre> 19624 19625 <pre class="file" id="file92" style="display: none">/* 19626 Copyright 2019 The Kubernetes Authors. 19627 19628 Licensed under the Apache License, Version 2.0 (the "License"); 19629 you may not use this file except in compliance with the License. 19630 You may obtain a copy of the License at 19631 19632 http://www.apache.org/licenses/LICENSE-2.0 19633 19634 Unless required by applicable law or agreed to in writing, software 19635 distributed under the License is distributed on an "AS IS" BASIS, 19636 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19637 See the License for the specific language governing permissions and 19638 limitations under the License. 19639 */ 19640 19641 package util 19642 19643 import ( 19644 "context" 19645 "fmt" 19646 "math" 19647 "math/rand" 19648 "sort" 19649 "sync" 19650 19651 "k8s.io/client-go/util/workqueue" 19652 "k8s.io/klog" 19653 k8sframework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1" 19654 19655 "volcano.sh/volcano/cmd/scheduler/app/options" 19656 "volcano.sh/volcano/pkg/scheduler/api" 19657 ) 19658 19659 const baselinePercentageOfNodesToFind = 50 19660 19661 var lastProcessedNodeIndex int 19662 19663 // Reservation is used to record target job and locked nodes 19664 var Reservation *ResourceReservation 19665 19666 func init() <span class="cov8" title="1">{ 19667 Reservation = NewResourceReservation() 19668 }</span> 19669 19670 // CalculateNumOfFeasibleNodesToFind returns the number of feasible nodes that once found, 19671 // the scheduler stops its search for more feasible nodes. 19672 func CalculateNumOfFeasibleNodesToFind(numAllNodes int32) (numNodes int32) <span class="cov0" title="0">{ 19673 opts := options.ServerOpts 19674 if numAllNodes <= opts.MinNodesToFind || opts.PercentageOfNodesToFind >= 100 </span><span class="cov0" title="0">{ 19675 return numAllNodes 19676 }</span> 19677 19678 <span class="cov0" title="0">adaptivePercentage := opts.PercentageOfNodesToFind 19679 if adaptivePercentage <= 0 </span><span class="cov0" title="0">{ 19680 adaptivePercentage = baselinePercentageOfNodesToFind - numAllNodes/125 19681 if adaptivePercentage < opts.MinPercentageOfNodesToFind </span><span class="cov0" title="0">{ 19682 adaptivePercentage = opts.MinPercentageOfNodesToFind 19683 }</span> 19684 } 19685 19686 <span class="cov0" title="0">numNodes = numAllNodes * adaptivePercentage / 100 19687 if numNodes < opts.MinNodesToFind </span><span class="cov0" title="0">{ 19688 numNodes = opts.MinNodesToFind 19689 }</span> 19690 <span class="cov0" title="0">return numNodes</span> 19691 } 19692 19693 // PrioritizeNodes returns a map whose key is node's score and value are corresponding nodes 19694 func PrioritizeNodes(task *api.TaskInfo, nodes []*api.NodeInfo, batchFn api.BatchNodeOrderFn, mapFn api.NodeOrderMapFn, reduceFn api.NodeOrderReduceFn) map[float64][]*api.NodeInfo <span class="cov0" title="0">{ 19695 pluginNodeScoreMap := map[string]k8sframework.NodeScoreList{} 19696 nodeOrderScoreMap := map[string]float64{} 19697 nodeScores := map[float64][]*api.NodeInfo{} 19698 var workerLock sync.Mutex 19699 scoreNode := func(index int) </span><span class="cov0" title="0">{ 19700 node := nodes[index] 19701 mapScores, orderScore, err := mapFn(task, node) 19702 if err != nil </span><span class="cov0" title="0">{ 19703 klog.Errorf("Error in Calculating Priority for the node:%v", err) 19704 return 19705 }</span> 19706 19707 <span class="cov0" title="0">workerLock.Lock() 19708 for plugin, score := range mapScores </span><span class="cov0" title="0">{ 19709 nodeScoreMap, ok := pluginNodeScoreMap[plugin] 19710 if !ok </span><span class="cov0" title="0">{ 19711 nodeScoreMap = k8sframework.NodeScoreList{} 19712 }</span> 19713 <span class="cov0" title="0">hp := k8sframework.NodeScore{} 19714 hp.Name = node.Name 19715 hp.Score = int64(math.Floor(score)) 19716 pluginNodeScoreMap[plugin] = append(nodeScoreMap, hp)</span> 19717 } 19718 <span class="cov0" title="0">nodeOrderScoreMap[node.Name] = orderScore 19719 workerLock.Unlock()</span> 19720 } 19721 <span class="cov0" title="0">workqueue.ParallelizeUntil(context.TODO(), 16, len(nodes), scoreNode) 19722 reduceScores, err := reduceFn(task, pluginNodeScoreMap) 19723 if err != nil </span><span class="cov0" title="0">{ 19724 klog.Errorf("Error in Calculating Priority for the node:%v", err) 19725 return nodeScores 19726 }</span> 19727 19728 <span class="cov0" title="0">batchNodeScore, err := batchFn(task, nodes) 19729 if err != nil </span><span class="cov0" title="0">{ 19730 klog.Errorf("Error in Calculating batch Priority for the node, err %v", err) 19731 return nodeScores 19732 }</span> 19733 19734 <span class="cov0" title="0">for _, node := range nodes </span><span class="cov0" title="0">{ 19735 if score, found := reduceScores[node.Name]; found </span><span class="cov0" title="0">{ 19736 if orderScore, ok := nodeOrderScoreMap[node.Name]; ok </span><span class="cov0" title="0">{ 19737 score += orderScore 19738 }</span> 19739 <span class="cov0" title="0">if batchScore, ok := batchNodeScore[node.Name]; ok </span><span class="cov0" title="0">{ 19740 score += batchScore 19741 }</span> 19742 <span class="cov0" title="0">nodeScores[score] = append(nodeScores[score], node)</span> 19743 } else<span class="cov0" title="0"> { 19744 // If no plugin is applied to this node, the default is 0.0 19745 score = 0.0 19746 if orderScore, ok := nodeOrderScoreMap[node.Name]; ok </span><span class="cov0" title="0">{ 19747 score += orderScore 19748 }</span> 19749 <span class="cov0" title="0">if batchScore, ok := batchNodeScore[node.Name]; ok </span><span class="cov0" title="0">{ 19750 score += batchScore 19751 }</span> 19752 <span class="cov0" title="0">nodeScores[score] = append(nodeScores[score], node)</span> 19753 } 19754 } 19755 <span class="cov0" title="0">return nodeScores</span> 19756 } 19757 19758 // SortNodes returns nodes by order of score 19759 func SortNodes(nodeScores map[float64][]*api.NodeInfo) []*api.NodeInfo <span class="cov0" title="0">{ 19760 var nodesInorder []*api.NodeInfo 19761 var keys []float64 19762 for key := range nodeScores </span><span class="cov0" title="0">{ 19763 keys = append(keys, key) 19764 }</span> 19765 <span class="cov0" title="0">sort.Sort(sort.Reverse(sort.Float64Slice(keys))) 19766 for _, key := range keys </span><span class="cov0" title="0">{ 19767 nodes := nodeScores[key] 19768 nodesInorder = append(nodesInorder, nodes...) 19769 }</span> 19770 <span class="cov0" title="0">return nodesInorder</span> 19771 } 19772 19773 // SelectBestNode returns best node whose score is highest, pick one randomly if there are many nodes with same score. 19774 func SelectBestNode(nodeScores map[float64][]*api.NodeInfo) *api.NodeInfo <span class="cov8" title="1">{ 19775 var bestNodes []*api.NodeInfo 19776 maxScore := -1.0 19777 for score, nodes := range nodeScores </span><span class="cov8" title="1">{ 19778 if score > maxScore </span><span class="cov8" title="1">{ 19779 maxScore = score 19780 bestNodes = nodes 19781 }</span> 19782 } 19783 19784 <span class="cov8" title="1">if len(bestNodes) == 0 </span><span class="cov8" title="1">{ 19785 return nil 19786 }</span> 19787 19788 <span class="cov8" title="1">return bestNodes[rand.Intn(len(bestNodes))]</span> 19789 } 19790 19791 // GetNodeList returns values of the map 'nodes' 19792 func GetNodeList(nodes map[string]*api.NodeInfo, nodeList []string) []*api.NodeInfo <span class="cov0" title="0">{ 19793 result := make([]*api.NodeInfo, 0, len(nodeList)) 19794 for _, nodename := range nodeList </span><span class="cov0" title="0">{ 19795 if ni, ok := nodes[nodename]; ok </span><span class="cov0" title="0">{ 19796 result = append(result, ni) 19797 }</span> 19798 } 19799 <span class="cov0" title="0">return result</span> 19800 } 19801 19802 // ValidateVictims returns an error if the resources of the victims can't satisfy the preemptor 19803 func ValidateVictims(preemptor *api.TaskInfo, node *api.NodeInfo, victims []*api.TaskInfo) error <span class="cov0" title="0">{ 19804 if len(victims) == 0 </span><span class="cov0" title="0">{ 19805 return fmt.Errorf("no victims") 19806 }</span> 19807 <span class="cov0" title="0">futureIdle := node.FutureIdle() 19808 for _, victim := range victims </span><span class="cov0" title="0">{ 19809 futureIdle.Add(victim.Resreq) 19810 }</span> 19811 // Every resource of the preemptor needs to be less or equal than corresponding 19812 // idle resource after preemption. 19813 <span class="cov0" title="0">if !preemptor.InitResreq.LessEqual(futureIdle, api.Zero) </span><span class="cov0" title="0">{ 19814 return fmt.Errorf("not enough resources: requested <%v>, but future idle <%v>", 19815 preemptor.InitResreq, futureIdle) 19816 }</span> 19817 <span class="cov0" title="0">return nil</span> 19818 } 19819 19820 // ResourceReservation is struct used for resource reservation 19821 type ResourceReservation struct { 19822 TargetJob *api.JobInfo 19823 LockedNodes map[string]*api.NodeInfo 19824 } 19825 19826 // NewResourceReservation is used to create global instance 19827 func NewResourceReservation() *ResourceReservation <span class="cov8" title="1">{ 19828 return &ResourceReservation{ 19829 TargetJob: nil, 19830 LockedNodes: map[string]*api.NodeInfo{}, 19831 } 19832 }</span> 19833 19834 // GetMinInt return minimum int from vals 19835 func GetMinInt(vals ...int) int <span class="cov8" title="1">{ 19836 if len(vals) == 0 </span><span class="cov8" title="1">{ 19837 return 0 19838 }</span> 19839 19840 <span class="cov8" title="1">min := vals[0] 19841 for _, val := range vals </span><span class="cov8" title="1">{ 19842 if val <= min </span><span class="cov8" title="1">{ 19843 min = val 19844 }</span> 19845 } 19846 <span class="cov8" title="1">return min</span> 19847 } 19848 </pre> 19849 19850 <pre class="file" id="file93" style="display: none">/* 19851 Copyright 2019 The Kubernetes Authors. 19852 19853 Licensed under the Apache License, Version 2.0 (the "License"); 19854 you may not use this file except in compliance with the License. 19855 You may obtain a copy of the License at 19856 19857 http://www.apache.org/licenses/LICENSE-2.0 19858 19859 Unless required by applicable law or agreed to in writing, software 19860 distributed under the License is distributed on an "AS IS" BASIS, 19861 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19862 See the License for the specific language governing permissions and 19863 limitations under the License. 19864 */ 19865 19866 package util 19867 19868 import ( 19869 "fmt" 19870 "sync" 19871 19872 v1 "k8s.io/api/core/v1" 19873 "k8s.io/apimachinery/pkg/api/resource" 19874 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 19875 "k8s.io/apimachinery/pkg/types" 19876 "k8s.io/client-go/kubernetes" 19877 volumescheduling "k8s.io/kubernetes/pkg/controller/volume/scheduling" 19878 19879 schedulingv2 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 19880 "volcano.sh/volcano/pkg/scheduler/api" 19881 ) 19882 19883 // BuildResourceList builts resource list object 19884 func BuildResourceList(cpu string, memory string) v1.ResourceList <span class="cov0" title="0">{ 19885 return v1.ResourceList{ 19886 v1.ResourceCPU: resource.MustParse(cpu), 19887 v1.ResourceMemory: resource.MustParse(memory), 19888 api.GPUResourceName: resource.MustParse("0"), 19889 } 19890 }</span> 19891 19892 // BuildResourceListWithGPU builts resource list with GPU 19893 func BuildResourceListWithGPU(cpu string, memory string, GPU string) v1.ResourceList <span class="cov0" title="0">{ 19894 return v1.ResourceList{ 19895 v1.ResourceCPU: resource.MustParse(cpu), 19896 v1.ResourceMemory: resource.MustParse(memory), 19897 api.GPUResourceName: resource.MustParse(GPU), 19898 } 19899 }</span> 19900 19901 // BuildNode builts node object 19902 func BuildNode(name string, alloc v1.ResourceList, labels map[string]string) *v1.Node <span class="cov0" title="0">{ 19903 return &v1.Node{ 19904 ObjectMeta: metav1.ObjectMeta{ 19905 Name: name, 19906 Labels: labels, 19907 Annotations: map[string]string{}, 19908 }, 19909 Status: v1.NodeStatus{ 19910 Capacity: alloc, 19911 Allocatable: alloc, 19912 }, 19913 } 19914 }</span> 19915 19916 // BuildPod builts Pod object 19917 func BuildPod(namespace, name, nodename string, p v1.PodPhase, req v1.ResourceList, groupName string, labels map[string]string, selector map[string]string) *v1.Pod <span class="cov0" title="0">{ 19918 return &v1.Pod{ 19919 ObjectMeta: metav1.ObjectMeta{ 19920 UID: types.UID(fmt.Sprintf("%v-%v", namespace, name)), 19921 Name: name, 19922 Namespace: namespace, 19923 Labels: labels, 19924 Annotations: map[string]string{ 19925 schedulingv2.KubeGroupNameAnnotationKey: groupName, 19926 }, 19927 }, 19928 Status: v1.PodStatus{ 19929 Phase: p, 19930 }, 19931 Spec: v1.PodSpec{ 19932 NodeName: nodename, 19933 NodeSelector: selector, 19934 Containers: []v1.Container{ 19935 { 19936 Resources: v1.ResourceRequirements{ 19937 Requests: req, 19938 }, 19939 }, 19940 }, 19941 }, 19942 } 19943 }</span> 19944 19945 // FakeBinder is used as fake binder 19946 type FakeBinder struct { 19947 Binds map[string]string 19948 Channel chan string 19949 } 19950 19951 // Bind used by fake binder struct to bind pods 19952 func (fb *FakeBinder) Bind(kubeClient *kubernetes.Clientset, tasks []*api.TaskInfo) (error, []*api.TaskInfo) <span class="cov0" title="0">{ 19953 for _, p := range tasks </span><span class="cov0" title="0">{ 19954 key := fmt.Sprintf("%v/%v", p.Namespace, p.Name) 19955 fb.Binds[key] = p.NodeName 19956 }</span> 19957 19958 <span class="cov0" title="0">return nil, nil</span> 19959 } 19960 19961 // FakeEvictor is used as fake evictor 19962 type FakeEvictor struct { 19963 sync.Mutex 19964 evicts []string 19965 Channel chan string 19966 } 19967 19968 // Evicts returns copy of evicted pods. 19969 func (fe *FakeEvictor) Evicts() []string <span class="cov0" title="0">{ 19970 fe.Lock() 19971 defer fe.Unlock() 19972 return append([]string{}, fe.evicts...) 19973 }</span> 19974 19975 // Evict is used by fake evictor to evict pods 19976 func (fe *FakeEvictor) Evict(p *v1.Pod, reason string) error <span class="cov0" title="0">{ 19977 fe.Lock() 19978 defer fe.Unlock() 19979 19980 fmt.Println("PodName: ", p.Name) 19981 key := fmt.Sprintf("%v/%v", p.Namespace, p.Name) 19982 fe.evicts = append(fe.evicts, key) 19983 19984 fe.Channel <- key 19985 19986 return nil 19987 }</span> 19988 19989 // FakeStatusUpdater is used for fake status update 19990 type FakeStatusUpdater struct { 19991 } 19992 19993 // UpdatePodCondition is a empty function 19994 func (ftsu *FakeStatusUpdater) UpdatePodCondition(pod *v1.Pod, podCondition *v1.PodCondition) (*v1.Pod, error) <span class="cov0" title="0">{ 19995 // do nothing here 19996 return nil, nil 19997 }</span> 19998 19999 // UpdatePodGroup is a empty function 20000 func (ftsu *FakeStatusUpdater) UpdatePodGroup(pg *api.PodGroup) (*api.PodGroup, error) <span class="cov0" title="0">{ 20001 // do nothing here 20002 return nil, nil 20003 }</span> 20004 20005 // FakeVolumeBinder is used as fake volume binder 20006 type FakeVolumeBinder struct { 20007 } 20008 20009 // AllocateVolumes is a empty function 20010 func (fvb *FakeVolumeBinder) AllocateVolumes(task *api.TaskInfo, hostname string, podVolumes *volumescheduling.PodVolumes) error <span class="cov0" title="0">{ 20011 return nil 20012 }</span> 20013 20014 // BindVolumes is a empty function 20015 func (fvb *FakeVolumeBinder) BindVolumes(task *api.TaskInfo, podVolumes *volumescheduling.PodVolumes) error <span class="cov0" title="0">{ 20016 return nil 20017 }</span> 20018 20019 // GetPodVolumes is a empty function 20020 func (fvb *FakeVolumeBinder) GetPodVolumes(task *api.TaskInfo, node *v1.Node) (*volumescheduling.PodVolumes, error) <span class="cov0" title="0">{ 20021 return nil, nil 20022 }</span> 20023 </pre> 20024 20025 <pre class="file" id="file94" style="display: none">/* 20026 Copyright 2018 The Volcano Authors. 20027 20028 Licensed under the Apache License, Version 2.0 (the "License"); 20029 you may not use this file except in compliance with the License. 20030 You may obtain a copy of the License at 20031 20032 http://www.apache.org/licenses/LICENSE-2.0 20033 20034 Unless required by applicable law or agreed to in writing, software 20035 distributed under the License is distributed on an "AS IS" BASIS, 20036 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20037 See the License for the specific language governing permissions and 20038 limitations under the License. 20039 */ 20040 20041 package mutate 20042 20043 import ( 20044 "encoding/json" 20045 "fmt" 20046 "strconv" 20047 20048 "k8s.io/api/admission/v1beta1" 20049 whv1beta1 "k8s.io/api/admissionregistration/v1beta1" 20050 v1 "k8s.io/api/core/v1" 20051 "k8s.io/klog" 20052 20053 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 20054 "volcano.sh/volcano/pkg/webhooks/router" 20055 "volcano.sh/volcano/pkg/webhooks/schema" 20056 "volcano.sh/volcano/pkg/webhooks/util" 20057 ) 20058 20059 const ( 20060 // DefaultQueue constant stores the name of the queue as "default" 20061 DefaultQueue = "default" 20062 // DefaultMaxRetry is the default number of retries. 20063 DefaultMaxRetry = 3 20064 20065 defaultSchedulerName = "volcano" 20066 20067 defaultMaxRetry int32 = 3 20068 ) 20069 20070 func init() <span class="cov8" title="1">{ 20071 router.RegisterAdmission(service) 20072 }</span> 20073 20074 var service = &router.AdmissionService{ 20075 Path: "/jobs/mutate", 20076 Func: Jobs, 20077 20078 MutatingConfig: &whv1beta1.MutatingWebhookConfiguration{ 20079 Webhooks: []whv1beta1.MutatingWebhook{{ 20080 Name: "mutatejob.volcano.sh", 20081 Rules: []whv1beta1.RuleWithOperations{ 20082 { 20083 Operations: []whv1beta1.OperationType{whv1beta1.Create}, 20084 Rule: whv1beta1.Rule{ 20085 APIGroups: []string{"batch.volcano.sh"}, 20086 APIVersions: []string{"v1alpha1"}, 20087 Resources: []string{"jobs"}, 20088 }, 20089 }, 20090 }, 20091 }}, 20092 }, 20093 } 20094 20095 type patchOperation struct { 20096 Op string `json:"op"` 20097 Path string `json:"path"` 20098 Value interface{} `json:"value,omitempty"` 20099 } 20100 20101 // Jobs mutate jobs. 20102 func Jobs(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse <span class="cov0" title="0">{ 20103 klog.V(3).Infof("mutating jobs") 20104 20105 job, err := schema.DecodeJob(ar.Request.Object, ar.Request.Resource) 20106 if err != nil </span><span class="cov0" title="0">{ 20107 return util.ToAdmissionResponse(err) 20108 }</span> 20109 20110 <span class="cov0" title="0">var patchBytes []byte 20111 switch ar.Request.Operation </span>{ 20112 case v1beta1.Create:<span class="cov0" title="0"> 20113 patchBytes, _ = createPatch(job)</span> 20114 default:<span class="cov0" title="0"> 20115 err = fmt.Errorf("expect operation to be 'CREATE' ") 20116 return util.ToAdmissionResponse(err)</span> 20117 } 20118 20119 <span class="cov0" title="0">klog.V(3).Infof("AdmissionResponse: patch=%v", string(patchBytes)) 20120 reviewResponse := v1beta1.AdmissionResponse{ 20121 Allowed: true, 20122 Patch: patchBytes, 20123 } 20124 pt := v1beta1.PatchTypeJSONPatch 20125 reviewResponse.PatchType = &pt 20126 20127 return &reviewResponse</span> 20128 } 20129 20130 func createPatch(job *v1alpha1.Job) ([]byte, error) <span class="cov0" title="0">{ 20131 var patch []patchOperation 20132 pathQueue := patchDefaultQueue(job) 20133 if pathQueue != nil </span><span class="cov0" title="0">{ 20134 patch = append(patch, *pathQueue) 20135 }</span> 20136 <span class="cov0" title="0">pathScheduler := patchDefaultScheduler(job) 20137 if pathScheduler != nil </span><span class="cov0" title="0">{ 20138 patch = append(patch, *pathScheduler) 20139 }</span> 20140 <span class="cov0" title="0">pathMaxRetry := patchDefaultMaxRetry(job) 20141 if pathMaxRetry != nil </span><span class="cov0" title="0">{ 20142 patch = append(patch, *pathMaxRetry) 20143 }</span> 20144 <span class="cov0" title="0">pathSpec := mutateSpec(job.Spec.Tasks, "/spec/tasks") 20145 if pathSpec != nil </span><span class="cov0" title="0">{ 20146 patch = append(patch, *pathSpec) 20147 }</span> 20148 <span class="cov0" title="0">pathMinAvailable := patchDefaultMinAvailable(job) 20149 if pathMinAvailable != nil </span><span class="cov0" title="0">{ 20150 patch = append(patch, *pathMinAvailable) 20151 }</span> 20152 // Add default plugins for some distributed-framework plugin cases 20153 <span class="cov0" title="0">patchPlugins := patchDefaultPlugins(job) 20154 if patchPlugins != nil </span><span class="cov0" title="0">{ 20155 patch = append(patch, *patchPlugins) 20156 }</span> 20157 <span class="cov0" title="0">return json.Marshal(patch)</span> 20158 } 20159 20160 func patchDefaultQueue(job *v1alpha1.Job) *patchOperation <span class="cov0" title="0">{ 20161 //Add default queue if not specified. 20162 if job.Spec.Queue == "" </span><span class="cov0" title="0">{ 20163 return &patchOperation{Op: "add", Path: "/spec/queue", Value: DefaultQueue} 20164 }</span> 20165 <span class="cov0" title="0">return nil</span> 20166 } 20167 20168 func patchDefaultScheduler(job *v1alpha1.Job) *patchOperation <span class="cov0" title="0">{ 20169 // Add default scheduler name if not specified. 20170 if job.Spec.SchedulerName == "" </span><span class="cov0" title="0">{ 20171 return &patchOperation{Op: "add", Path: "/spec/schedulerName", Value: defaultSchedulerName} 20172 }</span> 20173 <span class="cov0" title="0">return nil</span> 20174 } 20175 20176 func patchDefaultMaxRetry(job *v1alpha1.Job) *patchOperation <span class="cov0" title="0">{ 20177 // Add default maxRetry if maxRetry is zero. 20178 if job.Spec.MaxRetry == 0 </span><span class="cov0" title="0">{ 20179 return &patchOperation{Op: "add", Path: "/spec/maxRetry", Value: DefaultMaxRetry} 20180 }</span> 20181 <span class="cov0" title="0">return nil</span> 20182 } 20183 20184 func patchDefaultMinAvailable(job *v1alpha1.Job) *patchOperation <span class="cov0" title="0">{ 20185 // Add default minAvailable if minAvailable is zero. 20186 if job.Spec.MinAvailable == 0 </span><span class="cov0" title="0">{ 20187 var jobMinAvailable int32 20188 for _, task := range job.Spec.Tasks </span><span class="cov0" title="0">{ 20189 if task.MinAvailable != nil </span><span class="cov0" title="0">{ 20190 jobMinAvailable += *task.MinAvailable 20191 }</span> else<span class="cov0" title="0"> { 20192 jobMinAvailable += task.Replicas 20193 }</span> 20194 } 20195 20196 <span class="cov0" title="0">return &patchOperation{Op: "add", Path: "/spec/minAvailable", Value: jobMinAvailable}</span> 20197 } 20198 <span class="cov0" title="0">return nil</span> 20199 } 20200 20201 func mutateSpec(tasks []v1alpha1.TaskSpec, basePath string) *patchOperation <span class="cov8" title="1">{ 20202 patched := false 20203 for index := range tasks </span><span class="cov8" title="1">{ 20204 // add default task name 20205 taskName := tasks[index].Name 20206 if len(taskName) == 0 </span><span class="cov8" title="1">{ 20207 patched = true 20208 tasks[index].Name = v1alpha1.DefaultTaskSpec + strconv.Itoa(index) 20209 }</span> 20210 20211 <span class="cov8" title="1">if tasks[index].Template.Spec.HostNetwork && tasks[index].Template.Spec.DNSPolicy == "" </span><span class="cov0" title="0">{ 20212 patched = true 20213 tasks[index].Template.Spec.DNSPolicy = v1.DNSClusterFirstWithHostNet 20214 }</span> 20215 20216 <span class="cov8" title="1">if tasks[index].MinAvailable == nil </span><span class="cov8" title="1">{ 20217 patched = true 20218 minAvailable := tasks[index].Replicas 20219 tasks[index].MinAvailable = &minAvailable 20220 }</span> 20221 20222 <span class="cov8" title="1">if tasks[index].MaxRetry == 0 </span><span class="cov8" title="1">{ 20223 patched = true 20224 tasks[index].MaxRetry = defaultMaxRetry 20225 }</span> 20226 } 20227 <span class="cov8" title="1">if !patched </span><span class="cov0" title="0">{ 20228 return nil 20229 }</span> 20230 <span class="cov8" title="1">return &patchOperation{ 20231 Op: "replace", 20232 Path: basePath, 20233 Value: tasks, 20234 }</span> 20235 } 20236 20237 func patchDefaultPlugins(job *v1alpha1.Job) *patchOperation <span class="cov0" title="0">{ 20238 if job.Spec.Plugins == nil </span><span class="cov0" title="0">{ 20239 return nil 20240 }</span> 20241 <span class="cov0" title="0">plugins := map[string][]string{} 20242 for k, v := range job.Spec.Plugins </span><span class="cov0" title="0">{ 20243 plugins[k] = v 20244 }</span> 20245 20246 // Because the tensorflow-plugin depends on svc-plugin. 20247 // If the svc-plugin is not defined, we should add it. 20248 <span class="cov0" title="0">if _, ok := job.Spec.Plugins["tensorflow"]; ok </span><span class="cov0" title="0">{ 20249 if _, ok := plugins["svc"]; !ok </span><span class="cov0" title="0">{ 20250 plugins["svc"] = []string{} 20251 }</span> 20252 } 20253 20254 <span class="cov0" title="0">return &patchOperation{ 20255 Op: "replace", 20256 Path: "/spec/plugins", 20257 Value: plugins, 20258 }</span> 20259 } 20260 </pre> 20261 20262 <pre class="file" id="file95" style="display: none">/* 20263 Copyright 2018 The Volcano Authors. 20264 20265 Licensed under the Apache License, Version 2.0 (the "License"); 20266 you may not use this file except in compliance with the License. 20267 You may obtain a copy of the License at 20268 20269 http://www.apache.org/licenses/LICENSE-2.0 20270 20271 Unless required by applicable law or agreed to in writing, software 20272 distributed under the License is distributed on an "AS IS" BASIS, 20273 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20274 See the License for the specific language governing permissions and 20275 limitations under the License. 20276 */ 20277 20278 package validate 20279 20280 import ( 20281 "context" 20282 "fmt" 20283 "strings" 20284 20285 "k8s.io/api/admission/v1beta1" 20286 whv1beta1 "k8s.io/api/admissionregistration/v1beta1" 20287 v1 "k8s.io/api/core/v1" 20288 apiequality "k8s.io/apimachinery/pkg/api/equality" 20289 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 20290 "k8s.io/apimachinery/pkg/util/validation" 20291 "k8s.io/apimachinery/pkg/util/validation/field" 20292 "k8s.io/klog" 20293 k8score "k8s.io/kubernetes/pkg/apis/core" 20294 k8scorev1 "k8s.io/kubernetes/pkg/apis/core/v1" 20295 v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" 20296 k8scorevalid "k8s.io/kubernetes/pkg/apis/core/validation" 20297 20298 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 20299 schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 20300 jobhelpers "volcano.sh/volcano/pkg/controllers/job/helpers" 20301 "volcano.sh/volcano/pkg/controllers/job/plugins" 20302 "volcano.sh/volcano/pkg/webhooks/router" 20303 "volcano.sh/volcano/pkg/webhooks/schema" 20304 "volcano.sh/volcano/pkg/webhooks/util" 20305 ) 20306 20307 func init() <span class="cov8" title="1">{ 20308 router.RegisterAdmission(service) 20309 }</span> 20310 20311 var service = &router.AdmissionService{ 20312 Path: "/jobs/validate", 20313 Func: AdmitJobs, 20314 20315 Config: config, 20316 20317 ValidatingConfig: &whv1beta1.ValidatingWebhookConfiguration{ 20318 Webhooks: []whv1beta1.ValidatingWebhook{{ 20319 Name: "validatejob.volcano.sh", 20320 Rules: []whv1beta1.RuleWithOperations{ 20321 { 20322 Operations: []whv1beta1.OperationType{whv1beta1.Create, whv1beta1.Update}, 20323 Rule: whv1beta1.Rule{ 20324 APIGroups: []string{"batch.volcano.sh"}, 20325 APIVersions: []string{"v1alpha1"}, 20326 Resources: []string{"jobs"}, 20327 }, 20328 }, 20329 }, 20330 }}, 20331 }, 20332 } 20333 20334 var config = &router.AdmissionServiceConfig{} 20335 20336 // AdmitJobs is to admit jobs and return response. 20337 func AdmitJobs(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse <span class="cov0" title="0">{ 20338 klog.V(3).Infof("admitting jobs -- %s", ar.Request.Operation) 20339 20340 job, err := schema.DecodeJob(ar.Request.Object, ar.Request.Resource) 20341 if err != nil </span><span class="cov0" title="0">{ 20342 return util.ToAdmissionResponse(err) 20343 }</span> 20344 <span class="cov0" title="0">var msg string 20345 reviewResponse := v1beta1.AdmissionResponse{} 20346 reviewResponse.Allowed = true 20347 20348 switch ar.Request.Operation </span>{ 20349 case v1beta1.Create:<span class="cov0" title="0"> 20350 msg = validateJobCreate(job, &reviewResponse)</span> 20351 case v1beta1.Update:<span class="cov0" title="0"> 20352 oldJob, err := schema.DecodeJob(ar.Request.OldObject, ar.Request.Resource) 20353 if err != nil </span><span class="cov0" title="0">{ 20354 return util.ToAdmissionResponse(err) 20355 }</span> 20356 <span class="cov0" title="0">err = validateJobUpdate(oldJob, job) 20357 if err != nil </span><span class="cov0" title="0">{ 20358 return util.ToAdmissionResponse(err) 20359 }</span> 20360 default:<span class="cov0" title="0"> 20361 err := fmt.Errorf("expect operation to be 'CREATE' or 'UPDATE'") 20362 return util.ToAdmissionResponse(err)</span> 20363 } 20364 20365 <span class="cov0" title="0">if !reviewResponse.Allowed </span><span class="cov0" title="0">{ 20366 reviewResponse.Result = &metav1.Status{Message: strings.TrimSpace(msg)} 20367 }</span> 20368 <span class="cov0" title="0">return &reviewResponse</span> 20369 } 20370 20371 func validateJobCreate(job *v1alpha1.Job, reviewResponse *v1beta1.AdmissionResponse) string <span class="cov8" title="1">{ 20372 var msg string 20373 taskNames := map[string]string{} 20374 var totalReplicas int32 20375 20376 if job.Spec.MinAvailable < 0 </span><span class="cov8" title="1">{ 20377 reviewResponse.Allowed = false 20378 return "job 'minAvailable' must be >= 0." 20379 }</span> 20380 20381 <span class="cov8" title="1">if job.Spec.MaxRetry < 0 </span><span class="cov8" title="1">{ 20382 reviewResponse.Allowed = false 20383 return "'maxRetry' cannot be less than zero." 20384 }</span> 20385 20386 <span class="cov8" title="1">if job.Spec.TTLSecondsAfterFinished != nil && *job.Spec.TTLSecondsAfterFinished < 0 </span><span class="cov8" title="1">{ 20387 reviewResponse.Allowed = false 20388 return "'ttlSecondsAfterFinished' cannot be less than zero." 20389 }</span> 20390 20391 <span class="cov8" title="1">if len(job.Spec.Tasks) == 0 </span><span class="cov8" title="1">{ 20392 reviewResponse.Allowed = false 20393 return "No task specified in job spec" 20394 }</span> 20395 20396 <span class="cov8" title="1">hasDependenciesBetweenTasks := false 20397 for index, task := range job.Spec.Tasks </span><span class="cov8" title="1">{ 20398 if task.DependsOn != nil </span><span class="cov8" title="1">{ 20399 hasDependenciesBetweenTasks = true 20400 }</span> 20401 20402 <span class="cov8" title="1">if task.Replicas < 0 </span><span class="cov8" title="1">{ 20403 msg += fmt.Sprintf(" 'replicas' < 0 in task: %s;", task.Name) 20404 }</span> 20405 20406 <span class="cov8" title="1">if task.MinAvailable != nil && *task.MinAvailable > task.Replicas </span><span class="cov0" title="0">{ 20407 msg += fmt.Sprintf(" 'minAvailable' is greater than 'replicas' in task: %s, job: %s", task.Name, job.Name) 20408 }</span> 20409 20410 // count replicas 20411 <span class="cov8" title="1">totalReplicas += task.Replicas 20412 20413 // validate task name 20414 if errMsgs := validation.IsDNS1123Label(task.Name); len(errMsgs) > 0 </span><span class="cov8" title="1">{ 20415 msg += fmt.Sprintf(" %v;", errMsgs) 20416 }</span> 20417 20418 // duplicate task name 20419 <span class="cov8" title="1">if _, found := taskNames[task.Name]; found </span><span class="cov8" title="1">{ 20420 msg += fmt.Sprintf(" duplicated task name %s;", task.Name) 20421 break</span> 20422 } else<span class="cov8" title="1"> { 20423 taskNames[task.Name] = task.Name 20424 }</span> 20425 20426 <span class="cov8" title="1">if err := validatePolicies(task.Policies, field.NewPath("spec.tasks.policies")); err != nil </span><span class="cov8" title="1">{ 20427 msg += err.Error() + fmt.Sprintf(" valid events are %v, valid actions are %v", 20428 getValidEvents(), getValidActions()) 20429 }</span> 20430 <span class="cov8" title="1">podName := jobhelpers.MakePodName(job.Name, task.Name, index) 20431 msg += validateK8sPodNameLength(podName) 20432 msg += validateTaskTemplate(task, job, index)</span> 20433 } 20434 20435 <span class="cov8" title="1">msg += validateJobName(job) 20436 20437 if totalReplicas < job.Spec.MinAvailable </span><span class="cov8" title="1">{ 20438 msg += "job 'minAvailable' should not be greater than total replicas in tasks;" 20439 }</span> 20440 20441 <span class="cov8" title="1">if err := validatePolicies(job.Spec.Policies, field.NewPath("spec.policies")); err != nil </span><span class="cov8" title="1">{ 20442 msg = msg + err.Error() + fmt.Sprintf(" valid events are %v, valid actions are %v;", 20443 getValidEvents(), getValidActions()) 20444 }</span> 20445 20446 // invalid job plugins 20447 <span class="cov8" title="1">if len(job.Spec.Plugins) != 0 </span><span class="cov8" title="1">{ 20448 for name := range job.Spec.Plugins </span><span class="cov8" title="1">{ 20449 if _, found := plugins.GetPluginBuilder(name); !found </span><span class="cov8" title="1">{ 20450 msg += fmt.Sprintf(" unable to find job plugin: %s", name) 20451 }</span> 20452 } 20453 } 20454 20455 <span class="cov8" title="1">if err := validateIO(job.Spec.Volumes); err != nil </span><span class="cov8" title="1">{ 20456 msg += err.Error() 20457 }</span> 20458 20459 <span class="cov8" title="1">queue, err := config.VolcanoClient.SchedulingV1beta1().Queues().Get(context.TODO(), job.Spec.Queue, metav1.GetOptions{}) 20460 if err != nil </span><span class="cov8" title="1">{ 20461 msg += fmt.Sprintf(" unable to find job queue: %v", err) 20462 }</span> else<span class="cov8" title="1"> if queue.Status.State != schedulingv1beta1.QueueStateOpen </span><span class="cov0" title="0">{ 20463 msg += fmt.Sprintf("can only submit job to queue with state `Open`, "+ 20464 "queue `%s` status is `%s`", queue.Name, queue.Status.State) 20465 }</span> 20466 20467 <span class="cov8" title="1">if hasDependenciesBetweenTasks </span><span class="cov8" title="1">{ 20468 _, isDag := topoSort(job) 20469 if !isDag </span><span class="cov8" title="1">{ 20470 msg += fmt.Sprintf("job has dependencies between tasks, but doesn't form a directed acyclic graph(DAG)") 20471 }</span> 20472 } 20473 20474 <span class="cov8" title="1">if msg != "" </span><span class="cov8" title="1">{ 20475 reviewResponse.Allowed = false 20476 }</span> 20477 20478 <span class="cov8" title="1">return msg</span> 20479 } 20480 20481 func validateJobUpdate(old, new *v1alpha1.Job) error <span class="cov8" title="1">{ 20482 var totalReplicas int32 20483 for _, task := range new.Spec.Tasks </span><span class="cov8" title="1">{ 20484 if task.Replicas < 0 </span><span class="cov0" title="0">{ 20485 return fmt.Errorf("'replicas' must be >= 0 in task: %s", task.Name) 20486 }</span> 20487 20488 <span class="cov8" title="1">if task.MinAvailable != nil && *task.MinAvailable > task.Replicas </span><span class="cov0" title="0">{ 20489 return fmt.Errorf("'minAvailable' must be <= 'replicas' in task: %s;", task.Name) 20490 }</span> 20491 // count replicas 20492 <span class="cov8" title="1">totalReplicas += task.Replicas</span> 20493 } 20494 <span class="cov8" title="1">if new.Spec.MinAvailable > totalReplicas </span><span class="cov8" title="1">{ 20495 return fmt.Errorf("job 'minAvailable' must not be greater than total replicas") 20496 }</span> 20497 <span class="cov8" title="1">if new.Spec.MinAvailable < 0 </span><span class="cov8" title="1">{ 20498 return fmt.Errorf("job 'minAvailable' must be >= 0") 20499 }</span> 20500 20501 <span class="cov8" title="1">if len(old.Spec.Tasks) != len(new.Spec.Tasks) </span><span class="cov8" title="1">{ 20502 return fmt.Errorf("job updates may not add or remove tasks") 20503 }</span> 20504 // other fields under spec are not allowed to mutate 20505 <span class="cov8" title="1">new.Spec.MinAvailable = old.Spec.MinAvailable 20506 new.Spec.PriorityClassName = old.Spec.PriorityClassName 20507 for i := range new.Spec.Tasks </span><span class="cov8" title="1">{ 20508 new.Spec.Tasks[i].Replicas = old.Spec.Tasks[i].Replicas 20509 new.Spec.Tasks[i].MinAvailable = old.Spec.Tasks[i].MinAvailable 20510 }</span> 20511 20512 // job controller will update the pvc name if not provided 20513 <span class="cov8" title="1">for i := range new.Spec.Volumes </span><span class="cov0" title="0">{ 20514 if new.Spec.Volumes[i].VolumeClaim != nil </span><span class="cov0" title="0">{ 20515 new.Spec.Volumes[i].VolumeClaimName = "" 20516 }</span> 20517 } 20518 <span class="cov8" title="1">for i := range old.Spec.Volumes </span><span class="cov0" title="0">{ 20519 if old.Spec.Volumes[i].VolumeClaim != nil </span><span class="cov0" title="0">{ 20520 old.Spec.Volumes[i].VolumeClaimName = "" 20521 }</span> 20522 } 20523 20524 <span class="cov8" title="1">if !apiequality.Semantic.DeepEqual(new.Spec, old.Spec) </span><span class="cov8" title="1">{ 20525 return fmt.Errorf("job updates may not change fields other than `minAvailable`, `tasks[*].replicas under spec`") 20526 }</span> 20527 20528 <span class="cov8" title="1">return nil</span> 20529 } 20530 20531 func validateTaskTemplate(task v1alpha1.TaskSpec, job *v1alpha1.Job, index int) string <span class="cov8" title="1">{ 20532 var v1PodTemplate v1.PodTemplate 20533 v1PodTemplate.Template = *task.Template.DeepCopy() 20534 k8scorev1.SetObjectDefaults_PodTemplate(&v1PodTemplate) 20535 20536 var coreTemplateSpec k8score.PodTemplateSpec 20537 k8scorev1.Convert_v1_PodTemplateSpec_To_core_PodTemplateSpec(&v1PodTemplate.Template, &coreTemplateSpec, nil) 20538 20539 // Skip verify container SecurityContex.Privileged as it depends on 20540 // the kube-apiserver `allow-privileged` flag. 20541 for i, container := range coreTemplateSpec.Spec.Containers </span><span class="cov8" title="1">{ 20542 if container.SecurityContext != nil && container.SecurityContext.Privileged != nil </span><span class="cov8" title="1">{ 20543 coreTemplateSpec.Spec.Containers[i].SecurityContext.Privileged = nil 20544 }</span> 20545 } 20546 20547 <span class="cov8" title="1">corePodTemplate := k8score.PodTemplate{ 20548 ObjectMeta: metav1.ObjectMeta{ 20549 Name: task.Name, 20550 Namespace: job.Namespace, 20551 }, 20552 Template: coreTemplateSpec, 20553 } 20554 20555 if allErrs := k8scorevalid.ValidatePodTemplate(&corePodTemplate); len(allErrs) > 0 </span><span class="cov8" title="1">{ 20556 msg := fmt.Sprintf("spec.task[%d].", index) 20557 for index := range allErrs </span><span class="cov8" title="1">{ 20558 msg += allErrs[index].Error() + ". " 20559 }</span> 20560 <span class="cov8" title="1">return msg</span> 20561 } 20562 20563 <span class="cov8" title="1">msg := validateTaskTopoPolicy(task, index) 20564 if msg != "" </span><span class="cov0" title="0">{ 20565 return msg 20566 }</span> 20567 20568 <span class="cov8" title="1">return ""</span> 20569 } 20570 20571 func validateK8sPodNameLength(podName string) string <span class="cov8" title="1">{ 20572 if errMsgs := validation.IsQualifiedName(podName); len(errMsgs) > 0 </span><span class="cov0" title="0">{ 20573 return fmt.Sprintf("create pod with name %s validate failed %v;", podName, errMsgs) 20574 }</span> 20575 <span class="cov8" title="1">return ""</span> 20576 } 20577 20578 func validateJobName(job *v1alpha1.Job) string <span class="cov8" title="1">{ 20579 if errMsgs := validation.IsQualifiedName(job.Name); len(errMsgs) > 0 </span><span class="cov0" title="0">{ 20580 return fmt.Sprintf("create job with name %s validate failed %v", job.Name, errMsgs) 20581 }</span> 20582 <span class="cov8" title="1">return ""</span> 20583 } 20584 20585 func validateTaskTopoPolicy(task v1alpha1.TaskSpec, index int) string <span class="cov8" title="1">{ 20586 if task.TopologyPolicy == "" || task.TopologyPolicy == v1alpha1.None </span><span class="cov8" title="1">{ 20587 return "" 20588 }</span> 20589 20590 <span class="cov8" title="1">template := task.Template.DeepCopy() 20591 20592 for id, container := range template.Spec.Containers </span><span class="cov8" title="1">{ 20593 if len(container.Resources.Requests) == 0 </span><span class="cov8" title="1">{ 20594 template.Spec.Containers[id].Resources.Requests = container.Resources.Limits.DeepCopy() 20595 }</span> 20596 } 20597 20598 <span class="cov8" title="1">for id, container := range template.Spec.InitContainers </span><span class="cov0" title="0">{ 20599 if len(container.Resources.Requests) == 0 </span><span class="cov0" title="0">{ 20600 template.Spec.InitContainers[id].Resources.Requests = container.Resources.Limits.DeepCopy() 20601 }</span> 20602 } 20603 20604 <span class="cov8" title="1">pod := &v1.Pod{ 20605 Spec: template.Spec, 20606 } 20607 20608 if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed </span><span class="cov8" title="1">{ 20609 return fmt.Sprintf("spec.task[%d] isn't Guaranteed pod, kind=%v", index, v1qos.GetPodQOS(pod)) 20610 }</span> 20611 20612 <span class="cov8" title="1">for id, container := range append(template.Spec.Containers, template.Spec.InitContainers...) </span><span class="cov8" title="1">{ 20613 requestNum := guaranteedCPUs(container) 20614 if requestNum == 0 </span><span class="cov8" title="1">{ 20615 return fmt.Sprintf("the cpu request isn't an integer in spec.task[%d] container[%d].", 20616 index, id) 20617 }</span> 20618 } 20619 20620 <span class="cov8" title="1">return ""</span> 20621 } 20622 20623 func guaranteedCPUs(container v1.Container) int <span class="cov8" title="1">{ 20624 cpuQuantity := container.Resources.Requests[v1.ResourceCPU] 20625 if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() </span><span class="cov8" title="1">{ 20626 return 0 20627 }</span> 20628 20629 <span class="cov8" title="1">return int(cpuQuantity.Value())</span> 20630 } 20631 </pre> 20632 20633 <pre class="file" id="file96" style="display: none">/* 20634 Copyright 2018 The Volcano Authors. 20635 20636 Licensed under the Apache License, Version 2.0 (the "License"); 20637 you may not use this file except in compliance with the License. 20638 You may obtain a copy of the License at 20639 20640 http://www.apache.org/licenses/LICENSE-2.0 20641 20642 Unless required by applicable law or agreed to in writing, software 20643 distributed under the License is distributed on an "AS IS" BASIS, 20644 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20645 See the License for the specific language governing permissions and 20646 limitations under the License. 20647 */ 20648 20649 package validate 20650 20651 import ( 20652 "fmt" 20653 20654 "github.com/hashicorp/go-multierror" 20655 "k8s.io/apimachinery/pkg/util/validation/field" 20656 "k8s.io/kubernetes/pkg/apis/core/validation" 20657 20658 batchv1alpha1 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 20659 busv1alpha1 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 20660 ) 20661 20662 // policyEventMap defines all policy events and whether to allow external use. 20663 var policyEventMap = map[busv1alpha1.Event]bool{ 20664 busv1alpha1.AnyEvent: true, 20665 busv1alpha1.PodFailedEvent: true, 20666 busv1alpha1.PodEvictedEvent: true, 20667 busv1alpha1.JobUnknownEvent: true, 20668 busv1alpha1.TaskCompletedEvent: true, 20669 busv1alpha1.TaskFailedEvent: true, 20670 busv1alpha1.OutOfSyncEvent: false, 20671 busv1alpha1.CommandIssuedEvent: false, 20672 busv1alpha1.JobUpdatedEvent: true, 20673 } 20674 20675 // policyActionMap defines all policy actions and whether to allow external use. 20676 var policyActionMap = map[busv1alpha1.Action]bool{ 20677 busv1alpha1.AbortJobAction: true, 20678 busv1alpha1.RestartJobAction: true, 20679 busv1alpha1.RestartTaskAction: true, 20680 busv1alpha1.TerminateJobAction: true, 20681 busv1alpha1.CompleteJobAction: true, 20682 busv1alpha1.ResumeJobAction: true, 20683 busv1alpha1.SyncJobAction: false, 20684 busv1alpha1.EnqueueAction: false, 20685 busv1alpha1.SyncQueueAction: false, 20686 busv1alpha1.OpenQueueAction: false, 20687 busv1alpha1.CloseQueueAction: false, 20688 } 20689 20690 func validatePolicies(policies []batchv1alpha1.LifecyclePolicy, fldPath *field.Path) error <span class="cov8" title="1">{ 20691 var err error 20692 policyEvents := map[busv1alpha1.Event]struct{}{} 20693 exitCodes := map[int32]struct{}{} 20694 20695 for _, policy := range policies </span><span class="cov8" title="1">{ 20696 if (policy.Event != "" || len(policy.Events) != 0) && policy.ExitCode != nil </span><span class="cov8" title="1">{ 20697 err = multierror.Append(err, fmt.Errorf("must not specify event and exitCode simultaneously")) 20698 break</span> 20699 } 20700 20701 <span class="cov8" title="1">if policy.Event == "" && len(policy.Events) == 0 && policy.ExitCode == nil </span><span class="cov8" title="1">{ 20702 err = multierror.Append(err, fmt.Errorf("either event and exitCode should be specified")) 20703 break</span> 20704 } 20705 20706 <span class="cov8" title="1">if len(policy.Event) != 0 || len(policy.Events) != 0 </span><span class="cov8" title="1">{ 20707 bFlag := false 20708 policyEventsList := getEventList(policy) 20709 for _, event := range policyEventsList </span><span class="cov8" title="1">{ 20710 if allow, ok := policyEventMap[event]; !ok || !allow </span><span class="cov8" title="1">{ 20711 err = multierror.Append(err, field.Invalid(fldPath, event, "invalid policy event")) 20712 bFlag = true 20713 break</span> 20714 } 20715 20716 <span class="cov8" title="1">if allow, ok := policyActionMap[policy.Action]; !ok || !allow </span><span class="cov8" title="1">{ 20717 err = multierror.Append(err, field.Invalid(fldPath, policy.Action, "invalid policy action")) 20718 bFlag = true 20719 break</span> 20720 } 20721 <span class="cov8" title="1">if _, found := policyEvents[event]; found </span><span class="cov8" title="1">{ 20722 err = multierror.Append(err, fmt.Errorf("duplicate event %v across different policy", event)) 20723 bFlag = true 20724 break</span> 20725 } else<span class="cov8" title="1"> { 20726 policyEvents[event] = struct{}{} 20727 }</span> 20728 } 20729 <span class="cov8" title="1">if bFlag </span><span class="cov8" title="1">{ 20730 break</span> 20731 } 20732 } else<span class="cov8" title="1"> { 20733 if *policy.ExitCode == 0 </span><span class="cov8" title="1">{ 20734 err = multierror.Append(err, fmt.Errorf("0 is not a valid error code")) 20735 break</span> 20736 } 20737 <span class="cov8" title="1">if _, found := exitCodes[*policy.ExitCode]; found </span><span class="cov8" title="1">{ 20738 err = multierror.Append(err, fmt.Errorf("duplicate exitCode %v", *policy.ExitCode)) 20739 break</span> 20740 } else<span class="cov8" title="1"> { 20741 exitCodes[*policy.ExitCode] = struct{}{} 20742 }</span> 20743 } 20744 } 20745 20746 <span class="cov8" title="1">if _, found := policyEvents[busv1alpha1.AnyEvent]; found && len(policyEvents) > 1 </span><span class="cov8" title="1">{ 20747 err = multierror.Append(err, fmt.Errorf("if there's * here, no other policy should be here")) 20748 }</span> 20749 20750 <span class="cov8" title="1">return err</span> 20751 } 20752 20753 func getEventList(policy batchv1alpha1.LifecyclePolicy) []busv1alpha1.Event <span class="cov8" title="1">{ 20754 policyEventsList := policy.Events 20755 if len(policy.Event) > 0 </span><span class="cov8" title="1">{ 20756 policyEventsList = append(policyEventsList, policy.Event) 20757 }</span> 20758 <span class="cov8" title="1">uniquePolicyEventlist := removeDuplicates(policyEventsList) 20759 return uniquePolicyEventlist</span> 20760 } 20761 20762 func removeDuplicates(eventList []busv1alpha1.Event) []busv1alpha1.Event <span class="cov8" title="1">{ 20763 keys := make(map[busv1alpha1.Event]bool) 20764 list := []busv1alpha1.Event{} 20765 for _, val := range eventList </span><span class="cov8" title="1">{ 20766 if _, value := keys[val]; !value </span><span class="cov8" title="1">{ 20767 keys[val] = true 20768 list = append(list, val) 20769 }</span> 20770 } 20771 <span class="cov8" title="1">return list</span> 20772 } 20773 20774 func getValidEvents() []busv1alpha1.Event <span class="cov8" title="1">{ 20775 var events []busv1alpha1.Event 20776 for e, allow := range policyEventMap </span><span class="cov8" title="1">{ 20777 if allow </span><span class="cov8" title="1">{ 20778 events = append(events, e) 20779 }</span> 20780 } 20781 20782 <span class="cov8" title="1">return events</span> 20783 } 20784 20785 func getValidActions() []busv1alpha1.Action <span class="cov8" title="1">{ 20786 var actions []busv1alpha1.Action 20787 for a, allow := range policyActionMap </span><span class="cov8" title="1">{ 20788 if allow </span><span class="cov8" title="1">{ 20789 actions = append(actions, a) 20790 }</span> 20791 } 20792 20793 <span class="cov8" title="1">return actions</span> 20794 } 20795 20796 // validateIO validates IO configuration. 20797 func validateIO(volumes []batchv1alpha1.VolumeSpec) error <span class="cov8" title="1">{ 20798 volumeMap := map[string]bool{} 20799 for _, volume := range volumes </span><span class="cov8" title="1">{ 20800 if len(volume.MountPath) == 0 </span><span class="cov8" title="1">{ 20801 return fmt.Errorf(" mountPath is required;") 20802 }</span> 20803 <span class="cov8" title="1">if _, found := volumeMap[volume.MountPath]; found </span><span class="cov8" title="1">{ 20804 return fmt.Errorf(" duplicated mountPath: %s;", volume.MountPath) 20805 }</span> 20806 <span class="cov8" title="1">if volume.VolumeClaim == nil && volume.VolumeClaimName == "" </span><span class="cov8" title="1">{ 20807 return fmt.Errorf(" either VolumeClaim or VolumeClaimName must be specified;") 20808 }</span> 20809 <span class="cov8" title="1">if len(volume.VolumeClaimName) != 0 </span><span class="cov8" title="1">{ 20810 if volume.VolumeClaim != nil </span><span class="cov0" title="0">{ 20811 return fmt.Errorf("conflict: If you want to use an existing PVC, just specify VolumeClaimName." + 20812 "If you want to create a new PVC, you do not need to specify VolumeClaimName") 20813 }</span> 20814 <span class="cov8" title="1">if errMsgs := validation.ValidatePersistentVolumeName(volume.VolumeClaimName, false); len(errMsgs) > 0 </span><span class="cov0" title="0">{ 20815 return fmt.Errorf("invalid VolumeClaimName %s : %v", volume.VolumeClaimName, errMsgs) 20816 }</span> 20817 } 20818 20819 <span class="cov8" title="1">volumeMap[volume.MountPath] = true</span> 20820 } 20821 <span class="cov8" title="1">return nil</span> 20822 } 20823 20824 // topoSort uses topo sort to sort job tasks based on dependsOn field 20825 // it will return an array contains all sorted task names and a bool which indicates whether it's a valid dag 20826 func topoSort(job *batchv1alpha1.Job) ([]string, bool) <span class="cov8" title="1">{ 20827 graph, inDegree, taskList := makeGraph(job) 20828 var taskStack []string 20829 for task, degree := range inDegree </span><span class="cov8" title="1">{ 20830 if degree == 0 </span><span class="cov8" title="1">{ 20831 taskStack = append(taskStack, task) 20832 }</span> 20833 } 20834 20835 <span class="cov8" title="1">sortedTasks := make([]string, 0) 20836 for len(taskStack) > 0 </span><span class="cov8" title="1">{ 20837 length := len(taskStack) 20838 var out string 20839 out, taskStack = taskStack[length-1], taskStack[:length-1] 20840 sortedTasks = append(sortedTasks, out) 20841 for in, connected := range graph[out] </span><span class="cov8" title="1">{ 20842 if connected </span><span class="cov8" title="1">{ 20843 graph[out][in] = false 20844 inDegree[in]-- 20845 if inDegree[in] == 0 </span><span class="cov8" title="1">{ 20846 taskStack = append(taskStack, in) 20847 }</span> 20848 } 20849 } 20850 } 20851 20852 <span class="cov8" title="1">isDag := len(sortedTasks) == len(taskList) 20853 if !isDag </span><span class="cov8" title="1">{ 20854 return nil, false 20855 }</span> 20856 20857 <span class="cov8" title="1">return sortedTasks, isDag</span> 20858 } 20859 20860 func makeGraph(job *batchv1alpha1.Job) (map[string]map[string]bool, map[string]int, []string) <span class="cov8" title="1">{ 20861 graph := make(map[string]map[string]bool) 20862 inDegree := make(map[string]int) 20863 taskList := make([]string, 0) 20864 20865 for _, task := range job.Spec.Tasks </span><span class="cov8" title="1">{ 20866 taskList = append(taskList, task.Name) 20867 inDegree[task.Name] = 0 20868 if task.DependsOn != nil </span><span class="cov8" title="1">{ 20869 for _, dependOnTask := range task.DependsOn.Name </span><span class="cov8" title="1">{ 20870 if graph[dependOnTask] == nil </span><span class="cov8" title="1">{ 20871 graph[dependOnTask] = make(map[string]bool) 20872 }</span> 20873 20874 <span class="cov8" title="1">graph[dependOnTask][task.Name] = true 20875 inDegree[task.Name]++</span> 20876 } 20877 } 20878 } 20879 20880 <span class="cov8" title="1">return graph, inDegree, taskList</span> 20881 } 20882 </pre> 20883 20884 <pre class="file" id="file97" style="display: none">/* 20885 Copyright 2021 The Volcano Authors. 20886 20887 Licensed under the Apache License, Version 2.0 (the "License"); 20888 you may not use this file except in compliance with the License. 20889 You may obtain a copy of the License at 20890 20891 http://www.apache.org/licenses/LICENSE-2.0 20892 20893 Unless required by applicable law or agreed to in writing, software 20894 distributed under the License is distributed on an "AS IS" BASIS, 20895 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20896 See the License for the specific language governing permissions and 20897 limitations under the License. 20898 */ 20899 20900 package mutate 20901 20902 import ( 20903 "github.com/imdario/mergo" 20904 "gopkg.in/yaml.v2" 20905 v1 "k8s.io/api/core/v1" 20906 "k8s.io/klog" 20907 20908 wkconfig "volcano.sh/volcano/pkg/webhooks/config" 20909 ) 20910 20911 type annotationResGroup struct{} 20912 20913 const ( 20914 // defaultAnnotationKey: default annotation key 20915 defaultAnnotationKey = "volcano.sh/resource-group" 20916 ) 20917 20918 // NewAnnotationResGroup create a new structure 20919 func NewAnnotationResGroup() ResGroup <span class="cov8" title="1">{ 20920 return &annotationResGroup{} 20921 }</span> 20922 20923 // getAnnotation get annotations from the resource group 20924 func getAnnotation(resGroupConfig wkconfig.ResGroupConfig) map[string]string <span class="cov8" title="1">{ 20925 annotations := make(map[string]string) 20926 for _, val := range resGroupConfig.Object.Value </span><span class="cov8" title="1">{ 20927 tmp := make(map[string]string) 20928 err := yaml.Unmarshal([]byte(val), &tmp) 20929 if err != nil </span><span class="cov0" title="0">{ 20930 continue</span> 20931 } 20932 20933 <span class="cov8" title="1">if err := mergo.Merge(&annotations, &tmp); err != nil </span><span class="cov0" title="0">{ 20934 klog.Errorf("annotations merge failed, err=%v", err) 20935 continue</span> 20936 } 20937 } 20938 20939 <span class="cov8" title="1">return annotations</span> 20940 } 20941 20942 // IsBelongResGroup adjust whether pod is belong to the resource group 20943 func (resGroup *annotationResGroup) IsBelongResGroup(pod *v1.Pod, resGroupConfig wkconfig.ResGroupConfig) bool <span class="cov8" title="1">{ 20944 if resGroupConfig.Object.Key != "" && resGroupConfig.Object.Key != "annotation" </span><span class="cov0" title="0">{ 20945 return false 20946 }</span> 20947 20948 <span class="cov8" title="1">annotations := getAnnotation(resGroupConfig) 20949 klog.V(3).Infof("annotations : %v", annotations) 20950 for key, annotation := range annotations </span><span class="cov8" title="1">{ 20951 if pod.Annotations[key] == annotation </span><span class="cov8" title="1">{ 20952 return true 20953 }</span> 20954 } 20955 20956 <span class="cov8" title="1">if resGroupConfig.Object.Key == "" && pod.Annotations[defaultAnnotationKey] == resGroupConfig.ResourceGroup </span><span class="cov8" title="1">{ 20957 return true 20958 }</span> 20959 20960 <span class="cov8" title="1">return false</span> 20961 } 20962 </pre> 20963 20964 <pre class="file" id="file98" style="display: none">/* 20965 Copyright 2021 The Volcano Authors. 20966 20967 Licensed under the Apache License, Version 2.0 (the "License"); 20968 you may not use this file except in compliance with the License. 20969 You may obtain a copy of the License at 20970 20971 http://www.apache.org/licenses/LICENSE-2.0 20972 20973 Unless required by applicable law or agreed to in writing, software 20974 distributed under the License is distributed on an "AS IS" BASIS, 20975 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20976 See the License for the specific language governing permissions and 20977 limitations under the License. 20978 */ 20979 20980 package mutate 20981 20982 import ( 20983 v1 "k8s.io/api/core/v1" 20984 20985 wkconfig "volcano.sh/volcano/pkg/webhooks/config" 20986 ) 20987 20988 // ResGroup interface for resource group 20989 type ResGroup interface { 20990 IsBelongResGroup(pod *v1.Pod, resGroupConfig wkconfig.ResGroupConfig) bool 20991 } 20992 20993 // GetResGroup return the interface besed on resourceGroup.Object.Key 20994 func GetResGroup(resourceGroup wkconfig.ResGroupConfig) ResGroup <span class="cov8" title="1">{ 20995 switch resourceGroup.Object.Key </span>{ 20996 case "namespace":<span class="cov8" title="1"> 20997 return NewNamespaceResGroup()</span> 20998 case "annotation":<span class="cov8" title="1"> 20999 return NewAnnotationResGroup()</span> 21000 } 21001 <span class="cov8" title="1">return NewAnnotationResGroup()</span> 21002 } 21003 </pre> 21004 21005 <pre class="file" id="file99" style="display: none">/* 21006 Copyright 2021 The Volcano Authors. 21007 21008 Licensed under the Apache License, Version 2.0 (the "License"); 21009 you may not use this file except in compliance with the License. 21010 You may obtain a copy of the License at 21011 21012 http://www.apache.org/licenses/LICENSE-2.0 21013 21014 Unless required by applicable law or agreed to in writing, software 21015 distributed under the License is distributed on an "AS IS" BASIS, 21016 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 21017 See the License for the specific language governing permissions and 21018 limitations under the License. 21019 */ 21020 21021 package mutate 21022 21023 import ( 21024 "encoding/json" 21025 "fmt" 21026 21027 "k8s.io/api/admission/v1beta1" 21028 whv1beta1 "k8s.io/api/admissionregistration/v1beta1" 21029 v1 "k8s.io/api/core/v1" 21030 "k8s.io/klog" 21031 21032 wkconfig "volcano.sh/volcano/pkg/webhooks/config" 21033 "volcano.sh/volcano/pkg/webhooks/router" 21034 "volcano.sh/volcano/pkg/webhooks/schema" 21035 "volcano.sh/volcano/pkg/webhooks/util" 21036 ) 21037 21038 // patchOperation define the patch operation structure 21039 type patchOperation struct { 21040 Op string `json:"op"` 21041 Path string `json:"path"` 21042 Value interface{} `json:"value,omitempty"` 21043 } 21044 21045 // init register mutate pod 21046 func init() <span class="cov8" title="1">{ 21047 router.RegisterAdmission(service) 21048 }</span> 21049 21050 var service = &router.AdmissionService{ 21051 Path: "/pods/mutate", 21052 Func: Pods, 21053 Config: config, 21054 MutatingConfig: &whv1beta1.MutatingWebhookConfiguration{ 21055 Webhooks: []whv1beta1.MutatingWebhook{{ 21056 Name: "mutatepod.volcano.sh", 21057 Rules: []whv1beta1.RuleWithOperations{ 21058 { 21059 Operations: []whv1beta1.OperationType{whv1beta1.Create}, 21060 Rule: whv1beta1.Rule{ 21061 APIGroups: []string{""}, 21062 APIVersions: []string{"v1"}, 21063 Resources: []string{"pods"}, 21064 }, 21065 }, 21066 }, 21067 }}, 21068 }, 21069 } 21070 21071 var config = &router.AdmissionServiceConfig{} 21072 21073 // Pods mutate pods. 21074 func Pods(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse <span class="cov0" title="0">{ 21075 klog.V(3).Infof("mutating pods -- %s", ar.Request.Operation) 21076 pod, err := schema.DecodePod(ar.Request.Object, ar.Request.Resource) 21077 if err != nil </span><span class="cov0" title="0">{ 21078 return util.ToAdmissionResponse(err) 21079 }</span> 21080 21081 <span class="cov0" title="0">if pod.Namespace == "" </span><span class="cov0" title="0">{ 21082 pod.Namespace = ar.Request.Namespace 21083 }</span> 21084 21085 <span class="cov0" title="0">var patchBytes []byte 21086 switch ar.Request.Operation </span>{ 21087 case v1beta1.Create:<span class="cov0" title="0"> 21088 patchBytes, _ = createPatch(pod)</span> 21089 default:<span class="cov0" title="0"> 21090 err = fmt.Errorf("expect operation to be 'CREATE' ") 21091 return util.ToAdmissionResponse(err)</span> 21092 } 21093 21094 <span class="cov0" title="0">reviewResponse := v1beta1.AdmissionResponse{ 21095 Allowed: true, 21096 Patch: patchBytes, 21097 } 21098 pt := v1beta1.PatchTypeJSONPatch 21099 reviewResponse.PatchType = &pt 21100 21101 return &reviewResponse</span> 21102 } 21103 21104 // createPatch patch pod 21105 func createPatch(pod *v1.Pod) ([]byte, error) <span class="cov8" title="1">{ 21106 if config.ConfigData == nil </span><span class="cov0" title="0">{ 21107 klog.V(5).Infof("admission configuration is empty.") 21108 return nil, nil 21109 }</span> 21110 21111 <span class="cov8" title="1">var patch []patchOperation 21112 config.ConfigData.Lock() 21113 defer config.ConfigData.Unlock() 21114 21115 for _, resourceGroup := range config.ConfigData.ResGroupsConfig </span><span class="cov8" title="1">{ 21116 klog.V(3).Infof("resourceGroup %s", resourceGroup.ResourceGroup) 21117 group := GetResGroup(resourceGroup) 21118 if !group.IsBelongResGroup(pod, resourceGroup) </span><span class="cov8" title="1">{ 21119 continue</span> 21120 } 21121 21122 <span class="cov8" title="1">patchLabel := patchLabels(pod, resourceGroup) 21123 if patchLabel != nil </span><span class="cov8" title="1">{ 21124 patch = append(patch, *patchLabel) 21125 }</span> 21126 21127 <span class="cov8" title="1">patchToleration := patchTaintToleration(pod, resourceGroup) 21128 if patchToleration != nil </span><span class="cov8" title="1">{ 21129 patch = append(patch, *patchToleration) 21130 }</span> 21131 <span class="cov8" title="1">patchScheduler := patchSchedulerName(resourceGroup) 21132 if patchScheduler != nil </span><span class="cov8" title="1">{ 21133 patch = append(patch, *patchScheduler) 21134 }</span> 21135 21136 <span class="cov8" title="1">klog.V(5).Infof("pod patch %v", patch) 21137 return json.Marshal(patch)</span> 21138 } 21139 21140 <span class="cov8" title="1">return json.Marshal(patch)</span> 21141 } 21142 21143 // patchLabels patch label 21144 func patchLabels(pod *v1.Pod, resGroupConfig wkconfig.ResGroupConfig) *patchOperation <span class="cov8" title="1">{ 21145 if len(resGroupConfig.Labels) == 0 </span><span class="cov0" title="0">{ 21146 return nil 21147 }</span> 21148 21149 <span class="cov8" title="1">nodeSelector := make(map[string]string) 21150 for key, label := range pod.Spec.NodeSelector </span><span class="cov0" title="0">{ 21151 nodeSelector[key] = label 21152 }</span> 21153 21154 <span class="cov8" title="1">for key, label := range resGroupConfig.Labels </span><span class="cov8" title="1">{ 21155 nodeSelector[key] = label 21156 }</span> 21157 21158 <span class="cov8" title="1">return &patchOperation{Op: "add", Path: "/spec/nodeSelector", Value: nodeSelector}</span> 21159 } 21160 21161 // patchTaintToleration patch taint toleration 21162 func patchTaintToleration(pod *v1.Pod, resGroupConfig wkconfig.ResGroupConfig) *patchOperation <span class="cov8" title="1">{ 21163 if len(resGroupConfig.Tolerations) == 0 </span><span class="cov8" title="1">{ 21164 return nil 21165 }</span> 21166 21167 <span class="cov8" title="1">var dst []v1.Toleration 21168 dst = append(dst, pod.Spec.Tolerations...) 21169 dst = append(dst, resGroupConfig.Tolerations...) 21170 21171 return &patchOperation{Op: "add", Path: "/spec/tolerations", Value: dst}</span> 21172 } 21173 21174 // patchSchedulerName patch scheduler 21175 func patchSchedulerName(resGroupConfig wkconfig.ResGroupConfig) *patchOperation <span class="cov8" title="1">{ 21176 if resGroupConfig.SchedulerName == "" </span><span class="cov0" title="0">{ 21177 return nil 21178 }</span> 21179 21180 <span class="cov8" title="1">return &patchOperation{Op: "add", Path: "/spec/schedulerName", Value: resGroupConfig.SchedulerName}</span> 21181 } 21182 </pre> 21183 21184 <pre class="file" id="file100" style="display: none">/* 21185 Copyright 2021 The Volcano Authors. 21186 21187 Licensed under the Apache License, Version 2.0 (the "License"); 21188 you may not use this file except in compliance with the License. 21189 You may obtain a copy of the License at 21190 21191 http://www.apache.org/licenses/LICENSE-2.0 21192 21193 Unless required by applicable law or agreed to in writing, software 21194 distributed under the License is distributed on an "AS IS" BASIS, 21195 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 21196 See the License for the specific language governing permissions and 21197 limitations under the License. 21198 */ 21199 21200 package mutate 21201 21202 import ( 21203 v1 "k8s.io/api/core/v1" 21204 21205 wkconfig "volcano.sh/volcano/pkg/webhooks/config" 21206 ) 21207 21208 type namespaceResGroup struct{} 21209 21210 // NewNamespaceResGroup create a new structure 21211 func NewNamespaceResGroup() ResGroup <span class="cov8" title="1">{ 21212 return &namespaceResGroup{} 21213 }</span> 21214 21215 // IsBelongResGroup adjust whether pod is belong to the resource group 21216 func (resGroup *namespaceResGroup) IsBelongResGroup(pod *v1.Pod, resGroupConfig wkconfig.ResGroupConfig) bool <span class="cov8" title="1">{ 21217 if resGroupConfig.Object.Key != "namespace" </span><span class="cov0" title="0">{ 21218 return false 21219 }</span> 21220 21221 <span class="cov8" title="1">for _, val := range resGroupConfig.Object.Value </span><span class="cov8" title="1">{ 21222 if pod.Namespace == val </span><span class="cov8" title="1">{ 21223 return true 21224 }</span> 21225 } 21226 21227 <span class="cov8" title="1">return false</span> 21228 } 21229 </pre> 21230 21231 <pre class="file" id="file101" style="display: none">/* 21232 Copyright 2019 The Volcano Authors. 21233 21234 Licensed under the Apache License, Version 2.0 (the "License"); 21235 you may not use this file except in compliance with the License. 21236 You may obtain a copy of the License at 21237 21238 http://www.apache.org/licenses/LICENSE-2.0 21239 21240 Unless required by applicable law or agreed to in writing, software 21241 distributed under the License is distributed on an "AS IS" BASIS, 21242 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 21243 See the License for the specific language governing permissions and 21244 limitations under the License. 21245 */ 21246 21247 package validate 21248 21249 import ( 21250 "context" 21251 "fmt" 21252 "strconv" 21253 "strings" 21254 21255 "k8s.io/api/admission/v1beta1" 21256 whv1beta1 "k8s.io/api/admissionregistration/v1beta1" 21257 v1 "k8s.io/api/core/v1" 21258 apierrors "k8s.io/apimachinery/pkg/api/errors" 21259 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 21260 "k8s.io/apimachinery/pkg/util/intstr" 21261 "k8s.io/klog" 21262 21263 "volcano.sh/apis/pkg/apis/helpers" 21264 vcv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 21265 "volcano.sh/volcano/pkg/webhooks/router" 21266 "volcano.sh/volcano/pkg/webhooks/schema" 21267 "volcano.sh/volcano/pkg/webhooks/util" 21268 ) 21269 21270 func init() <span class="cov8" title="1">{ 21271 router.RegisterAdmission(service) 21272 }</span> 21273 21274 var service = &router.AdmissionService{ 21275 Path: "/pods/validate", 21276 Func: AdmitPods, 21277 21278 Config: config, 21279 21280 ValidatingConfig: &whv1beta1.ValidatingWebhookConfiguration{ 21281 Webhooks: []whv1beta1.ValidatingWebhook{{ 21282 Name: "validatepod.volcano.sh", 21283 Rules: []whv1beta1.RuleWithOperations{ 21284 { 21285 Operations: []whv1beta1.OperationType{whv1beta1.Create}, 21286 Rule: whv1beta1.Rule{ 21287 APIGroups: []string{""}, 21288 APIVersions: []string{"v1"}, 21289 Resources: []string{"pods"}, 21290 }, 21291 }, 21292 }, 21293 }}, 21294 }, 21295 } 21296 21297 var config = &router.AdmissionServiceConfig{} 21298 21299 // AdmitPods is to admit pods and return response. 21300 func AdmitPods(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse <span class="cov0" title="0">{ 21301 klog.V(3).Infof("admitting pods -- %s", ar.Request.Operation) 21302 21303 pod, err := schema.DecodePod(ar.Request.Object, ar.Request.Resource) 21304 if err != nil </span><span class="cov0" title="0">{ 21305 return util.ToAdmissionResponse(err) 21306 }</span> 21307 21308 <span class="cov0" title="0">var msg string 21309 reviewResponse := v1beta1.AdmissionResponse{} 21310 reviewResponse.Allowed = true 21311 21312 switch ar.Request.Operation </span>{ 21313 case v1beta1.Create:<span class="cov0" title="0"> 21314 msg = validatePod(pod, &reviewResponse)</span> 21315 default:<span class="cov0" title="0"> 21316 err := fmt.Errorf("expect operation to be 'CREATE'") 21317 return util.ToAdmissionResponse(err)</span> 21318 } 21319 21320 <span class="cov0" title="0">if !reviewResponse.Allowed </span><span class="cov0" title="0">{ 21321 reviewResponse.Result = &metav1.Status{Message: strings.TrimSpace(msg)} 21322 }</span> 21323 <span class="cov0" title="0">return &reviewResponse</span> 21324 } 21325 21326 /* 21327 allow pods to create when 21328 1. schedulerName of pod isn't volcano 21329 2. pod has Podgroup whose phase isn't Pending 21330 3. normal pods whose schedulerName is volcano don't have podgroup. 21331 4. check pod budget annotations configure 21332 */ 21333 func validatePod(pod *v1.Pod, reviewResponse *v1beta1.AdmissionResponse) string <span class="cov8" title="1">{ 21334 if pod.Spec.SchedulerName != config.SchedulerName </span><span class="cov8" title="1">{ 21335 return "" 21336 }</span> 21337 21338 <span class="cov8" title="1">pgName := "" 21339 msg := "" 21340 21341 // vc-job, SN == volcano 21342 if pod.Annotations != nil </span><span class="cov8" title="1">{ 21343 pgName = pod.Annotations[vcv1beta1.KubeGroupNameAnnotationKey] 21344 }</span> 21345 <span class="cov8" title="1">if pgName != "" </span><span class="cov8" title="1">{ 21346 if err := checkPGPhase(pod, pgName, true); err != nil </span><span class="cov8" title="1">{ 21347 msg = err.Error() 21348 reviewResponse.Allowed = false 21349 }</span> 21350 <span class="cov8" title="1">return msg</span> 21351 } 21352 21353 // normal pod, SN == volcano 21354 <span class="cov8" title="1">pgName = helpers.GeneratePodgroupName(pod) 21355 if err := checkPGPhase(pod, pgName, false); err != nil </span><span class="cov8" title="1">{ 21356 msg = err.Error() 21357 reviewResponse.Allowed = false 21358 }</span> 21359 21360 // check pod annotatations 21361 <span class="cov8" title="1">if err := validateAnnotation(pod); err != nil </span><span class="cov0" title="0">{ 21362 msg = err.Error() 21363 reviewResponse.Allowed = false 21364 }</span> 21365 21366 <span class="cov8" title="1">return msg</span> 21367 } 21368 21369 func checkPGPhase(pod *v1.Pod, pgName string, isVCJob bool) error <span class="cov8" title="1">{ 21370 pg, err := config.VolcanoClient.SchedulingV1beta1().PodGroups(pod.Namespace).Get(context.TODO(), pgName, metav1.GetOptions{}) 21371 if err != nil </span><span class="cov8" title="1">{ 21372 if isVCJob || (!isVCJob && !apierrors.IsNotFound(err)) </span><span class="cov8" title="1">{ 21373 return fmt.Errorf("failed to get PodGroup for pod <%s/%s>: %v", pod.Namespace, pod.Name, err) 21374 }</span> 21375 <span class="cov0" title="0">return nil</span> 21376 } 21377 <span class="cov8" title="1">if pg.Status.Phase != vcv1beta1.PodGroupPending </span><span class="cov0" title="0">{ 21378 return nil 21379 }</span> 21380 <span class="cov8" title="1">return fmt.Errorf("failed to create pod <%s/%s> as the podgroup phase is Pending", 21381 pod.Namespace, pod.Name)</span> 21382 } 21383 21384 func validateAnnotation(pod *v1.Pod) error <span class="cov8" title="1">{ 21385 num := 0 21386 if len(pod.Annotations) > 0 </span><span class="cov0" title="0">{ 21387 keys := []string{ 21388 vcv1beta1.JDBMinAvailable, 21389 vcv1beta1.JDBMaxUnavailable, 21390 } 21391 for _, key := range keys </span><span class="cov0" title="0">{ 21392 if value, found := pod.Annotations[key]; found </span><span class="cov0" title="0">{ 21393 num++ 21394 if err := validateIntPercentageStr(key, value); err != nil </span><span class="cov0" title="0">{ 21395 recordEvent(err) 21396 return err 21397 }</span> 21398 } 21399 } 21400 <span class="cov0" title="0">if num > 1 </span><span class="cov0" title="0">{ 21401 return fmt.Errorf("not allow configure multiple annotations <%v> at same time", keys) 21402 }</span> 21403 } 21404 <span class="cov8" title="1">return nil</span> 21405 } 21406 21407 func recordEvent(err error) <span class="cov0" title="0">{ 21408 config.Recorder.Eventf(nil, v1.EventTypeWarning, "Admit", "Create pod failed due to %v", err) 21409 }</span> 21410 21411 func validateIntPercentageStr(key, value string) error <span class="cov0" title="0">{ 21412 tmp := intstr.Parse(value) 21413 switch tmp.Type </span>{ 21414 case intstr.Int:<span class="cov0" title="0"> 21415 if tmp.IntValue() <= 0 </span><span class="cov0" title="0">{ 21416 return fmt.Errorf("invalid value <%q> for %v, it must be a positive integer", value, key) 21417 }</span> 21418 <span class="cov0" title="0">return nil</span> 21419 case intstr.String:<span class="cov0" title="0"> 21420 s := strings.Replace(tmp.StrVal, "%", "", -1) 21421 v, err := strconv.Atoi(s) 21422 if err != nil </span><span class="cov0" title="0">{ 21423 return fmt.Errorf("invalid value %v for %v", err, key) 21424 }</span> 21425 <span class="cov0" title="0">if v <= 0 || v >= 100 </span><span class="cov0" title="0">{ 21426 return fmt.Errorf("invalid value <%q> for %v, it must be a valid percentage which between 1%% ~ 99%%", tmp.StrVal, key) 21427 }</span> 21428 <span class="cov0" title="0">return nil</span> 21429 } 21430 <span class="cov0" title="0">return fmt.Errorf("invalid type: neither int nor percentage for %v", key)</span> 21431 } 21432 </pre> 21433 21434 <pre class="file" id="file102" style="display: none">/* 21435 Copyright 2018 The Volcano Authors. 21436 21437 Licensed under the Apache License, Version 2.0 (the "License"); 21438 you may not use this file except in compliance with the License. 21439 You may obtain a copy of the License at 21440 21441 http://www.apache.org/licenses/LICENSE-2.0 21442 21443 Unless required by applicable law or agreed to in writing, software 21444 distributed under the License is distributed on an "AS IS" BASIS, 21445 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 21446 See the License for the specific language governing permissions and 21447 limitations under the License. 21448 */ 21449 21450 package mutate 21451 21452 import ( 21453 "encoding/json" 21454 "fmt" 21455 "strings" 21456 21457 "k8s.io/api/admission/v1beta1" 21458 whv1beta1 "k8s.io/api/admissionregistration/v1beta1" 21459 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 21460 "k8s.io/klog" 21461 21462 schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 21463 "volcano.sh/volcano/pkg/webhooks/router" 21464 "volcano.sh/volcano/pkg/webhooks/schema" 21465 "volcano.sh/volcano/pkg/webhooks/util" 21466 ) 21467 21468 func init() <span class="cov8" title="1">{ 21469 router.RegisterAdmission(service) 21470 }</span> 21471 21472 var service = &router.AdmissionService{ 21473 Path: "/queues/mutate", 21474 Func: Queues, 21475 21476 MutatingConfig: &whv1beta1.MutatingWebhookConfiguration{ 21477 Webhooks: []whv1beta1.MutatingWebhook{{ 21478 Name: "mutatequeue.volcano.sh", 21479 Rules: []whv1beta1.RuleWithOperations{ 21480 { 21481 Operations: []whv1beta1.OperationType{whv1beta1.Create}, 21482 Rule: whv1beta1.Rule{ 21483 APIGroups: []string{schedulingv1beta1.SchemeGroupVersion.Group}, 21484 APIVersions: []string{schedulingv1beta1.SchemeGroupVersion.Version}, 21485 Resources: []string{"queues"}, 21486 }, 21487 }, 21488 }, 21489 }}, 21490 }, 21491 } 21492 21493 type patchOperation struct { 21494 Op string `json:"op"` 21495 Path string `json:"path"` 21496 Value interface{} `json:"value,omitempty"` 21497 } 21498 21499 // Queues mutate queues. 21500 func Queues(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse <span class="cov8" title="1">{ 21501 klog.V(3).Infof("Mutating %s queue %s.", ar.Request.Operation, ar.Request.Name) 21502 21503 queue, err := schema.DecodeQueue(ar.Request.Object, ar.Request.Resource) 21504 if err != nil </span><span class="cov0" title="0">{ 21505 return util.ToAdmissionResponse(err) 21506 }</span> 21507 21508 <span class="cov8" title="1">var patchBytes []byte 21509 switch ar.Request.Operation </span>{ 21510 case v1beta1.Create:<span class="cov8" title="1"> 21511 patchBytes, err = createQueuePatch(queue)</span> 21512 default:<span class="cov8" title="1"> 21513 return util.ToAdmissionResponse(fmt.Errorf("invalid operation `%s`, "+ 21514 "expect operation to be `CREATE`", ar.Request.Operation))</span> 21515 } 21516 21517 <span class="cov8" title="1">if err != nil </span><span class="cov0" title="0">{ 21518 return &v1beta1.AdmissionResponse{ 21519 Allowed: false, 21520 Result: &metav1.Status{Message: err.Error()}, 21521 } 21522 }</span> 21523 21524 <span class="cov8" title="1">pt := v1beta1.PatchTypeJSONPatch 21525 return &v1beta1.AdmissionResponse{ 21526 Allowed: true, 21527 Patch: patchBytes, 21528 PatchType: &pt, 21529 }</span> 21530 } 21531 21532 func createQueuePatch(queue *schedulingv1beta1.Queue) ([]byte, error) <span class="cov8" title="1">{ 21533 var patch []patchOperation 21534 21535 // add root node if the root node not specified 21536 hierarchy := queue.Annotations[schedulingv1beta1.KubeHierarchyAnnotationKey] 21537 hierarchicalWeights := queue.Annotations[schedulingv1beta1.KubeHierarchyWeightAnnotationKey] 21538 21539 if hierarchy != "" && hierarchicalWeights != "" && !strings.HasPrefix(hierarchy, "root") </span><span class="cov8" title="1">{ 21540 // based on https://tools.ietf.org/html/rfc6901#section-3 21541 // escape "/" with "~1" 21542 patch = append(patch, patchOperation{ 21543 Op: "add", 21544 Path: fmt.Sprintf("/metadata/annotations/%s", strings.ReplaceAll(schedulingv1beta1.KubeHierarchyAnnotationKey, "/", "~1")), 21545 Value: fmt.Sprintf("root/%s", hierarchy), 21546 }) 21547 patch = append(patch, patchOperation{ 21548 Op: "add", 21549 Path: fmt.Sprintf("/metadata/annotations/%s", strings.ReplaceAll(schedulingv1beta1.KubeHierarchyWeightAnnotationKey, "/", "~1")), 21550 Value: fmt.Sprintf("1/%s", hierarchicalWeights), 21551 }) 21552 }</span> 21553 21554 <span class="cov8" title="1">trueValue := true 21555 if queue.Spec.Reclaimable == nil </span><span class="cov8" title="1">{ 21556 patch = append(patch, patchOperation{ 21557 Op: "add", 21558 Path: "/spec/reclaimable", 21559 Value: &trueValue, 21560 }) 21561 }</span> 21562 21563 <span class="cov8" title="1">defaultWeight := 1 21564 if queue.Spec.Weight == 0 </span><span class="cov0" title="0">{ 21565 patch = append(patch, patchOperation{ 21566 Op: "add", 21567 Path: "/spec/weight", 21568 Value: &defaultWeight, 21569 }) 21570 }</span> 21571 21572 <span class="cov8" title="1">return json.Marshal(patch)</span> 21573 } 21574 </pre> 21575 21576 <pre class="file" id="file103" style="display: none">/* 21577 Copyright 2018 The Volcano Authors. 21578 21579 Licensed under the Apache License, Version 2.0 (the "License"); 21580 you may not use this file except in compliance with the License. 21581 You may obtain a copy of the License at 21582 21583 http://www.apache.org/licenses/LICENSE-2.0 21584 21585 Unless required by applicable law or agreed to in writing, software 21586 distributed under the License is distributed on an "AS IS" BASIS, 21587 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 21588 See the License for the specific language governing permissions and 21589 limitations under the License. 21590 */ 21591 21592 package validate 21593 21594 import ( 21595 "context" 21596 "fmt" 21597 "strconv" 21598 "strings" 21599 21600 "k8s.io/api/admission/v1beta1" 21601 whv1beta1 "k8s.io/api/admissionregistration/v1beta1" 21602 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 21603 "k8s.io/apimachinery/pkg/util/validation/field" 21604 "k8s.io/klog" 21605 21606 schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 21607 "volcano.sh/volcano/pkg/webhooks/router" 21608 "volcano.sh/volcano/pkg/webhooks/schema" 21609 "volcano.sh/volcano/pkg/webhooks/util" 21610 ) 21611 21612 func init() <span class="cov8" title="1">{ 21613 router.RegisterAdmission(service) 21614 }</span> 21615 21616 var service = &router.AdmissionService{ 21617 Path: "/queues/validate", 21618 Func: AdmitQueues, 21619 21620 Config: config, 21621 21622 ValidatingConfig: &whv1beta1.ValidatingWebhookConfiguration{ 21623 Webhooks: []whv1beta1.ValidatingWebhook{{ 21624 Name: "validatequeue.volcano.sh", 21625 Rules: []whv1beta1.RuleWithOperations{ 21626 { 21627 Operations: []whv1beta1.OperationType{whv1beta1.Create, whv1beta1.Update, whv1beta1.Delete}, 21628 Rule: whv1beta1.Rule{ 21629 APIGroups: []string{schedulingv1beta1.SchemeGroupVersion.Group}, 21630 APIVersions: []string{schedulingv1beta1.SchemeGroupVersion.Version}, 21631 Resources: []string{"queues"}, 21632 }, 21633 }, 21634 }, 21635 }}, 21636 }, 21637 } 21638 21639 var config = &router.AdmissionServiceConfig{} 21640 21641 // AdmitQueues is to admit queues and return response. 21642 func AdmitQueues(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse <span class="cov8" title="1">{ 21643 klog.V(3).Infof("Admitting %s queue %s.", ar.Request.Operation, ar.Request.Name) 21644 21645 queue, err := schema.DecodeQueue(ar.Request.Object, ar.Request.Resource) 21646 if err != nil </span><span class="cov0" title="0">{ 21647 return util.ToAdmissionResponse(err) 21648 }</span> 21649 21650 <span class="cov8" title="1">switch ar.Request.Operation </span>{ 21651 case v1beta1.Create, v1beta1.Update:<span class="cov8" title="1"> 21652 err = validateQueue(queue)</span> 21653 case v1beta1.Delete:<span class="cov8" title="1"> 21654 err = validateQueueDeleting(ar.Request.Name)</span> 21655 default:<span class="cov8" title="1"> 21656 return util.ToAdmissionResponse(fmt.Errorf("invalid operation `%s`, "+ 21657 "expect operation to be `CREATE`, `UPDATE` or `DELETE`", ar.Request.Operation))</span> 21658 } 21659 21660 <span class="cov8" title="1">if err != nil </span><span class="cov8" title="1">{ 21661 return &v1beta1.AdmissionResponse{ 21662 Allowed: false, 21663 Result: &metav1.Status{Message: err.Error()}, 21664 } 21665 }</span> 21666 21667 <span class="cov8" title="1">return &v1beta1.AdmissionResponse{ 21668 Allowed: true, 21669 }</span> 21670 } 21671 21672 func validateQueue(queue *schedulingv1beta1.Queue) error <span class="cov8" title="1">{ 21673 errs := field.ErrorList{} 21674 resourcePath := field.NewPath("requestBody") 21675 21676 errs = append(errs, validateStateOfQueue(queue.Status.State, resourcePath.Child("spec").Child("state"))...) 21677 errs = append(errs, validateWeightOfQueue(queue.Spec.Weight, resourcePath.Child("spec").Child("weight"))...) 21678 errs = append(errs, validateHierarchicalAttributes(queue, resourcePath.Child("metadata").Child("annotations"))...) 21679 21680 if len(errs) > 0 </span><span class="cov8" title="1">{ 21681 return errs.ToAggregate() 21682 }</span> 21683 21684 <span class="cov8" title="1">return nil</span> 21685 } 21686 func validateHierarchicalAttributes(queue *schedulingv1beta1.Queue, fldPath *field.Path) field.ErrorList <span class="cov8" title="1">{ 21687 errs := field.ErrorList{} 21688 hierarchy := queue.Annotations[schedulingv1beta1.KubeHierarchyAnnotationKey] 21689 hierarchicalWeights := queue.Annotations[schedulingv1beta1.KubeHierarchyWeightAnnotationKey] 21690 if hierarchy != "" || hierarchicalWeights != "" </span><span class="cov8" title="1">{ 21691 paths := strings.Split(hierarchy, "/") 21692 weights := strings.Split(hierarchicalWeights, "/") 21693 // path length must be the same with weights length 21694 if len(paths) != len(weights) </span><span class="cov8" title="1">{ 21695 return append(errs, field.Invalid(fldPath, hierarchy, 21696 fmt.Sprintf("%s must have the same length with %s", 21697 schedulingv1beta1.KubeHierarchyAnnotationKey, 21698 schedulingv1beta1.KubeHierarchyWeightAnnotationKey, 21699 ))) 21700 }</span> 21701 21702 // check weights format 21703 <span class="cov8" title="1">for _, weight := range weights </span><span class="cov8" title="1">{ 21704 weightFloat, err := strconv.ParseFloat(weight, 64) 21705 if err != nil </span><span class="cov8" title="1">{ 21706 return append(errs, field.Invalid(fldPath, hierarchicalWeights, 21707 fmt.Sprintf("%s in the %s is invalid number: %v", 21708 weight, hierarchicalWeights, err, 21709 ))) 21710 }</span> 21711 <span class="cov8" title="1">if weightFloat <= 0 </span><span class="cov8" title="1">{ 21712 return append(errs, field.Invalid(fldPath, hierarchicalWeights, 21713 fmt.Sprintf("%s in the %s must be larger than 0", 21714 weight, hierarchicalWeights, 21715 ))) 21716 }</span> 21717 } 21718 21719 // The node is not allowed to be in the sub path of a node. 21720 // For example, a queue with "root/sci" conflicts with a queue with "root/sci/dev" 21721 <span class="cov8" title="1">queueList, err := config.VolcanoClient.SchedulingV1beta1().Queues().List(context.TODO(), metav1.ListOptions{}) 21722 if err != nil </span><span class="cov0" title="0">{ 21723 return append(errs, field.Invalid(fldPath, hierarchy, 21724 fmt.Sprintf("checking %s, list queues failed: %v", 21725 schedulingv1beta1.KubeHierarchyAnnotationKey, 21726 err, 21727 ))) 21728 }</span> 21729 <span class="cov8" title="1">for _, queueInTree := range queueList.Items </span><span class="cov8" title="1">{ 21730 hierarchyInTree := queueInTree.Annotations[schedulingv1beta1.KubeHierarchyAnnotationKey] 21731 if hierarchyInTree != "" && queue.Name != queueInTree.Name && 21732 strings.HasPrefix(hierarchyInTree, hierarchy) </span><span class="cov8" title="1">{ 21733 return append(errs, field.Invalid(fldPath, hierarchy, 21734 fmt.Sprintf("%s is not allowed to be in the sub path of %s of queue %s", 21735 hierarchy, hierarchyInTree, queueInTree.Name))) 21736 }</span> 21737 } 21738 } 21739 <span class="cov8" title="1">return errs</span> 21740 } 21741 21742 func validateStateOfQueue(value schedulingv1beta1.QueueState, fldPath *field.Path) field.ErrorList <span class="cov8" title="1">{ 21743 errs := field.ErrorList{} 21744 21745 if len(value) == 0 </span><span class="cov8" title="1">{ 21746 return errs 21747 }</span> 21748 21749 <span class="cov8" title="1">validQueueStates := []schedulingv1beta1.QueueState{ 21750 schedulingv1beta1.QueueStateOpen, 21751 schedulingv1beta1.QueueStateClosed, 21752 } 21753 21754 for _, validQueue := range validQueueStates </span><span class="cov8" title="1">{ 21755 if value == validQueue </span><span class="cov8" title="1">{ 21756 return errs 21757 }</span> 21758 } 21759 21760 <span class="cov8" title="1">return append(errs, field.Invalid(fldPath, value, fmt.Sprintf("queue state must be in %v", validQueueStates)))</span> 21761 } 21762 21763 func validateWeightOfQueue(value int32, fldPath *field.Path) field.ErrorList <span class="cov8" title="1">{ 21764 errs := field.ErrorList{} 21765 if value > 0 </span><span class="cov8" title="1">{ 21766 return errs 21767 }</span> 21768 <span class="cov8" title="1">return append(errs, field.Invalid(fldPath, value, "queue weight must be a positive integer"))</span> 21769 } 21770 21771 func validateQueueDeleting(queue string) error <span class="cov8" title="1">{ 21772 if queue == "default" </span><span class="cov8" title="1">{ 21773 return fmt.Errorf("`%s` queue can not be deleted", "default") 21774 }</span> 21775 21776 <span class="cov8" title="1">q, err := config.VolcanoClient.SchedulingV1beta1().Queues().Get(context.TODO(), queue, metav1.GetOptions{}) 21777 if err != nil </span><span class="cov0" title="0">{ 21778 return err 21779 }</span> 21780 21781 <span class="cov8" title="1">if q.Status.State != schedulingv1beta1.QueueStateClosed </span><span class="cov8" title="1">{ 21782 return fmt.Errorf("only queue with state `%s` can be deleted, queue `%s` state is `%s`", 21783 schedulingv1beta1.QueueStateClosed, q.Name, q.Status.State) 21784 }</span> 21785 21786 <span class="cov8" title="1">return nil</span> 21787 } 21788 </pre> 21789 21790 </div> 21791 </body> 21792 <script> 21793 (function() { 21794 var files = document.getElementById('files'); 21795 var visible; 21796 files.addEventListener('change', onChange, false); 21797 function select(part) { 21798 if (visible) 21799 visible.style.display = 'none'; 21800 visible = document.getElementById(part); 21801 if (!visible) 21802 return; 21803 files.value = part; 21804 visible.style.display = 'block'; 21805 location.hash = part; 21806 } 21807 function onChange() { 21808 select(files.value); 21809 window.scrollTo(0, 0); 21810 } 21811 if (location.hash != "") { 21812 select(location.hash.substr(1)); 21813 } 21814 if (!visible) { 21815 select("file0"); 21816 } 21817 })(); 21818 </script> 21819 </html>