volcano.sh/volcano@v1.9.0/pkg/scheduler/framework/statement.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package framework 18 19 import ( 20 "fmt" 21 22 "k8s.io/klog/v2" 23 24 "volcano.sh/volcano/pkg/scheduler/api" 25 "volcano.sh/volcano/pkg/scheduler/metrics" 26 ) 27 28 // Operation type 29 type Operation int8 30 31 const ( 32 // Evict op 33 Evict = iota 34 // Pipeline op 35 Pipeline 36 // Allocate op 37 Allocate 38 ) 39 40 type operation struct { 41 name Operation 42 task *api.TaskInfo 43 reason string 44 } 45 46 // Statement structure 47 type Statement struct { 48 operations []operation 49 ssn *Session 50 } 51 52 // NewStatement returns new statement object 53 func NewStatement(ssn *Session) *Statement { 54 return &Statement{ 55 ssn: ssn, 56 } 57 } 58 59 // Evict the pod 60 func (s *Statement) Evict(reclaimee *api.TaskInfo, reason string) error { 61 // Update status in session 62 if job, found := s.ssn.Jobs[reclaimee.Job]; found { 63 if err := job.UpdateTaskStatus(reclaimee, api.Releasing); err != nil { 64 klog.Errorf("Failed to update task <%v/%v> status to %v when evicting in Session <%v>: %v", 65 reclaimee.Namespace, reclaimee.Name, api.Releasing, s.ssn.UID, err) 66 } 67 } else { 68 klog.Errorf("Failed to find Job <%s> in Session <%s> index when evicting.", 69 reclaimee.Job, s.ssn.UID) 70 } 71 72 // Update task in node. 73 if node, found := s.ssn.Nodes[reclaimee.NodeName]; found { 74 err := node.UpdateTask(reclaimee) 75 if err != nil { 76 klog.Errorf("Failed to update task <%v/%v> in node %v for: %s", 77 reclaimee.Namespace, reclaimee.Name, reclaimee.NodeName, err.Error()) 78 return err 79 } 80 } 81 82 for _, eh := range s.ssn.eventHandlers { 83 if eh.DeallocateFunc != nil { 84 eh.DeallocateFunc(&Event{ 85 Task: reclaimee, 86 }) 87 } 88 } 89 90 s.operations = append(s.operations, operation{ 91 name: Evict, 92 task: reclaimee, 93 reason: reason, 94 }) 95 96 return nil 97 } 98 99 func (s *Statement) evict(reclaimee *api.TaskInfo, reason string) error { 100 if err := s.ssn.cache.Evict(reclaimee, reason); err != nil { 101 if e := s.unevict(reclaimee); e != nil { 102 klog.Errorf("Faled to unevict task <%v/%v>: %v.", reclaimee.Namespace, reclaimee.Name, e) 103 } 104 return err 105 } 106 107 return nil 108 } 109 110 func (s *Statement) unevict(reclaimee *api.TaskInfo) error { 111 // Update status in session 112 job, found := s.ssn.Jobs[reclaimee.Job] 113 if found { 114 if err := job.UpdateTaskStatus(reclaimee, api.Running); err != nil { 115 klog.Errorf("Failed to update task <%v/%v> status to %v when unevicting in Session <%v>: %v", 116 reclaimee.Namespace, reclaimee.Name, api.Running, s.ssn.UID, err) 117 } 118 } else { 119 klog.Errorf("Failed to find Job <%s> in Session <%s> index when unevicting.", 120 reclaimee.Job, s.ssn.UID) 121 } 122 123 // Update task in node. 124 if node, found := s.ssn.Nodes[reclaimee.NodeName]; found { 125 err := node.UpdateTask(reclaimee) 126 if err != nil { 127 klog.Errorf("Failed to update task <%v/%v> in node %v for: %s", 128 reclaimee.Namespace, reclaimee.Name, reclaimee.NodeName, err.Error()) 129 return err 130 } 131 } 132 133 for _, eh := range s.ssn.eventHandlers { 134 if eh.AllocateFunc != nil { 135 eh.AllocateFunc(&Event{ 136 Task: reclaimee, 137 }) 138 } 139 } 140 141 return nil 142 } 143 144 // Pipeline the task for the node 145 func (s *Statement) Pipeline(task *api.TaskInfo, hostname string) error { 146 job, found := s.ssn.Jobs[task.Job] 147 if found { 148 if err := job.UpdateTaskStatus(task, api.Pipelined); err != nil { 149 klog.Errorf("Failed to update task <%v/%v> status to %v when pipeline in Session <%v>: %v", 150 task.Namespace, task.Name, api.Pipelined, s.ssn.UID, err) 151 } 152 } else { 153 klog.Errorf("Failed to find Job <%s> in Session <%s> index when pipeline.", 154 task.Job, s.ssn.UID) 155 } 156 157 task.NodeName = hostname 158 159 if node, found := s.ssn.Nodes[hostname]; found { 160 if err := node.AddTask(task); err != nil { 161 klog.Errorf("Failed to add task <%v/%v> to node <%v> when pipeline in Session <%v>: %v", 162 task.Namespace, task.Name, hostname, s.ssn.UID, err) 163 } 164 klog.V(3).Infof("After pipelined Task <%v/%v> to Node <%v>: idle <%v>, used <%v>, releasing <%v>", 165 task.Namespace, task.Name, node.Name, node.Idle, node.Used, node.Releasing) 166 } else { 167 klog.Errorf("Failed to find Node <%s> in Session <%s> index when pipeline.", 168 hostname, s.ssn.UID) 169 } 170 171 for _, eh := range s.ssn.eventHandlers { 172 if eh.AllocateFunc != nil { 173 eh.AllocateFunc(&Event{ 174 Task: task, 175 }) 176 } 177 } 178 179 s.operations = append(s.operations, operation{ 180 name: Pipeline, 181 task: task, 182 }) 183 184 return nil 185 } 186 187 func (s *Statement) pipeline(task *api.TaskInfo) { 188 } 189 190 func (s *Statement) UnPipeline(task *api.TaskInfo) error { 191 job, found := s.ssn.Jobs[task.Job] 192 if found { 193 if err := job.UpdateTaskStatus(task, api.Pending); err != nil { 194 klog.Errorf("Failed to update task <%v/%v> status to %v when unpipeline in Session <%v>: %v", 195 task.Namespace, task.Name, api.Pending, s.ssn.UID, err) 196 } 197 } else { 198 klog.Errorf("Failed to find Job <%s> in Session <%s> index when unpipeline.", task.Job, s.ssn.UID) 199 } 200 201 if node, found := s.ssn.Nodes[task.NodeName]; found { 202 if err := node.RemoveTask(task); err != nil { 203 klog.Errorf("Failed to remove task <%v/%v> to node <%v> when unpipeline in Session <%v>: %v", 204 task.Namespace, task.Name, task.NodeName, s.ssn.UID, err) 205 } 206 klog.V(3).Infof("After unpipelined Task <%v/%v> to Node <%v>: idle <%v>, used <%v>, releasing <%v>", 207 task.Namespace, task.Name, node.Name, node.Idle, node.Used, node.Releasing) 208 } else { 209 klog.Errorf("Failed to find Node <%s> in Session <%s> index when unpipeline.", 210 task.NodeName, s.ssn.UID) 211 } 212 213 for _, eh := range s.ssn.eventHandlers { 214 if eh.DeallocateFunc != nil { 215 eh.DeallocateFunc(&Event{ 216 Task: task, 217 }) 218 } 219 } 220 task.NodeName = "" 221 222 return nil 223 } 224 225 // Allocate the task to node 226 func (s *Statement) Allocate(task *api.TaskInfo, nodeInfo *api.NodeInfo) (err error) { 227 podVolumes, err := s.ssn.cache.GetPodVolumes(task, nodeInfo.Node) 228 if err != nil { 229 return err 230 } 231 232 hostname := nodeInfo.Name 233 if err := s.ssn.cache.AllocateVolumes(task, hostname, podVolumes); err != nil { 234 return err 235 } 236 defer func() { 237 if err != nil { 238 s.ssn.cache.RevertVolumes(task, podVolumes) 239 } 240 }() 241 242 task.Pod.Spec.NodeName = hostname 243 task.PodVolumes = podVolumes 244 245 // Only update status in session 246 job, found := s.ssn.Jobs[task.Job] 247 if found { 248 if err := job.UpdateTaskStatus(task, api.Allocated); err != nil { 249 klog.Errorf("Failed to update task <%v/%v> status to %v when allocating in Session <%v>: %v", 250 task.Namespace, task.Name, api.Allocated, s.ssn.UID, err) 251 return err 252 } 253 } else { 254 klog.Errorf("Failed to find Job <%s> in Session <%s> index when allocating.", 255 task.Job, s.ssn.UID) 256 return fmt.Errorf("failed to find job %s", task.Job) 257 } 258 259 task.NodeName = hostname 260 if node, found := s.ssn.Nodes[hostname]; found { 261 if err := node.AddTask(task); err != nil { 262 klog.Errorf("Failed to add task <%v/%v> to node <%v> when allocating in Session <%v>: %v", 263 task.Namespace, task.Name, hostname, s.ssn.UID, err) 264 return err 265 } 266 klog.V(3).Infof("After allocated Task <%v/%v> to Node <%v>: idle <%v>, used <%v>, releasing <%v>", 267 task.Namespace, task.Name, node.Name, node.Idle, node.Used, node.Releasing) 268 } else { 269 klog.Errorf("Failed to find Node <%s> in Session <%s> index when allocating.", 270 hostname, s.ssn.UID) 271 return fmt.Errorf("failed to find node %s", hostname) 272 } 273 274 // Callbacks 275 for _, eh := range s.ssn.eventHandlers { 276 if eh.AllocateFunc != nil { 277 eh.AllocateFunc(&Event{ 278 Task: task, 279 }) 280 } 281 } 282 283 // Update status in session 284 klog.V(3).Info("Allocating operations ...") 285 s.operations = append(s.operations, operation{ 286 name: Allocate, 287 task: task, 288 }) 289 290 return nil 291 } 292 293 func (s *Statement) allocate(task *api.TaskInfo) error { 294 if err := s.ssn.cache.AddBindTask(task); err != nil { 295 return err 296 } 297 298 if job, found := s.ssn.Jobs[task.Job]; found { 299 if err := job.UpdateTaskStatus(task, api.Binding); err != nil { 300 klog.Errorf("Failed to update task <%v/%v> status to %v when binding in Session <%v>: %v", 301 task.Namespace, task.Name, api.Binding, s.ssn.UID, err) 302 return err 303 } 304 } else { 305 klog.Errorf("Failed to find Job <%s> in Session <%s> index when binding.", 306 task.Job, s.ssn.UID) 307 return fmt.Errorf("failed to find job %s", task.Job) 308 } 309 310 metrics.UpdateTaskScheduleDuration(metrics.Duration(task.Pod.CreationTimestamp.Time)) 311 return nil 312 } 313 314 // unallocate the pod for task 315 func (s *Statement) unallocate(task *api.TaskInfo) error { 316 s.ssn.cache.RevertVolumes(task, task.PodVolumes) 317 318 // Update status in session 319 job, found := s.ssn.Jobs[task.Job] 320 if found { 321 if err := job.UpdateTaskStatus(task, api.Pending); err != nil { 322 klog.Errorf("Failed to update task <%v/%v> status to %v when unallocating in Session <%v>: %v", 323 task.Namespace, task.Name, api.Pending, s.ssn.UID, err) 324 } 325 } else { 326 klog.Errorf("Failed to find Job <%s> in Session <%s> index when unallocating.", 327 task.Job, s.ssn.UID) 328 } 329 330 if node, found := s.ssn.Nodes[task.NodeName]; found { 331 klog.V(3).Infof("Remove Task <%v> on node <%v>", task.Name, task.NodeName) 332 err := node.RemoveTask(task) 333 if err != nil { 334 klog.Errorf("Failed to remove Task <%v> on node <%v> when unallocating: %s", task.Name, task.NodeName, err.Error()) 335 } 336 } 337 338 for _, eh := range s.ssn.eventHandlers { 339 if eh.DeallocateFunc != nil { 340 eh.DeallocateFunc(&Event{ 341 Task: task, 342 }) 343 } 344 } 345 task.NodeName = "" 346 347 return nil 348 } 349 350 // Discard operation for evict, pipeline and allocate 351 func (s *Statement) Discard() { 352 klog.V(3).Info("Discarding operations ...") 353 for i := len(s.operations) - 1; i >= 0; i-- { 354 op := s.operations[i] 355 op.task.GenerateLastTxContext() 356 switch op.name { 357 case Evict: 358 err := s.unevict(op.task) 359 if err != nil { 360 klog.Errorf("Failed to unevict task: %s", err.Error()) 361 } 362 case Pipeline: 363 err := s.UnPipeline(op.task) 364 if err != nil { 365 klog.Errorf("Failed to unpipeline task: %s", err.Error()) 366 } 367 case Allocate: 368 err := s.unallocate(op.task) 369 if err != nil { 370 klog.Errorf("Failed to unallocate task: %s", err.Error()) 371 } 372 } 373 } 374 } 375 376 // Commit operation for evict and pipeline 377 func (s *Statement) Commit() { 378 klog.V(3).Info("Committing operations ...") 379 for _, op := range s.operations { 380 op.task.ClearLastTxContext() 381 switch op.name { 382 case Evict: 383 err := s.evict(op.task, op.reason) 384 if err != nil { 385 klog.Errorf("Failed to evict task: %s", err.Error()) 386 } 387 case Pipeline: 388 s.pipeline(op.task) 389 case Allocate: 390 err := s.allocate(op.task) 391 if err != nil { 392 if e := s.unallocate(op.task); e != nil { 393 klog.Errorf("Failed to unallocate task <%v/%v>: %v.", op.task.Namespace, op.task.Name, e) 394 } 395 klog.Errorf("Failed to allocate task <%v/%v>: %v.", op.task.Namespace, op.task.Name, err) 396 } 397 } 398 } 399 }