github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/nomad/fsm.go (about) 1 package nomad 2 3 import ( 4 "fmt" 5 "io" 6 "log" 7 "time" 8 9 "github.com/armon/go-metrics" 10 "github.com/hashicorp/nomad/nomad/state" 11 "github.com/hashicorp/nomad/nomad/structs" 12 "github.com/hashicorp/nomad/scheduler" 13 "github.com/hashicorp/raft" 14 "github.com/ugorji/go/codec" 15 ) 16 17 const ( 18 // timeTableGranularity is the granularity of index to time tracking 19 timeTableGranularity = 5 * time.Minute 20 21 // timeTableLimit is the maximum limit of our tracking 22 timeTableLimit = 72 * time.Hour 23 ) 24 25 // SnapshotType is prefixed to a record in the FSM snapshot 26 // so that we can determine the type for restore 27 type SnapshotType byte 28 29 const ( 30 NodeSnapshot SnapshotType = iota 31 JobSnapshot 32 IndexSnapshot 33 EvalSnapshot 34 AllocSnapshot 35 TimeTableSnapshot 36 PeriodicLaunchSnapshot 37 JobSummarySnapshot 38 VaultAccessorSnapshot 39 ) 40 41 // nomadFSM implements a finite state machine that is used 42 // along with Raft to provide strong consistency. We implement 43 // this outside the Server to avoid exposing this outside the package. 44 type nomadFSM struct { 45 evalBroker *EvalBroker 46 blockedEvals *BlockedEvals 47 periodicDispatcher *PeriodicDispatch 48 logOutput io.Writer 49 logger *log.Logger 50 state *state.StateStore 51 timetable *TimeTable 52 } 53 54 // nomadSnapshot is used to provide a snapshot of the current 55 // state in a way that can be accessed concurrently with operations 56 // that may modify the live state. 57 type nomadSnapshot struct { 58 snap *state.StateSnapshot 59 timetable *TimeTable 60 } 61 62 // snapshotHeader is the first entry in our snapshot 63 type snapshotHeader struct { 64 } 65 66 // NewFSMPath is used to construct a new FSM with a blank state 67 func NewFSM(evalBroker *EvalBroker, periodic *PeriodicDispatch, 68 blocked *BlockedEvals, logOutput io.Writer) (*nomadFSM, error) { 69 // Create a state store 70 state, err := state.NewStateStore(logOutput) 71 if err != nil { 72 return nil, err 73 } 74 75 fsm := &nomadFSM{ 76 evalBroker: evalBroker, 77 periodicDispatcher: periodic, 78 blockedEvals: blocked, 79 logOutput: logOutput, 80 logger: log.New(logOutput, "", log.LstdFlags), 81 state: state, 82 timetable: NewTimeTable(timeTableGranularity, timeTableLimit), 83 } 84 return fsm, nil 85 } 86 87 // Close is used to cleanup resources associated with the FSM 88 func (n *nomadFSM) Close() error { 89 return nil 90 } 91 92 // State is used to return a handle to the current state 93 func (n *nomadFSM) State() *state.StateStore { 94 return n.state 95 } 96 97 // TimeTable returns the time table of transactions 98 func (n *nomadFSM) TimeTable() *TimeTable { 99 return n.timetable 100 } 101 102 func (n *nomadFSM) Apply(log *raft.Log) interface{} { 103 buf := log.Data 104 msgType := structs.MessageType(buf[0]) 105 106 // Witness this write 107 n.timetable.Witness(log.Index, time.Now().UTC()) 108 109 // Check if this message type should be ignored when unknown. This is 110 // used so that new commands can be added with developer control if older 111 // versions can safely ignore the command, or if they should crash. 112 ignoreUnknown := false 113 if msgType&structs.IgnoreUnknownTypeFlag == structs.IgnoreUnknownTypeFlag { 114 msgType &= ^structs.IgnoreUnknownTypeFlag 115 ignoreUnknown = true 116 } 117 118 switch msgType { 119 case structs.NodeRegisterRequestType: 120 return n.applyUpsertNode(buf[1:], log.Index) 121 case structs.NodeDeregisterRequestType: 122 return n.applyDeregisterNode(buf[1:], log.Index) 123 case structs.NodeUpdateStatusRequestType: 124 return n.applyStatusUpdate(buf[1:], log.Index) 125 case structs.NodeUpdateDrainRequestType: 126 return n.applyDrainUpdate(buf[1:], log.Index) 127 case structs.JobRegisterRequestType: 128 return n.applyUpsertJob(buf[1:], log.Index) 129 case structs.JobDeregisterRequestType: 130 return n.applyDeregisterJob(buf[1:], log.Index) 131 case structs.EvalUpdateRequestType: 132 return n.applyUpdateEval(buf[1:], log.Index) 133 case structs.EvalDeleteRequestType: 134 return n.applyDeleteEval(buf[1:], log.Index) 135 case structs.AllocUpdateRequestType: 136 return n.applyAllocUpdate(buf[1:], log.Index) 137 case structs.AllocClientUpdateRequestType: 138 return n.applyAllocClientUpdate(buf[1:], log.Index) 139 case structs.ReconcileJobSummariesRequestType: 140 return n.applyReconcileSummaries(buf[1:], log.Index) 141 case structs.VaultAccessorRegisterRequestType: 142 return n.applyUpsertVaultAccessor(buf[1:], log.Index) 143 case structs.VaultAccessorDegisterRequestType: 144 return n.applyDeregisterVaultAccessor(buf[1:], log.Index) 145 default: 146 if ignoreUnknown { 147 n.logger.Printf("[WARN] nomad.fsm: ignoring unknown message type (%d), upgrade to newer version", msgType) 148 return nil 149 } else { 150 panic(fmt.Errorf("failed to apply request: %#v", buf)) 151 } 152 } 153 } 154 155 func (n *nomadFSM) applyUpsertNode(buf []byte, index uint64) interface{} { 156 defer metrics.MeasureSince([]string{"nomad", "fsm", "register_node"}, time.Now()) 157 var req structs.NodeRegisterRequest 158 if err := structs.Decode(buf, &req); err != nil { 159 panic(fmt.Errorf("failed to decode request: %v", err)) 160 } 161 162 if err := n.state.UpsertNode(index, req.Node); err != nil { 163 n.logger.Printf("[ERR] nomad.fsm: UpsertNode failed: %v", err) 164 return err 165 } 166 167 // Unblock evals for the nodes computed node class if it is in a ready 168 // state. 169 if req.Node.Status == structs.NodeStatusReady { 170 n.blockedEvals.Unblock(req.Node.ComputedClass, index) 171 } 172 173 return nil 174 } 175 176 func (n *nomadFSM) applyDeregisterNode(buf []byte, index uint64) interface{} { 177 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_node"}, time.Now()) 178 var req structs.NodeDeregisterRequest 179 if err := structs.Decode(buf, &req); err != nil { 180 panic(fmt.Errorf("failed to decode request: %v", err)) 181 } 182 183 if err := n.state.DeleteNode(index, req.NodeID); err != nil { 184 n.logger.Printf("[ERR] nomad.fsm: DeleteNode failed: %v", err) 185 return err 186 } 187 return nil 188 } 189 190 func (n *nomadFSM) applyStatusUpdate(buf []byte, index uint64) interface{} { 191 defer metrics.MeasureSince([]string{"nomad", "fsm", "node_status_update"}, time.Now()) 192 var req structs.NodeUpdateStatusRequest 193 if err := structs.Decode(buf, &req); err != nil { 194 panic(fmt.Errorf("failed to decode request: %v", err)) 195 } 196 197 if err := n.state.UpdateNodeStatus(index, req.NodeID, req.Status); err != nil { 198 n.logger.Printf("[ERR] nomad.fsm: UpdateNodeStatus failed: %v", err) 199 return err 200 } 201 202 // Unblock evals for the nodes computed node class if it is in a ready 203 // state. 204 if req.Status == structs.NodeStatusReady { 205 node, err := n.state.NodeByID(req.NodeID) 206 if err != nil { 207 n.logger.Printf("[ERR] nomad.fsm: looking up node %q failed: %v", req.NodeID, err) 208 return err 209 210 } 211 n.blockedEvals.Unblock(node.ComputedClass, index) 212 } 213 214 return nil 215 } 216 217 func (n *nomadFSM) applyDrainUpdate(buf []byte, index uint64) interface{} { 218 defer metrics.MeasureSince([]string{"nomad", "fsm", "node_drain_update"}, time.Now()) 219 var req structs.NodeUpdateDrainRequest 220 if err := structs.Decode(buf, &req); err != nil { 221 panic(fmt.Errorf("failed to decode request: %v", err)) 222 } 223 224 if err := n.state.UpdateNodeDrain(index, req.NodeID, req.Drain); err != nil { 225 n.logger.Printf("[ERR] nomad.fsm: UpdateNodeDrain failed: %v", err) 226 return err 227 } 228 return nil 229 } 230 231 func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { 232 defer metrics.MeasureSince([]string{"nomad", "fsm", "register_job"}, time.Now()) 233 var req structs.JobRegisterRequest 234 if err := structs.Decode(buf, &req); err != nil { 235 panic(fmt.Errorf("failed to decode request: %v", err)) 236 } 237 238 // COMPAT: Remove in 0.6 239 // Empty maps and slices should be treated as nil to avoid 240 // un-intended destructive updates in scheduler since we use 241 // reflect.DeepEqual. Starting Nomad 0.4.1, job submission sanatizes 242 // the incoming job. 243 req.Job.Canonicalize() 244 245 if err := n.state.UpsertJob(index, req.Job); err != nil { 246 n.logger.Printf("[ERR] nomad.fsm: UpsertJob failed: %v", err) 247 return err 248 } 249 250 // We always add the job to the periodic dispatcher because there is the 251 // possibility that the periodic spec was removed and then we should stop 252 // tracking it. 253 if err := n.periodicDispatcher.Add(req.Job); err != nil { 254 n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Add failed: %v", err) 255 return err 256 } 257 258 // If it is periodic, record the time it was inserted. This is necessary for 259 // recovering during leader election. It is possible that from the time it 260 // is added to when it was suppose to launch, leader election occurs and the 261 // job was not launched. In this case, we use the insertion time to 262 // determine if a launch was missed. 263 if req.Job.IsPeriodic() { 264 prevLaunch, err := n.state.PeriodicLaunchByID(req.Job.ID) 265 if err != nil { 266 n.logger.Printf("[ERR] nomad.fsm: PeriodicLaunchByID failed: %v", err) 267 return err 268 } 269 270 // Record the insertion time as a launch. We overload the launch table 271 // such that the first entry is the insertion time. 272 if prevLaunch == nil { 273 launch := &structs.PeriodicLaunch{ID: req.Job.ID, Launch: time.Now()} 274 if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { 275 n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) 276 return err 277 } 278 } 279 } 280 281 // Check if the parent job is periodic and mark the launch time. 282 parentID := req.Job.ParentID 283 if parentID != "" { 284 parent, err := n.state.JobByID(parentID) 285 if err != nil { 286 n.logger.Printf("[ERR] nomad.fsm: JobByID(%v) lookup for parent failed: %v", parentID, err) 287 return err 288 } else if parent == nil { 289 // The parent has been deregistered. 290 return nil 291 } 292 293 if parent.IsPeriodic() { 294 t, err := n.periodicDispatcher.LaunchTime(req.Job.ID) 295 if err != nil { 296 n.logger.Printf("[ERR] nomad.fsm: LaunchTime(%v) failed: %v", req.Job.ID, err) 297 return err 298 } 299 300 launch := &structs.PeriodicLaunch{ID: parentID, Launch: t} 301 if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { 302 n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) 303 return err 304 } 305 } 306 } 307 308 return nil 309 } 310 311 func (n *nomadFSM) applyDeregisterJob(buf []byte, index uint64) interface{} { 312 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_job"}, time.Now()) 313 var req structs.JobDeregisterRequest 314 if err := structs.Decode(buf, &req); err != nil { 315 panic(fmt.Errorf("failed to decode request: %v", err)) 316 } 317 318 if err := n.state.DeleteJob(index, req.JobID); err != nil { 319 n.logger.Printf("[ERR] nomad.fsm: DeleteJob failed: %v", err) 320 return err 321 } 322 323 if err := n.periodicDispatcher.Remove(req.JobID); err != nil { 324 n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Remove failed: %v", err) 325 return err 326 } 327 328 // We always delete from the periodic launch table because it is possible that 329 // the job was updated to be non-perioidic, thus checking if it is periodic 330 // doesn't ensure we clean it up properly. 331 n.state.DeletePeriodicLaunch(index, req.JobID) 332 333 return nil 334 } 335 336 func (n *nomadFSM) applyUpdateEval(buf []byte, index uint64) interface{} { 337 defer metrics.MeasureSince([]string{"nomad", "fsm", "update_eval"}, time.Now()) 338 var req structs.EvalUpdateRequest 339 if err := structs.Decode(buf, &req); err != nil { 340 panic(fmt.Errorf("failed to decode request: %v", err)) 341 } 342 343 if err := n.state.UpsertEvals(index, req.Evals); err != nil { 344 n.logger.Printf("[ERR] nomad.fsm: UpsertEvals failed: %v", err) 345 return err 346 } 347 348 for _, eval := range req.Evals { 349 if eval.ShouldEnqueue() { 350 n.evalBroker.Enqueue(eval) 351 } else if eval.ShouldBlock() { 352 n.blockedEvals.Block(eval) 353 } 354 } 355 return nil 356 } 357 358 func (n *nomadFSM) applyDeleteEval(buf []byte, index uint64) interface{} { 359 defer metrics.MeasureSince([]string{"nomad", "fsm", "delete_eval"}, time.Now()) 360 var req structs.EvalDeleteRequest 361 if err := structs.Decode(buf, &req); err != nil { 362 panic(fmt.Errorf("failed to decode request: %v", err)) 363 } 364 365 if err := n.state.DeleteEval(index, req.Evals, req.Allocs); err != nil { 366 n.logger.Printf("[ERR] nomad.fsm: DeleteEval failed: %v", err) 367 return err 368 } 369 return nil 370 } 371 372 func (n *nomadFSM) applyAllocUpdate(buf []byte, index uint64) interface{} { 373 defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_update"}, time.Now()) 374 var req structs.AllocUpdateRequest 375 if err := structs.Decode(buf, &req); err != nil { 376 panic(fmt.Errorf("failed to decode request: %v", err)) 377 } 378 379 // Attach the job to all the allocations. It is pulled out in the 380 // payload to avoid the redundancy of encoding, but should be denormalized 381 // prior to being inserted into MemDB. 382 if j := req.Job; j != nil { 383 for _, alloc := range req.Alloc { 384 if alloc.Job == nil && !alloc.TerminalStatus() { 385 alloc.Job = j 386 } 387 } 388 } 389 390 // Calculate the total resources of allocations. It is pulled out in the 391 // payload to avoid encoding something that can be computed, but should be 392 // denormalized prior to being inserted into MemDB. 393 for _, alloc := range req.Alloc { 394 if alloc.Resources != nil { 395 // COMPAT 0.4.1 -> 0.5 396 // Set the shared resources for allocations which don't have them 397 if alloc.SharedResources == nil { 398 alloc.SharedResources = &structs.Resources{ 399 DiskMB: alloc.Resources.DiskMB, 400 } 401 } 402 403 continue 404 } 405 406 alloc.Resources = new(structs.Resources) 407 for _, task := range alloc.TaskResources { 408 alloc.Resources.Add(task) 409 } 410 411 // Add the shared resources 412 alloc.Resources.Add(alloc.SharedResources) 413 } 414 415 if err := n.state.UpsertAllocs(index, req.Alloc); err != nil { 416 n.logger.Printf("[ERR] nomad.fsm: UpsertAllocs failed: %v", err) 417 return err 418 } 419 return nil 420 } 421 422 func (n *nomadFSM) applyAllocClientUpdate(buf []byte, index uint64) interface{} { 423 defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_client_update"}, time.Now()) 424 var req structs.AllocUpdateRequest 425 if err := structs.Decode(buf, &req); err != nil { 426 panic(fmt.Errorf("failed to decode request: %v", err)) 427 } 428 if len(req.Alloc) == 0 { 429 return nil 430 } 431 432 // Updating the allocs with the job id and task group name 433 for _, alloc := range req.Alloc { 434 if existing, _ := n.state.AllocByID(alloc.ID); existing != nil { 435 alloc.JobID = existing.JobID 436 alloc.TaskGroup = existing.TaskGroup 437 } 438 } 439 440 // Update all the client allocations 441 if err := n.state.UpdateAllocsFromClient(index, req.Alloc); err != nil { 442 n.logger.Printf("[ERR] nomad.fsm: UpdateAllocFromClient failed: %v", err) 443 return err 444 } 445 446 // Unblock evals for the nodes computed node class if the client has 447 // finished running an allocation. 448 for _, alloc := range req.Alloc { 449 if alloc.ClientStatus == structs.AllocClientStatusComplete || 450 alloc.ClientStatus == structs.AllocClientStatusFailed { 451 nodeID := alloc.NodeID 452 node, err := n.state.NodeByID(nodeID) 453 if err != nil || node == nil { 454 n.logger.Printf("[ERR] nomad.fsm: looking up node %q failed: %v", nodeID, err) 455 return err 456 457 } 458 n.blockedEvals.Unblock(node.ComputedClass, index) 459 } 460 } 461 462 return nil 463 } 464 465 // applyReconcileSummaries reconciles summaries for all the jobs 466 func (n *nomadFSM) applyReconcileSummaries(buf []byte, index uint64) interface{} { 467 if err := n.state.ReconcileJobSummaries(index); err != nil { 468 return err 469 } 470 return n.reconcileQueuedAllocations(index) 471 } 472 473 // applyUpsertVaultAccessor stores the Vault accessors for a given allocation 474 // and task 475 func (n *nomadFSM) applyUpsertVaultAccessor(buf []byte, index uint64) interface{} { 476 defer metrics.MeasureSince([]string{"nomad", "fsm", "upsert_vault_accessor"}, time.Now()) 477 var req structs.VaultAccessorsRequest 478 if err := structs.Decode(buf, &req); err != nil { 479 panic(fmt.Errorf("failed to decode request: %v", err)) 480 } 481 482 if err := n.state.UpsertVaultAccessor(index, req.Accessors); err != nil { 483 n.logger.Printf("[ERR] nomad.fsm: UpsertVaultAccessor failed: %v", err) 484 return err 485 } 486 487 return nil 488 } 489 490 // applyDeregisterVaultAccessor deregisters a set of Vault accessors 491 func (n *nomadFSM) applyDeregisterVaultAccessor(buf []byte, index uint64) interface{} { 492 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_vault_accessor"}, time.Now()) 493 var req structs.VaultAccessorsRequest 494 if err := structs.Decode(buf, &req); err != nil { 495 panic(fmt.Errorf("failed to decode request: %v", err)) 496 } 497 498 if err := n.state.DeleteVaultAccessors(index, req.Accessors); err != nil { 499 n.logger.Printf("[ERR] nomad.fsm: DeregisterVaultAccessor failed: %v", err) 500 return err 501 } 502 503 return nil 504 } 505 506 func (n *nomadFSM) Snapshot() (raft.FSMSnapshot, error) { 507 // Create a new snapshot 508 snap, err := n.state.Snapshot() 509 if err != nil { 510 return nil, err 511 } 512 513 ns := &nomadSnapshot{ 514 snap: snap, 515 timetable: n.timetable, 516 } 517 return ns, nil 518 } 519 520 func (n *nomadFSM) Restore(old io.ReadCloser) error { 521 defer old.Close() 522 523 // Create a new state store 524 newState, err := state.NewStateStore(n.logOutput) 525 if err != nil { 526 return err 527 } 528 n.state = newState 529 530 // Start the state restore 531 restore, err := newState.Restore() 532 if err != nil { 533 return err 534 } 535 defer restore.Abort() 536 537 // Create a decoder 538 dec := codec.NewDecoder(old, structs.MsgpackHandle) 539 540 // Read in the header 541 var header snapshotHeader 542 if err := dec.Decode(&header); err != nil { 543 return err 544 } 545 546 // Populate the new state 547 msgType := make([]byte, 1) 548 for { 549 // Read the message type 550 _, err := old.Read(msgType) 551 if err == io.EOF { 552 break 553 } else if err != nil { 554 return err 555 } 556 557 // Decode 558 switch SnapshotType(msgType[0]) { 559 case TimeTableSnapshot: 560 if err := n.timetable.Deserialize(dec); err != nil { 561 return fmt.Errorf("time table deserialize failed: %v", err) 562 } 563 564 case NodeSnapshot: 565 node := new(structs.Node) 566 if err := dec.Decode(node); err != nil { 567 return err 568 } 569 if err := restore.NodeRestore(node); err != nil { 570 return err 571 } 572 573 case JobSnapshot: 574 job := new(structs.Job) 575 if err := dec.Decode(job); err != nil { 576 return err 577 } 578 579 // COMPAT: Remove in 0.5 580 // Empty maps and slices should be treated as nil to avoid 581 // un-intended destructive updates in scheduler since we use 582 // reflect.DeepEqual. Starting Nomad 0.4.1, job submission sanatizes 583 // the incoming job. 584 job.Canonicalize() 585 586 if err := restore.JobRestore(job); err != nil { 587 return err 588 } 589 590 case EvalSnapshot: 591 eval := new(structs.Evaluation) 592 if err := dec.Decode(eval); err != nil { 593 return err 594 } 595 if err := restore.EvalRestore(eval); err != nil { 596 return err 597 } 598 599 case AllocSnapshot: 600 alloc := new(structs.Allocation) 601 if err := dec.Decode(alloc); err != nil { 602 return err 603 } 604 if err := restore.AllocRestore(alloc); err != nil { 605 return err 606 } 607 608 case IndexSnapshot: 609 idx := new(state.IndexEntry) 610 if err := dec.Decode(idx); err != nil { 611 return err 612 } 613 if err := restore.IndexRestore(idx); err != nil { 614 return err 615 } 616 617 case PeriodicLaunchSnapshot: 618 launch := new(structs.PeriodicLaunch) 619 if err := dec.Decode(launch); err != nil { 620 return err 621 } 622 if err := restore.PeriodicLaunchRestore(launch); err != nil { 623 return err 624 } 625 626 case JobSummarySnapshot: 627 summary := new(structs.JobSummary) 628 if err := dec.Decode(summary); err != nil { 629 return err 630 } 631 if err := restore.JobSummaryRestore(summary); err != nil { 632 return err 633 } 634 635 case VaultAccessorSnapshot: 636 accessor := new(structs.VaultAccessor) 637 if err := dec.Decode(accessor); err != nil { 638 return err 639 } 640 if err := restore.VaultAccessorRestore(accessor); err != nil { 641 return err 642 } 643 644 default: 645 return fmt.Errorf("Unrecognized snapshot type: %v", msgType) 646 } 647 } 648 649 restore.Commit() 650 651 // Create Job Summaries 652 // COMPAT 0.4 -> 0.4.1 653 // We can remove this in 0.5. This exists so that the server creates job 654 // summaries if they were not present previously. When users upgrade to 0.5 655 // from 0.4.1, the snapshot will contain job summaries so it will be safe to 656 // remove this block. 657 index, err := n.state.Index("job_summary") 658 if err != nil { 659 return fmt.Errorf("couldn't fetch index of job summary table: %v", err) 660 } 661 662 // If the index is 0 that means there is no job summary in the snapshot so 663 // we will have to create them 664 if index == 0 { 665 // query the latest index 666 latestIndex, err := n.state.LatestIndex() 667 if err != nil { 668 return fmt.Errorf("unable to query latest index: %v", index) 669 } 670 if err := n.state.ReconcileJobSummaries(latestIndex); err != nil { 671 return fmt.Errorf("error reconciling summaries: %v", err) 672 } 673 } 674 675 return nil 676 } 677 678 // reconcileSummaries re-calculates the queued allocations for every job that we 679 // created a Job Summary during the snap shot restore 680 func (n *nomadFSM) reconcileQueuedAllocations(index uint64) error { 681 // Get all the jobs 682 iter, err := n.state.Jobs() 683 if err != nil { 684 return err 685 } 686 687 snap, err := n.state.Snapshot() 688 if err != nil { 689 return fmt.Errorf("unable to create snapshot: %v", err) 690 } 691 692 // Invoking the scheduler for every job so that we can populate the number 693 // of queued allocations for every job 694 for { 695 rawJob := iter.Next() 696 if rawJob == nil { 697 break 698 } 699 job := rawJob.(*structs.Job) 700 planner := &scheduler.Harness{ 701 State: &snap.StateStore, 702 } 703 // Create an eval and mark it as requiring annotations and insert that as well 704 eval := &structs.Evaluation{ 705 ID: structs.GenerateUUID(), 706 Priority: job.Priority, 707 Type: job.Type, 708 TriggeredBy: structs.EvalTriggerJobRegister, 709 JobID: job.ID, 710 JobModifyIndex: job.JobModifyIndex + 1, 711 Status: structs.EvalStatusPending, 712 AnnotatePlan: true, 713 } 714 715 // Create the scheduler and run it 716 sched, err := scheduler.NewScheduler(eval.Type, n.logger, snap, planner) 717 if err != nil { 718 return err 719 } 720 721 if err := sched.Process(eval); err != nil { 722 return err 723 } 724 725 // Get the job summary from the fsm state store 726 summary, err := n.state.JobSummaryByID(job.ID) 727 if err != nil { 728 return err 729 } 730 731 // Add the allocations scheduler has made to queued since these 732 // allocations are never getting placed until the scheduler is invoked 733 // with a real planner 734 if l := len(planner.Plans); l != 1 { 735 return fmt.Errorf("unexpected number of plans during restore %d. Please file an issue including the logs", l) 736 } 737 for _, allocations := range planner.Plans[0].NodeAllocation { 738 for _, allocation := range allocations { 739 tgSummary, ok := summary.Summary[allocation.TaskGroup] 740 if !ok { 741 return fmt.Errorf("task group %q not found while updating queued count", allocation.TaskGroup) 742 } 743 tgSummary.Queued += 1 744 summary.Summary[allocation.TaskGroup] = tgSummary 745 } 746 } 747 748 // Add the queued allocations attached to the evaluation to the queued 749 // counter of the job summary 750 if l := len(planner.Evals); l != 1 { 751 return fmt.Errorf("unexpected number of evals during restore %d. Please file an issue including the logs", l) 752 } 753 for tg, queued := range planner.Evals[0].QueuedAllocations { 754 tgSummary, ok := summary.Summary[tg] 755 if !ok { 756 return fmt.Errorf("task group %q not found while updating queued count", tg) 757 } 758 tgSummary.Queued += queued 759 summary.Summary[tg] = tgSummary 760 } 761 762 if err := n.state.UpsertJobSummary(index, summary); err != nil { 763 return err 764 } 765 } 766 return nil 767 } 768 769 func (s *nomadSnapshot) Persist(sink raft.SnapshotSink) error { 770 defer metrics.MeasureSince([]string{"nomad", "fsm", "persist"}, time.Now()) 771 // Register the nodes 772 encoder := codec.NewEncoder(sink, structs.MsgpackHandle) 773 774 // Write the header 775 header := snapshotHeader{} 776 if err := encoder.Encode(&header); err != nil { 777 sink.Cancel() 778 return err 779 } 780 781 // Write the time table 782 sink.Write([]byte{byte(TimeTableSnapshot)}) 783 if err := s.timetable.Serialize(encoder); err != nil { 784 sink.Cancel() 785 return err 786 } 787 788 // Write all the data out 789 if err := s.persistIndexes(sink, encoder); err != nil { 790 sink.Cancel() 791 return err 792 } 793 if err := s.persistNodes(sink, encoder); err != nil { 794 sink.Cancel() 795 return err 796 } 797 if err := s.persistJobs(sink, encoder); err != nil { 798 sink.Cancel() 799 return err 800 } 801 if err := s.persistEvals(sink, encoder); err != nil { 802 sink.Cancel() 803 return err 804 } 805 if err := s.persistAllocs(sink, encoder); err != nil { 806 sink.Cancel() 807 return err 808 } 809 if err := s.persistPeriodicLaunches(sink, encoder); err != nil { 810 sink.Cancel() 811 return err 812 } 813 if err := s.persistJobSummaries(sink, encoder); err != nil { 814 sink.Cancel() 815 return err 816 } 817 if err := s.persistVaultAccessors(sink, encoder); err != nil { 818 sink.Cancel() 819 return err 820 } 821 return nil 822 } 823 824 func (s *nomadSnapshot) persistIndexes(sink raft.SnapshotSink, 825 encoder *codec.Encoder) error { 826 // Get all the indexes 827 iter, err := s.snap.Indexes() 828 if err != nil { 829 return err 830 } 831 832 for { 833 // Get the next item 834 raw := iter.Next() 835 if raw == nil { 836 break 837 } 838 839 // Prepare the request struct 840 idx := raw.(*state.IndexEntry) 841 842 // Write out a node registration 843 sink.Write([]byte{byte(IndexSnapshot)}) 844 if err := encoder.Encode(idx); err != nil { 845 return err 846 } 847 } 848 return nil 849 } 850 851 func (s *nomadSnapshot) persistNodes(sink raft.SnapshotSink, 852 encoder *codec.Encoder) error { 853 // Get all the nodes 854 nodes, err := s.snap.Nodes() 855 if err != nil { 856 return err 857 } 858 859 for { 860 // Get the next item 861 raw := nodes.Next() 862 if raw == nil { 863 break 864 } 865 866 // Prepare the request struct 867 node := raw.(*structs.Node) 868 869 // Write out a node registration 870 sink.Write([]byte{byte(NodeSnapshot)}) 871 if err := encoder.Encode(node); err != nil { 872 return err 873 } 874 } 875 return nil 876 } 877 878 func (s *nomadSnapshot) persistJobs(sink raft.SnapshotSink, 879 encoder *codec.Encoder) error { 880 // Get all the jobs 881 jobs, err := s.snap.Jobs() 882 if err != nil { 883 return err 884 } 885 886 for { 887 // Get the next item 888 raw := jobs.Next() 889 if raw == nil { 890 break 891 } 892 893 // Prepare the request struct 894 job := raw.(*structs.Job) 895 896 // Write out a job registration 897 sink.Write([]byte{byte(JobSnapshot)}) 898 if err := encoder.Encode(job); err != nil { 899 return err 900 } 901 } 902 return nil 903 } 904 905 func (s *nomadSnapshot) persistEvals(sink raft.SnapshotSink, 906 encoder *codec.Encoder) error { 907 // Get all the evaluations 908 evals, err := s.snap.Evals() 909 if err != nil { 910 return err 911 } 912 913 for { 914 // Get the next item 915 raw := evals.Next() 916 if raw == nil { 917 break 918 } 919 920 // Prepare the request struct 921 eval := raw.(*structs.Evaluation) 922 923 // Write out the evaluation 924 sink.Write([]byte{byte(EvalSnapshot)}) 925 if err := encoder.Encode(eval); err != nil { 926 return err 927 } 928 } 929 return nil 930 } 931 932 func (s *nomadSnapshot) persistAllocs(sink raft.SnapshotSink, 933 encoder *codec.Encoder) error { 934 // Get all the allocations 935 allocs, err := s.snap.Allocs() 936 if err != nil { 937 return err 938 } 939 940 for { 941 // Get the next item 942 raw := allocs.Next() 943 if raw == nil { 944 break 945 } 946 947 // Prepare the request struct 948 alloc := raw.(*structs.Allocation) 949 950 // Write out the evaluation 951 sink.Write([]byte{byte(AllocSnapshot)}) 952 if err := encoder.Encode(alloc); err != nil { 953 return err 954 } 955 } 956 return nil 957 } 958 959 func (s *nomadSnapshot) persistPeriodicLaunches(sink raft.SnapshotSink, 960 encoder *codec.Encoder) error { 961 // Get all the jobs 962 launches, err := s.snap.PeriodicLaunches() 963 if err != nil { 964 return err 965 } 966 967 for { 968 // Get the next item 969 raw := launches.Next() 970 if raw == nil { 971 break 972 } 973 974 // Prepare the request struct 975 launch := raw.(*structs.PeriodicLaunch) 976 977 // Write out a job registration 978 sink.Write([]byte{byte(PeriodicLaunchSnapshot)}) 979 if err := encoder.Encode(launch); err != nil { 980 return err 981 } 982 } 983 return nil 984 } 985 986 func (s *nomadSnapshot) persistJobSummaries(sink raft.SnapshotSink, 987 encoder *codec.Encoder) error { 988 989 summaries, err := s.snap.JobSummaries() 990 if err != nil { 991 return err 992 } 993 994 for { 995 raw := summaries.Next() 996 if raw == nil { 997 break 998 } 999 1000 jobSummary := raw.(structs.JobSummary) 1001 1002 sink.Write([]byte{byte(JobSummarySnapshot)}) 1003 if err := encoder.Encode(jobSummary); err != nil { 1004 return err 1005 } 1006 } 1007 return nil 1008 } 1009 1010 func (s *nomadSnapshot) persistVaultAccessors(sink raft.SnapshotSink, 1011 encoder *codec.Encoder) error { 1012 1013 accessors, err := s.snap.VaultAccessors() 1014 if err != nil { 1015 return err 1016 } 1017 1018 for { 1019 raw := accessors.Next() 1020 if raw == nil { 1021 break 1022 } 1023 1024 accessor := raw.(*structs.VaultAccessor) 1025 1026 sink.Write([]byte{byte(VaultAccessorSnapshot)}) 1027 if err := encoder.Encode(accessor); err != nil { 1028 return err 1029 } 1030 } 1031 return nil 1032 } 1033 1034 // Release is a no-op, as we just need to GC the pointer 1035 // to the state store snapshot. There is nothing to explicitly 1036 // cleanup. 1037 func (s *nomadSnapshot) Release() {}