github.com/ncodes/nomad@v0.5.7-0.20170403112158-97adf4a74fb3/nomad/fsm.go (about) 1 package nomad 2 3 import ( 4 "fmt" 5 "io" 6 "log" 7 "reflect" 8 "sync" 9 "time" 10 11 "github.com/armon/go-metrics" 12 memdb "github.com/hashicorp/go-memdb" 13 "github.com/ncodes/nomad/nomad/state" 14 "github.com/ncodes/nomad/nomad/structs" 15 "github.com/ncodes/nomad/scheduler" 16 "github.com/hashicorp/raft" 17 "github.com/ugorji/go/codec" 18 ) 19 20 const ( 21 // timeTableGranularity is the granularity of index to time tracking 22 timeTableGranularity = 5 * time.Minute 23 24 // timeTableLimit is the maximum limit of our tracking 25 timeTableLimit = 72 * time.Hour 26 ) 27 28 // SnapshotType is prefixed to a record in the FSM snapshot 29 // so that we can determine the type for restore 30 type SnapshotType byte 31 32 const ( 33 NodeSnapshot SnapshotType = iota 34 JobSnapshot 35 IndexSnapshot 36 EvalSnapshot 37 AllocSnapshot 38 TimeTableSnapshot 39 PeriodicLaunchSnapshot 40 JobSummarySnapshot 41 VaultAccessorSnapshot 42 ) 43 44 // nomadFSM implements a finite state machine that is used 45 // along with Raft to provide strong consistency. We implement 46 // this outside the Server to avoid exposing this outside the package. 47 type nomadFSM struct { 48 evalBroker *EvalBroker 49 blockedEvals *BlockedEvals 50 periodicDispatcher *PeriodicDispatch 51 logOutput io.Writer 52 logger *log.Logger 53 state *state.StateStore 54 timetable *TimeTable 55 56 // stateLock is only used to protect outside callers to State() from 57 // racing with Restore(), which is called by Raft (it puts in a totally 58 // new state store). Everything internal here is synchronized by the 59 // Raft side, so doesn't need to lock this. 60 stateLock sync.RWMutex 61 } 62 63 // nomadSnapshot is used to provide a snapshot of the current 64 // state in a way that can be accessed concurrently with operations 65 // that may modify the live state. 66 type nomadSnapshot struct { 67 snap *state.StateSnapshot 68 timetable *TimeTable 69 } 70 71 // snapshotHeader is the first entry in our snapshot 72 type snapshotHeader struct { 73 } 74 75 // NewFSMPath is used to construct a new FSM with a blank state 76 func NewFSM(evalBroker *EvalBroker, periodic *PeriodicDispatch, 77 blocked *BlockedEvals, logOutput io.Writer) (*nomadFSM, error) { 78 // Create a state store 79 state, err := state.NewStateStore(logOutput) 80 if err != nil { 81 return nil, err 82 } 83 84 fsm := &nomadFSM{ 85 evalBroker: evalBroker, 86 periodicDispatcher: periodic, 87 blockedEvals: blocked, 88 logOutput: logOutput, 89 logger: log.New(logOutput, "", log.LstdFlags), 90 state: state, 91 timetable: NewTimeTable(timeTableGranularity, timeTableLimit), 92 } 93 return fsm, nil 94 } 95 96 // Close is used to cleanup resources associated with the FSM 97 func (n *nomadFSM) Close() error { 98 return nil 99 } 100 101 // State is used to return a handle to the current state 102 func (n *nomadFSM) State() *state.StateStore { 103 n.stateLock.RLock() 104 defer n.stateLock.RUnlock() 105 return n.state 106 } 107 108 // TimeTable returns the time table of transactions 109 func (n *nomadFSM) TimeTable() *TimeTable { 110 return n.timetable 111 } 112 113 func (n *nomadFSM) Apply(log *raft.Log) interface{} { 114 buf := log.Data 115 msgType := structs.MessageType(buf[0]) 116 117 // Witness this write 118 n.timetable.Witness(log.Index, time.Now().UTC()) 119 120 // Check if this message type should be ignored when unknown. This is 121 // used so that new commands can be added with developer control if older 122 // versions can safely ignore the command, or if they should crash. 123 ignoreUnknown := false 124 if msgType&structs.IgnoreUnknownTypeFlag == structs.IgnoreUnknownTypeFlag { 125 msgType &= ^structs.IgnoreUnknownTypeFlag 126 ignoreUnknown = true 127 } 128 129 switch msgType { 130 case structs.NodeRegisterRequestType: 131 return n.applyUpsertNode(buf[1:], log.Index) 132 case structs.NodeDeregisterRequestType: 133 return n.applyDeregisterNode(buf[1:], log.Index) 134 case structs.NodeUpdateStatusRequestType: 135 return n.applyStatusUpdate(buf[1:], log.Index) 136 case structs.NodeUpdateDrainRequestType: 137 return n.applyDrainUpdate(buf[1:], log.Index) 138 case structs.JobRegisterRequestType: 139 return n.applyUpsertJob(buf[1:], log.Index) 140 case structs.JobDeregisterRequestType: 141 return n.applyDeregisterJob(buf[1:], log.Index) 142 case structs.EvalUpdateRequestType: 143 return n.applyUpdateEval(buf[1:], log.Index) 144 case structs.EvalDeleteRequestType: 145 return n.applyDeleteEval(buf[1:], log.Index) 146 case structs.AllocUpdateRequestType: 147 return n.applyAllocUpdate(buf[1:], log.Index) 148 case structs.AllocClientUpdateRequestType: 149 return n.applyAllocClientUpdate(buf[1:], log.Index) 150 case structs.ReconcileJobSummariesRequestType: 151 return n.applyReconcileSummaries(buf[1:], log.Index) 152 case structs.VaultAccessorRegisterRequestType: 153 return n.applyUpsertVaultAccessor(buf[1:], log.Index) 154 case structs.VaultAccessorDegisterRequestType: 155 return n.applyDeregisterVaultAccessor(buf[1:], log.Index) 156 default: 157 if ignoreUnknown { 158 n.logger.Printf("[WARN] nomad.fsm: ignoring unknown message type (%d), upgrade to newer version", msgType) 159 return nil 160 } else { 161 panic(fmt.Errorf("failed to apply request: %#v", buf)) 162 } 163 } 164 } 165 166 func (n *nomadFSM) applyUpsertNode(buf []byte, index uint64) interface{} { 167 defer metrics.MeasureSince([]string{"nomad", "fsm", "register_node"}, time.Now()) 168 var req structs.NodeRegisterRequest 169 if err := structs.Decode(buf, &req); err != nil { 170 panic(fmt.Errorf("failed to decode request: %v", err)) 171 } 172 173 if err := n.state.UpsertNode(index, req.Node); err != nil { 174 n.logger.Printf("[ERR] nomad.fsm: UpsertNode failed: %v", err) 175 return err 176 } 177 178 // Unblock evals for the nodes computed node class if it is in a ready 179 // state. 180 if req.Node.Status == structs.NodeStatusReady { 181 n.blockedEvals.Unblock(req.Node.ComputedClass, index) 182 } 183 184 return nil 185 } 186 187 func (n *nomadFSM) applyDeregisterNode(buf []byte, index uint64) interface{} { 188 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_node"}, time.Now()) 189 var req structs.NodeDeregisterRequest 190 if err := structs.Decode(buf, &req); err != nil { 191 panic(fmt.Errorf("failed to decode request: %v", err)) 192 } 193 194 if err := n.state.DeleteNode(index, req.NodeID); err != nil { 195 n.logger.Printf("[ERR] nomad.fsm: DeleteNode failed: %v", err) 196 return err 197 } 198 return nil 199 } 200 201 func (n *nomadFSM) applyStatusUpdate(buf []byte, index uint64) interface{} { 202 defer metrics.MeasureSince([]string{"nomad", "fsm", "node_status_update"}, time.Now()) 203 var req structs.NodeUpdateStatusRequest 204 if err := structs.Decode(buf, &req); err != nil { 205 panic(fmt.Errorf("failed to decode request: %v", err)) 206 } 207 208 if err := n.state.UpdateNodeStatus(index, req.NodeID, req.Status); err != nil { 209 n.logger.Printf("[ERR] nomad.fsm: UpdateNodeStatus failed: %v", err) 210 return err 211 } 212 213 // Unblock evals for the nodes computed node class if it is in a ready 214 // state. 215 if req.Status == structs.NodeStatusReady { 216 ws := memdb.NewWatchSet() 217 node, err := n.state.NodeByID(ws, req.NodeID) 218 if err != nil { 219 n.logger.Printf("[ERR] nomad.fsm: looking up node %q failed: %v", req.NodeID, err) 220 return err 221 222 } 223 n.blockedEvals.Unblock(node.ComputedClass, index) 224 } 225 226 return nil 227 } 228 229 func (n *nomadFSM) applyDrainUpdate(buf []byte, index uint64) interface{} { 230 defer metrics.MeasureSince([]string{"nomad", "fsm", "node_drain_update"}, time.Now()) 231 var req structs.NodeUpdateDrainRequest 232 if err := structs.Decode(buf, &req); err != nil { 233 panic(fmt.Errorf("failed to decode request: %v", err)) 234 } 235 236 if err := n.state.UpdateNodeDrain(index, req.NodeID, req.Drain); err != nil { 237 n.logger.Printf("[ERR] nomad.fsm: UpdateNodeDrain failed: %v", err) 238 return err 239 } 240 return nil 241 } 242 243 func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { 244 defer metrics.MeasureSince([]string{"nomad", "fsm", "register_job"}, time.Now()) 245 var req structs.JobRegisterRequest 246 if err := structs.Decode(buf, &req); err != nil { 247 panic(fmt.Errorf("failed to decode request: %v", err)) 248 } 249 250 // COMPAT: Remove in 0.6 251 // Empty maps and slices should be treated as nil to avoid 252 // un-intended destructive updates in scheduler since we use 253 // reflect.DeepEqual. Starting Nomad 0.4.1, job submission sanatizes 254 // the incoming job. 255 req.Job.Canonicalize() 256 257 if err := n.state.UpsertJob(index, req.Job); err != nil { 258 n.logger.Printf("[ERR] nomad.fsm: UpsertJob failed: %v", err) 259 return err 260 } 261 262 // We always add the job to the periodic dispatcher because there is the 263 // possibility that the periodic spec was removed and then we should stop 264 // tracking it. 265 if err := n.periodicDispatcher.Add(req.Job); err != nil { 266 n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Add failed: %v", err) 267 return err 268 } 269 270 // Create a watch set 271 ws := memdb.NewWatchSet() 272 273 // If it is periodic, record the time it was inserted. This is necessary for 274 // recovering during leader election. It is possible that from the time it 275 // is added to when it was suppose to launch, leader election occurs and the 276 // job was not launched. In this case, we use the insertion time to 277 // determine if a launch was missed. 278 if req.Job.IsPeriodic() { 279 prevLaunch, err := n.state.PeriodicLaunchByID(ws, req.Job.ID) 280 if err != nil { 281 n.logger.Printf("[ERR] nomad.fsm: PeriodicLaunchByID failed: %v", err) 282 return err 283 } 284 285 // Record the insertion time as a launch. We overload the launch table 286 // such that the first entry is the insertion time. 287 if prevLaunch == nil { 288 launch := &structs.PeriodicLaunch{ID: req.Job.ID, Launch: time.Now()} 289 if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { 290 n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) 291 return err 292 } 293 } 294 } 295 296 // Check if the parent job is periodic and mark the launch time. 297 parentID := req.Job.ParentID 298 if parentID != "" { 299 parent, err := n.state.JobByID(ws, parentID) 300 if err != nil { 301 n.logger.Printf("[ERR] nomad.fsm: JobByID(%v) lookup for parent failed: %v", parentID, err) 302 return err 303 } else if parent == nil { 304 // The parent has been deregistered. 305 return nil 306 } 307 308 if parent.IsPeriodic() && !parent.IsParameterized() { 309 t, err := n.periodicDispatcher.LaunchTime(req.Job.ID) 310 if err != nil { 311 n.logger.Printf("[ERR] nomad.fsm: LaunchTime(%v) failed: %v", req.Job.ID, err) 312 return err 313 } 314 315 launch := &structs.PeriodicLaunch{ID: parentID, Launch: t} 316 if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { 317 n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) 318 return err 319 } 320 } 321 } 322 323 return nil 324 } 325 326 func (n *nomadFSM) applyDeregisterJob(buf []byte, index uint64) interface{} { 327 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_job"}, time.Now()) 328 var req structs.JobDeregisterRequest 329 if err := structs.Decode(buf, &req); err != nil { 330 panic(fmt.Errorf("failed to decode request: %v", err)) 331 } 332 333 if err := n.state.DeleteJob(index, req.JobID); err != nil { 334 n.logger.Printf("[ERR] nomad.fsm: DeleteJob failed: %v", err) 335 return err 336 } 337 338 if err := n.periodicDispatcher.Remove(req.JobID); err != nil { 339 n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Remove failed: %v", err) 340 return err 341 } 342 343 // We always delete from the periodic launch table because it is possible that 344 // the job was updated to be non-perioidic, thus checking if it is periodic 345 // doesn't ensure we clean it up properly. 346 n.state.DeletePeriodicLaunch(index, req.JobID) 347 348 return nil 349 } 350 351 func (n *nomadFSM) applyUpdateEval(buf []byte, index uint64) interface{} { 352 defer metrics.MeasureSince([]string{"nomad", "fsm", "update_eval"}, time.Now()) 353 var req structs.EvalUpdateRequest 354 if err := structs.Decode(buf, &req); err != nil { 355 panic(fmt.Errorf("failed to decode request: %v", err)) 356 } 357 358 if err := n.state.UpsertEvals(index, req.Evals); err != nil { 359 n.logger.Printf("[ERR] nomad.fsm: UpsertEvals failed: %v", err) 360 return err 361 } 362 363 for _, eval := range req.Evals { 364 if eval.ShouldEnqueue() { 365 n.evalBroker.Enqueue(eval) 366 } else if eval.ShouldBlock() { 367 n.blockedEvals.Block(eval) 368 } else if eval.Status == structs.EvalStatusComplete && 369 len(eval.FailedTGAllocs) == 0 { 370 // If we have a successful evaluation for a node, untrack any 371 // blocked evaluation 372 n.blockedEvals.Untrack(eval.JobID) 373 } 374 } 375 return nil 376 } 377 378 func (n *nomadFSM) applyDeleteEval(buf []byte, index uint64) interface{} { 379 defer metrics.MeasureSince([]string{"nomad", "fsm", "delete_eval"}, time.Now()) 380 var req structs.EvalDeleteRequest 381 if err := structs.Decode(buf, &req); err != nil { 382 panic(fmt.Errorf("failed to decode request: %v", err)) 383 } 384 385 if err := n.state.DeleteEval(index, req.Evals, req.Allocs); err != nil { 386 n.logger.Printf("[ERR] nomad.fsm: DeleteEval failed: %v", err) 387 return err 388 } 389 return nil 390 } 391 392 func (n *nomadFSM) applyAllocUpdate(buf []byte, index uint64) interface{} { 393 defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_update"}, time.Now()) 394 var req structs.AllocUpdateRequest 395 if err := structs.Decode(buf, &req); err != nil { 396 panic(fmt.Errorf("failed to decode request: %v", err)) 397 } 398 399 // Attach the job to all the allocations. It is pulled out in the 400 // payload to avoid the redundancy of encoding, but should be denormalized 401 // prior to being inserted into MemDB. 402 if j := req.Job; j != nil { 403 for _, alloc := range req.Alloc { 404 if alloc.Job == nil && !alloc.TerminalStatus() { 405 alloc.Job = j 406 } 407 } 408 } 409 410 // Calculate the total resources of allocations. It is pulled out in the 411 // payload to avoid encoding something that can be computed, but should be 412 // denormalized prior to being inserted into MemDB. 413 for _, alloc := range req.Alloc { 414 if alloc.Resources != nil { 415 // COMPAT 0.4.1 -> 0.5 416 // Set the shared resources for allocations which don't have them 417 if alloc.SharedResources == nil { 418 alloc.SharedResources = &structs.Resources{ 419 DiskMB: alloc.Resources.DiskMB, 420 } 421 } 422 423 continue 424 } 425 426 alloc.Resources = new(structs.Resources) 427 for _, task := range alloc.TaskResources { 428 alloc.Resources.Add(task) 429 } 430 431 // Add the shared resources 432 alloc.Resources.Add(alloc.SharedResources) 433 } 434 435 if err := n.state.UpsertAllocs(index, req.Alloc); err != nil { 436 n.logger.Printf("[ERR] nomad.fsm: UpsertAllocs failed: %v", err) 437 return err 438 } 439 return nil 440 } 441 442 func (n *nomadFSM) applyAllocClientUpdate(buf []byte, index uint64) interface{} { 443 defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_client_update"}, time.Now()) 444 var req structs.AllocUpdateRequest 445 if err := structs.Decode(buf, &req); err != nil { 446 panic(fmt.Errorf("failed to decode request: %v", err)) 447 } 448 if len(req.Alloc) == 0 { 449 return nil 450 } 451 452 // Create a watch set 453 ws := memdb.NewWatchSet() 454 455 // Updating the allocs with the job id and task group name 456 for _, alloc := range req.Alloc { 457 if existing, _ := n.state.AllocByID(ws, alloc.ID); existing != nil { 458 alloc.JobID = existing.JobID 459 alloc.TaskGroup = existing.TaskGroup 460 } 461 } 462 463 // Update all the client allocations 464 if err := n.state.UpdateAllocsFromClient(index, req.Alloc); err != nil { 465 n.logger.Printf("[ERR] nomad.fsm: UpdateAllocFromClient failed: %v", err) 466 return err 467 } 468 469 // Unblock evals for the nodes computed node class if the client has 470 // finished running an allocation. 471 for _, alloc := range req.Alloc { 472 if alloc.ClientStatus == structs.AllocClientStatusComplete || 473 alloc.ClientStatus == structs.AllocClientStatusFailed { 474 nodeID := alloc.NodeID 475 node, err := n.state.NodeByID(ws, nodeID) 476 if err != nil || node == nil { 477 n.logger.Printf("[ERR] nomad.fsm: looking up node %q failed: %v", nodeID, err) 478 return err 479 480 } 481 n.blockedEvals.Unblock(node.ComputedClass, index) 482 } 483 } 484 485 return nil 486 } 487 488 // applyReconcileSummaries reconciles summaries for all the jobs 489 func (n *nomadFSM) applyReconcileSummaries(buf []byte, index uint64) interface{} { 490 if err := n.state.ReconcileJobSummaries(index); err != nil { 491 return err 492 } 493 return n.reconcileQueuedAllocations(index) 494 } 495 496 // applyUpsertVaultAccessor stores the Vault accessors for a given allocation 497 // and task 498 func (n *nomadFSM) applyUpsertVaultAccessor(buf []byte, index uint64) interface{} { 499 defer metrics.MeasureSince([]string{"nomad", "fsm", "upsert_vault_accessor"}, time.Now()) 500 var req structs.VaultAccessorsRequest 501 if err := structs.Decode(buf, &req); err != nil { 502 panic(fmt.Errorf("failed to decode request: %v", err)) 503 } 504 505 if err := n.state.UpsertVaultAccessor(index, req.Accessors); err != nil { 506 n.logger.Printf("[ERR] nomad.fsm: UpsertVaultAccessor failed: %v", err) 507 return err 508 } 509 510 return nil 511 } 512 513 // applyDeregisterVaultAccessor deregisters a set of Vault accessors 514 func (n *nomadFSM) applyDeregisterVaultAccessor(buf []byte, index uint64) interface{} { 515 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_vault_accessor"}, time.Now()) 516 var req structs.VaultAccessorsRequest 517 if err := structs.Decode(buf, &req); err != nil { 518 panic(fmt.Errorf("failed to decode request: %v", err)) 519 } 520 521 if err := n.state.DeleteVaultAccessors(index, req.Accessors); err != nil { 522 n.logger.Printf("[ERR] nomad.fsm: DeregisterVaultAccessor failed: %v", err) 523 return err 524 } 525 526 return nil 527 } 528 529 func (n *nomadFSM) Snapshot() (raft.FSMSnapshot, error) { 530 // Create a new snapshot 531 snap, err := n.state.Snapshot() 532 if err != nil { 533 return nil, err 534 } 535 536 ns := &nomadSnapshot{ 537 snap: snap, 538 timetable: n.timetable, 539 } 540 return ns, nil 541 } 542 543 func (n *nomadFSM) Restore(old io.ReadCloser) error { 544 defer old.Close() 545 546 // Create a new state store 547 newState, err := state.NewStateStore(n.logOutput) 548 if err != nil { 549 return err 550 } 551 552 // Start the state restore 553 restore, err := newState.Restore() 554 if err != nil { 555 return err 556 } 557 defer restore.Abort() 558 559 // Create a decoder 560 dec := codec.NewDecoder(old, structs.MsgpackHandle) 561 562 // Read in the header 563 var header snapshotHeader 564 if err := dec.Decode(&header); err != nil { 565 return err 566 } 567 568 // Populate the new state 569 msgType := make([]byte, 1) 570 for { 571 // Read the message type 572 _, err := old.Read(msgType) 573 if err == io.EOF { 574 break 575 } else if err != nil { 576 return err 577 } 578 579 // Decode 580 switch SnapshotType(msgType[0]) { 581 case TimeTableSnapshot: 582 if err := n.timetable.Deserialize(dec); err != nil { 583 return fmt.Errorf("time table deserialize failed: %v", err) 584 } 585 586 case NodeSnapshot: 587 node := new(structs.Node) 588 if err := dec.Decode(node); err != nil { 589 return err 590 } 591 if err := restore.NodeRestore(node); err != nil { 592 return err 593 } 594 595 case JobSnapshot: 596 job := new(structs.Job) 597 if err := dec.Decode(job); err != nil { 598 return err 599 } 600 601 // COMPAT: Remove in 0.5 602 // Empty maps and slices should be treated as nil to avoid 603 // un-intended destructive updates in scheduler since we use 604 // reflect.DeepEqual. Starting Nomad 0.4.1, job submission sanatizes 605 // the incoming job. 606 job.Canonicalize() 607 608 if err := restore.JobRestore(job); err != nil { 609 return err 610 } 611 612 case EvalSnapshot: 613 eval := new(structs.Evaluation) 614 if err := dec.Decode(eval); err != nil { 615 return err 616 } 617 if err := restore.EvalRestore(eval); err != nil { 618 return err 619 } 620 621 case AllocSnapshot: 622 alloc := new(structs.Allocation) 623 if err := dec.Decode(alloc); err != nil { 624 return err 625 } 626 if err := restore.AllocRestore(alloc); err != nil { 627 return err 628 } 629 630 case IndexSnapshot: 631 idx := new(state.IndexEntry) 632 if err := dec.Decode(idx); err != nil { 633 return err 634 } 635 if err := restore.IndexRestore(idx); err != nil { 636 return err 637 } 638 639 case PeriodicLaunchSnapshot: 640 launch := new(structs.PeriodicLaunch) 641 if err := dec.Decode(launch); err != nil { 642 return err 643 } 644 if err := restore.PeriodicLaunchRestore(launch); err != nil { 645 return err 646 } 647 648 case JobSummarySnapshot: 649 summary := new(structs.JobSummary) 650 if err := dec.Decode(summary); err != nil { 651 return err 652 } 653 if err := restore.JobSummaryRestore(summary); err != nil { 654 return err 655 } 656 657 case VaultAccessorSnapshot: 658 accessor := new(structs.VaultAccessor) 659 if err := dec.Decode(accessor); err != nil { 660 return err 661 } 662 if err := restore.VaultAccessorRestore(accessor); err != nil { 663 return err 664 } 665 666 default: 667 return fmt.Errorf("Unrecognized snapshot type: %v", msgType) 668 } 669 } 670 671 restore.Commit() 672 673 // Create Job Summaries 674 // COMPAT 0.4 -> 0.4.1 675 // We can remove this in 0.5. This exists so that the server creates job 676 // summaries if they were not present previously. When users upgrade to 0.5 677 // from 0.4.1, the snapshot will contain job summaries so it will be safe to 678 // remove this block. 679 index, err := newState.Index("job_summary") 680 if err != nil { 681 return fmt.Errorf("couldn't fetch index of job summary table: %v", err) 682 } 683 684 // If the index is 0 that means there is no job summary in the snapshot so 685 // we will have to create them 686 if index == 0 { 687 // query the latest index 688 latestIndex, err := newState.LatestIndex() 689 if err != nil { 690 return fmt.Errorf("unable to query latest index: %v", index) 691 } 692 if err := newState.ReconcileJobSummaries(latestIndex); err != nil { 693 return fmt.Errorf("error reconciling summaries: %v", err) 694 } 695 } 696 697 // External code might be calling State(), so we need to synchronize 698 // here to make sure we swap in the new state store atomically. 699 n.stateLock.Lock() 700 stateOld := n.state 701 n.state = newState 702 n.stateLock.Unlock() 703 704 // Signal that the old state store has been abandoned. This is required 705 // because we don't operate on it any more, we just throw it away, so 706 // blocking queries won't see any changes and need to be woken up. 707 stateOld.Abandon() 708 709 return nil 710 } 711 712 // reconcileSummaries re-calculates the queued allocations for every job that we 713 // created a Job Summary during the snap shot restore 714 func (n *nomadFSM) reconcileQueuedAllocations(index uint64) error { 715 // Get all the jobs 716 ws := memdb.NewWatchSet() 717 iter, err := n.state.Jobs(ws) 718 if err != nil { 719 return err 720 } 721 722 snap, err := n.state.Snapshot() 723 if err != nil { 724 return fmt.Errorf("unable to create snapshot: %v", err) 725 } 726 727 // Invoking the scheduler for every job so that we can populate the number 728 // of queued allocations for every job 729 for { 730 rawJob := iter.Next() 731 if rawJob == nil { 732 break 733 } 734 job := rawJob.(*structs.Job) 735 planner := &scheduler.Harness{ 736 State: &snap.StateStore, 737 } 738 // Create an eval and mark it as requiring annotations and insert that as well 739 eval := &structs.Evaluation{ 740 ID: structs.GenerateUUID(), 741 Priority: job.Priority, 742 Type: job.Type, 743 TriggeredBy: structs.EvalTriggerJobRegister, 744 JobID: job.ID, 745 JobModifyIndex: job.JobModifyIndex + 1, 746 Status: structs.EvalStatusPending, 747 AnnotatePlan: true, 748 } 749 750 // Create the scheduler and run it 751 sched, err := scheduler.NewScheduler(eval.Type, n.logger, snap, planner) 752 if err != nil { 753 return err 754 } 755 756 if err := sched.Process(eval); err != nil { 757 return err 758 } 759 760 // Get the job summary from the fsm state store 761 originalSummary, err := n.state.JobSummaryByID(ws, job.ID) 762 if err != nil { 763 return err 764 } 765 summary := originalSummary.Copy() 766 767 // Add the allocations scheduler has made to queued since these 768 // allocations are never getting placed until the scheduler is invoked 769 // with a real planner 770 if l := len(planner.Plans); l != 1 { 771 return fmt.Errorf("unexpected number of plans during restore %d. Please file an issue including the logs", l) 772 } 773 for _, allocations := range planner.Plans[0].NodeAllocation { 774 for _, allocation := range allocations { 775 tgSummary, ok := summary.Summary[allocation.TaskGroup] 776 if !ok { 777 return fmt.Errorf("task group %q not found while updating queued count", allocation.TaskGroup) 778 } 779 tgSummary.Queued += 1 780 summary.Summary[allocation.TaskGroup] = tgSummary 781 } 782 } 783 784 // Add the queued allocations attached to the evaluation to the queued 785 // counter of the job summary 786 if l := len(planner.Evals); l != 1 { 787 return fmt.Errorf("unexpected number of evals during restore %d. Please file an issue including the logs", l) 788 } 789 for tg, queued := range planner.Evals[0].QueuedAllocations { 790 tgSummary, ok := summary.Summary[tg] 791 if !ok { 792 return fmt.Errorf("task group %q not found while updating queued count", tg) 793 } 794 795 // We add instead of setting here because we want to take into 796 // consideration what the scheduler with a mock planner thinks it 797 // placed. Those should be counted as queued as well 798 tgSummary.Queued += queued 799 summary.Summary[tg] = tgSummary 800 } 801 802 if !reflect.DeepEqual(summary, originalSummary) { 803 summary.ModifyIndex = index 804 if err := n.state.UpsertJobSummary(index, summary); err != nil { 805 return err 806 } 807 } 808 } 809 return nil 810 } 811 812 func (s *nomadSnapshot) Persist(sink raft.SnapshotSink) error { 813 defer metrics.MeasureSince([]string{"nomad", "fsm", "persist"}, time.Now()) 814 // Register the nodes 815 encoder := codec.NewEncoder(sink, structs.MsgpackHandle) 816 817 // Write the header 818 header := snapshotHeader{} 819 if err := encoder.Encode(&header); err != nil { 820 sink.Cancel() 821 return err 822 } 823 824 // Write the time table 825 sink.Write([]byte{byte(TimeTableSnapshot)}) 826 if err := s.timetable.Serialize(encoder); err != nil { 827 sink.Cancel() 828 return err 829 } 830 831 // Write all the data out 832 if err := s.persistIndexes(sink, encoder); err != nil { 833 sink.Cancel() 834 return err 835 } 836 if err := s.persistNodes(sink, encoder); err != nil { 837 sink.Cancel() 838 return err 839 } 840 if err := s.persistJobs(sink, encoder); err != nil { 841 sink.Cancel() 842 return err 843 } 844 if err := s.persistEvals(sink, encoder); err != nil { 845 sink.Cancel() 846 return err 847 } 848 if err := s.persistAllocs(sink, encoder); err != nil { 849 sink.Cancel() 850 return err 851 } 852 if err := s.persistPeriodicLaunches(sink, encoder); err != nil { 853 sink.Cancel() 854 return err 855 } 856 if err := s.persistJobSummaries(sink, encoder); err != nil { 857 sink.Cancel() 858 return err 859 } 860 if err := s.persistVaultAccessors(sink, encoder); err != nil { 861 sink.Cancel() 862 return err 863 } 864 return nil 865 } 866 867 func (s *nomadSnapshot) persistIndexes(sink raft.SnapshotSink, 868 encoder *codec.Encoder) error { 869 // Get all the indexes 870 iter, err := s.snap.Indexes() 871 if err != nil { 872 return err 873 } 874 875 for { 876 // Get the next item 877 raw := iter.Next() 878 if raw == nil { 879 break 880 } 881 882 // Prepare the request struct 883 idx := raw.(*state.IndexEntry) 884 885 // Write out a node registration 886 sink.Write([]byte{byte(IndexSnapshot)}) 887 if err := encoder.Encode(idx); err != nil { 888 return err 889 } 890 } 891 return nil 892 } 893 894 func (s *nomadSnapshot) persistNodes(sink raft.SnapshotSink, 895 encoder *codec.Encoder) error { 896 // Get all the nodes 897 ws := memdb.NewWatchSet() 898 nodes, err := s.snap.Nodes(ws) 899 if err != nil { 900 return err 901 } 902 903 for { 904 // Get the next item 905 raw := nodes.Next() 906 if raw == nil { 907 break 908 } 909 910 // Prepare the request struct 911 node := raw.(*structs.Node) 912 913 // Write out a node registration 914 sink.Write([]byte{byte(NodeSnapshot)}) 915 if err := encoder.Encode(node); err != nil { 916 return err 917 } 918 } 919 return nil 920 } 921 922 func (s *nomadSnapshot) persistJobs(sink raft.SnapshotSink, 923 encoder *codec.Encoder) error { 924 // Get all the jobs 925 ws := memdb.NewWatchSet() 926 jobs, err := s.snap.Jobs(ws) 927 if err != nil { 928 return err 929 } 930 931 for { 932 // Get the next item 933 raw := jobs.Next() 934 if raw == nil { 935 break 936 } 937 938 // Prepare the request struct 939 job := raw.(*structs.Job) 940 941 // Write out a job registration 942 sink.Write([]byte{byte(JobSnapshot)}) 943 if err := encoder.Encode(job); err != nil { 944 return err 945 } 946 } 947 return nil 948 } 949 950 func (s *nomadSnapshot) persistEvals(sink raft.SnapshotSink, 951 encoder *codec.Encoder) error { 952 // Get all the evaluations 953 ws := memdb.NewWatchSet() 954 evals, err := s.snap.Evals(ws) 955 if err != nil { 956 return err 957 } 958 959 for { 960 // Get the next item 961 raw := evals.Next() 962 if raw == nil { 963 break 964 } 965 966 // Prepare the request struct 967 eval := raw.(*structs.Evaluation) 968 969 // Write out the evaluation 970 sink.Write([]byte{byte(EvalSnapshot)}) 971 if err := encoder.Encode(eval); err != nil { 972 return err 973 } 974 } 975 return nil 976 } 977 978 func (s *nomadSnapshot) persistAllocs(sink raft.SnapshotSink, 979 encoder *codec.Encoder) error { 980 // Get all the allocations 981 ws := memdb.NewWatchSet() 982 allocs, err := s.snap.Allocs(ws) 983 if err != nil { 984 return err 985 } 986 987 for { 988 // Get the next item 989 raw := allocs.Next() 990 if raw == nil { 991 break 992 } 993 994 // Prepare the request struct 995 alloc := raw.(*structs.Allocation) 996 997 // Write out the evaluation 998 sink.Write([]byte{byte(AllocSnapshot)}) 999 if err := encoder.Encode(alloc); err != nil { 1000 return err 1001 } 1002 } 1003 return nil 1004 } 1005 1006 func (s *nomadSnapshot) persistPeriodicLaunches(sink raft.SnapshotSink, 1007 encoder *codec.Encoder) error { 1008 // Get all the jobs 1009 ws := memdb.NewWatchSet() 1010 launches, err := s.snap.PeriodicLaunches(ws) 1011 if err != nil { 1012 return err 1013 } 1014 1015 for { 1016 // Get the next item 1017 raw := launches.Next() 1018 if raw == nil { 1019 break 1020 } 1021 1022 // Prepare the request struct 1023 launch := raw.(*structs.PeriodicLaunch) 1024 1025 // Write out a job registration 1026 sink.Write([]byte{byte(PeriodicLaunchSnapshot)}) 1027 if err := encoder.Encode(launch); err != nil { 1028 return err 1029 } 1030 } 1031 return nil 1032 } 1033 1034 func (s *nomadSnapshot) persistJobSummaries(sink raft.SnapshotSink, 1035 encoder *codec.Encoder) error { 1036 1037 ws := memdb.NewWatchSet() 1038 summaries, err := s.snap.JobSummaries(ws) 1039 if err != nil { 1040 return err 1041 } 1042 1043 for { 1044 raw := summaries.Next() 1045 if raw == nil { 1046 break 1047 } 1048 1049 jobSummary := raw.(*structs.JobSummary) 1050 1051 sink.Write([]byte{byte(JobSummarySnapshot)}) 1052 if err := encoder.Encode(jobSummary); err != nil { 1053 return err 1054 } 1055 } 1056 return nil 1057 } 1058 1059 func (s *nomadSnapshot) persistVaultAccessors(sink raft.SnapshotSink, 1060 encoder *codec.Encoder) error { 1061 1062 ws := memdb.NewWatchSet() 1063 accessors, err := s.snap.VaultAccessors(ws) 1064 if err != nil { 1065 return err 1066 } 1067 1068 for { 1069 raw := accessors.Next() 1070 if raw == nil { 1071 break 1072 } 1073 1074 accessor := raw.(*structs.VaultAccessor) 1075 1076 sink.Write([]byte{byte(VaultAccessorSnapshot)}) 1077 if err := encoder.Encode(accessor); err != nil { 1078 return err 1079 } 1080 } 1081 return nil 1082 } 1083 1084 // Release is a no-op, as we just need to GC the pointer 1085 // to the state store snapshot. There is nothing to explicitly 1086 // cleanup. 1087 func (s *nomadSnapshot) Release() {}