github.com/diptanu/nomad@v0.5.7-0.20170516172507-d72e86cbe3d9/nomad/fsm.go (about) 1 package nomad 2 3 import ( 4 "fmt" 5 "io" 6 "log" 7 "reflect" 8 "sync" 9 "time" 10 11 "github.com/armon/go-metrics" 12 memdb "github.com/hashicorp/go-memdb" 13 "github.com/hashicorp/nomad/nomad/state" 14 "github.com/hashicorp/nomad/nomad/structs" 15 "github.com/hashicorp/nomad/scheduler" 16 "github.com/hashicorp/raft" 17 "github.com/ugorji/go/codec" 18 ) 19 20 const ( 21 // timeTableGranularity is the granularity of index to time tracking 22 timeTableGranularity = 5 * time.Minute 23 24 // timeTableLimit is the maximum limit of our tracking 25 timeTableLimit = 72 * time.Hour 26 ) 27 28 // SnapshotType is prefixed to a record in the FSM snapshot 29 // so that we can determine the type for restore 30 type SnapshotType byte 31 32 const ( 33 NodeSnapshot SnapshotType = iota 34 JobSnapshot 35 IndexSnapshot 36 EvalSnapshot 37 AllocSnapshot 38 TimeTableSnapshot 39 PeriodicLaunchSnapshot 40 JobSummarySnapshot 41 VaultAccessorSnapshot 42 JobVersionSnapshot 43 DeploymentSnapshot 44 ) 45 46 // nomadFSM implements a finite state machine that is used 47 // along with Raft to provide strong consistency. We implement 48 // this outside the Server to avoid exposing this outside the package. 49 type nomadFSM struct { 50 evalBroker *EvalBroker 51 blockedEvals *BlockedEvals 52 periodicDispatcher *PeriodicDispatch 53 logOutput io.Writer 54 logger *log.Logger 55 state *state.StateStore 56 timetable *TimeTable 57 58 // stateLock is only used to protect outside callers to State() from 59 // racing with Restore(), which is called by Raft (it puts in a totally 60 // new state store). Everything internal here is synchronized by the 61 // Raft side, so doesn't need to lock this. 62 stateLock sync.RWMutex 63 } 64 65 // nomadSnapshot is used to provide a snapshot of the current 66 // state in a way that can be accessed concurrently with operations 67 // that may modify the live state. 68 type nomadSnapshot struct { 69 snap *state.StateSnapshot 70 timetable *TimeTable 71 } 72 73 // snapshotHeader is the first entry in our snapshot 74 type snapshotHeader struct { 75 } 76 77 // NewFSMPath is used to construct a new FSM with a blank state 78 func NewFSM(evalBroker *EvalBroker, periodic *PeriodicDispatch, 79 blocked *BlockedEvals, logOutput io.Writer) (*nomadFSM, error) { 80 // Create a state store 81 state, err := state.NewStateStore(logOutput) 82 if err != nil { 83 return nil, err 84 } 85 86 fsm := &nomadFSM{ 87 evalBroker: evalBroker, 88 periodicDispatcher: periodic, 89 blockedEvals: blocked, 90 logOutput: logOutput, 91 logger: log.New(logOutput, "", log.LstdFlags), 92 state: state, 93 timetable: NewTimeTable(timeTableGranularity, timeTableLimit), 94 } 95 return fsm, nil 96 } 97 98 // Close is used to cleanup resources associated with the FSM 99 func (n *nomadFSM) Close() error { 100 return nil 101 } 102 103 // State is used to return a handle to the current state 104 func (n *nomadFSM) State() *state.StateStore { 105 n.stateLock.RLock() 106 defer n.stateLock.RUnlock() 107 return n.state 108 } 109 110 // TimeTable returns the time table of transactions 111 func (n *nomadFSM) TimeTable() *TimeTable { 112 return n.timetable 113 } 114 115 func (n *nomadFSM) Apply(log *raft.Log) interface{} { 116 buf := log.Data 117 msgType := structs.MessageType(buf[0]) 118 119 // Witness this write 120 n.timetable.Witness(log.Index, time.Now().UTC()) 121 122 // Check if this message type should be ignored when unknown. This is 123 // used so that new commands can be added with developer control if older 124 // versions can safely ignore the command, or if they should crash. 125 ignoreUnknown := false 126 if msgType&structs.IgnoreUnknownTypeFlag == structs.IgnoreUnknownTypeFlag { 127 msgType &= ^structs.IgnoreUnknownTypeFlag 128 ignoreUnknown = true 129 } 130 131 switch msgType { 132 case structs.NodeRegisterRequestType: 133 return n.applyUpsertNode(buf[1:], log.Index) 134 case structs.NodeDeregisterRequestType: 135 return n.applyDeregisterNode(buf[1:], log.Index) 136 case structs.NodeUpdateStatusRequestType: 137 return n.applyStatusUpdate(buf[1:], log.Index) 138 case structs.NodeUpdateDrainRequestType: 139 return n.applyDrainUpdate(buf[1:], log.Index) 140 case structs.JobRegisterRequestType: 141 return n.applyUpsertJob(buf[1:], log.Index) 142 case structs.JobDeregisterRequestType: 143 return n.applyDeregisterJob(buf[1:], log.Index) 144 case structs.EvalUpdateRequestType: 145 return n.applyUpdateEval(buf[1:], log.Index) 146 case structs.EvalDeleteRequestType: 147 return n.applyDeleteEval(buf[1:], log.Index) 148 case structs.AllocUpdateRequestType: 149 return n.applyAllocUpdate(buf[1:], log.Index) 150 case structs.AllocClientUpdateRequestType: 151 return n.applyAllocClientUpdate(buf[1:], log.Index) 152 case structs.ReconcileJobSummariesRequestType: 153 return n.applyReconcileSummaries(buf[1:], log.Index) 154 case structs.VaultAccessorRegisterRequestType: 155 return n.applyUpsertVaultAccessor(buf[1:], log.Index) 156 case structs.VaultAccessorDegisterRequestType: 157 return n.applyDeregisterVaultAccessor(buf[1:], log.Index) 158 case structs.ApplyPlanResultsRequestType: 159 return n.applyPlanResults(buf[1:], log.Index) 160 default: 161 if ignoreUnknown { 162 n.logger.Printf("[WARN] nomad.fsm: ignoring unknown message type (%d), upgrade to newer version", msgType) 163 return nil 164 } else { 165 panic(fmt.Errorf("failed to apply request: %#v", buf)) 166 } 167 } 168 } 169 170 func (n *nomadFSM) applyUpsertNode(buf []byte, index uint64) interface{} { 171 defer metrics.MeasureSince([]string{"nomad", "fsm", "register_node"}, time.Now()) 172 var req structs.NodeRegisterRequest 173 if err := structs.Decode(buf, &req); err != nil { 174 panic(fmt.Errorf("failed to decode request: %v", err)) 175 } 176 177 if err := n.state.UpsertNode(index, req.Node); err != nil { 178 n.logger.Printf("[ERR] nomad.fsm: UpsertNode failed: %v", err) 179 return err 180 } 181 182 // Unblock evals for the nodes computed node class if it is in a ready 183 // state. 184 if req.Node.Status == structs.NodeStatusReady { 185 n.blockedEvals.Unblock(req.Node.ComputedClass, index) 186 } 187 188 return nil 189 } 190 191 func (n *nomadFSM) applyDeregisterNode(buf []byte, index uint64) interface{} { 192 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_node"}, time.Now()) 193 var req structs.NodeDeregisterRequest 194 if err := structs.Decode(buf, &req); err != nil { 195 panic(fmt.Errorf("failed to decode request: %v", err)) 196 } 197 198 if err := n.state.DeleteNode(index, req.NodeID); err != nil { 199 n.logger.Printf("[ERR] nomad.fsm: DeleteNode failed: %v", err) 200 return err 201 } 202 return nil 203 } 204 205 func (n *nomadFSM) applyStatusUpdate(buf []byte, index uint64) interface{} { 206 defer metrics.MeasureSince([]string{"nomad", "fsm", "node_status_update"}, time.Now()) 207 var req structs.NodeUpdateStatusRequest 208 if err := structs.Decode(buf, &req); err != nil { 209 panic(fmt.Errorf("failed to decode request: %v", err)) 210 } 211 212 if err := n.state.UpdateNodeStatus(index, req.NodeID, req.Status); err != nil { 213 n.logger.Printf("[ERR] nomad.fsm: UpdateNodeStatus failed: %v", err) 214 return err 215 } 216 217 // Unblock evals for the nodes computed node class if it is in a ready 218 // state. 219 if req.Status == structs.NodeStatusReady { 220 ws := memdb.NewWatchSet() 221 node, err := n.state.NodeByID(ws, req.NodeID) 222 if err != nil { 223 n.logger.Printf("[ERR] nomad.fsm: looking up node %q failed: %v", req.NodeID, err) 224 return err 225 226 } 227 n.blockedEvals.Unblock(node.ComputedClass, index) 228 } 229 230 return nil 231 } 232 233 func (n *nomadFSM) applyDrainUpdate(buf []byte, index uint64) interface{} { 234 defer metrics.MeasureSince([]string{"nomad", "fsm", "node_drain_update"}, time.Now()) 235 var req structs.NodeUpdateDrainRequest 236 if err := structs.Decode(buf, &req); err != nil { 237 panic(fmt.Errorf("failed to decode request: %v", err)) 238 } 239 240 if err := n.state.UpdateNodeDrain(index, req.NodeID, req.Drain); err != nil { 241 n.logger.Printf("[ERR] nomad.fsm: UpdateNodeDrain failed: %v", err) 242 return err 243 } 244 return nil 245 } 246 247 func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { 248 defer metrics.MeasureSince([]string{"nomad", "fsm", "register_job"}, time.Now()) 249 var req structs.JobRegisterRequest 250 if err := structs.Decode(buf, &req); err != nil { 251 panic(fmt.Errorf("failed to decode request: %v", err)) 252 } 253 254 // COMPAT: Remove in 0.6 255 // Empty maps and slices should be treated as nil to avoid 256 // un-intended destructive updates in scheduler since we use 257 // reflect.DeepEqual. Starting Nomad 0.4.1, job submission sanatizes 258 // the incoming job. 259 req.Job.Canonicalize() 260 261 if err := n.state.UpsertJob(index, req.Job); err != nil { 262 n.logger.Printf("[ERR] nomad.fsm: UpsertJob failed: %v", err) 263 return err 264 } 265 266 // We always add the job to the periodic dispatcher because there is the 267 // possibility that the periodic spec was removed and then we should stop 268 // tracking it. 269 if err := n.periodicDispatcher.Add(req.Job); err != nil { 270 n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Add failed: %v", err) 271 return err 272 } 273 274 // Create a watch set 275 ws := memdb.NewWatchSet() 276 277 // If it is periodic, record the time it was inserted. This is necessary for 278 // recovering during leader election. It is possible that from the time it 279 // is added to when it was suppose to launch, leader election occurs and the 280 // job was not launched. In this case, we use the insertion time to 281 // determine if a launch was missed. 282 if req.Job.IsPeriodic() { 283 prevLaunch, err := n.state.PeriodicLaunchByID(ws, req.Job.ID) 284 if err != nil { 285 n.logger.Printf("[ERR] nomad.fsm: PeriodicLaunchByID failed: %v", err) 286 return err 287 } 288 289 // Record the insertion time as a launch. We overload the launch table 290 // such that the first entry is the insertion time. 291 if prevLaunch == nil { 292 launch := &structs.PeriodicLaunch{ID: req.Job.ID, Launch: time.Now()} 293 if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { 294 n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) 295 return err 296 } 297 } 298 } 299 300 // Check if the parent job is periodic and mark the launch time. 301 parentID := req.Job.ParentID 302 if parentID != "" { 303 parent, err := n.state.JobByID(ws, parentID) 304 if err != nil { 305 n.logger.Printf("[ERR] nomad.fsm: JobByID(%v) lookup for parent failed: %v", parentID, err) 306 return err 307 } else if parent == nil { 308 // The parent has been deregistered. 309 return nil 310 } 311 312 if parent.IsPeriodic() && !parent.IsParameterized() { 313 t, err := n.periodicDispatcher.LaunchTime(req.Job.ID) 314 if err != nil { 315 n.logger.Printf("[ERR] nomad.fsm: LaunchTime(%v) failed: %v", req.Job.ID, err) 316 return err 317 } 318 319 launch := &structs.PeriodicLaunch{ID: parentID, Launch: t} 320 if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { 321 n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) 322 return err 323 } 324 } 325 } 326 327 return nil 328 } 329 330 func (n *nomadFSM) applyDeregisterJob(buf []byte, index uint64) interface{} { 331 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_job"}, time.Now()) 332 var req structs.JobDeregisterRequest 333 if err := structs.Decode(buf, &req); err != nil { 334 panic(fmt.Errorf("failed to decode request: %v", err)) 335 } 336 337 // If it is periodic remove it from the dispatcher 338 if err := n.periodicDispatcher.Remove(req.JobID); err != nil { 339 n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Remove failed: %v", err) 340 return err 341 } 342 343 if req.Purge { 344 if err := n.state.DeleteJob(index, req.JobID); err != nil { 345 n.logger.Printf("[ERR] nomad.fsm: DeleteJob failed: %v", err) 346 return err 347 } 348 349 // We always delete from the periodic launch table because it is possible that 350 // the job was updated to be non-perioidic, thus checking if it is periodic 351 // doesn't ensure we clean it up properly. 352 n.state.DeletePeriodicLaunch(index, req.JobID) 353 } else { 354 // Get the current job and mark it as stopped and re-insert it. 355 ws := memdb.NewWatchSet() 356 current, err := n.state.JobByID(ws, req.JobID) 357 if err != nil { 358 n.logger.Printf("[ERR] nomad.fsm: JobByID lookup failed: %v", err) 359 return err 360 } 361 362 if current == nil { 363 return fmt.Errorf("job %q doesn't exist to be deregistered", req.JobID) 364 } 365 366 stopped := current.Copy() 367 stopped.Stop = true 368 369 if err := n.state.UpsertJob(index, stopped); err != nil { 370 n.logger.Printf("[ERR] nomad.fsm: UpsertJob failed: %v", err) 371 return err 372 } 373 } 374 375 return nil 376 } 377 378 func (n *nomadFSM) applyUpdateEval(buf []byte, index uint64) interface{} { 379 defer metrics.MeasureSince([]string{"nomad", "fsm", "update_eval"}, time.Now()) 380 var req structs.EvalUpdateRequest 381 if err := structs.Decode(buf, &req); err != nil { 382 panic(fmt.Errorf("failed to decode request: %v", err)) 383 } 384 385 if err := n.state.UpsertEvals(index, req.Evals); err != nil { 386 n.logger.Printf("[ERR] nomad.fsm: UpsertEvals failed: %v", err) 387 return err 388 } 389 390 for _, eval := range req.Evals { 391 if eval.ShouldEnqueue() { 392 n.evalBroker.Enqueue(eval) 393 } else if eval.ShouldBlock() { 394 n.blockedEvals.Block(eval) 395 } else if eval.Status == structs.EvalStatusComplete && 396 len(eval.FailedTGAllocs) == 0 { 397 // If we have a successful evaluation for a node, untrack any 398 // blocked evaluation 399 n.blockedEvals.Untrack(eval.JobID) 400 } 401 } 402 return nil 403 } 404 405 func (n *nomadFSM) applyDeleteEval(buf []byte, index uint64) interface{} { 406 defer metrics.MeasureSince([]string{"nomad", "fsm", "delete_eval"}, time.Now()) 407 var req structs.EvalDeleteRequest 408 if err := structs.Decode(buf, &req); err != nil { 409 panic(fmt.Errorf("failed to decode request: %v", err)) 410 } 411 412 if err := n.state.DeleteEval(index, req.Evals, req.Allocs); err != nil { 413 n.logger.Printf("[ERR] nomad.fsm: DeleteEval failed: %v", err) 414 return err 415 } 416 return nil 417 } 418 419 func (n *nomadFSM) applyAllocUpdate(buf []byte, index uint64) interface{} { 420 defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_update"}, time.Now()) 421 var req structs.AllocUpdateRequest 422 if err := structs.Decode(buf, &req); err != nil { 423 panic(fmt.Errorf("failed to decode request: %v", err)) 424 } 425 426 // Attach the job to all the allocations. It is pulled out in the 427 // payload to avoid the redundancy of encoding, but should be denormalized 428 // prior to being inserted into MemDB. 429 structs.DenormalizeAllocationJobs(req.Job, req.Alloc) 430 431 // Calculate the total resources of allocations. It is pulled out in the 432 // payload to avoid encoding something that can be computed, but should be 433 // denormalized prior to being inserted into MemDB. 434 for _, alloc := range req.Alloc { 435 if alloc.Resources != nil { 436 // COMPAT 0.4.1 -> 0.5 437 // Set the shared resources for allocations which don't have them 438 if alloc.SharedResources == nil { 439 alloc.SharedResources = &structs.Resources{ 440 DiskMB: alloc.Resources.DiskMB, 441 } 442 } 443 444 continue 445 } 446 447 alloc.Resources = new(structs.Resources) 448 for _, task := range alloc.TaskResources { 449 alloc.Resources.Add(task) 450 } 451 452 // Add the shared resources 453 alloc.Resources.Add(alloc.SharedResources) 454 } 455 456 if err := n.state.UpsertAllocs(index, req.Alloc); err != nil { 457 n.logger.Printf("[ERR] nomad.fsm: UpsertAllocs failed: %v", err) 458 return err 459 } 460 return nil 461 } 462 463 func (n *nomadFSM) applyAllocClientUpdate(buf []byte, index uint64) interface{} { 464 defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_client_update"}, time.Now()) 465 var req structs.AllocUpdateRequest 466 if err := structs.Decode(buf, &req); err != nil { 467 panic(fmt.Errorf("failed to decode request: %v", err)) 468 } 469 if len(req.Alloc) == 0 { 470 return nil 471 } 472 473 // Create a watch set 474 ws := memdb.NewWatchSet() 475 476 // Updating the allocs with the job id and task group name 477 for _, alloc := range req.Alloc { 478 if existing, _ := n.state.AllocByID(ws, alloc.ID); existing != nil { 479 alloc.JobID = existing.JobID 480 alloc.TaskGroup = existing.TaskGroup 481 } 482 } 483 484 // Update all the client allocations 485 if err := n.state.UpdateAllocsFromClient(index, req.Alloc); err != nil { 486 n.logger.Printf("[ERR] nomad.fsm: UpdateAllocFromClient failed: %v", err) 487 return err 488 } 489 490 // Unblock evals for the nodes computed node class if the client has 491 // finished running an allocation. 492 for _, alloc := range req.Alloc { 493 if alloc.ClientStatus == structs.AllocClientStatusComplete || 494 alloc.ClientStatus == structs.AllocClientStatusFailed { 495 nodeID := alloc.NodeID 496 node, err := n.state.NodeByID(ws, nodeID) 497 if err != nil || node == nil { 498 n.logger.Printf("[ERR] nomad.fsm: looking up node %q failed: %v", nodeID, err) 499 return err 500 501 } 502 n.blockedEvals.Unblock(node.ComputedClass, index) 503 } 504 } 505 506 return nil 507 } 508 509 // applyReconcileSummaries reconciles summaries for all the jobs 510 func (n *nomadFSM) applyReconcileSummaries(buf []byte, index uint64) interface{} { 511 if err := n.state.ReconcileJobSummaries(index); err != nil { 512 return err 513 } 514 return n.reconcileQueuedAllocations(index) 515 } 516 517 // applyUpsertVaultAccessor stores the Vault accessors for a given allocation 518 // and task 519 func (n *nomadFSM) applyUpsertVaultAccessor(buf []byte, index uint64) interface{} { 520 defer metrics.MeasureSince([]string{"nomad", "fsm", "upsert_vault_accessor"}, time.Now()) 521 var req structs.VaultAccessorsRequest 522 if err := structs.Decode(buf, &req); err != nil { 523 panic(fmt.Errorf("failed to decode request: %v", err)) 524 } 525 526 if err := n.state.UpsertVaultAccessor(index, req.Accessors); err != nil { 527 n.logger.Printf("[ERR] nomad.fsm: UpsertVaultAccessor failed: %v", err) 528 return err 529 } 530 531 return nil 532 } 533 534 // applyDeregisterVaultAccessor deregisters a set of Vault accessors 535 func (n *nomadFSM) applyDeregisterVaultAccessor(buf []byte, index uint64) interface{} { 536 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_vault_accessor"}, time.Now()) 537 var req structs.VaultAccessorsRequest 538 if err := structs.Decode(buf, &req); err != nil { 539 panic(fmt.Errorf("failed to decode request: %v", err)) 540 } 541 542 if err := n.state.DeleteVaultAccessors(index, req.Accessors); err != nil { 543 n.logger.Printf("[ERR] nomad.fsm: DeregisterVaultAccessor failed: %v", err) 544 return err 545 } 546 547 return nil 548 } 549 550 // applyPlanApply applies the results of a plan application 551 func (n *nomadFSM) applyPlanResults(buf []byte, index uint64) interface{} { 552 defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_plan_results"}, time.Now()) 553 var req structs.ApplyPlanResultsRequest 554 if err := structs.Decode(buf, &req); err != nil { 555 panic(fmt.Errorf("failed to decode request: %v", err)) 556 } 557 558 if err := n.state.UpsertPlanResults(index, &req); err != nil { 559 n.logger.Printf("[ERR] nomad.fsm: ApplyPlan failed: %v", err) 560 return err 561 } 562 563 return nil 564 } 565 566 func (n *nomadFSM) Snapshot() (raft.FSMSnapshot, error) { 567 // Create a new snapshot 568 snap, err := n.state.Snapshot() 569 if err != nil { 570 return nil, err 571 } 572 573 ns := &nomadSnapshot{ 574 snap: snap, 575 timetable: n.timetable, 576 } 577 return ns, nil 578 } 579 580 func (n *nomadFSM) Restore(old io.ReadCloser) error { 581 defer old.Close() 582 583 // Create a new state store 584 newState, err := state.NewStateStore(n.logOutput) 585 if err != nil { 586 return err 587 } 588 589 // Start the state restore 590 restore, err := newState.Restore() 591 if err != nil { 592 return err 593 } 594 defer restore.Abort() 595 596 // Create a decoder 597 dec := codec.NewDecoder(old, structs.MsgpackHandle) 598 599 // Read in the header 600 var header snapshotHeader 601 if err := dec.Decode(&header); err != nil { 602 return err 603 } 604 605 // Populate the new state 606 msgType := make([]byte, 1) 607 for { 608 // Read the message type 609 _, err := old.Read(msgType) 610 if err == io.EOF { 611 break 612 } else if err != nil { 613 return err 614 } 615 616 // Decode 617 switch SnapshotType(msgType[0]) { 618 case TimeTableSnapshot: 619 if err := n.timetable.Deserialize(dec); err != nil { 620 return fmt.Errorf("time table deserialize failed: %v", err) 621 } 622 623 case NodeSnapshot: 624 node := new(structs.Node) 625 if err := dec.Decode(node); err != nil { 626 return err 627 } 628 if err := restore.NodeRestore(node); err != nil { 629 return err 630 } 631 632 case JobSnapshot: 633 job := new(structs.Job) 634 if err := dec.Decode(job); err != nil { 635 return err 636 } 637 638 // COMPAT: Remove in 0.5 639 // Empty maps and slices should be treated as nil to avoid 640 // un-intended destructive updates in scheduler since we use 641 // reflect.DeepEqual. Starting Nomad 0.4.1, job submission sanatizes 642 // the incoming job. 643 job.Canonicalize() 644 645 if err := restore.JobRestore(job); err != nil { 646 return err 647 } 648 649 case EvalSnapshot: 650 eval := new(structs.Evaluation) 651 if err := dec.Decode(eval); err != nil { 652 return err 653 } 654 if err := restore.EvalRestore(eval); err != nil { 655 return err 656 } 657 658 case AllocSnapshot: 659 alloc := new(structs.Allocation) 660 if err := dec.Decode(alloc); err != nil { 661 return err 662 } 663 if err := restore.AllocRestore(alloc); err != nil { 664 return err 665 } 666 667 case IndexSnapshot: 668 idx := new(state.IndexEntry) 669 if err := dec.Decode(idx); err != nil { 670 return err 671 } 672 if err := restore.IndexRestore(idx); err != nil { 673 return err 674 } 675 676 case PeriodicLaunchSnapshot: 677 launch := new(structs.PeriodicLaunch) 678 if err := dec.Decode(launch); err != nil { 679 return err 680 } 681 if err := restore.PeriodicLaunchRestore(launch); err != nil { 682 return err 683 } 684 685 case JobSummarySnapshot: 686 summary := new(structs.JobSummary) 687 if err := dec.Decode(summary); err != nil { 688 return err 689 } 690 if err := restore.JobSummaryRestore(summary); err != nil { 691 return err 692 } 693 694 case VaultAccessorSnapshot: 695 accessor := new(structs.VaultAccessor) 696 if err := dec.Decode(accessor); err != nil { 697 return err 698 } 699 if err := restore.VaultAccessorRestore(accessor); err != nil { 700 return err 701 } 702 703 case JobVersionSnapshot: 704 version := new(structs.Job) 705 if err := dec.Decode(version); err != nil { 706 return err 707 } 708 if err := restore.JobVersionRestore(version); err != nil { 709 return err 710 } 711 712 case DeploymentSnapshot: 713 deployment := new(structs.Deployment) 714 if err := dec.Decode(deployment); err != nil { 715 return err 716 } 717 if err := restore.DeploymentRestore(deployment); err != nil { 718 return err 719 } 720 721 default: 722 return fmt.Errorf("Unrecognized snapshot type: %v", msgType) 723 } 724 } 725 726 restore.Commit() 727 728 // Create Job Summaries 729 // COMPAT 0.4 -> 0.4.1 730 // We can remove this in 0.5. This exists so that the server creates job 731 // summaries if they were not present previously. When users upgrade to 0.5 732 // from 0.4.1, the snapshot will contain job summaries so it will be safe to 733 // remove this block. 734 index, err := newState.Index("job_summary") 735 if err != nil { 736 return fmt.Errorf("couldn't fetch index of job summary table: %v", err) 737 } 738 739 // If the index is 0 that means there is no job summary in the snapshot so 740 // we will have to create them 741 if index == 0 { 742 // query the latest index 743 latestIndex, err := newState.LatestIndex() 744 if err != nil { 745 return fmt.Errorf("unable to query latest index: %v", index) 746 } 747 if err := newState.ReconcileJobSummaries(latestIndex); err != nil { 748 return fmt.Errorf("error reconciling summaries: %v", err) 749 } 750 } 751 752 // External code might be calling State(), so we need to synchronize 753 // here to make sure we swap in the new state store atomically. 754 n.stateLock.Lock() 755 stateOld := n.state 756 n.state = newState 757 n.stateLock.Unlock() 758 759 // Signal that the old state store has been abandoned. This is required 760 // because we don't operate on it any more, we just throw it away, so 761 // blocking queries won't see any changes and need to be woken up. 762 stateOld.Abandon() 763 764 return nil 765 } 766 767 // reconcileSummaries re-calculates the queued allocations for every job that we 768 // created a Job Summary during the snap shot restore 769 func (n *nomadFSM) reconcileQueuedAllocations(index uint64) error { 770 // Get all the jobs 771 ws := memdb.NewWatchSet() 772 iter, err := n.state.Jobs(ws) 773 if err != nil { 774 return err 775 } 776 777 snap, err := n.state.Snapshot() 778 if err != nil { 779 return fmt.Errorf("unable to create snapshot: %v", err) 780 } 781 782 // Invoking the scheduler for every job so that we can populate the number 783 // of queued allocations for every job 784 for { 785 rawJob := iter.Next() 786 if rawJob == nil { 787 break 788 } 789 job := rawJob.(*structs.Job) 790 planner := &scheduler.Harness{ 791 State: &snap.StateStore, 792 } 793 // Create an eval and mark it as requiring annotations and insert that as well 794 eval := &structs.Evaluation{ 795 ID: structs.GenerateUUID(), 796 Priority: job.Priority, 797 Type: job.Type, 798 TriggeredBy: structs.EvalTriggerJobRegister, 799 JobID: job.ID, 800 JobModifyIndex: job.JobModifyIndex + 1, 801 Status: structs.EvalStatusPending, 802 AnnotatePlan: true, 803 } 804 805 // Create the scheduler and run it 806 sched, err := scheduler.NewScheduler(eval.Type, n.logger, snap, planner) 807 if err != nil { 808 return err 809 } 810 811 if err := sched.Process(eval); err != nil { 812 return err 813 } 814 815 // Get the job summary from the fsm state store 816 originalSummary, err := n.state.JobSummaryByID(ws, job.ID) 817 if err != nil { 818 return err 819 } 820 summary := originalSummary.Copy() 821 822 // Add the allocations scheduler has made to queued since these 823 // allocations are never getting placed until the scheduler is invoked 824 // with a real planner 825 if l := len(planner.Plans); l != 1 { 826 return fmt.Errorf("unexpected number of plans during restore %d. Please file an issue including the logs", l) 827 } 828 for _, allocations := range planner.Plans[0].NodeAllocation { 829 for _, allocation := range allocations { 830 tgSummary, ok := summary.Summary[allocation.TaskGroup] 831 if !ok { 832 return fmt.Errorf("task group %q not found while updating queued count", allocation.TaskGroup) 833 } 834 tgSummary.Queued += 1 835 summary.Summary[allocation.TaskGroup] = tgSummary 836 } 837 } 838 839 // Add the queued allocations attached to the evaluation to the queued 840 // counter of the job summary 841 if l := len(planner.Evals); l != 1 { 842 return fmt.Errorf("unexpected number of evals during restore %d. Please file an issue including the logs", l) 843 } 844 for tg, queued := range planner.Evals[0].QueuedAllocations { 845 tgSummary, ok := summary.Summary[tg] 846 if !ok { 847 return fmt.Errorf("task group %q not found while updating queued count", tg) 848 } 849 850 // We add instead of setting here because we want to take into 851 // consideration what the scheduler with a mock planner thinks it 852 // placed. Those should be counted as queued as well 853 tgSummary.Queued += queued 854 summary.Summary[tg] = tgSummary 855 } 856 857 if !reflect.DeepEqual(summary, originalSummary) { 858 summary.ModifyIndex = index 859 if err := n.state.UpsertJobSummary(index, summary); err != nil { 860 return err 861 } 862 } 863 } 864 return nil 865 } 866 867 func (s *nomadSnapshot) Persist(sink raft.SnapshotSink) error { 868 defer metrics.MeasureSince([]string{"nomad", "fsm", "persist"}, time.Now()) 869 // Register the nodes 870 encoder := codec.NewEncoder(sink, structs.MsgpackHandle) 871 872 // Write the header 873 header := snapshotHeader{} 874 if err := encoder.Encode(&header); err != nil { 875 sink.Cancel() 876 return err 877 } 878 879 // Write the time table 880 sink.Write([]byte{byte(TimeTableSnapshot)}) 881 if err := s.timetable.Serialize(encoder); err != nil { 882 sink.Cancel() 883 return err 884 } 885 886 // Write all the data out 887 if err := s.persistIndexes(sink, encoder); err != nil { 888 sink.Cancel() 889 return err 890 } 891 if err := s.persistNodes(sink, encoder); err != nil { 892 sink.Cancel() 893 return err 894 } 895 if err := s.persistJobs(sink, encoder); err != nil { 896 sink.Cancel() 897 return err 898 } 899 if err := s.persistEvals(sink, encoder); err != nil { 900 sink.Cancel() 901 return err 902 } 903 if err := s.persistAllocs(sink, encoder); err != nil { 904 sink.Cancel() 905 return err 906 } 907 if err := s.persistPeriodicLaunches(sink, encoder); err != nil { 908 sink.Cancel() 909 return err 910 } 911 if err := s.persistJobSummaries(sink, encoder); err != nil { 912 sink.Cancel() 913 return err 914 } 915 if err := s.persistVaultAccessors(sink, encoder); err != nil { 916 sink.Cancel() 917 return err 918 } 919 if err := s.persistJobVersions(sink, encoder); err != nil { 920 sink.Cancel() 921 return err 922 } 923 if err := s.persistDeployments(sink, encoder); err != nil { 924 sink.Cancel() 925 return err 926 } 927 return nil 928 } 929 930 func (s *nomadSnapshot) persistIndexes(sink raft.SnapshotSink, 931 encoder *codec.Encoder) error { 932 // Get all the indexes 933 iter, err := s.snap.Indexes() 934 if err != nil { 935 return err 936 } 937 938 for { 939 // Get the next item 940 raw := iter.Next() 941 if raw == nil { 942 break 943 } 944 945 // Prepare the request struct 946 idx := raw.(*state.IndexEntry) 947 948 // Write out a node registration 949 sink.Write([]byte{byte(IndexSnapshot)}) 950 if err := encoder.Encode(idx); err != nil { 951 return err 952 } 953 } 954 return nil 955 } 956 957 func (s *nomadSnapshot) persistNodes(sink raft.SnapshotSink, 958 encoder *codec.Encoder) error { 959 // Get all the nodes 960 ws := memdb.NewWatchSet() 961 nodes, err := s.snap.Nodes(ws) 962 if err != nil { 963 return err 964 } 965 966 for { 967 // Get the next item 968 raw := nodes.Next() 969 if raw == nil { 970 break 971 } 972 973 // Prepare the request struct 974 node := raw.(*structs.Node) 975 976 // Write out a node registration 977 sink.Write([]byte{byte(NodeSnapshot)}) 978 if err := encoder.Encode(node); err != nil { 979 return err 980 } 981 } 982 return nil 983 } 984 985 func (s *nomadSnapshot) persistJobs(sink raft.SnapshotSink, 986 encoder *codec.Encoder) error { 987 // Get all the jobs 988 ws := memdb.NewWatchSet() 989 jobs, err := s.snap.Jobs(ws) 990 if err != nil { 991 return err 992 } 993 994 for { 995 // Get the next item 996 raw := jobs.Next() 997 if raw == nil { 998 break 999 } 1000 1001 // Prepare the request struct 1002 job := raw.(*structs.Job) 1003 1004 // Write out a job registration 1005 sink.Write([]byte{byte(JobSnapshot)}) 1006 if err := encoder.Encode(job); err != nil { 1007 return err 1008 } 1009 } 1010 return nil 1011 } 1012 1013 func (s *nomadSnapshot) persistEvals(sink raft.SnapshotSink, 1014 encoder *codec.Encoder) error { 1015 // Get all the evaluations 1016 ws := memdb.NewWatchSet() 1017 evals, err := s.snap.Evals(ws) 1018 if err != nil { 1019 return err 1020 } 1021 1022 for { 1023 // Get the next item 1024 raw := evals.Next() 1025 if raw == nil { 1026 break 1027 } 1028 1029 // Prepare the request struct 1030 eval := raw.(*structs.Evaluation) 1031 1032 // Write out the evaluation 1033 sink.Write([]byte{byte(EvalSnapshot)}) 1034 if err := encoder.Encode(eval); err != nil { 1035 return err 1036 } 1037 } 1038 return nil 1039 } 1040 1041 func (s *nomadSnapshot) persistAllocs(sink raft.SnapshotSink, 1042 encoder *codec.Encoder) error { 1043 // Get all the allocations 1044 ws := memdb.NewWatchSet() 1045 allocs, err := s.snap.Allocs(ws) 1046 if err != nil { 1047 return err 1048 } 1049 1050 for { 1051 // Get the next item 1052 raw := allocs.Next() 1053 if raw == nil { 1054 break 1055 } 1056 1057 // Prepare the request struct 1058 alloc := raw.(*structs.Allocation) 1059 1060 // Write out the evaluation 1061 sink.Write([]byte{byte(AllocSnapshot)}) 1062 if err := encoder.Encode(alloc); err != nil { 1063 return err 1064 } 1065 } 1066 return nil 1067 } 1068 1069 func (s *nomadSnapshot) persistPeriodicLaunches(sink raft.SnapshotSink, 1070 encoder *codec.Encoder) error { 1071 // Get all the jobs 1072 ws := memdb.NewWatchSet() 1073 launches, err := s.snap.PeriodicLaunches(ws) 1074 if err != nil { 1075 return err 1076 } 1077 1078 for { 1079 // Get the next item 1080 raw := launches.Next() 1081 if raw == nil { 1082 break 1083 } 1084 1085 // Prepare the request struct 1086 launch := raw.(*structs.PeriodicLaunch) 1087 1088 // Write out a job registration 1089 sink.Write([]byte{byte(PeriodicLaunchSnapshot)}) 1090 if err := encoder.Encode(launch); err != nil { 1091 return err 1092 } 1093 } 1094 return nil 1095 } 1096 1097 func (s *nomadSnapshot) persistJobSummaries(sink raft.SnapshotSink, 1098 encoder *codec.Encoder) error { 1099 1100 ws := memdb.NewWatchSet() 1101 summaries, err := s.snap.JobSummaries(ws) 1102 if err != nil { 1103 return err 1104 } 1105 1106 for { 1107 raw := summaries.Next() 1108 if raw == nil { 1109 break 1110 } 1111 1112 jobSummary := raw.(*structs.JobSummary) 1113 1114 sink.Write([]byte{byte(JobSummarySnapshot)}) 1115 if err := encoder.Encode(jobSummary); err != nil { 1116 return err 1117 } 1118 } 1119 return nil 1120 } 1121 1122 func (s *nomadSnapshot) persistVaultAccessors(sink raft.SnapshotSink, 1123 encoder *codec.Encoder) error { 1124 1125 ws := memdb.NewWatchSet() 1126 accessors, err := s.snap.VaultAccessors(ws) 1127 if err != nil { 1128 return err 1129 } 1130 1131 for { 1132 raw := accessors.Next() 1133 if raw == nil { 1134 break 1135 } 1136 1137 accessor := raw.(*structs.VaultAccessor) 1138 1139 sink.Write([]byte{byte(VaultAccessorSnapshot)}) 1140 if err := encoder.Encode(accessor); err != nil { 1141 return err 1142 } 1143 } 1144 return nil 1145 } 1146 1147 func (s *nomadSnapshot) persistJobVersions(sink raft.SnapshotSink, 1148 encoder *codec.Encoder) error { 1149 // Get all the jobs 1150 ws := memdb.NewWatchSet() 1151 versions, err := s.snap.JobVersions(ws) 1152 if err != nil { 1153 return err 1154 } 1155 1156 for { 1157 // Get the next item 1158 raw := versions.Next() 1159 if raw == nil { 1160 break 1161 } 1162 1163 // Prepare the request struct 1164 job := raw.(*structs.Job) 1165 1166 // Write out a job registration 1167 sink.Write([]byte{byte(JobVersionSnapshot)}) 1168 if err := encoder.Encode(job); err != nil { 1169 return err 1170 } 1171 } 1172 return nil 1173 } 1174 1175 func (s *nomadSnapshot) persistDeployments(sink raft.SnapshotSink, 1176 encoder *codec.Encoder) error { 1177 // Get all the jobs 1178 ws := memdb.NewWatchSet() 1179 deployments, err := s.snap.Deployments(ws) 1180 if err != nil { 1181 return err 1182 } 1183 1184 for { 1185 // Get the next item 1186 raw := deployments.Next() 1187 if raw == nil { 1188 break 1189 } 1190 1191 // Prepare the request struct 1192 deployment := raw.(*structs.Deployment) 1193 1194 // Write out a job registration 1195 sink.Write([]byte{byte(DeploymentSnapshot)}) 1196 if err := encoder.Encode(deployment); err != nil { 1197 return err 1198 } 1199 } 1200 return nil 1201 } 1202 1203 // Release is a no-op, as we just need to GC the pointer 1204 // to the state store snapshot. There is nothing to explicitly 1205 // cleanup. 1206 func (s *nomadSnapshot) Release() {}