github.com/ryanslade/nomad@v0.2.4-0.20160128061903-fc95782f2089/nomad/fsm.go (about) 1 package nomad 2 3 import ( 4 "fmt" 5 "io" 6 "log" 7 "time" 8 9 "github.com/armon/go-metrics" 10 "github.com/hashicorp/go-msgpack/codec" 11 "github.com/hashicorp/nomad/nomad/state" 12 "github.com/hashicorp/nomad/nomad/structs" 13 "github.com/hashicorp/raft" 14 ) 15 16 const ( 17 // timeTableGranularity is the granularity of index to time tracking 18 timeTableGranularity = 5 * time.Minute 19 20 // timeTableLimit is the maximum limit of our tracking 21 timeTableLimit = 72 * time.Hour 22 ) 23 24 // SnapshotType is prefixed to a record in the FSM snapshot 25 // so that we can determine the type for restore 26 type SnapshotType byte 27 28 const ( 29 NodeSnapshot SnapshotType = iota 30 JobSnapshot 31 IndexSnapshot 32 EvalSnapshot 33 AllocSnapshot 34 TimeTableSnapshot 35 PeriodicLaunchSnapshot 36 ) 37 38 // nomadFSM implements a finite state machine that is used 39 // along with Raft to provide strong consistency. We implement 40 // this outside the Server to avoid exposing this outside the package. 41 type nomadFSM struct { 42 evalBroker *EvalBroker 43 periodicDispatcher *PeriodicDispatch 44 logOutput io.Writer 45 logger *log.Logger 46 state *state.StateStore 47 timetable *TimeTable 48 } 49 50 // nomadSnapshot is used to provide a snapshot of the current 51 // state in a way that can be accessed concurrently with operations 52 // that may modify the live state. 53 type nomadSnapshot struct { 54 snap *state.StateSnapshot 55 timetable *TimeTable 56 } 57 58 // snapshotHeader is the first entry in our snapshot 59 type snapshotHeader struct { 60 } 61 62 // NewFSMPath is used to construct a new FSM with a blank state 63 func NewFSM(evalBroker *EvalBroker, periodic *PeriodicDispatch, logOutput io.Writer) (*nomadFSM, error) { 64 // Create a state store 65 state, err := state.NewStateStore(logOutput) 66 if err != nil { 67 return nil, err 68 } 69 70 fsm := &nomadFSM{ 71 evalBroker: evalBroker, 72 periodicDispatcher: periodic, 73 logOutput: logOutput, 74 logger: log.New(logOutput, "", log.LstdFlags), 75 state: state, 76 timetable: NewTimeTable(timeTableGranularity, timeTableLimit), 77 } 78 return fsm, nil 79 } 80 81 // Close is used to cleanup resources associated with the FSM 82 func (n *nomadFSM) Close() error { 83 return nil 84 } 85 86 // State is used to return a handle to the current state 87 func (n *nomadFSM) State() *state.StateStore { 88 return n.state 89 } 90 91 // TimeTable returns the time table of transactions 92 func (n *nomadFSM) TimeTable() *TimeTable { 93 return n.timetable 94 } 95 96 func (n *nomadFSM) Apply(log *raft.Log) interface{} { 97 buf := log.Data 98 msgType := structs.MessageType(buf[0]) 99 100 // Witness this write 101 n.timetable.Witness(log.Index, time.Now().UTC()) 102 103 // Check if this message type should be ignored when unknown. This is 104 // used so that new commands can be added with developer control if older 105 // versions can safely ignore the command, or if they should crash. 106 ignoreUnknown := false 107 if msgType&structs.IgnoreUnknownTypeFlag == structs.IgnoreUnknownTypeFlag { 108 msgType &= ^structs.IgnoreUnknownTypeFlag 109 ignoreUnknown = true 110 } 111 112 switch msgType { 113 case structs.NodeRegisterRequestType: 114 return n.applyUpsertNode(buf[1:], log.Index) 115 case structs.NodeDeregisterRequestType: 116 return n.applyDeregisterNode(buf[1:], log.Index) 117 case structs.NodeUpdateStatusRequestType: 118 return n.applyStatusUpdate(buf[1:], log.Index) 119 case structs.NodeUpdateDrainRequestType: 120 return n.applyDrainUpdate(buf[1:], log.Index) 121 case structs.JobRegisterRequestType: 122 return n.applyUpsertJob(buf[1:], log.Index) 123 case structs.JobDeregisterRequestType: 124 return n.applyDeregisterJob(buf[1:], log.Index) 125 case structs.EvalUpdateRequestType: 126 return n.applyUpdateEval(buf[1:], log.Index) 127 case structs.EvalDeleteRequestType: 128 return n.applyDeleteEval(buf[1:], log.Index) 129 case structs.AllocUpdateRequestType: 130 return n.applyAllocUpdate(buf[1:], log.Index) 131 case structs.AllocClientUpdateRequestType: 132 return n.applyAllocClientUpdate(buf[1:], log.Index) 133 default: 134 if ignoreUnknown { 135 n.logger.Printf("[WARN] nomad.fsm: ignoring unknown message type (%d), upgrade to newer version", msgType) 136 return nil 137 } else { 138 panic(fmt.Errorf("failed to apply request: %#v", buf)) 139 } 140 } 141 } 142 143 func (n *nomadFSM) applyUpsertNode(buf []byte, index uint64) interface{} { 144 defer metrics.MeasureSince([]string{"nomad", "fsm", "register_node"}, time.Now()) 145 var req structs.NodeRegisterRequest 146 if err := structs.Decode(buf, &req); err != nil { 147 panic(fmt.Errorf("failed to decode request: %v", err)) 148 } 149 150 if err := n.state.UpsertNode(index, req.Node); err != nil { 151 n.logger.Printf("[ERR] nomad.fsm: UpsertNode failed: %v", err) 152 return err 153 } 154 return nil 155 } 156 157 func (n *nomadFSM) applyDeregisterNode(buf []byte, index uint64) interface{} { 158 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_node"}, time.Now()) 159 var req structs.NodeDeregisterRequest 160 if err := structs.Decode(buf, &req); err != nil { 161 panic(fmt.Errorf("failed to decode request: %v", err)) 162 } 163 164 if err := n.state.DeleteNode(index, req.NodeID); err != nil { 165 n.logger.Printf("[ERR] nomad.fsm: DeleteNode failed: %v", err) 166 return err 167 } 168 return nil 169 } 170 171 func (n *nomadFSM) applyStatusUpdate(buf []byte, index uint64) interface{} { 172 defer metrics.MeasureSince([]string{"nomad", "fsm", "node_status_update"}, time.Now()) 173 var req structs.NodeUpdateStatusRequest 174 if err := structs.Decode(buf, &req); err != nil { 175 panic(fmt.Errorf("failed to decode request: %v", err)) 176 } 177 178 if err := n.state.UpdateNodeStatus(index, req.NodeID, req.Status); err != nil { 179 n.logger.Printf("[ERR] nomad.fsm: UpdateNodeStatus failed: %v", err) 180 return err 181 } 182 return nil 183 } 184 185 func (n *nomadFSM) applyDrainUpdate(buf []byte, index uint64) interface{} { 186 defer metrics.MeasureSince([]string{"nomad", "fsm", "node_drain_update"}, time.Now()) 187 var req structs.NodeUpdateDrainRequest 188 if err := structs.Decode(buf, &req); err != nil { 189 panic(fmt.Errorf("failed to decode request: %v", err)) 190 } 191 192 if err := n.state.UpdateNodeDrain(index, req.NodeID, req.Drain); err != nil { 193 n.logger.Printf("[ERR] nomad.fsm: UpdateNodeDrain failed: %v", err) 194 return err 195 } 196 return nil 197 } 198 199 func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { 200 defer metrics.MeasureSince([]string{"nomad", "fsm", "register_job"}, time.Now()) 201 var req structs.JobRegisterRequest 202 if err := structs.Decode(buf, &req); err != nil { 203 panic(fmt.Errorf("failed to decode request: %v", err)) 204 } 205 206 if err := n.state.UpsertJob(index, req.Job); err != nil { 207 n.logger.Printf("[ERR] nomad.fsm: UpsertJob failed: %v", err) 208 return err 209 } 210 211 // We always add the job to the periodic dispatcher because there is the 212 // possibility that the periodic spec was removed and then we should stop 213 // tracking it. 214 if err := n.periodicDispatcher.Add(req.Job); err != nil { 215 n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Add failed: %v", err) 216 return err 217 } 218 219 // If it is periodic, record the time it was inserted. This is necessary for 220 // recovering during leader election. It is possible that from the time it 221 // is added to when it was suppose to launch, leader election occurs and the 222 // job was not launched. In this case, we use the insertion time to 223 // determine if a launch was missed. 224 if req.Job.IsPeriodic() { 225 prevLaunch, err := n.state.PeriodicLaunchByID(req.Job.ID) 226 if err != nil { 227 n.logger.Printf("[ERR] nomad.fsm: PeriodicLaunchByID failed: %v", err) 228 return err 229 } 230 231 // Record the insertion time as a launch. We overload the launch table 232 // such that the first entry is the insertion time. 233 if prevLaunch == nil { 234 launch := &structs.PeriodicLaunch{ID: req.Job.ID, Launch: time.Now()} 235 if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { 236 n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) 237 return err 238 } 239 } 240 } 241 242 // Check if the parent job is periodic and mark the launch time. 243 parentID := req.Job.ParentID 244 if parentID != "" { 245 parent, err := n.state.JobByID(parentID) 246 if err != nil { 247 n.logger.Printf("[ERR] nomad.fsm: JobByID(%v) lookup for parent failed: %v", parentID, err) 248 return err 249 } else if parent == nil { 250 // The parent has been deregistered. 251 return nil 252 } 253 254 if parent.IsPeriodic() { 255 t, err := n.periodicDispatcher.LaunchTime(req.Job.ID) 256 if err != nil { 257 n.logger.Printf("[ERR] nomad.fsm: LaunchTime(%v) failed: %v", req.Job.ID, err) 258 return err 259 } 260 261 launch := &structs.PeriodicLaunch{ID: parentID, Launch: t} 262 if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil { 263 n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err) 264 return err 265 } 266 } 267 } 268 269 return nil 270 } 271 272 func (n *nomadFSM) applyDeregisterJob(buf []byte, index uint64) interface{} { 273 defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_job"}, time.Now()) 274 var req structs.JobDeregisterRequest 275 if err := structs.Decode(buf, &req); err != nil { 276 panic(fmt.Errorf("failed to decode request: %v", err)) 277 } 278 279 if err := n.state.DeleteJob(index, req.JobID); err != nil { 280 n.logger.Printf("[ERR] nomad.fsm: DeleteJob failed: %v", err) 281 return err 282 } 283 284 if err := n.periodicDispatcher.Remove(req.JobID); err != nil { 285 n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Remove failed: %v", err) 286 return err 287 } 288 289 // We always delete from the periodic launch table because it is possible that 290 // the job was updated to be non-perioidic, thus checking if it is periodic 291 // doesn't ensure we clean it up properly. 292 n.state.DeletePeriodicLaunch(index, req.JobID) 293 294 return nil 295 } 296 297 func (n *nomadFSM) applyUpdateEval(buf []byte, index uint64) interface{} { 298 defer metrics.MeasureSince([]string{"nomad", "fsm", "update_eval"}, time.Now()) 299 var req structs.EvalUpdateRequest 300 if err := structs.Decode(buf, &req); err != nil { 301 panic(fmt.Errorf("failed to decode request: %v", err)) 302 } 303 304 if err := n.state.UpsertEvals(index, req.Evals); err != nil { 305 n.logger.Printf("[ERR] nomad.fsm: UpsertEvals failed: %v", err) 306 return err 307 } 308 309 for _, eval := range req.Evals { 310 if eval.ShouldEnqueue() { 311 if err := n.evalBroker.Enqueue(eval); err != nil { 312 n.logger.Printf("[ERR] nomad.fsm: failed to enqueue evaluation %s: %v", eval.ID, err) 313 return err 314 } 315 } 316 } 317 return nil 318 } 319 320 func (n *nomadFSM) applyDeleteEval(buf []byte, index uint64) interface{} { 321 defer metrics.MeasureSince([]string{"nomad", "fsm", "delete_eval"}, time.Now()) 322 var req structs.EvalDeleteRequest 323 if err := structs.Decode(buf, &req); err != nil { 324 panic(fmt.Errorf("failed to decode request: %v", err)) 325 } 326 327 if err := n.state.DeleteEval(index, req.Evals, req.Allocs); err != nil { 328 n.logger.Printf("[ERR] nomad.fsm: DeleteEval failed: %v", err) 329 return err 330 } 331 return nil 332 } 333 334 func (n *nomadFSM) applyAllocUpdate(buf []byte, index uint64) interface{} { 335 defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_update"}, time.Now()) 336 var req structs.AllocUpdateRequest 337 if err := structs.Decode(buf, &req); err != nil { 338 panic(fmt.Errorf("failed to decode request: %v", err)) 339 } 340 341 if err := n.state.UpsertAllocs(index, req.Alloc); err != nil { 342 n.logger.Printf("[ERR] nomad.fsm: UpsertAllocs failed: %v", err) 343 return err 344 } 345 return nil 346 } 347 348 func (n *nomadFSM) applyAllocClientUpdate(buf []byte, index uint64) interface{} { 349 defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_client_update"}, time.Now()) 350 var req structs.AllocUpdateRequest 351 if err := structs.Decode(buf, &req); err != nil { 352 panic(fmt.Errorf("failed to decode request: %v", err)) 353 } 354 if len(req.Alloc) == 0 { 355 return nil 356 } 357 358 if err := n.state.UpdateAllocFromClient(index, req.Alloc[0]); err != nil { 359 n.logger.Printf("[ERR] nomad.fsm: UpdateAllocFromClient failed: %v", err) 360 return err 361 } 362 return nil 363 } 364 365 func (n *nomadFSM) Snapshot() (raft.FSMSnapshot, error) { 366 // Create a new snapshot 367 snap, err := n.state.Snapshot() 368 if err != nil { 369 return nil, err 370 } 371 372 ns := &nomadSnapshot{ 373 snap: snap, 374 timetable: n.timetable, 375 } 376 return ns, nil 377 } 378 379 func (n *nomadFSM) Restore(old io.ReadCloser) error { 380 defer old.Close() 381 382 // Create a new state store 383 newState, err := state.NewStateStore(n.logOutput) 384 if err != nil { 385 return err 386 } 387 n.state = newState 388 389 // Start the state restore 390 restore, err := newState.Restore() 391 if err != nil { 392 return err 393 } 394 defer restore.Abort() 395 396 // Create a decoder 397 dec := codec.NewDecoder(old, structs.MsgpackHandle) 398 399 // Read in the header 400 var header snapshotHeader 401 if err := dec.Decode(&header); err != nil { 402 return err 403 } 404 405 // Populate the new state 406 msgType := make([]byte, 1) 407 for { 408 // Read the message type 409 _, err := old.Read(msgType) 410 if err == io.EOF { 411 break 412 } else if err != nil { 413 return err 414 } 415 416 // Decode 417 switch SnapshotType(msgType[0]) { 418 case TimeTableSnapshot: 419 if err := n.timetable.Deserialize(dec); err != nil { 420 return fmt.Errorf("time table deserialize failed: %v", err) 421 } 422 423 case NodeSnapshot: 424 node := new(structs.Node) 425 if err := dec.Decode(node); err != nil { 426 return err 427 } 428 if err := restore.NodeRestore(node); err != nil { 429 return err 430 } 431 432 case JobSnapshot: 433 job := new(structs.Job) 434 if err := dec.Decode(job); err != nil { 435 return err 436 } 437 if err := restore.JobRestore(job); err != nil { 438 return err 439 } 440 441 case EvalSnapshot: 442 eval := new(structs.Evaluation) 443 if err := dec.Decode(eval); err != nil { 444 return err 445 } 446 if err := restore.EvalRestore(eval); err != nil { 447 return err 448 } 449 450 case AllocSnapshot: 451 alloc := new(structs.Allocation) 452 if err := dec.Decode(alloc); err != nil { 453 return err 454 } 455 if err := restore.AllocRestore(alloc); err != nil { 456 return err 457 } 458 459 case IndexSnapshot: 460 idx := new(state.IndexEntry) 461 if err := dec.Decode(idx); err != nil { 462 return err 463 } 464 if err := restore.IndexRestore(idx); err != nil { 465 return err 466 } 467 468 case PeriodicLaunchSnapshot: 469 launch := new(structs.PeriodicLaunch) 470 if err := dec.Decode(launch); err != nil { 471 return err 472 } 473 if err := restore.PeriodicLaunchRestore(launch); err != nil { 474 return err 475 } 476 477 default: 478 return fmt.Errorf("Unrecognized snapshot type: %v", msgType) 479 } 480 } 481 482 // Commit the state restore 483 restore.Commit() 484 return nil 485 } 486 487 func (s *nomadSnapshot) Persist(sink raft.SnapshotSink) error { 488 defer metrics.MeasureSince([]string{"nomad", "fsm", "persist"}, time.Now()) 489 // Register the nodes 490 encoder := codec.NewEncoder(sink, structs.MsgpackHandle) 491 492 // Write the header 493 header := snapshotHeader{} 494 if err := encoder.Encode(&header); err != nil { 495 sink.Cancel() 496 return err 497 } 498 499 // Write the time table 500 sink.Write([]byte{byte(TimeTableSnapshot)}) 501 if err := s.timetable.Serialize(encoder); err != nil { 502 sink.Cancel() 503 return err 504 } 505 506 // Write all the data out 507 if err := s.persistIndexes(sink, encoder); err != nil { 508 sink.Cancel() 509 return err 510 } 511 if err := s.persistNodes(sink, encoder); err != nil { 512 sink.Cancel() 513 return err 514 } 515 if err := s.persistJobs(sink, encoder); err != nil { 516 sink.Cancel() 517 return err 518 } 519 if err := s.persistEvals(sink, encoder); err != nil { 520 sink.Cancel() 521 return err 522 } 523 if err := s.persistAllocs(sink, encoder); err != nil { 524 sink.Cancel() 525 return err 526 } 527 if err := s.persistPeriodicLaunches(sink, encoder); err != nil { 528 sink.Cancel() 529 return err 530 } 531 return nil 532 } 533 534 func (s *nomadSnapshot) persistIndexes(sink raft.SnapshotSink, 535 encoder *codec.Encoder) error { 536 // Get all the indexes 537 iter, err := s.snap.Indexes() 538 if err != nil { 539 return err 540 } 541 542 for { 543 // Get the next item 544 raw := iter.Next() 545 if raw == nil { 546 break 547 } 548 549 // Prepare the request struct 550 idx := raw.(*state.IndexEntry) 551 552 // Write out a node registration 553 sink.Write([]byte{byte(IndexSnapshot)}) 554 if err := encoder.Encode(idx); err != nil { 555 return err 556 } 557 } 558 return nil 559 } 560 561 func (s *nomadSnapshot) persistNodes(sink raft.SnapshotSink, 562 encoder *codec.Encoder) error { 563 // Get all the nodes 564 nodes, err := s.snap.Nodes() 565 if err != nil { 566 return err 567 } 568 569 for { 570 // Get the next item 571 raw := nodes.Next() 572 if raw == nil { 573 break 574 } 575 576 // Prepare the request struct 577 node := raw.(*structs.Node) 578 579 // Write out a node registration 580 sink.Write([]byte{byte(NodeSnapshot)}) 581 if err := encoder.Encode(node); err != nil { 582 return err 583 } 584 } 585 return nil 586 } 587 588 func (s *nomadSnapshot) persistJobs(sink raft.SnapshotSink, 589 encoder *codec.Encoder) error { 590 // Get all the jobs 591 jobs, err := s.snap.Jobs() 592 if err != nil { 593 return err 594 } 595 596 for { 597 // Get the next item 598 raw := jobs.Next() 599 if raw == nil { 600 break 601 } 602 603 // Prepare the request struct 604 job := raw.(*structs.Job) 605 606 // Write out a job registration 607 sink.Write([]byte{byte(JobSnapshot)}) 608 if err := encoder.Encode(job); err != nil { 609 return err 610 } 611 } 612 return nil 613 } 614 615 func (s *nomadSnapshot) persistEvals(sink raft.SnapshotSink, 616 encoder *codec.Encoder) error { 617 // Get all the evaluations 618 evals, err := s.snap.Evals() 619 if err != nil { 620 return err 621 } 622 623 for { 624 // Get the next item 625 raw := evals.Next() 626 if raw == nil { 627 break 628 } 629 630 // Prepare the request struct 631 eval := raw.(*structs.Evaluation) 632 633 // Write out the evaluation 634 sink.Write([]byte{byte(EvalSnapshot)}) 635 if err := encoder.Encode(eval); err != nil { 636 return err 637 } 638 } 639 return nil 640 } 641 642 func (s *nomadSnapshot) persistAllocs(sink raft.SnapshotSink, 643 encoder *codec.Encoder) error { 644 // Get all the allocations 645 allocs, err := s.snap.Allocs() 646 if err != nil { 647 return err 648 } 649 650 for { 651 // Get the next item 652 raw := allocs.Next() 653 if raw == nil { 654 break 655 } 656 657 // Prepare the request struct 658 alloc := raw.(*structs.Allocation) 659 660 // Write out the evaluation 661 sink.Write([]byte{byte(AllocSnapshot)}) 662 if err := encoder.Encode(alloc); err != nil { 663 return err 664 } 665 } 666 return nil 667 } 668 669 func (s *nomadSnapshot) persistPeriodicLaunches(sink raft.SnapshotSink, 670 encoder *codec.Encoder) error { 671 // Get all the jobs 672 launches, err := s.snap.PeriodicLaunches() 673 if err != nil { 674 return err 675 } 676 677 for { 678 // Get the next item 679 raw := launches.Next() 680 if raw == nil { 681 break 682 } 683 684 // Prepare the request struct 685 launch := raw.(*structs.PeriodicLaunch) 686 687 // Write out a job registration 688 sink.Write([]byte{byte(PeriodicLaunchSnapshot)}) 689 if err := encoder.Encode(launch); err != nil { 690 return err 691 } 692 } 693 return nil 694 } 695 696 // Release is a no-op, as we just need to GC the pointer 697 // to the state store snapshot. There is nothing to explicitly 698 // cleanup. 699 func (s *nomadSnapshot) Release() {}