github.com/ncodes/nomad@v0.5.7-0.20170403112158-97adf4a74fb3/nomad/fsm.go (about)

     1  package nomad
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"log"
     7  	"reflect"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/armon/go-metrics"
    12  	memdb "github.com/hashicorp/go-memdb"
    13  	"github.com/ncodes/nomad/nomad/state"
    14  	"github.com/ncodes/nomad/nomad/structs"
    15  	"github.com/ncodes/nomad/scheduler"
    16  	"github.com/hashicorp/raft"
    17  	"github.com/ugorji/go/codec"
    18  )
    19  
    20  const (
    21  	// timeTableGranularity is the granularity of index to time tracking
    22  	timeTableGranularity = 5 * time.Minute
    23  
    24  	// timeTableLimit is the maximum limit of our tracking
    25  	timeTableLimit = 72 * time.Hour
    26  )
    27  
    28  // SnapshotType is prefixed to a record in the FSM snapshot
    29  // so that we can determine the type for restore
    30  type SnapshotType byte
    31  
    32  const (
    33  	NodeSnapshot SnapshotType = iota
    34  	JobSnapshot
    35  	IndexSnapshot
    36  	EvalSnapshot
    37  	AllocSnapshot
    38  	TimeTableSnapshot
    39  	PeriodicLaunchSnapshot
    40  	JobSummarySnapshot
    41  	VaultAccessorSnapshot
    42  )
    43  
    44  // nomadFSM implements a finite state machine that is used
    45  // along with Raft to provide strong consistency. We implement
    46  // this outside the Server to avoid exposing this outside the package.
    47  type nomadFSM struct {
    48  	evalBroker         *EvalBroker
    49  	blockedEvals       *BlockedEvals
    50  	periodicDispatcher *PeriodicDispatch
    51  	logOutput          io.Writer
    52  	logger             *log.Logger
    53  	state              *state.StateStore
    54  	timetable          *TimeTable
    55  
    56  	// stateLock is only used to protect outside callers to State() from
    57  	// racing with Restore(), which is called by Raft (it puts in a totally
    58  	// new state store). Everything internal here is synchronized by the
    59  	// Raft side, so doesn't need to lock this.
    60  	stateLock sync.RWMutex
    61  }
    62  
    63  // nomadSnapshot is used to provide a snapshot of the current
    64  // state in a way that can be accessed concurrently with operations
    65  // that may modify the live state.
    66  type nomadSnapshot struct {
    67  	snap      *state.StateSnapshot
    68  	timetable *TimeTable
    69  }
    70  
    71  // snapshotHeader is the first entry in our snapshot
    72  type snapshotHeader struct {
    73  }
    74  
    75  // NewFSMPath is used to construct a new FSM with a blank state
    76  func NewFSM(evalBroker *EvalBroker, periodic *PeriodicDispatch,
    77  	blocked *BlockedEvals, logOutput io.Writer) (*nomadFSM, error) {
    78  	// Create a state store
    79  	state, err := state.NewStateStore(logOutput)
    80  	if err != nil {
    81  		return nil, err
    82  	}
    83  
    84  	fsm := &nomadFSM{
    85  		evalBroker:         evalBroker,
    86  		periodicDispatcher: periodic,
    87  		blockedEvals:       blocked,
    88  		logOutput:          logOutput,
    89  		logger:             log.New(logOutput, "", log.LstdFlags),
    90  		state:              state,
    91  		timetable:          NewTimeTable(timeTableGranularity, timeTableLimit),
    92  	}
    93  	return fsm, nil
    94  }
    95  
    96  // Close is used to cleanup resources associated with the FSM
    97  func (n *nomadFSM) Close() error {
    98  	return nil
    99  }
   100  
   101  // State is used to return a handle to the current state
   102  func (n *nomadFSM) State() *state.StateStore {
   103  	n.stateLock.RLock()
   104  	defer n.stateLock.RUnlock()
   105  	return n.state
   106  }
   107  
   108  // TimeTable returns the time table of transactions
   109  func (n *nomadFSM) TimeTable() *TimeTable {
   110  	return n.timetable
   111  }
   112  
   113  func (n *nomadFSM) Apply(log *raft.Log) interface{} {
   114  	buf := log.Data
   115  	msgType := structs.MessageType(buf[0])
   116  
   117  	// Witness this write
   118  	n.timetable.Witness(log.Index, time.Now().UTC())
   119  
   120  	// Check if this message type should be ignored when unknown. This is
   121  	// used so that new commands can be added with developer control if older
   122  	// versions can safely ignore the command, or if they should crash.
   123  	ignoreUnknown := false
   124  	if msgType&structs.IgnoreUnknownTypeFlag == structs.IgnoreUnknownTypeFlag {
   125  		msgType &= ^structs.IgnoreUnknownTypeFlag
   126  		ignoreUnknown = true
   127  	}
   128  
   129  	switch msgType {
   130  	case structs.NodeRegisterRequestType:
   131  		return n.applyUpsertNode(buf[1:], log.Index)
   132  	case structs.NodeDeregisterRequestType:
   133  		return n.applyDeregisterNode(buf[1:], log.Index)
   134  	case structs.NodeUpdateStatusRequestType:
   135  		return n.applyStatusUpdate(buf[1:], log.Index)
   136  	case structs.NodeUpdateDrainRequestType:
   137  		return n.applyDrainUpdate(buf[1:], log.Index)
   138  	case structs.JobRegisterRequestType:
   139  		return n.applyUpsertJob(buf[1:], log.Index)
   140  	case structs.JobDeregisterRequestType:
   141  		return n.applyDeregisterJob(buf[1:], log.Index)
   142  	case structs.EvalUpdateRequestType:
   143  		return n.applyUpdateEval(buf[1:], log.Index)
   144  	case structs.EvalDeleteRequestType:
   145  		return n.applyDeleteEval(buf[1:], log.Index)
   146  	case structs.AllocUpdateRequestType:
   147  		return n.applyAllocUpdate(buf[1:], log.Index)
   148  	case structs.AllocClientUpdateRequestType:
   149  		return n.applyAllocClientUpdate(buf[1:], log.Index)
   150  	case structs.ReconcileJobSummariesRequestType:
   151  		return n.applyReconcileSummaries(buf[1:], log.Index)
   152  	case structs.VaultAccessorRegisterRequestType:
   153  		return n.applyUpsertVaultAccessor(buf[1:], log.Index)
   154  	case structs.VaultAccessorDegisterRequestType:
   155  		return n.applyDeregisterVaultAccessor(buf[1:], log.Index)
   156  	default:
   157  		if ignoreUnknown {
   158  			n.logger.Printf("[WARN] nomad.fsm: ignoring unknown message type (%d), upgrade to newer version", msgType)
   159  			return nil
   160  		} else {
   161  			panic(fmt.Errorf("failed to apply request: %#v", buf))
   162  		}
   163  	}
   164  }
   165  
   166  func (n *nomadFSM) applyUpsertNode(buf []byte, index uint64) interface{} {
   167  	defer metrics.MeasureSince([]string{"nomad", "fsm", "register_node"}, time.Now())
   168  	var req structs.NodeRegisterRequest
   169  	if err := structs.Decode(buf, &req); err != nil {
   170  		panic(fmt.Errorf("failed to decode request: %v", err))
   171  	}
   172  
   173  	if err := n.state.UpsertNode(index, req.Node); err != nil {
   174  		n.logger.Printf("[ERR] nomad.fsm: UpsertNode failed: %v", err)
   175  		return err
   176  	}
   177  
   178  	// Unblock evals for the nodes computed node class if it is in a ready
   179  	// state.
   180  	if req.Node.Status == structs.NodeStatusReady {
   181  		n.blockedEvals.Unblock(req.Node.ComputedClass, index)
   182  	}
   183  
   184  	return nil
   185  }
   186  
   187  func (n *nomadFSM) applyDeregisterNode(buf []byte, index uint64) interface{} {
   188  	defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_node"}, time.Now())
   189  	var req structs.NodeDeregisterRequest
   190  	if err := structs.Decode(buf, &req); err != nil {
   191  		panic(fmt.Errorf("failed to decode request: %v", err))
   192  	}
   193  
   194  	if err := n.state.DeleteNode(index, req.NodeID); err != nil {
   195  		n.logger.Printf("[ERR] nomad.fsm: DeleteNode failed: %v", err)
   196  		return err
   197  	}
   198  	return nil
   199  }
   200  
   201  func (n *nomadFSM) applyStatusUpdate(buf []byte, index uint64) interface{} {
   202  	defer metrics.MeasureSince([]string{"nomad", "fsm", "node_status_update"}, time.Now())
   203  	var req structs.NodeUpdateStatusRequest
   204  	if err := structs.Decode(buf, &req); err != nil {
   205  		panic(fmt.Errorf("failed to decode request: %v", err))
   206  	}
   207  
   208  	if err := n.state.UpdateNodeStatus(index, req.NodeID, req.Status); err != nil {
   209  		n.logger.Printf("[ERR] nomad.fsm: UpdateNodeStatus failed: %v", err)
   210  		return err
   211  	}
   212  
   213  	// Unblock evals for the nodes computed node class if it is in a ready
   214  	// state.
   215  	if req.Status == structs.NodeStatusReady {
   216  		ws := memdb.NewWatchSet()
   217  		node, err := n.state.NodeByID(ws, req.NodeID)
   218  		if err != nil {
   219  			n.logger.Printf("[ERR] nomad.fsm: looking up node %q failed: %v", req.NodeID, err)
   220  			return err
   221  
   222  		}
   223  		n.blockedEvals.Unblock(node.ComputedClass, index)
   224  	}
   225  
   226  	return nil
   227  }
   228  
   229  func (n *nomadFSM) applyDrainUpdate(buf []byte, index uint64) interface{} {
   230  	defer metrics.MeasureSince([]string{"nomad", "fsm", "node_drain_update"}, time.Now())
   231  	var req structs.NodeUpdateDrainRequest
   232  	if err := structs.Decode(buf, &req); err != nil {
   233  		panic(fmt.Errorf("failed to decode request: %v", err))
   234  	}
   235  
   236  	if err := n.state.UpdateNodeDrain(index, req.NodeID, req.Drain); err != nil {
   237  		n.logger.Printf("[ERR] nomad.fsm: UpdateNodeDrain failed: %v", err)
   238  		return err
   239  	}
   240  	return nil
   241  }
   242  
   243  func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} {
   244  	defer metrics.MeasureSince([]string{"nomad", "fsm", "register_job"}, time.Now())
   245  	var req structs.JobRegisterRequest
   246  	if err := structs.Decode(buf, &req); err != nil {
   247  		panic(fmt.Errorf("failed to decode request: %v", err))
   248  	}
   249  
   250  	// COMPAT: Remove in 0.6
   251  	// Empty maps and slices should be treated as nil to avoid
   252  	// un-intended destructive updates in scheduler since we use
   253  	// reflect.DeepEqual. Starting Nomad 0.4.1, job submission sanatizes
   254  	// the incoming job.
   255  	req.Job.Canonicalize()
   256  
   257  	if err := n.state.UpsertJob(index, req.Job); err != nil {
   258  		n.logger.Printf("[ERR] nomad.fsm: UpsertJob failed: %v", err)
   259  		return err
   260  	}
   261  
   262  	// We always add the job to the periodic dispatcher because there is the
   263  	// possibility that the periodic spec was removed and then we should stop
   264  	// tracking it.
   265  	if err := n.periodicDispatcher.Add(req.Job); err != nil {
   266  		n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Add failed: %v", err)
   267  		return err
   268  	}
   269  
   270  	// Create a watch set
   271  	ws := memdb.NewWatchSet()
   272  
   273  	// If it is periodic, record the time it was inserted. This is necessary for
   274  	// recovering during leader election. It is possible that from the time it
   275  	// is added to when it was suppose to launch, leader election occurs and the
   276  	// job was not launched. In this case, we use the insertion time to
   277  	// determine if a launch was missed.
   278  	if req.Job.IsPeriodic() {
   279  		prevLaunch, err := n.state.PeriodicLaunchByID(ws, req.Job.ID)
   280  		if err != nil {
   281  			n.logger.Printf("[ERR] nomad.fsm: PeriodicLaunchByID failed: %v", err)
   282  			return err
   283  		}
   284  
   285  		// Record the insertion time as a launch. We overload the launch table
   286  		// such that the first entry is the insertion time.
   287  		if prevLaunch == nil {
   288  			launch := &structs.PeriodicLaunch{ID: req.Job.ID, Launch: time.Now()}
   289  			if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil {
   290  				n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err)
   291  				return err
   292  			}
   293  		}
   294  	}
   295  
   296  	// Check if the parent job is periodic and mark the launch time.
   297  	parentID := req.Job.ParentID
   298  	if parentID != "" {
   299  		parent, err := n.state.JobByID(ws, parentID)
   300  		if err != nil {
   301  			n.logger.Printf("[ERR] nomad.fsm: JobByID(%v) lookup for parent failed: %v", parentID, err)
   302  			return err
   303  		} else if parent == nil {
   304  			// The parent has been deregistered.
   305  			return nil
   306  		}
   307  
   308  		if parent.IsPeriodic() && !parent.IsParameterized() {
   309  			t, err := n.periodicDispatcher.LaunchTime(req.Job.ID)
   310  			if err != nil {
   311  				n.logger.Printf("[ERR] nomad.fsm: LaunchTime(%v) failed: %v", req.Job.ID, err)
   312  				return err
   313  			}
   314  
   315  			launch := &structs.PeriodicLaunch{ID: parentID, Launch: t}
   316  			if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil {
   317  				n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err)
   318  				return err
   319  			}
   320  		}
   321  	}
   322  
   323  	return nil
   324  }
   325  
   326  func (n *nomadFSM) applyDeregisterJob(buf []byte, index uint64) interface{} {
   327  	defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_job"}, time.Now())
   328  	var req structs.JobDeregisterRequest
   329  	if err := structs.Decode(buf, &req); err != nil {
   330  		panic(fmt.Errorf("failed to decode request: %v", err))
   331  	}
   332  
   333  	if err := n.state.DeleteJob(index, req.JobID); err != nil {
   334  		n.logger.Printf("[ERR] nomad.fsm: DeleteJob failed: %v", err)
   335  		return err
   336  	}
   337  
   338  	if err := n.periodicDispatcher.Remove(req.JobID); err != nil {
   339  		n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Remove failed: %v", err)
   340  		return err
   341  	}
   342  
   343  	// We always delete from the periodic launch table because it is possible that
   344  	// the job was updated to be non-perioidic, thus checking if it is periodic
   345  	// doesn't ensure we clean it up properly.
   346  	n.state.DeletePeriodicLaunch(index, req.JobID)
   347  
   348  	return nil
   349  }
   350  
   351  func (n *nomadFSM) applyUpdateEval(buf []byte, index uint64) interface{} {
   352  	defer metrics.MeasureSince([]string{"nomad", "fsm", "update_eval"}, time.Now())
   353  	var req structs.EvalUpdateRequest
   354  	if err := structs.Decode(buf, &req); err != nil {
   355  		panic(fmt.Errorf("failed to decode request: %v", err))
   356  	}
   357  
   358  	if err := n.state.UpsertEvals(index, req.Evals); err != nil {
   359  		n.logger.Printf("[ERR] nomad.fsm: UpsertEvals failed: %v", err)
   360  		return err
   361  	}
   362  
   363  	for _, eval := range req.Evals {
   364  		if eval.ShouldEnqueue() {
   365  			n.evalBroker.Enqueue(eval)
   366  		} else if eval.ShouldBlock() {
   367  			n.blockedEvals.Block(eval)
   368  		} else if eval.Status == structs.EvalStatusComplete &&
   369  			len(eval.FailedTGAllocs) == 0 {
   370  			// If we have a successful evaluation for a node, untrack any
   371  			// blocked evaluation
   372  			n.blockedEvals.Untrack(eval.JobID)
   373  		}
   374  	}
   375  	return nil
   376  }
   377  
   378  func (n *nomadFSM) applyDeleteEval(buf []byte, index uint64) interface{} {
   379  	defer metrics.MeasureSince([]string{"nomad", "fsm", "delete_eval"}, time.Now())
   380  	var req structs.EvalDeleteRequest
   381  	if err := structs.Decode(buf, &req); err != nil {
   382  		panic(fmt.Errorf("failed to decode request: %v", err))
   383  	}
   384  
   385  	if err := n.state.DeleteEval(index, req.Evals, req.Allocs); err != nil {
   386  		n.logger.Printf("[ERR] nomad.fsm: DeleteEval failed: %v", err)
   387  		return err
   388  	}
   389  	return nil
   390  }
   391  
   392  func (n *nomadFSM) applyAllocUpdate(buf []byte, index uint64) interface{} {
   393  	defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_update"}, time.Now())
   394  	var req structs.AllocUpdateRequest
   395  	if err := structs.Decode(buf, &req); err != nil {
   396  		panic(fmt.Errorf("failed to decode request: %v", err))
   397  	}
   398  
   399  	// Attach the job to all the allocations. It is pulled out in the
   400  	// payload to avoid the redundancy of encoding, but should be denormalized
   401  	// prior to being inserted into MemDB.
   402  	if j := req.Job; j != nil {
   403  		for _, alloc := range req.Alloc {
   404  			if alloc.Job == nil && !alloc.TerminalStatus() {
   405  				alloc.Job = j
   406  			}
   407  		}
   408  	}
   409  
   410  	// Calculate the total resources of allocations. It is pulled out in the
   411  	// payload to avoid encoding something that can be computed, but should be
   412  	// denormalized prior to being inserted into MemDB.
   413  	for _, alloc := range req.Alloc {
   414  		if alloc.Resources != nil {
   415  			// COMPAT 0.4.1 -> 0.5
   416  			// Set the shared resources for allocations which don't have them
   417  			if alloc.SharedResources == nil {
   418  				alloc.SharedResources = &structs.Resources{
   419  					DiskMB: alloc.Resources.DiskMB,
   420  				}
   421  			}
   422  
   423  			continue
   424  		}
   425  
   426  		alloc.Resources = new(structs.Resources)
   427  		for _, task := range alloc.TaskResources {
   428  			alloc.Resources.Add(task)
   429  		}
   430  
   431  		// Add the shared resources
   432  		alloc.Resources.Add(alloc.SharedResources)
   433  	}
   434  
   435  	if err := n.state.UpsertAllocs(index, req.Alloc); err != nil {
   436  		n.logger.Printf("[ERR] nomad.fsm: UpsertAllocs failed: %v", err)
   437  		return err
   438  	}
   439  	return nil
   440  }
   441  
   442  func (n *nomadFSM) applyAllocClientUpdate(buf []byte, index uint64) interface{} {
   443  	defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_client_update"}, time.Now())
   444  	var req structs.AllocUpdateRequest
   445  	if err := structs.Decode(buf, &req); err != nil {
   446  		panic(fmt.Errorf("failed to decode request: %v", err))
   447  	}
   448  	if len(req.Alloc) == 0 {
   449  		return nil
   450  	}
   451  
   452  	// Create a watch set
   453  	ws := memdb.NewWatchSet()
   454  
   455  	// Updating the allocs with the job id and task group name
   456  	for _, alloc := range req.Alloc {
   457  		if existing, _ := n.state.AllocByID(ws, alloc.ID); existing != nil {
   458  			alloc.JobID = existing.JobID
   459  			alloc.TaskGroup = existing.TaskGroup
   460  		}
   461  	}
   462  
   463  	// Update all the client allocations
   464  	if err := n.state.UpdateAllocsFromClient(index, req.Alloc); err != nil {
   465  		n.logger.Printf("[ERR] nomad.fsm: UpdateAllocFromClient failed: %v", err)
   466  		return err
   467  	}
   468  
   469  	// Unblock evals for the nodes computed node class if the client has
   470  	// finished running an allocation.
   471  	for _, alloc := range req.Alloc {
   472  		if alloc.ClientStatus == structs.AllocClientStatusComplete ||
   473  			alloc.ClientStatus == structs.AllocClientStatusFailed {
   474  			nodeID := alloc.NodeID
   475  			node, err := n.state.NodeByID(ws, nodeID)
   476  			if err != nil || node == nil {
   477  				n.logger.Printf("[ERR] nomad.fsm: looking up node %q failed: %v", nodeID, err)
   478  				return err
   479  
   480  			}
   481  			n.blockedEvals.Unblock(node.ComputedClass, index)
   482  		}
   483  	}
   484  
   485  	return nil
   486  }
   487  
   488  // applyReconcileSummaries reconciles summaries for all the jobs
   489  func (n *nomadFSM) applyReconcileSummaries(buf []byte, index uint64) interface{} {
   490  	if err := n.state.ReconcileJobSummaries(index); err != nil {
   491  		return err
   492  	}
   493  	return n.reconcileQueuedAllocations(index)
   494  }
   495  
   496  // applyUpsertVaultAccessor stores the Vault accessors for a given allocation
   497  // and task
   498  func (n *nomadFSM) applyUpsertVaultAccessor(buf []byte, index uint64) interface{} {
   499  	defer metrics.MeasureSince([]string{"nomad", "fsm", "upsert_vault_accessor"}, time.Now())
   500  	var req structs.VaultAccessorsRequest
   501  	if err := structs.Decode(buf, &req); err != nil {
   502  		panic(fmt.Errorf("failed to decode request: %v", err))
   503  	}
   504  
   505  	if err := n.state.UpsertVaultAccessor(index, req.Accessors); err != nil {
   506  		n.logger.Printf("[ERR] nomad.fsm: UpsertVaultAccessor failed: %v", err)
   507  		return err
   508  	}
   509  
   510  	return nil
   511  }
   512  
   513  // applyDeregisterVaultAccessor deregisters a set of Vault accessors
   514  func (n *nomadFSM) applyDeregisterVaultAccessor(buf []byte, index uint64) interface{} {
   515  	defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_vault_accessor"}, time.Now())
   516  	var req structs.VaultAccessorsRequest
   517  	if err := structs.Decode(buf, &req); err != nil {
   518  		panic(fmt.Errorf("failed to decode request: %v", err))
   519  	}
   520  
   521  	if err := n.state.DeleteVaultAccessors(index, req.Accessors); err != nil {
   522  		n.logger.Printf("[ERR] nomad.fsm: DeregisterVaultAccessor failed: %v", err)
   523  		return err
   524  	}
   525  
   526  	return nil
   527  }
   528  
   529  func (n *nomadFSM) Snapshot() (raft.FSMSnapshot, error) {
   530  	// Create a new snapshot
   531  	snap, err := n.state.Snapshot()
   532  	if err != nil {
   533  		return nil, err
   534  	}
   535  
   536  	ns := &nomadSnapshot{
   537  		snap:      snap,
   538  		timetable: n.timetable,
   539  	}
   540  	return ns, nil
   541  }
   542  
   543  func (n *nomadFSM) Restore(old io.ReadCloser) error {
   544  	defer old.Close()
   545  
   546  	// Create a new state store
   547  	newState, err := state.NewStateStore(n.logOutput)
   548  	if err != nil {
   549  		return err
   550  	}
   551  
   552  	// Start the state restore
   553  	restore, err := newState.Restore()
   554  	if err != nil {
   555  		return err
   556  	}
   557  	defer restore.Abort()
   558  
   559  	// Create a decoder
   560  	dec := codec.NewDecoder(old, structs.MsgpackHandle)
   561  
   562  	// Read in the header
   563  	var header snapshotHeader
   564  	if err := dec.Decode(&header); err != nil {
   565  		return err
   566  	}
   567  
   568  	// Populate the new state
   569  	msgType := make([]byte, 1)
   570  	for {
   571  		// Read the message type
   572  		_, err := old.Read(msgType)
   573  		if err == io.EOF {
   574  			break
   575  		} else if err != nil {
   576  			return err
   577  		}
   578  
   579  		// Decode
   580  		switch SnapshotType(msgType[0]) {
   581  		case TimeTableSnapshot:
   582  			if err := n.timetable.Deserialize(dec); err != nil {
   583  				return fmt.Errorf("time table deserialize failed: %v", err)
   584  			}
   585  
   586  		case NodeSnapshot:
   587  			node := new(structs.Node)
   588  			if err := dec.Decode(node); err != nil {
   589  				return err
   590  			}
   591  			if err := restore.NodeRestore(node); err != nil {
   592  				return err
   593  			}
   594  
   595  		case JobSnapshot:
   596  			job := new(structs.Job)
   597  			if err := dec.Decode(job); err != nil {
   598  				return err
   599  			}
   600  
   601  			// COMPAT: Remove in 0.5
   602  			// Empty maps and slices should be treated as nil to avoid
   603  			// un-intended destructive updates in scheduler since we use
   604  			// reflect.DeepEqual. Starting Nomad 0.4.1, job submission sanatizes
   605  			// the incoming job.
   606  			job.Canonicalize()
   607  
   608  			if err := restore.JobRestore(job); err != nil {
   609  				return err
   610  			}
   611  
   612  		case EvalSnapshot:
   613  			eval := new(structs.Evaluation)
   614  			if err := dec.Decode(eval); err != nil {
   615  				return err
   616  			}
   617  			if err := restore.EvalRestore(eval); err != nil {
   618  				return err
   619  			}
   620  
   621  		case AllocSnapshot:
   622  			alloc := new(structs.Allocation)
   623  			if err := dec.Decode(alloc); err != nil {
   624  				return err
   625  			}
   626  			if err := restore.AllocRestore(alloc); err != nil {
   627  				return err
   628  			}
   629  
   630  		case IndexSnapshot:
   631  			idx := new(state.IndexEntry)
   632  			if err := dec.Decode(idx); err != nil {
   633  				return err
   634  			}
   635  			if err := restore.IndexRestore(idx); err != nil {
   636  				return err
   637  			}
   638  
   639  		case PeriodicLaunchSnapshot:
   640  			launch := new(structs.PeriodicLaunch)
   641  			if err := dec.Decode(launch); err != nil {
   642  				return err
   643  			}
   644  			if err := restore.PeriodicLaunchRestore(launch); err != nil {
   645  				return err
   646  			}
   647  
   648  		case JobSummarySnapshot:
   649  			summary := new(structs.JobSummary)
   650  			if err := dec.Decode(summary); err != nil {
   651  				return err
   652  			}
   653  			if err := restore.JobSummaryRestore(summary); err != nil {
   654  				return err
   655  			}
   656  
   657  		case VaultAccessorSnapshot:
   658  			accessor := new(structs.VaultAccessor)
   659  			if err := dec.Decode(accessor); err != nil {
   660  				return err
   661  			}
   662  			if err := restore.VaultAccessorRestore(accessor); err != nil {
   663  				return err
   664  			}
   665  
   666  		default:
   667  			return fmt.Errorf("Unrecognized snapshot type: %v", msgType)
   668  		}
   669  	}
   670  
   671  	restore.Commit()
   672  
   673  	// Create Job Summaries
   674  	// COMPAT 0.4 -> 0.4.1
   675  	// We can remove this in 0.5. This exists so that the server creates job
   676  	// summaries if they were not present previously. When users upgrade to 0.5
   677  	// from 0.4.1, the snapshot will contain job summaries so it will be safe to
   678  	// remove this block.
   679  	index, err := newState.Index("job_summary")
   680  	if err != nil {
   681  		return fmt.Errorf("couldn't fetch index of job summary table: %v", err)
   682  	}
   683  
   684  	// If the index is 0 that means there is no job summary in the snapshot so
   685  	// we will have to create them
   686  	if index == 0 {
   687  		// query the latest index
   688  		latestIndex, err := newState.LatestIndex()
   689  		if err != nil {
   690  			return fmt.Errorf("unable to query latest index: %v", index)
   691  		}
   692  		if err := newState.ReconcileJobSummaries(latestIndex); err != nil {
   693  			return fmt.Errorf("error reconciling summaries: %v", err)
   694  		}
   695  	}
   696  
   697  	// External code might be calling State(), so we need to synchronize
   698  	// here to make sure we swap in the new state store atomically.
   699  	n.stateLock.Lock()
   700  	stateOld := n.state
   701  	n.state = newState
   702  	n.stateLock.Unlock()
   703  
   704  	// Signal that the old state store has been abandoned. This is required
   705  	// because we don't operate on it any more, we just throw it away, so
   706  	// blocking queries won't see any changes and need to be woken up.
   707  	stateOld.Abandon()
   708  
   709  	return nil
   710  }
   711  
   712  // reconcileSummaries re-calculates the queued allocations for every job that we
   713  // created a Job Summary during the snap shot restore
   714  func (n *nomadFSM) reconcileQueuedAllocations(index uint64) error {
   715  	// Get all the jobs
   716  	ws := memdb.NewWatchSet()
   717  	iter, err := n.state.Jobs(ws)
   718  	if err != nil {
   719  		return err
   720  	}
   721  
   722  	snap, err := n.state.Snapshot()
   723  	if err != nil {
   724  		return fmt.Errorf("unable to create snapshot: %v", err)
   725  	}
   726  
   727  	// Invoking the scheduler for every job so that we can populate the number
   728  	// of queued allocations for every job
   729  	for {
   730  		rawJob := iter.Next()
   731  		if rawJob == nil {
   732  			break
   733  		}
   734  		job := rawJob.(*structs.Job)
   735  		planner := &scheduler.Harness{
   736  			State: &snap.StateStore,
   737  		}
   738  		// Create an eval and mark it as requiring annotations and insert that as well
   739  		eval := &structs.Evaluation{
   740  			ID:             structs.GenerateUUID(),
   741  			Priority:       job.Priority,
   742  			Type:           job.Type,
   743  			TriggeredBy:    structs.EvalTriggerJobRegister,
   744  			JobID:          job.ID,
   745  			JobModifyIndex: job.JobModifyIndex + 1,
   746  			Status:         structs.EvalStatusPending,
   747  			AnnotatePlan:   true,
   748  		}
   749  
   750  		// Create the scheduler and run it
   751  		sched, err := scheduler.NewScheduler(eval.Type, n.logger, snap, planner)
   752  		if err != nil {
   753  			return err
   754  		}
   755  
   756  		if err := sched.Process(eval); err != nil {
   757  			return err
   758  		}
   759  
   760  		// Get the job summary from the fsm state store
   761  		originalSummary, err := n.state.JobSummaryByID(ws, job.ID)
   762  		if err != nil {
   763  			return err
   764  		}
   765  		summary := originalSummary.Copy()
   766  
   767  		// Add the allocations scheduler has made to queued since these
   768  		// allocations are never getting placed until the scheduler is invoked
   769  		// with a real planner
   770  		if l := len(planner.Plans); l != 1 {
   771  			return fmt.Errorf("unexpected number of plans during restore %d. Please file an issue including the logs", l)
   772  		}
   773  		for _, allocations := range planner.Plans[0].NodeAllocation {
   774  			for _, allocation := range allocations {
   775  				tgSummary, ok := summary.Summary[allocation.TaskGroup]
   776  				if !ok {
   777  					return fmt.Errorf("task group %q not found while updating queued count", allocation.TaskGroup)
   778  				}
   779  				tgSummary.Queued += 1
   780  				summary.Summary[allocation.TaskGroup] = tgSummary
   781  			}
   782  		}
   783  
   784  		// Add the queued allocations attached to the evaluation to the queued
   785  		// counter of the job summary
   786  		if l := len(planner.Evals); l != 1 {
   787  			return fmt.Errorf("unexpected number of evals during restore %d. Please file an issue including the logs", l)
   788  		}
   789  		for tg, queued := range planner.Evals[0].QueuedAllocations {
   790  			tgSummary, ok := summary.Summary[tg]
   791  			if !ok {
   792  				return fmt.Errorf("task group %q not found while updating queued count", tg)
   793  			}
   794  
   795  			// We add instead of setting here because we want to take into
   796  			// consideration what the scheduler with a mock planner thinks it
   797  			// placed. Those should be counted as queued as well
   798  			tgSummary.Queued += queued
   799  			summary.Summary[tg] = tgSummary
   800  		}
   801  
   802  		if !reflect.DeepEqual(summary, originalSummary) {
   803  			summary.ModifyIndex = index
   804  			if err := n.state.UpsertJobSummary(index, summary); err != nil {
   805  				return err
   806  			}
   807  		}
   808  	}
   809  	return nil
   810  }
   811  
   812  func (s *nomadSnapshot) Persist(sink raft.SnapshotSink) error {
   813  	defer metrics.MeasureSince([]string{"nomad", "fsm", "persist"}, time.Now())
   814  	// Register the nodes
   815  	encoder := codec.NewEncoder(sink, structs.MsgpackHandle)
   816  
   817  	// Write the header
   818  	header := snapshotHeader{}
   819  	if err := encoder.Encode(&header); err != nil {
   820  		sink.Cancel()
   821  		return err
   822  	}
   823  
   824  	// Write the time table
   825  	sink.Write([]byte{byte(TimeTableSnapshot)})
   826  	if err := s.timetable.Serialize(encoder); err != nil {
   827  		sink.Cancel()
   828  		return err
   829  	}
   830  
   831  	// Write all the data out
   832  	if err := s.persistIndexes(sink, encoder); err != nil {
   833  		sink.Cancel()
   834  		return err
   835  	}
   836  	if err := s.persistNodes(sink, encoder); err != nil {
   837  		sink.Cancel()
   838  		return err
   839  	}
   840  	if err := s.persistJobs(sink, encoder); err != nil {
   841  		sink.Cancel()
   842  		return err
   843  	}
   844  	if err := s.persistEvals(sink, encoder); err != nil {
   845  		sink.Cancel()
   846  		return err
   847  	}
   848  	if err := s.persistAllocs(sink, encoder); err != nil {
   849  		sink.Cancel()
   850  		return err
   851  	}
   852  	if err := s.persistPeriodicLaunches(sink, encoder); err != nil {
   853  		sink.Cancel()
   854  		return err
   855  	}
   856  	if err := s.persistJobSummaries(sink, encoder); err != nil {
   857  		sink.Cancel()
   858  		return err
   859  	}
   860  	if err := s.persistVaultAccessors(sink, encoder); err != nil {
   861  		sink.Cancel()
   862  		return err
   863  	}
   864  	return nil
   865  }
   866  
   867  func (s *nomadSnapshot) persistIndexes(sink raft.SnapshotSink,
   868  	encoder *codec.Encoder) error {
   869  	// Get all the indexes
   870  	iter, err := s.snap.Indexes()
   871  	if err != nil {
   872  		return err
   873  	}
   874  
   875  	for {
   876  		// Get the next item
   877  		raw := iter.Next()
   878  		if raw == nil {
   879  			break
   880  		}
   881  
   882  		// Prepare the request struct
   883  		idx := raw.(*state.IndexEntry)
   884  
   885  		// Write out a node registration
   886  		sink.Write([]byte{byte(IndexSnapshot)})
   887  		if err := encoder.Encode(idx); err != nil {
   888  			return err
   889  		}
   890  	}
   891  	return nil
   892  }
   893  
   894  func (s *nomadSnapshot) persistNodes(sink raft.SnapshotSink,
   895  	encoder *codec.Encoder) error {
   896  	// Get all the nodes
   897  	ws := memdb.NewWatchSet()
   898  	nodes, err := s.snap.Nodes(ws)
   899  	if err != nil {
   900  		return err
   901  	}
   902  
   903  	for {
   904  		// Get the next item
   905  		raw := nodes.Next()
   906  		if raw == nil {
   907  			break
   908  		}
   909  
   910  		// Prepare the request struct
   911  		node := raw.(*structs.Node)
   912  
   913  		// Write out a node registration
   914  		sink.Write([]byte{byte(NodeSnapshot)})
   915  		if err := encoder.Encode(node); err != nil {
   916  			return err
   917  		}
   918  	}
   919  	return nil
   920  }
   921  
   922  func (s *nomadSnapshot) persistJobs(sink raft.SnapshotSink,
   923  	encoder *codec.Encoder) error {
   924  	// Get all the jobs
   925  	ws := memdb.NewWatchSet()
   926  	jobs, err := s.snap.Jobs(ws)
   927  	if err != nil {
   928  		return err
   929  	}
   930  
   931  	for {
   932  		// Get the next item
   933  		raw := jobs.Next()
   934  		if raw == nil {
   935  			break
   936  		}
   937  
   938  		// Prepare the request struct
   939  		job := raw.(*structs.Job)
   940  
   941  		// Write out a job registration
   942  		sink.Write([]byte{byte(JobSnapshot)})
   943  		if err := encoder.Encode(job); err != nil {
   944  			return err
   945  		}
   946  	}
   947  	return nil
   948  }
   949  
   950  func (s *nomadSnapshot) persistEvals(sink raft.SnapshotSink,
   951  	encoder *codec.Encoder) error {
   952  	// Get all the evaluations
   953  	ws := memdb.NewWatchSet()
   954  	evals, err := s.snap.Evals(ws)
   955  	if err != nil {
   956  		return err
   957  	}
   958  
   959  	for {
   960  		// Get the next item
   961  		raw := evals.Next()
   962  		if raw == nil {
   963  			break
   964  		}
   965  
   966  		// Prepare the request struct
   967  		eval := raw.(*structs.Evaluation)
   968  
   969  		// Write out the evaluation
   970  		sink.Write([]byte{byte(EvalSnapshot)})
   971  		if err := encoder.Encode(eval); err != nil {
   972  			return err
   973  		}
   974  	}
   975  	return nil
   976  }
   977  
   978  func (s *nomadSnapshot) persistAllocs(sink raft.SnapshotSink,
   979  	encoder *codec.Encoder) error {
   980  	// Get all the allocations
   981  	ws := memdb.NewWatchSet()
   982  	allocs, err := s.snap.Allocs(ws)
   983  	if err != nil {
   984  		return err
   985  	}
   986  
   987  	for {
   988  		// Get the next item
   989  		raw := allocs.Next()
   990  		if raw == nil {
   991  			break
   992  		}
   993  
   994  		// Prepare the request struct
   995  		alloc := raw.(*structs.Allocation)
   996  
   997  		// Write out the evaluation
   998  		sink.Write([]byte{byte(AllocSnapshot)})
   999  		if err := encoder.Encode(alloc); err != nil {
  1000  			return err
  1001  		}
  1002  	}
  1003  	return nil
  1004  }
  1005  
  1006  func (s *nomadSnapshot) persistPeriodicLaunches(sink raft.SnapshotSink,
  1007  	encoder *codec.Encoder) error {
  1008  	// Get all the jobs
  1009  	ws := memdb.NewWatchSet()
  1010  	launches, err := s.snap.PeriodicLaunches(ws)
  1011  	if err != nil {
  1012  		return err
  1013  	}
  1014  
  1015  	for {
  1016  		// Get the next item
  1017  		raw := launches.Next()
  1018  		if raw == nil {
  1019  			break
  1020  		}
  1021  
  1022  		// Prepare the request struct
  1023  		launch := raw.(*structs.PeriodicLaunch)
  1024  
  1025  		// Write out a job registration
  1026  		sink.Write([]byte{byte(PeriodicLaunchSnapshot)})
  1027  		if err := encoder.Encode(launch); err != nil {
  1028  			return err
  1029  		}
  1030  	}
  1031  	return nil
  1032  }
  1033  
  1034  func (s *nomadSnapshot) persistJobSummaries(sink raft.SnapshotSink,
  1035  	encoder *codec.Encoder) error {
  1036  
  1037  	ws := memdb.NewWatchSet()
  1038  	summaries, err := s.snap.JobSummaries(ws)
  1039  	if err != nil {
  1040  		return err
  1041  	}
  1042  
  1043  	for {
  1044  		raw := summaries.Next()
  1045  		if raw == nil {
  1046  			break
  1047  		}
  1048  
  1049  		jobSummary := raw.(*structs.JobSummary)
  1050  
  1051  		sink.Write([]byte{byte(JobSummarySnapshot)})
  1052  		if err := encoder.Encode(jobSummary); err != nil {
  1053  			return err
  1054  		}
  1055  	}
  1056  	return nil
  1057  }
  1058  
  1059  func (s *nomadSnapshot) persistVaultAccessors(sink raft.SnapshotSink,
  1060  	encoder *codec.Encoder) error {
  1061  
  1062  	ws := memdb.NewWatchSet()
  1063  	accessors, err := s.snap.VaultAccessors(ws)
  1064  	if err != nil {
  1065  		return err
  1066  	}
  1067  
  1068  	for {
  1069  		raw := accessors.Next()
  1070  		if raw == nil {
  1071  			break
  1072  		}
  1073  
  1074  		accessor := raw.(*structs.VaultAccessor)
  1075  
  1076  		sink.Write([]byte{byte(VaultAccessorSnapshot)})
  1077  		if err := encoder.Encode(accessor); err != nil {
  1078  			return err
  1079  		}
  1080  	}
  1081  	return nil
  1082  }
  1083  
  1084  // Release is a no-op, as we just need to GC the pointer
  1085  // to the state store snapshot. There is nothing to explicitly
  1086  // cleanup.
  1087  func (s *nomadSnapshot) Release() {}