github.com/hspak/nomad@v0.7.2-0.20180309000617-bc4ae22a39a5/nomad/fsm.go (about)

     1  package nomad
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"log"
     7  	"reflect"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/armon/go-metrics"
    12  	memdb "github.com/hashicorp/go-memdb"
    13  	"github.com/hashicorp/nomad/helper/uuid"
    14  	"github.com/hashicorp/nomad/nomad/state"
    15  	"github.com/hashicorp/nomad/nomad/structs"
    16  	"github.com/hashicorp/nomad/scheduler"
    17  	"github.com/hashicorp/raft"
    18  	"github.com/ugorji/go/codec"
    19  )
    20  
    21  const (
    22  	// timeTableGranularity is the granularity of index to time tracking
    23  	timeTableGranularity = 5 * time.Minute
    24  
    25  	// timeTableLimit is the maximum limit of our tracking
    26  	timeTableLimit = 72 * time.Hour
    27  )
    28  
    29  // SnapshotType is prefixed to a record in the FSM snapshot
    30  // so that we can determine the type for restore
    31  type SnapshotType byte
    32  
    33  const (
    34  	NodeSnapshot SnapshotType = iota
    35  	JobSnapshot
    36  	IndexSnapshot
    37  	EvalSnapshot
    38  	AllocSnapshot
    39  	TimeTableSnapshot
    40  	PeriodicLaunchSnapshot
    41  	JobSummarySnapshot
    42  	VaultAccessorSnapshot
    43  	JobVersionSnapshot
    44  	DeploymentSnapshot
    45  	ACLPolicySnapshot
    46  	ACLTokenSnapshot
    47  )
    48  
    49  // LogApplier is the definition of a function that can apply a Raft log
    50  type LogApplier func(buf []byte, index uint64) interface{}
    51  
    52  // LogAppliers is a mapping of the Raft MessageType to the appropriate log
    53  // applier
    54  type LogAppliers map[structs.MessageType]LogApplier
    55  
    56  // SnapshotRestorer is the definition of a function that can apply a Raft log
    57  type SnapshotRestorer func(restore *state.StateRestore, dec *codec.Decoder) error
    58  
    59  // SnapshotRestorers is a mapping of the SnapshotType to the appropriate
    60  // snapshot restorer.
    61  type SnapshotRestorers map[SnapshotType]SnapshotRestorer
    62  
    63  // nomadFSM implements a finite state machine that is used
    64  // along with Raft to provide strong consistency. We implement
    65  // this outside the Server to avoid exposing this outside the package.
    66  type nomadFSM struct {
    67  	evalBroker         *EvalBroker
    68  	blockedEvals       *BlockedEvals
    69  	periodicDispatcher *PeriodicDispatch
    70  	logger             *log.Logger
    71  	state              *state.StateStore
    72  	timetable          *TimeTable
    73  
    74  	// config is the FSM config
    75  	config *FSMConfig
    76  
    77  	// enterpriseAppliers holds the set of enterprise only LogAppliers
    78  	enterpriseAppliers LogAppliers
    79  
    80  	// enterpriseRestorers holds the set of enterprise only snapshot restorers
    81  	enterpriseRestorers SnapshotRestorers
    82  
    83  	// stateLock is only used to protect outside callers to State() from
    84  	// racing with Restore(), which is called by Raft (it puts in a totally
    85  	// new state store). Everything internal here is synchronized by the
    86  	// Raft side, so doesn't need to lock this.
    87  	stateLock sync.RWMutex
    88  }
    89  
    90  // nomadSnapshot is used to provide a snapshot of the current
    91  // state in a way that can be accessed concurrently with operations
    92  // that may modify the live state.
    93  type nomadSnapshot struct {
    94  	snap      *state.StateSnapshot
    95  	timetable *TimeTable
    96  }
    97  
    98  // snapshotHeader is the first entry in our snapshot
    99  type snapshotHeader struct {
   100  }
   101  
   102  // FSMConfig is used to configure the FSM
   103  type FSMConfig struct {
   104  	// EvalBroker is the evaluation broker evaluations should be added to
   105  	EvalBroker *EvalBroker
   106  
   107  	// Periodic is the periodic job dispatcher that periodic jobs should be
   108  	// added/removed from
   109  	Periodic *PeriodicDispatch
   110  
   111  	// BlockedEvals is the blocked eval tracker that blocked evaulations should
   112  	// be added to.
   113  	Blocked *BlockedEvals
   114  
   115  	// LogOutput is the writer logs should be written to
   116  	LogOutput io.Writer
   117  
   118  	// Region is the region of the server embedding the FSM
   119  	Region string
   120  }
   121  
   122  // NewFSMPath is used to construct a new FSM with a blank state
   123  func NewFSM(config *FSMConfig) (*nomadFSM, error) {
   124  	// Create a state store
   125  	sconfig := &state.StateStoreConfig{
   126  		LogOutput: config.LogOutput,
   127  		Region:    config.Region,
   128  	}
   129  	state, err := state.NewStateStore(sconfig)
   130  	if err != nil {
   131  		return nil, err
   132  	}
   133  
   134  	fsm := &nomadFSM{
   135  		evalBroker:          config.EvalBroker,
   136  		periodicDispatcher:  config.Periodic,
   137  		blockedEvals:        config.Blocked,
   138  		logger:              log.New(config.LogOutput, "", log.LstdFlags),
   139  		config:              config,
   140  		state:               state,
   141  		timetable:           NewTimeTable(timeTableGranularity, timeTableLimit),
   142  		enterpriseAppliers:  make(map[structs.MessageType]LogApplier, 8),
   143  		enterpriseRestorers: make(map[SnapshotType]SnapshotRestorer, 8),
   144  	}
   145  
   146  	// Register all the log applier functions
   147  	fsm.registerLogAppliers()
   148  
   149  	// Register all the snapshot restorer functions
   150  	fsm.registerSnapshotRestorers()
   151  
   152  	return fsm, nil
   153  }
   154  
   155  // Close is used to cleanup resources associated with the FSM
   156  func (n *nomadFSM) Close() error {
   157  	return nil
   158  }
   159  
   160  // State is used to return a handle to the current state
   161  func (n *nomadFSM) State() *state.StateStore {
   162  	n.stateLock.RLock()
   163  	defer n.stateLock.RUnlock()
   164  	return n.state
   165  }
   166  
   167  // TimeTable returns the time table of transactions
   168  func (n *nomadFSM) TimeTable() *TimeTable {
   169  	return n.timetable
   170  }
   171  
   172  func (n *nomadFSM) Apply(log *raft.Log) interface{} {
   173  	buf := log.Data
   174  	msgType := structs.MessageType(buf[0])
   175  
   176  	// Witness this write
   177  	n.timetable.Witness(log.Index, time.Now().UTC())
   178  
   179  	// Check if this message type should be ignored when unknown. This is
   180  	// used so that new commands can be added with developer control if older
   181  	// versions can safely ignore the command, or if they should crash.
   182  	ignoreUnknown := false
   183  	if msgType&structs.IgnoreUnknownTypeFlag == structs.IgnoreUnknownTypeFlag {
   184  		msgType &= ^structs.IgnoreUnknownTypeFlag
   185  		ignoreUnknown = true
   186  	}
   187  
   188  	switch msgType {
   189  	case structs.NodeRegisterRequestType:
   190  		return n.applyUpsertNode(buf[1:], log.Index)
   191  	case structs.NodeDeregisterRequestType:
   192  		return n.applyDeregisterNode(buf[1:], log.Index)
   193  	case structs.NodeUpdateStatusRequestType:
   194  		return n.applyStatusUpdate(buf[1:], log.Index)
   195  	case structs.NodeUpdateDrainRequestType:
   196  		return n.applyDrainUpdate(buf[1:], log.Index)
   197  	case structs.JobRegisterRequestType:
   198  		return n.applyUpsertJob(buf[1:], log.Index)
   199  	case structs.JobDeregisterRequestType:
   200  		return n.applyDeregisterJob(buf[1:], log.Index)
   201  	case structs.EvalUpdateRequestType:
   202  		return n.applyUpdateEval(buf[1:], log.Index)
   203  	case structs.EvalDeleteRequestType:
   204  		return n.applyDeleteEval(buf[1:], log.Index)
   205  	case structs.AllocUpdateRequestType:
   206  		return n.applyAllocUpdate(buf[1:], log.Index)
   207  	case structs.AllocClientUpdateRequestType:
   208  		return n.applyAllocClientUpdate(buf[1:], log.Index)
   209  	case structs.ReconcileJobSummariesRequestType:
   210  		return n.applyReconcileSummaries(buf[1:], log.Index)
   211  	case structs.VaultAccessorRegisterRequestType:
   212  		return n.applyUpsertVaultAccessor(buf[1:], log.Index)
   213  	case structs.VaultAccessorDegisterRequestType:
   214  		return n.applyDeregisterVaultAccessor(buf[1:], log.Index)
   215  	case structs.ApplyPlanResultsRequestType:
   216  		return n.applyPlanResults(buf[1:], log.Index)
   217  	case structs.DeploymentStatusUpdateRequestType:
   218  		return n.applyDeploymentStatusUpdate(buf[1:], log.Index)
   219  	case structs.DeploymentPromoteRequestType:
   220  		return n.applyDeploymentPromotion(buf[1:], log.Index)
   221  	case structs.DeploymentAllocHealthRequestType:
   222  		return n.applyDeploymentAllocHealth(buf[1:], log.Index)
   223  	case structs.DeploymentDeleteRequestType:
   224  		return n.applyDeploymentDelete(buf[1:], log.Index)
   225  	case structs.JobStabilityRequestType:
   226  		return n.applyJobStability(buf[1:], log.Index)
   227  	case structs.ACLPolicyUpsertRequestType:
   228  		return n.applyACLPolicyUpsert(buf[1:], log.Index)
   229  	case structs.ACLPolicyDeleteRequestType:
   230  		return n.applyACLPolicyDelete(buf[1:], log.Index)
   231  	case structs.ACLTokenUpsertRequestType:
   232  		return n.applyACLTokenUpsert(buf[1:], log.Index)
   233  	case structs.ACLTokenDeleteRequestType:
   234  		return n.applyACLTokenDelete(buf[1:], log.Index)
   235  	case structs.ACLTokenBootstrapRequestType:
   236  		return n.applyACLTokenBootstrap(buf[1:], log.Index)
   237  	case structs.AutopilotRequestType:
   238  		return n.applyAutopilotUpdate(buf[1:], log.Index)
   239  	}
   240  
   241  	// Check enterprise only message types.
   242  	if applier, ok := n.enterpriseAppliers[msgType]; ok {
   243  		return applier(buf[1:], log.Index)
   244  	}
   245  
   246  	// We didn't match anything, either panic or ignore
   247  	if ignoreUnknown {
   248  		n.logger.Printf("[WARN] nomad.fsm: ignoring unknown message type (%d), upgrade to newer version", msgType)
   249  		return nil
   250  	}
   251  
   252  	panic(fmt.Errorf("failed to apply request: %#v", buf))
   253  }
   254  
   255  func (n *nomadFSM) applyUpsertNode(buf []byte, index uint64) interface{} {
   256  	defer metrics.MeasureSince([]string{"nomad", "fsm", "register_node"}, time.Now())
   257  	var req structs.NodeRegisterRequest
   258  	if err := structs.Decode(buf, &req); err != nil {
   259  		panic(fmt.Errorf("failed to decode request: %v", err))
   260  	}
   261  
   262  	if err := n.state.UpsertNode(index, req.Node); err != nil {
   263  		n.logger.Printf("[ERR] nomad.fsm: UpsertNode failed: %v", err)
   264  		return err
   265  	}
   266  
   267  	// Unblock evals for the nodes computed node class if it is in a ready
   268  	// state.
   269  	if req.Node.Status == structs.NodeStatusReady {
   270  		n.blockedEvals.Unblock(req.Node.ComputedClass, index)
   271  	}
   272  
   273  	return nil
   274  }
   275  
   276  func (n *nomadFSM) applyDeregisterNode(buf []byte, index uint64) interface{} {
   277  	defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_node"}, time.Now())
   278  	var req structs.NodeDeregisterRequest
   279  	if err := structs.Decode(buf, &req); err != nil {
   280  		panic(fmt.Errorf("failed to decode request: %v", err))
   281  	}
   282  
   283  	if err := n.state.DeleteNode(index, req.NodeID); err != nil {
   284  		n.logger.Printf("[ERR] nomad.fsm: DeleteNode failed: %v", err)
   285  		return err
   286  	}
   287  	return nil
   288  }
   289  
   290  func (n *nomadFSM) applyStatusUpdate(buf []byte, index uint64) interface{} {
   291  	defer metrics.MeasureSince([]string{"nomad", "fsm", "node_status_update"}, time.Now())
   292  	var req structs.NodeUpdateStatusRequest
   293  	if err := structs.Decode(buf, &req); err != nil {
   294  		panic(fmt.Errorf("failed to decode request: %v", err))
   295  	}
   296  
   297  	if err := n.state.UpdateNodeStatus(index, req.NodeID, req.Status); err != nil {
   298  		n.logger.Printf("[ERR] nomad.fsm: UpdateNodeStatus failed: %v", err)
   299  		return err
   300  	}
   301  
   302  	// Unblock evals for the nodes computed node class if it is in a ready
   303  	// state.
   304  	if req.Status == structs.NodeStatusReady {
   305  		ws := memdb.NewWatchSet()
   306  		node, err := n.state.NodeByID(ws, req.NodeID)
   307  		if err != nil {
   308  			n.logger.Printf("[ERR] nomad.fsm: looking up node %q failed: %v", req.NodeID, err)
   309  			return err
   310  
   311  		}
   312  		n.blockedEvals.Unblock(node.ComputedClass, index)
   313  	}
   314  
   315  	return nil
   316  }
   317  
   318  func (n *nomadFSM) applyDrainUpdate(buf []byte, index uint64) interface{} {
   319  	defer metrics.MeasureSince([]string{"nomad", "fsm", "node_drain_update"}, time.Now())
   320  	var req structs.NodeUpdateDrainRequest
   321  	if err := structs.Decode(buf, &req); err != nil {
   322  		panic(fmt.Errorf("failed to decode request: %v", err))
   323  	}
   324  
   325  	if err := n.state.UpdateNodeDrain(index, req.NodeID, req.Drain); err != nil {
   326  		n.logger.Printf("[ERR] nomad.fsm: UpdateNodeDrain failed: %v", err)
   327  		return err
   328  	}
   329  	return nil
   330  }
   331  
   332  func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} {
   333  	defer metrics.MeasureSince([]string{"nomad", "fsm", "register_job"}, time.Now())
   334  	var req structs.JobRegisterRequest
   335  	if err := structs.Decode(buf, &req); err != nil {
   336  		panic(fmt.Errorf("failed to decode request: %v", err))
   337  	}
   338  
   339  	/* Handle upgrade paths:
   340  	 * - Empty maps and slices should be treated as nil to avoid
   341  	 *   un-intended destructive updates in scheduler since we use
   342  	 *   reflect.DeepEqual. Starting Nomad 0.4.1, job submission sanatizes
   343  	 *   the incoming job.
   344  	 * - Migrate from old style upgrade stanza that used only a stagger.
   345  	 */
   346  	req.Job.Canonicalize()
   347  
   348  	if err := n.state.UpsertJob(index, req.Job); err != nil {
   349  		n.logger.Printf("[ERR] nomad.fsm: UpsertJob failed: %v", err)
   350  		return err
   351  	}
   352  
   353  	// We always add the job to the periodic dispatcher because there is the
   354  	// possibility that the periodic spec was removed and then we should stop
   355  	// tracking it.
   356  	if err := n.periodicDispatcher.Add(req.Job); err != nil {
   357  		n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Add failed: %v", err)
   358  		return err
   359  	}
   360  
   361  	// Create a watch set
   362  	ws := memdb.NewWatchSet()
   363  
   364  	// If it is an active periodic job, record the time it was inserted. This is
   365  	// necessary for recovering during leader election. It is possible that from
   366  	// the time it is added to when it was suppose to launch, leader election
   367  	// occurs and the job was not launched. In this case, we use the insertion
   368  	// time to determine if a launch was missed.
   369  	if req.Job.IsPeriodicActive() {
   370  		prevLaunch, err := n.state.PeriodicLaunchByID(ws, req.Namespace, req.Job.ID)
   371  		if err != nil {
   372  			n.logger.Printf("[ERR] nomad.fsm: PeriodicLaunchByID failed: %v", err)
   373  			return err
   374  		}
   375  
   376  		// Record the insertion time as a launch. We overload the launch table
   377  		// such that the first entry is the insertion time.
   378  		if prevLaunch == nil {
   379  			launch := &structs.PeriodicLaunch{
   380  				ID:        req.Job.ID,
   381  				Namespace: req.Namespace,
   382  				Launch:    time.Now(),
   383  			}
   384  			if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil {
   385  				n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err)
   386  				return err
   387  			}
   388  		}
   389  	}
   390  
   391  	// Check if the parent job is periodic and mark the launch time.
   392  	parentID := req.Job.ParentID
   393  	if parentID != "" {
   394  		parent, err := n.state.JobByID(ws, req.Namespace, parentID)
   395  		if err != nil {
   396  			n.logger.Printf("[ERR] nomad.fsm: JobByID(%v) lookup for parent failed: %v", parentID, err)
   397  			return err
   398  		} else if parent == nil {
   399  			// The parent has been deregistered.
   400  			return nil
   401  		}
   402  
   403  		if parent.IsPeriodic() && !parent.IsParameterized() {
   404  			t, err := n.periodicDispatcher.LaunchTime(req.Job.ID)
   405  			if err != nil {
   406  				n.logger.Printf("[ERR] nomad.fsm: LaunchTime(%v) failed: %v", req.Job.ID, err)
   407  				return err
   408  			}
   409  
   410  			launch := &structs.PeriodicLaunch{
   411  				ID:        parentID,
   412  				Namespace: req.Namespace,
   413  				Launch:    t,
   414  			}
   415  			if err := n.state.UpsertPeriodicLaunch(index, launch); err != nil {
   416  				n.logger.Printf("[ERR] nomad.fsm: UpsertPeriodicLaunch failed: %v", err)
   417  				return err
   418  			}
   419  		}
   420  	}
   421  
   422  	return nil
   423  }
   424  
   425  func (n *nomadFSM) applyDeregisterJob(buf []byte, index uint64) interface{} {
   426  	defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_job"}, time.Now())
   427  	var req structs.JobDeregisterRequest
   428  	if err := structs.Decode(buf, &req); err != nil {
   429  		panic(fmt.Errorf("failed to decode request: %v", err))
   430  	}
   431  
   432  	// If it is periodic remove it from the dispatcher
   433  	if err := n.periodicDispatcher.Remove(req.Namespace, req.JobID); err != nil {
   434  		n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Remove failed: %v", err)
   435  		return err
   436  	}
   437  
   438  	if req.Purge {
   439  		if err := n.state.DeleteJob(index, req.Namespace, req.JobID); err != nil {
   440  			n.logger.Printf("[ERR] nomad.fsm: DeleteJob failed: %v", err)
   441  			return err
   442  		}
   443  
   444  		// We always delete from the periodic launch table because it is possible that
   445  		// the job was updated to be non-perioidic, thus checking if it is periodic
   446  		// doesn't ensure we clean it up properly.
   447  		n.state.DeletePeriodicLaunch(index, req.Namespace, req.JobID)
   448  	} else {
   449  		// Get the current job and mark it as stopped and re-insert it.
   450  		ws := memdb.NewWatchSet()
   451  		current, err := n.state.JobByID(ws, req.Namespace, req.JobID)
   452  		if err != nil {
   453  			n.logger.Printf("[ERR] nomad.fsm: JobByID lookup failed: %v", err)
   454  			return err
   455  		}
   456  
   457  		if current == nil {
   458  			return fmt.Errorf("job %q in namespace %q doesn't exist to be deregistered", req.JobID, req.Namespace)
   459  		}
   460  
   461  		stopped := current.Copy()
   462  		stopped.Stop = true
   463  
   464  		if err := n.state.UpsertJob(index, stopped); err != nil {
   465  			n.logger.Printf("[ERR] nomad.fsm: UpsertJob failed: %v", err)
   466  			return err
   467  		}
   468  	}
   469  
   470  	return nil
   471  }
   472  
   473  func (n *nomadFSM) applyUpdateEval(buf []byte, index uint64) interface{} {
   474  	defer metrics.MeasureSince([]string{"nomad", "fsm", "update_eval"}, time.Now())
   475  	var req structs.EvalUpdateRequest
   476  	if err := structs.Decode(buf, &req); err != nil {
   477  		panic(fmt.Errorf("failed to decode request: %v", err))
   478  	}
   479  	return n.upsertEvals(index, req.Evals)
   480  }
   481  
   482  func (n *nomadFSM) upsertEvals(index uint64, evals []*structs.Evaluation) error {
   483  	if err := n.state.UpsertEvals(index, evals); err != nil {
   484  		n.logger.Printf("[ERR] nomad.fsm: UpsertEvals failed: %v", err)
   485  		return err
   486  	}
   487  
   488  	for _, eval := range evals {
   489  		if eval.ShouldEnqueue() {
   490  			n.evalBroker.Enqueue(eval)
   491  		} else if eval.ShouldBlock() {
   492  			n.blockedEvals.Block(eval)
   493  		} else if eval.Status == structs.EvalStatusComplete &&
   494  			len(eval.FailedTGAllocs) == 0 {
   495  			// If we have a successful evaluation for a node, untrack any
   496  			// blocked evaluation
   497  			n.blockedEvals.Untrack(eval.JobID)
   498  		}
   499  	}
   500  	return nil
   501  }
   502  
   503  func (n *nomadFSM) applyDeleteEval(buf []byte, index uint64) interface{} {
   504  	defer metrics.MeasureSince([]string{"nomad", "fsm", "delete_eval"}, time.Now())
   505  	var req structs.EvalDeleteRequest
   506  	if err := structs.Decode(buf, &req); err != nil {
   507  		panic(fmt.Errorf("failed to decode request: %v", err))
   508  	}
   509  
   510  	if err := n.state.DeleteEval(index, req.Evals, req.Allocs); err != nil {
   511  		n.logger.Printf("[ERR] nomad.fsm: DeleteEval failed: %v", err)
   512  		return err
   513  	}
   514  	return nil
   515  }
   516  
   517  func (n *nomadFSM) applyAllocUpdate(buf []byte, index uint64) interface{} {
   518  	defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_update"}, time.Now())
   519  	var req structs.AllocUpdateRequest
   520  	if err := structs.Decode(buf, &req); err != nil {
   521  		panic(fmt.Errorf("failed to decode request: %v", err))
   522  	}
   523  
   524  	// Attach the job to all the allocations. It is pulled out in the
   525  	// payload to avoid the redundancy of encoding, but should be denormalized
   526  	// prior to being inserted into MemDB.
   527  	structs.DenormalizeAllocationJobs(req.Job, req.Alloc)
   528  
   529  	// Calculate the total resources of allocations. It is pulled out in the
   530  	// payload to avoid encoding something that can be computed, but should be
   531  	// denormalized prior to being inserted into MemDB.
   532  	for _, alloc := range req.Alloc {
   533  		if alloc.Resources != nil {
   534  			// COMPAT 0.4.1 -> 0.5
   535  			// Set the shared resources for allocations which don't have them
   536  			if alloc.SharedResources == nil {
   537  				alloc.SharedResources = &structs.Resources{
   538  					DiskMB: alloc.Resources.DiskMB,
   539  				}
   540  			}
   541  
   542  			continue
   543  		}
   544  
   545  		alloc.Resources = new(structs.Resources)
   546  		for _, task := range alloc.TaskResources {
   547  			alloc.Resources.Add(task)
   548  		}
   549  
   550  		// Add the shared resources
   551  		alloc.Resources.Add(alloc.SharedResources)
   552  	}
   553  
   554  	if err := n.state.UpsertAllocs(index, req.Alloc); err != nil {
   555  		n.logger.Printf("[ERR] nomad.fsm: UpsertAllocs failed: %v", err)
   556  		return err
   557  	}
   558  	return nil
   559  }
   560  
   561  func (n *nomadFSM) applyAllocClientUpdate(buf []byte, index uint64) interface{} {
   562  	defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_client_update"}, time.Now())
   563  	var req structs.AllocUpdateRequest
   564  	if err := structs.Decode(buf, &req); err != nil {
   565  		panic(fmt.Errorf("failed to decode request: %v", err))
   566  	}
   567  	if len(req.Alloc) == 0 {
   568  		return nil
   569  	}
   570  
   571  	// Create a watch set
   572  	ws := memdb.NewWatchSet()
   573  
   574  	// Updating the allocs with the job id and task group name
   575  	for _, alloc := range req.Alloc {
   576  		if existing, _ := n.state.AllocByID(ws, alloc.ID); existing != nil {
   577  			alloc.JobID = existing.JobID
   578  			alloc.TaskGroup = existing.TaskGroup
   579  		}
   580  	}
   581  
   582  	// Update all the client allocations
   583  	if err := n.state.UpdateAllocsFromClient(index, req.Alloc); err != nil {
   584  		n.logger.Printf("[ERR] nomad.fsm: UpdateAllocFromClient failed: %v", err)
   585  		return err
   586  	}
   587  
   588  	// Update any evals
   589  	if len(req.Evals) > 0 {
   590  		if err := n.upsertEvals(index, req.Evals); err != nil {
   591  			n.logger.Printf("[ERR] nomad.fsm: applyAllocClientUpdate failed to update evaluations: %v", err)
   592  			return err
   593  		}
   594  	}
   595  
   596  	// Unblock evals for the nodes computed node class if the client has
   597  	// finished running an allocation.
   598  	for _, alloc := range req.Alloc {
   599  		if alloc.ClientStatus == structs.AllocClientStatusComplete ||
   600  			alloc.ClientStatus == structs.AllocClientStatusFailed {
   601  			nodeID := alloc.NodeID
   602  			node, err := n.state.NodeByID(ws, nodeID)
   603  			if err != nil || node == nil {
   604  				n.logger.Printf("[ERR] nomad.fsm: looking up node %q failed: %v", nodeID, err)
   605  				return err
   606  
   607  			}
   608  
   609  			// Unblock any associated quota
   610  			quota, err := n.allocQuota(alloc.ID)
   611  			if err != nil {
   612  				n.logger.Printf("[ERR] nomad.fsm: looking up quota associated with alloc %q failed: %v", alloc.ID, err)
   613  				return err
   614  			}
   615  
   616  			n.blockedEvals.UnblockClassAndQuota(node.ComputedClass, quota, index)
   617  		}
   618  	}
   619  
   620  	return nil
   621  }
   622  
   623  // applyReconcileSummaries reconciles summaries for all the jobs
   624  func (n *nomadFSM) applyReconcileSummaries(buf []byte, index uint64) interface{} {
   625  	if err := n.state.ReconcileJobSummaries(index); err != nil {
   626  		return err
   627  	}
   628  	return n.reconcileQueuedAllocations(index)
   629  }
   630  
   631  // applyUpsertVaultAccessor stores the Vault accessors for a given allocation
   632  // and task
   633  func (n *nomadFSM) applyUpsertVaultAccessor(buf []byte, index uint64) interface{} {
   634  	defer metrics.MeasureSince([]string{"nomad", "fsm", "upsert_vault_accessor"}, time.Now())
   635  	var req structs.VaultAccessorsRequest
   636  	if err := structs.Decode(buf, &req); err != nil {
   637  		panic(fmt.Errorf("failed to decode request: %v", err))
   638  	}
   639  
   640  	if err := n.state.UpsertVaultAccessor(index, req.Accessors); err != nil {
   641  		n.logger.Printf("[ERR] nomad.fsm: UpsertVaultAccessor failed: %v", err)
   642  		return err
   643  	}
   644  
   645  	return nil
   646  }
   647  
   648  // applyDeregisterVaultAccessor deregisters a set of Vault accessors
   649  func (n *nomadFSM) applyDeregisterVaultAccessor(buf []byte, index uint64) interface{} {
   650  	defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_vault_accessor"}, time.Now())
   651  	var req structs.VaultAccessorsRequest
   652  	if err := structs.Decode(buf, &req); err != nil {
   653  		panic(fmt.Errorf("failed to decode request: %v", err))
   654  	}
   655  
   656  	if err := n.state.DeleteVaultAccessors(index, req.Accessors); err != nil {
   657  		n.logger.Printf("[ERR] nomad.fsm: DeregisterVaultAccessor failed: %v", err)
   658  		return err
   659  	}
   660  
   661  	return nil
   662  }
   663  
   664  // applyPlanApply applies the results of a plan application
   665  func (n *nomadFSM) applyPlanResults(buf []byte, index uint64) interface{} {
   666  	defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_plan_results"}, time.Now())
   667  	var req structs.ApplyPlanResultsRequest
   668  	if err := structs.Decode(buf, &req); err != nil {
   669  		panic(fmt.Errorf("failed to decode request: %v", err))
   670  	}
   671  
   672  	if err := n.state.UpsertPlanResults(index, &req); err != nil {
   673  		n.logger.Printf("[ERR] nomad.fsm: ApplyPlan failed: %v", err)
   674  		return err
   675  	}
   676  
   677  	return nil
   678  }
   679  
   680  // applyDeploymentStatusUpdate is used to update the status of an existing
   681  // deployment
   682  func (n *nomadFSM) applyDeploymentStatusUpdate(buf []byte, index uint64) interface{} {
   683  	defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_status_update"}, time.Now())
   684  	var req structs.DeploymentStatusUpdateRequest
   685  	if err := structs.Decode(buf, &req); err != nil {
   686  		panic(fmt.Errorf("failed to decode request: %v", err))
   687  	}
   688  
   689  	if err := n.state.UpdateDeploymentStatus(index, &req); err != nil {
   690  		n.logger.Printf("[ERR] nomad.fsm: UpsertDeploymentStatusUpdate failed: %v", err)
   691  		return err
   692  	}
   693  
   694  	if req.Eval != nil && req.Eval.ShouldEnqueue() {
   695  		n.evalBroker.Enqueue(req.Eval)
   696  	}
   697  
   698  	return nil
   699  }
   700  
   701  // applyDeploymentPromotion is used to promote canaries in a deployment
   702  func (n *nomadFSM) applyDeploymentPromotion(buf []byte, index uint64) interface{} {
   703  	defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_promotion"}, time.Now())
   704  	var req structs.ApplyDeploymentPromoteRequest
   705  	if err := structs.Decode(buf, &req); err != nil {
   706  		panic(fmt.Errorf("failed to decode request: %v", err))
   707  	}
   708  
   709  	if err := n.state.UpdateDeploymentPromotion(index, &req); err != nil {
   710  		n.logger.Printf("[ERR] nomad.fsm: UpsertDeploymentPromotion failed: %v", err)
   711  		return err
   712  	}
   713  
   714  	if req.Eval != nil && req.Eval.ShouldEnqueue() {
   715  		n.evalBroker.Enqueue(req.Eval)
   716  	}
   717  
   718  	return nil
   719  }
   720  
   721  // applyDeploymentAllocHealth is used to set the health of allocations as part
   722  // of a deployment
   723  func (n *nomadFSM) applyDeploymentAllocHealth(buf []byte, index uint64) interface{} {
   724  	defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_alloc_health"}, time.Now())
   725  	var req structs.ApplyDeploymentAllocHealthRequest
   726  	if err := structs.Decode(buf, &req); err != nil {
   727  		panic(fmt.Errorf("failed to decode request: %v", err))
   728  	}
   729  
   730  	if err := n.state.UpdateDeploymentAllocHealth(index, &req); err != nil {
   731  		n.logger.Printf("[ERR] nomad.fsm: UpsertDeploymentAllocHealth failed: %v", err)
   732  		return err
   733  	}
   734  
   735  	if req.Eval != nil && req.Eval.ShouldEnqueue() {
   736  		n.evalBroker.Enqueue(req.Eval)
   737  	}
   738  
   739  	return nil
   740  }
   741  
   742  // applyDeploymentDelete is used to delete a set of deployments
   743  func (n *nomadFSM) applyDeploymentDelete(buf []byte, index uint64) interface{} {
   744  	defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_deployment_delete"}, time.Now())
   745  	var req structs.DeploymentDeleteRequest
   746  	if err := structs.Decode(buf, &req); err != nil {
   747  		panic(fmt.Errorf("failed to decode request: %v", err))
   748  	}
   749  
   750  	if err := n.state.DeleteDeployment(index, req.Deployments); err != nil {
   751  		n.logger.Printf("[ERR] nomad.fsm: DeleteDeployment failed: %v", err)
   752  		return err
   753  	}
   754  
   755  	return nil
   756  }
   757  
   758  // applyJobStability is used to set the stability of a job
   759  func (n *nomadFSM) applyJobStability(buf []byte, index uint64) interface{} {
   760  	defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_job_stability"}, time.Now())
   761  	var req structs.JobStabilityRequest
   762  	if err := structs.Decode(buf, &req); err != nil {
   763  		panic(fmt.Errorf("failed to decode request: %v", err))
   764  	}
   765  
   766  	if err := n.state.UpdateJobStability(index, req.Namespace, req.JobID, req.JobVersion, req.Stable); err != nil {
   767  		n.logger.Printf("[ERR] nomad.fsm: UpdateJobStability failed: %v", err)
   768  		return err
   769  	}
   770  
   771  	return nil
   772  }
   773  
   774  // applyACLPolicyUpsert is used to upsert a set of policies
   775  func (n *nomadFSM) applyACLPolicyUpsert(buf []byte, index uint64) interface{} {
   776  	defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_policy_upsert"}, time.Now())
   777  	var req structs.ACLPolicyUpsertRequest
   778  	if err := structs.Decode(buf, &req); err != nil {
   779  		panic(fmt.Errorf("failed to decode request: %v", err))
   780  	}
   781  
   782  	if err := n.state.UpsertACLPolicies(index, req.Policies); err != nil {
   783  		n.logger.Printf("[ERR] nomad.fsm: UpsertACLPolicies failed: %v", err)
   784  		return err
   785  	}
   786  	return nil
   787  }
   788  
   789  // applyACLPolicyDelete is used to delete a set of policies
   790  func (n *nomadFSM) applyACLPolicyDelete(buf []byte, index uint64) interface{} {
   791  	defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_policy_delete"}, time.Now())
   792  	var req structs.ACLPolicyDeleteRequest
   793  	if err := structs.Decode(buf, &req); err != nil {
   794  		panic(fmt.Errorf("failed to decode request: %v", err))
   795  	}
   796  
   797  	if err := n.state.DeleteACLPolicies(index, req.Names); err != nil {
   798  		n.logger.Printf("[ERR] nomad.fsm: DeleteACLPolicies failed: %v", err)
   799  		return err
   800  	}
   801  	return nil
   802  }
   803  
   804  // applyACLTokenUpsert is used to upsert a set of policies
   805  func (n *nomadFSM) applyACLTokenUpsert(buf []byte, index uint64) interface{} {
   806  	defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_token_upsert"}, time.Now())
   807  	var req structs.ACLTokenUpsertRequest
   808  	if err := structs.Decode(buf, &req); err != nil {
   809  		panic(fmt.Errorf("failed to decode request: %v", err))
   810  	}
   811  
   812  	if err := n.state.UpsertACLTokens(index, req.Tokens); err != nil {
   813  		n.logger.Printf("[ERR] nomad.fsm: UpsertACLTokens failed: %v", err)
   814  		return err
   815  	}
   816  	return nil
   817  }
   818  
   819  // applyACLTokenDelete is used to delete a set of policies
   820  func (n *nomadFSM) applyACLTokenDelete(buf []byte, index uint64) interface{} {
   821  	defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_token_delete"}, time.Now())
   822  	var req structs.ACLTokenDeleteRequest
   823  	if err := structs.Decode(buf, &req); err != nil {
   824  		panic(fmt.Errorf("failed to decode request: %v", err))
   825  	}
   826  
   827  	if err := n.state.DeleteACLTokens(index, req.AccessorIDs); err != nil {
   828  		n.logger.Printf("[ERR] nomad.fsm: DeleteACLTokens failed: %v", err)
   829  		return err
   830  	}
   831  	return nil
   832  }
   833  
   834  // applyACLTokenBootstrap is used to bootstrap an ACL token
   835  func (n *nomadFSM) applyACLTokenBootstrap(buf []byte, index uint64) interface{} {
   836  	defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_acl_token_bootstrap"}, time.Now())
   837  	var req structs.ACLTokenBootstrapRequest
   838  	if err := structs.Decode(buf, &req); err != nil {
   839  		panic(fmt.Errorf("failed to decode request: %v", err))
   840  	}
   841  
   842  	if err := n.state.BootstrapACLTokens(index, req.ResetIndex, req.Token); err != nil {
   843  		n.logger.Printf("[ERR] nomad.fsm: BootstrapACLToken failed: %v", err)
   844  		return err
   845  	}
   846  	return nil
   847  }
   848  
   849  func (n *nomadFSM) applyAutopilotUpdate(buf []byte, index uint64) interface{} {
   850  	var req structs.AutopilotSetConfigRequest
   851  	if err := structs.Decode(buf, &req); err != nil {
   852  		panic(fmt.Errorf("failed to decode request: %v", err))
   853  	}
   854  	defer metrics.MeasureSince([]string{"nomad", "fsm", "autopilot"}, time.Now())
   855  
   856  	if req.CAS {
   857  		act, err := n.state.AutopilotCASConfig(index, req.Config.ModifyIndex, &req.Config)
   858  		if err != nil {
   859  			return err
   860  		}
   861  		return act
   862  	}
   863  	return n.state.AutopilotSetConfig(index, &req.Config)
   864  }
   865  
   866  func (n *nomadFSM) Snapshot() (raft.FSMSnapshot, error) {
   867  	// Create a new snapshot
   868  	snap, err := n.state.Snapshot()
   869  	if err != nil {
   870  		return nil, err
   871  	}
   872  
   873  	ns := &nomadSnapshot{
   874  		snap:      snap,
   875  		timetable: n.timetable,
   876  	}
   877  	return ns, nil
   878  }
   879  
   880  func (n *nomadFSM) Restore(old io.ReadCloser) error {
   881  	defer old.Close()
   882  
   883  	// Create a new state store
   884  	config := &state.StateStoreConfig{
   885  		LogOutput: n.config.LogOutput,
   886  		Region:    n.config.Region,
   887  	}
   888  	newState, err := state.NewStateStore(config)
   889  	if err != nil {
   890  		return err
   891  	}
   892  
   893  	// Start the state restore
   894  	restore, err := newState.Restore()
   895  	if err != nil {
   896  		return err
   897  	}
   898  	defer restore.Abort()
   899  
   900  	// Create a decoder
   901  	dec := codec.NewDecoder(old, structs.MsgpackHandle)
   902  
   903  	// Read in the header
   904  	var header snapshotHeader
   905  	if err := dec.Decode(&header); err != nil {
   906  		return err
   907  	}
   908  
   909  	// Populate the new state
   910  	msgType := make([]byte, 1)
   911  	for {
   912  		// Read the message type
   913  		_, err := old.Read(msgType)
   914  		if err == io.EOF {
   915  			break
   916  		} else if err != nil {
   917  			return err
   918  		}
   919  
   920  		// Decode
   921  		snapType := SnapshotType(msgType[0])
   922  		switch snapType {
   923  		case TimeTableSnapshot:
   924  			if err := n.timetable.Deserialize(dec); err != nil {
   925  				return fmt.Errorf("time table deserialize failed: %v", err)
   926  			}
   927  
   928  		case NodeSnapshot:
   929  			node := new(structs.Node)
   930  			if err := dec.Decode(node); err != nil {
   931  				return err
   932  			}
   933  			if err := restore.NodeRestore(node); err != nil {
   934  				return err
   935  			}
   936  
   937  		case JobSnapshot:
   938  			job := new(structs.Job)
   939  			if err := dec.Decode(job); err != nil {
   940  				return err
   941  			}
   942  
   943  			/* Handle upgrade paths:
   944  			 * - Empty maps and slices should be treated as nil to avoid
   945  			 *   un-intended destructive updates in scheduler since we use
   946  			 *   reflect.DeepEqual. Starting Nomad 0.4.1, job submission sanatizes
   947  			 *   the incoming job.
   948  			 * - Migrate from old style upgrade stanza that used only a stagger.
   949  			 */
   950  			job.Canonicalize()
   951  
   952  			if err := restore.JobRestore(job); err != nil {
   953  				return err
   954  			}
   955  
   956  		case EvalSnapshot:
   957  			eval := new(structs.Evaluation)
   958  			if err := dec.Decode(eval); err != nil {
   959  				return err
   960  			}
   961  
   962  			// COMPAT: Handle upgrade to v0.7.0
   963  			if eval.Namespace == "" {
   964  				eval.Namespace = structs.DefaultNamespace
   965  			}
   966  
   967  			if err := restore.EvalRestore(eval); err != nil {
   968  				return err
   969  			}
   970  
   971  		case AllocSnapshot:
   972  			alloc := new(structs.Allocation)
   973  			if err := dec.Decode(alloc); err != nil {
   974  				return err
   975  			}
   976  
   977  			// COMPAT: Handle upgrade to v0.7.0
   978  			if alloc.Namespace == "" {
   979  				alloc.Namespace = structs.DefaultNamespace
   980  			}
   981  
   982  			if err := restore.AllocRestore(alloc); err != nil {
   983  				return err
   984  			}
   985  
   986  		case IndexSnapshot:
   987  			idx := new(state.IndexEntry)
   988  			if err := dec.Decode(idx); err != nil {
   989  				return err
   990  			}
   991  			if err := restore.IndexRestore(idx); err != nil {
   992  				return err
   993  			}
   994  
   995  		case PeriodicLaunchSnapshot:
   996  			launch := new(structs.PeriodicLaunch)
   997  			if err := dec.Decode(launch); err != nil {
   998  				return err
   999  			}
  1000  
  1001  			// COMPAT: Handle upgrade to v0.7.0
  1002  			if launch.Namespace == "" {
  1003  				launch.Namespace = structs.DefaultNamespace
  1004  			}
  1005  
  1006  			if err := restore.PeriodicLaunchRestore(launch); err != nil {
  1007  				return err
  1008  			}
  1009  
  1010  		case JobSummarySnapshot:
  1011  			summary := new(structs.JobSummary)
  1012  			if err := dec.Decode(summary); err != nil {
  1013  				return err
  1014  			}
  1015  
  1016  			// COMPAT: Handle upgrade to v0.7.0
  1017  			if summary.Namespace == "" {
  1018  				summary.Namespace = structs.DefaultNamespace
  1019  			}
  1020  
  1021  			if err := restore.JobSummaryRestore(summary); err != nil {
  1022  				return err
  1023  			}
  1024  
  1025  		case VaultAccessorSnapshot:
  1026  			accessor := new(structs.VaultAccessor)
  1027  			if err := dec.Decode(accessor); err != nil {
  1028  				return err
  1029  			}
  1030  			if err := restore.VaultAccessorRestore(accessor); err != nil {
  1031  				return err
  1032  			}
  1033  
  1034  		case JobVersionSnapshot:
  1035  			version := new(structs.Job)
  1036  			if err := dec.Decode(version); err != nil {
  1037  				return err
  1038  			}
  1039  
  1040  			// COMPAT: Handle upgrade to v0.7.0
  1041  			if version.Namespace == "" {
  1042  				version.Namespace = structs.DefaultNamespace
  1043  			}
  1044  
  1045  			if err := restore.JobVersionRestore(version); err != nil {
  1046  				return err
  1047  			}
  1048  
  1049  		case DeploymentSnapshot:
  1050  			deployment := new(structs.Deployment)
  1051  			if err := dec.Decode(deployment); err != nil {
  1052  				return err
  1053  			}
  1054  
  1055  			// COMPAT: Handle upgrade to v0.7.0
  1056  			if deployment.Namespace == "" {
  1057  				deployment.Namespace = structs.DefaultNamespace
  1058  			}
  1059  
  1060  			if err := restore.DeploymentRestore(deployment); err != nil {
  1061  				return err
  1062  			}
  1063  
  1064  		case ACLPolicySnapshot:
  1065  			policy := new(structs.ACLPolicy)
  1066  			if err := dec.Decode(policy); err != nil {
  1067  				return err
  1068  			}
  1069  			if err := restore.ACLPolicyRestore(policy); err != nil {
  1070  				return err
  1071  			}
  1072  
  1073  		case ACLTokenSnapshot:
  1074  			token := new(structs.ACLToken)
  1075  			if err := dec.Decode(token); err != nil {
  1076  				return err
  1077  			}
  1078  			if err := restore.ACLTokenRestore(token); err != nil {
  1079  				return err
  1080  			}
  1081  
  1082  		default:
  1083  			// Check if this is an enterprise only object being restored
  1084  			restorer, ok := n.enterpriseRestorers[snapType]
  1085  			if !ok {
  1086  				return fmt.Errorf("Unrecognized snapshot type: %v", msgType)
  1087  			}
  1088  
  1089  			// Restore the enterprise only object
  1090  			if err := restorer(restore, dec); err != nil {
  1091  				return err
  1092  			}
  1093  		}
  1094  	}
  1095  
  1096  	restore.Commit()
  1097  
  1098  	// Create Job Summaries
  1099  	// COMPAT 0.4 -> 0.4.1
  1100  	// We can remove this in 0.5. This exists so that the server creates job
  1101  	// summaries if they were not present previously. When users upgrade to 0.5
  1102  	// from 0.4.1, the snapshot will contain job summaries so it will be safe to
  1103  	// remove this block.
  1104  	index, err := newState.Index("job_summary")
  1105  	if err != nil {
  1106  		return fmt.Errorf("couldn't fetch index of job summary table: %v", err)
  1107  	}
  1108  
  1109  	// If the index is 0 that means there is no job summary in the snapshot so
  1110  	// we will have to create them
  1111  	if index == 0 {
  1112  		// query the latest index
  1113  		latestIndex, err := newState.LatestIndex()
  1114  		if err != nil {
  1115  			return fmt.Errorf("unable to query latest index: %v", index)
  1116  		}
  1117  		if err := newState.ReconcileJobSummaries(latestIndex); err != nil {
  1118  			return fmt.Errorf("error reconciling summaries: %v", err)
  1119  		}
  1120  	}
  1121  
  1122  	// External code might be calling State(), so we need to synchronize
  1123  	// here to make sure we swap in the new state store atomically.
  1124  	n.stateLock.Lock()
  1125  	stateOld := n.state
  1126  	n.state = newState
  1127  	n.stateLock.Unlock()
  1128  
  1129  	// Signal that the old state store has been abandoned. This is required
  1130  	// because we don't operate on it any more, we just throw it away, so
  1131  	// blocking queries won't see any changes and need to be woken up.
  1132  	stateOld.Abandon()
  1133  
  1134  	return nil
  1135  }
  1136  
  1137  // reconcileQueuedAllocations re-calculates the queued allocations for every job that we
  1138  // created a Job Summary during the snap shot restore
  1139  func (n *nomadFSM) reconcileQueuedAllocations(index uint64) error {
  1140  	// Get all the jobs
  1141  	ws := memdb.NewWatchSet()
  1142  	iter, err := n.state.Jobs(ws)
  1143  	if err != nil {
  1144  		return err
  1145  	}
  1146  
  1147  	snap, err := n.state.Snapshot()
  1148  	if err != nil {
  1149  		return fmt.Errorf("unable to create snapshot: %v", err)
  1150  	}
  1151  
  1152  	// Invoking the scheduler for every job so that we can populate the number
  1153  	// of queued allocations for every job
  1154  	for {
  1155  		rawJob := iter.Next()
  1156  		if rawJob == nil {
  1157  			break
  1158  		}
  1159  		job := rawJob.(*structs.Job)
  1160  		planner := &scheduler.Harness{
  1161  			State: &snap.StateStore,
  1162  		}
  1163  		// Create an eval and mark it as requiring annotations and insert that as well
  1164  		eval := &structs.Evaluation{
  1165  			ID:             uuid.Generate(),
  1166  			Namespace:      job.Namespace,
  1167  			Priority:       job.Priority,
  1168  			Type:           job.Type,
  1169  			TriggeredBy:    structs.EvalTriggerJobRegister,
  1170  			JobID:          job.ID,
  1171  			JobModifyIndex: job.JobModifyIndex + 1,
  1172  			Status:         structs.EvalStatusPending,
  1173  			AnnotatePlan:   true,
  1174  		}
  1175  		snap.UpsertEvals(100, []*structs.Evaluation{eval})
  1176  		// Create the scheduler and run it
  1177  		sched, err := scheduler.NewScheduler(eval.Type, n.logger, snap, planner)
  1178  		if err != nil {
  1179  			return err
  1180  		}
  1181  
  1182  		if err := sched.Process(eval); err != nil {
  1183  			return err
  1184  		}
  1185  
  1186  		// Get the job summary from the fsm state store
  1187  		originalSummary, err := n.state.JobSummaryByID(ws, job.Namespace, job.ID)
  1188  		if err != nil {
  1189  			return err
  1190  		}
  1191  		summary := originalSummary.Copy()
  1192  
  1193  		// Add the allocations scheduler has made to queued since these
  1194  		// allocations are never getting placed until the scheduler is invoked
  1195  		// with a real planner
  1196  		if l := len(planner.Plans); l != 1 {
  1197  			return fmt.Errorf("unexpected number of plans during restore %d. Please file an issue including the logs", l)
  1198  		}
  1199  		for _, allocations := range planner.Plans[0].NodeAllocation {
  1200  			for _, allocation := range allocations {
  1201  				tgSummary, ok := summary.Summary[allocation.TaskGroup]
  1202  				if !ok {
  1203  					return fmt.Errorf("task group %q not found while updating queued count", allocation.TaskGroup)
  1204  				}
  1205  				tgSummary.Queued += 1
  1206  				summary.Summary[allocation.TaskGroup] = tgSummary
  1207  			}
  1208  		}
  1209  
  1210  		// Add the queued allocations attached to the evaluation to the queued
  1211  		// counter of the job summary
  1212  		if l := len(planner.Evals); l != 1 {
  1213  			return fmt.Errorf("unexpected number of evals during restore %d. Please file an issue including the logs", l)
  1214  		}
  1215  		for tg, queued := range planner.Evals[0].QueuedAllocations {
  1216  			tgSummary, ok := summary.Summary[tg]
  1217  			if !ok {
  1218  				return fmt.Errorf("task group %q not found while updating queued count", tg)
  1219  			}
  1220  
  1221  			// We add instead of setting here because we want to take into
  1222  			// consideration what the scheduler with a mock planner thinks it
  1223  			// placed. Those should be counted as queued as well
  1224  			tgSummary.Queued += queued
  1225  			summary.Summary[tg] = tgSummary
  1226  		}
  1227  
  1228  		if !reflect.DeepEqual(summary, originalSummary) {
  1229  			summary.ModifyIndex = index
  1230  			if err := n.state.UpsertJobSummary(index, summary); err != nil {
  1231  				return err
  1232  			}
  1233  		}
  1234  	}
  1235  	return nil
  1236  }
  1237  
  1238  func (s *nomadSnapshot) Persist(sink raft.SnapshotSink) error {
  1239  	defer metrics.MeasureSince([]string{"nomad", "fsm", "persist"}, time.Now())
  1240  	// Register the nodes
  1241  	encoder := codec.NewEncoder(sink, structs.MsgpackHandle)
  1242  
  1243  	// Write the header
  1244  	header := snapshotHeader{}
  1245  	if err := encoder.Encode(&header); err != nil {
  1246  		sink.Cancel()
  1247  		return err
  1248  	}
  1249  
  1250  	// Write the time table
  1251  	sink.Write([]byte{byte(TimeTableSnapshot)})
  1252  	if err := s.timetable.Serialize(encoder); err != nil {
  1253  		sink.Cancel()
  1254  		return err
  1255  	}
  1256  
  1257  	// Write all the data out
  1258  	if err := s.persistIndexes(sink, encoder); err != nil {
  1259  		sink.Cancel()
  1260  		return err
  1261  	}
  1262  	if err := s.persistNodes(sink, encoder); err != nil {
  1263  		sink.Cancel()
  1264  		return err
  1265  	}
  1266  	if err := s.persistJobs(sink, encoder); err != nil {
  1267  		sink.Cancel()
  1268  		return err
  1269  	}
  1270  	if err := s.persistEvals(sink, encoder); err != nil {
  1271  		sink.Cancel()
  1272  		return err
  1273  	}
  1274  	if err := s.persistAllocs(sink, encoder); err != nil {
  1275  		sink.Cancel()
  1276  		return err
  1277  	}
  1278  	if err := s.persistPeriodicLaunches(sink, encoder); err != nil {
  1279  		sink.Cancel()
  1280  		return err
  1281  	}
  1282  	if err := s.persistJobSummaries(sink, encoder); err != nil {
  1283  		sink.Cancel()
  1284  		return err
  1285  	}
  1286  	if err := s.persistVaultAccessors(sink, encoder); err != nil {
  1287  		sink.Cancel()
  1288  		return err
  1289  	}
  1290  	if err := s.persistJobVersions(sink, encoder); err != nil {
  1291  		sink.Cancel()
  1292  		return err
  1293  	}
  1294  	if err := s.persistDeployments(sink, encoder); err != nil {
  1295  		sink.Cancel()
  1296  		return err
  1297  	}
  1298  	if err := s.persistACLPolicies(sink, encoder); err != nil {
  1299  		sink.Cancel()
  1300  		return err
  1301  	}
  1302  	if err := s.persistACLTokens(sink, encoder); err != nil {
  1303  		sink.Cancel()
  1304  		return err
  1305  	}
  1306  	if err := s.persistEnterpriseTables(sink, encoder); err != nil {
  1307  		sink.Cancel()
  1308  		return err
  1309  	}
  1310  	return nil
  1311  }
  1312  
  1313  func (s *nomadSnapshot) persistIndexes(sink raft.SnapshotSink,
  1314  	encoder *codec.Encoder) error {
  1315  	// Get all the indexes
  1316  	iter, err := s.snap.Indexes()
  1317  	if err != nil {
  1318  		return err
  1319  	}
  1320  
  1321  	for {
  1322  		// Get the next item
  1323  		raw := iter.Next()
  1324  		if raw == nil {
  1325  			break
  1326  		}
  1327  
  1328  		// Prepare the request struct
  1329  		idx := raw.(*state.IndexEntry)
  1330  
  1331  		// Write out a node registration
  1332  		sink.Write([]byte{byte(IndexSnapshot)})
  1333  		if err := encoder.Encode(idx); err != nil {
  1334  			return err
  1335  		}
  1336  	}
  1337  	return nil
  1338  }
  1339  
  1340  func (s *nomadSnapshot) persistNodes(sink raft.SnapshotSink,
  1341  	encoder *codec.Encoder) error {
  1342  	// Get all the nodes
  1343  	ws := memdb.NewWatchSet()
  1344  	nodes, err := s.snap.Nodes(ws)
  1345  	if err != nil {
  1346  		return err
  1347  	}
  1348  
  1349  	for {
  1350  		// Get the next item
  1351  		raw := nodes.Next()
  1352  		if raw == nil {
  1353  			break
  1354  		}
  1355  
  1356  		// Prepare the request struct
  1357  		node := raw.(*structs.Node)
  1358  
  1359  		// Write out a node registration
  1360  		sink.Write([]byte{byte(NodeSnapshot)})
  1361  		if err := encoder.Encode(node); err != nil {
  1362  			return err
  1363  		}
  1364  	}
  1365  	return nil
  1366  }
  1367  
  1368  func (s *nomadSnapshot) persistJobs(sink raft.SnapshotSink,
  1369  	encoder *codec.Encoder) error {
  1370  	// Get all the jobs
  1371  	ws := memdb.NewWatchSet()
  1372  	jobs, err := s.snap.Jobs(ws)
  1373  	if err != nil {
  1374  		return err
  1375  	}
  1376  
  1377  	for {
  1378  		// Get the next item
  1379  		raw := jobs.Next()
  1380  		if raw == nil {
  1381  			break
  1382  		}
  1383  
  1384  		// Prepare the request struct
  1385  		job := raw.(*structs.Job)
  1386  
  1387  		// Write out a job registration
  1388  		sink.Write([]byte{byte(JobSnapshot)})
  1389  		if err := encoder.Encode(job); err != nil {
  1390  			return err
  1391  		}
  1392  	}
  1393  	return nil
  1394  }
  1395  
  1396  func (s *nomadSnapshot) persistEvals(sink raft.SnapshotSink,
  1397  	encoder *codec.Encoder) error {
  1398  	// Get all the evaluations
  1399  	ws := memdb.NewWatchSet()
  1400  	evals, err := s.snap.Evals(ws)
  1401  	if err != nil {
  1402  		return err
  1403  	}
  1404  
  1405  	for {
  1406  		// Get the next item
  1407  		raw := evals.Next()
  1408  		if raw == nil {
  1409  			break
  1410  		}
  1411  
  1412  		// Prepare the request struct
  1413  		eval := raw.(*structs.Evaluation)
  1414  
  1415  		// Write out the evaluation
  1416  		sink.Write([]byte{byte(EvalSnapshot)})
  1417  		if err := encoder.Encode(eval); err != nil {
  1418  			return err
  1419  		}
  1420  	}
  1421  	return nil
  1422  }
  1423  
  1424  func (s *nomadSnapshot) persistAllocs(sink raft.SnapshotSink,
  1425  	encoder *codec.Encoder) error {
  1426  	// Get all the allocations
  1427  	ws := memdb.NewWatchSet()
  1428  	allocs, err := s.snap.Allocs(ws)
  1429  	if err != nil {
  1430  		return err
  1431  	}
  1432  
  1433  	for {
  1434  		// Get the next item
  1435  		raw := allocs.Next()
  1436  		if raw == nil {
  1437  			break
  1438  		}
  1439  
  1440  		// Prepare the request struct
  1441  		alloc := raw.(*structs.Allocation)
  1442  
  1443  		// Write out the evaluation
  1444  		sink.Write([]byte{byte(AllocSnapshot)})
  1445  		if err := encoder.Encode(alloc); err != nil {
  1446  			return err
  1447  		}
  1448  	}
  1449  	return nil
  1450  }
  1451  
  1452  func (s *nomadSnapshot) persistPeriodicLaunches(sink raft.SnapshotSink,
  1453  	encoder *codec.Encoder) error {
  1454  	// Get all the jobs
  1455  	ws := memdb.NewWatchSet()
  1456  	launches, err := s.snap.PeriodicLaunches(ws)
  1457  	if err != nil {
  1458  		return err
  1459  	}
  1460  
  1461  	for {
  1462  		// Get the next item
  1463  		raw := launches.Next()
  1464  		if raw == nil {
  1465  			break
  1466  		}
  1467  
  1468  		// Prepare the request struct
  1469  		launch := raw.(*structs.PeriodicLaunch)
  1470  
  1471  		// Write out a job registration
  1472  		sink.Write([]byte{byte(PeriodicLaunchSnapshot)})
  1473  		if err := encoder.Encode(launch); err != nil {
  1474  			return err
  1475  		}
  1476  	}
  1477  	return nil
  1478  }
  1479  
  1480  func (s *nomadSnapshot) persistJobSummaries(sink raft.SnapshotSink,
  1481  	encoder *codec.Encoder) error {
  1482  
  1483  	ws := memdb.NewWatchSet()
  1484  	summaries, err := s.snap.JobSummaries(ws)
  1485  	if err != nil {
  1486  		return err
  1487  	}
  1488  
  1489  	for {
  1490  		raw := summaries.Next()
  1491  		if raw == nil {
  1492  			break
  1493  		}
  1494  
  1495  		jobSummary := raw.(*structs.JobSummary)
  1496  
  1497  		sink.Write([]byte{byte(JobSummarySnapshot)})
  1498  		if err := encoder.Encode(jobSummary); err != nil {
  1499  			return err
  1500  		}
  1501  	}
  1502  	return nil
  1503  }
  1504  
  1505  func (s *nomadSnapshot) persistVaultAccessors(sink raft.SnapshotSink,
  1506  	encoder *codec.Encoder) error {
  1507  
  1508  	ws := memdb.NewWatchSet()
  1509  	accessors, err := s.snap.VaultAccessors(ws)
  1510  	if err != nil {
  1511  		return err
  1512  	}
  1513  
  1514  	for {
  1515  		raw := accessors.Next()
  1516  		if raw == nil {
  1517  			break
  1518  		}
  1519  
  1520  		accessor := raw.(*structs.VaultAccessor)
  1521  
  1522  		sink.Write([]byte{byte(VaultAccessorSnapshot)})
  1523  		if err := encoder.Encode(accessor); err != nil {
  1524  			return err
  1525  		}
  1526  	}
  1527  	return nil
  1528  }
  1529  
  1530  func (s *nomadSnapshot) persistJobVersions(sink raft.SnapshotSink,
  1531  	encoder *codec.Encoder) error {
  1532  	// Get all the jobs
  1533  	ws := memdb.NewWatchSet()
  1534  	versions, err := s.snap.JobVersions(ws)
  1535  	if err != nil {
  1536  		return err
  1537  	}
  1538  
  1539  	for {
  1540  		// Get the next item
  1541  		raw := versions.Next()
  1542  		if raw == nil {
  1543  			break
  1544  		}
  1545  
  1546  		// Prepare the request struct
  1547  		job := raw.(*structs.Job)
  1548  
  1549  		// Write out a job registration
  1550  		sink.Write([]byte{byte(JobVersionSnapshot)})
  1551  		if err := encoder.Encode(job); err != nil {
  1552  			return err
  1553  		}
  1554  	}
  1555  	return nil
  1556  }
  1557  
  1558  func (s *nomadSnapshot) persistDeployments(sink raft.SnapshotSink,
  1559  	encoder *codec.Encoder) error {
  1560  	// Get all the jobs
  1561  	ws := memdb.NewWatchSet()
  1562  	deployments, err := s.snap.Deployments(ws)
  1563  	if err != nil {
  1564  		return err
  1565  	}
  1566  
  1567  	for {
  1568  		// Get the next item
  1569  		raw := deployments.Next()
  1570  		if raw == nil {
  1571  			break
  1572  		}
  1573  
  1574  		// Prepare the request struct
  1575  		deployment := raw.(*structs.Deployment)
  1576  
  1577  		// Write out a job registration
  1578  		sink.Write([]byte{byte(DeploymentSnapshot)})
  1579  		if err := encoder.Encode(deployment); err != nil {
  1580  			return err
  1581  		}
  1582  	}
  1583  	return nil
  1584  }
  1585  
  1586  func (s *nomadSnapshot) persistACLPolicies(sink raft.SnapshotSink,
  1587  	encoder *codec.Encoder) error {
  1588  	// Get all the policies
  1589  	ws := memdb.NewWatchSet()
  1590  	policies, err := s.snap.ACLPolicies(ws)
  1591  	if err != nil {
  1592  		return err
  1593  	}
  1594  
  1595  	for {
  1596  		// Get the next item
  1597  		raw := policies.Next()
  1598  		if raw == nil {
  1599  			break
  1600  		}
  1601  
  1602  		// Prepare the request struct
  1603  		policy := raw.(*structs.ACLPolicy)
  1604  
  1605  		// Write out a policy registration
  1606  		sink.Write([]byte{byte(ACLPolicySnapshot)})
  1607  		if err := encoder.Encode(policy); err != nil {
  1608  			return err
  1609  		}
  1610  	}
  1611  	return nil
  1612  }
  1613  
  1614  func (s *nomadSnapshot) persistACLTokens(sink raft.SnapshotSink,
  1615  	encoder *codec.Encoder) error {
  1616  	// Get all the policies
  1617  	ws := memdb.NewWatchSet()
  1618  	tokens, err := s.snap.ACLTokens(ws)
  1619  	if err != nil {
  1620  		return err
  1621  	}
  1622  
  1623  	for {
  1624  		// Get the next item
  1625  		raw := tokens.Next()
  1626  		if raw == nil {
  1627  			break
  1628  		}
  1629  
  1630  		// Prepare the request struct
  1631  		token := raw.(*structs.ACLToken)
  1632  
  1633  		// Write out a token registration
  1634  		sink.Write([]byte{byte(ACLTokenSnapshot)})
  1635  		if err := encoder.Encode(token); err != nil {
  1636  			return err
  1637  		}
  1638  	}
  1639  	return nil
  1640  }
  1641  
  1642  // Release is a no-op, as we just need to GC the pointer
  1643  // to the state store snapshot. There is nothing to explicitly
  1644  // cleanup.
  1645  func (s *nomadSnapshot) Release() {}