github.com/smithx10/nomad@v0.9.1-rc1/nomad/state/state_store.go (about)

     1  package state
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sort"
     7  	"time"
     8  
     9  	"reflect"
    10  
    11  	log "github.com/hashicorp/go-hclog"
    12  	memdb "github.com/hashicorp/go-memdb"
    13  	multierror "github.com/hashicorp/go-multierror"
    14  	"github.com/hashicorp/nomad/helper"
    15  	"github.com/hashicorp/nomad/nomad/structs"
    16  )
    17  
    18  // Txn is a transaction against a state store.
    19  // This can be a read or write transaction.
    20  type Txn = *memdb.Txn
    21  
    22  const (
    23  	// NodeRegisterEventReregistered is the message used when the node becomes
    24  	// reregistered.
    25  	NodeRegisterEventRegistered = "Node registered"
    26  
    27  	// NodeRegisterEventReregistered is the message used when the node becomes
    28  	// reregistered.
    29  	NodeRegisterEventReregistered = "Node re-registered"
    30  )
    31  
    32  // IndexEntry is used with the "index" table
    33  // for managing the latest Raft index affecting a table.
    34  type IndexEntry struct {
    35  	Key   string
    36  	Value uint64
    37  }
    38  
    39  // StateStoreConfig is used to configure a new state store
    40  type StateStoreConfig struct {
    41  	// Logger is used to output the state store's logs
    42  	Logger log.Logger
    43  
    44  	// Region is the region of the server embedding the state store.
    45  	Region string
    46  }
    47  
    48  // The StateStore is responsible for maintaining all the Nomad
    49  // state. It is manipulated by the FSM which maintains consistency
    50  // through the use of Raft. The goals of the StateStore are to provide
    51  // high concurrency for read operations without blocking writes, and
    52  // to provide write availability in the face of reads. EVERY object
    53  // returned as a result of a read against the state store should be
    54  // considered a constant and NEVER modified in place.
    55  type StateStore struct {
    56  	logger log.Logger
    57  	db     *memdb.MemDB
    58  
    59  	// config is the passed in configuration
    60  	config *StateStoreConfig
    61  
    62  	// abandonCh is used to signal watchers that this state store has been
    63  	// abandoned (usually during a restore). This is only ever closed.
    64  	abandonCh chan struct{}
    65  }
    66  
    67  // NewStateStore is used to create a new state store
    68  func NewStateStore(config *StateStoreConfig) (*StateStore, error) {
    69  	// Create the MemDB
    70  	db, err := memdb.NewMemDB(stateStoreSchema())
    71  	if err != nil {
    72  		return nil, fmt.Errorf("state store setup failed: %v", err)
    73  	}
    74  
    75  	// Create the state store
    76  	s := &StateStore{
    77  		logger:    config.Logger.Named("state_store"),
    78  		db:        db,
    79  		config:    config,
    80  		abandonCh: make(chan struct{}),
    81  	}
    82  	return s, nil
    83  }
    84  
    85  // Config returns the state store configuration.
    86  func (s *StateStore) Config() *StateStoreConfig {
    87  	return s.config
    88  }
    89  
    90  // Snapshot is used to create a point in time snapshot. Because
    91  // we use MemDB, we just need to snapshot the state of the underlying
    92  // database.
    93  func (s *StateStore) Snapshot() (*StateSnapshot, error) {
    94  	snap := &StateSnapshot{
    95  		StateStore: StateStore{
    96  			logger: s.logger,
    97  			config: s.config,
    98  			db:     s.db.Snapshot(),
    99  		},
   100  	}
   101  	return snap, nil
   102  }
   103  
   104  // Restore is used to optimize the efficiency of rebuilding
   105  // state by minimizing the number of transactions and checking
   106  // overhead.
   107  func (s *StateStore) Restore() (*StateRestore, error) {
   108  	txn := s.db.Txn(true)
   109  	r := &StateRestore{
   110  		txn: txn,
   111  	}
   112  	return r, nil
   113  }
   114  
   115  // AbandonCh returns a channel you can wait on to know if the state store was
   116  // abandoned.
   117  func (s *StateStore) AbandonCh() <-chan struct{} {
   118  	return s.abandonCh
   119  }
   120  
   121  // Abandon is used to signal that the given state store has been abandoned.
   122  // Calling this more than one time will panic.
   123  func (s *StateStore) Abandon() {
   124  	close(s.abandonCh)
   125  }
   126  
   127  // QueryFn is the definition of a function that can be used to implement a basic
   128  // blocking query against the state store.
   129  type QueryFn func(memdb.WatchSet, *StateStore) (resp interface{}, index uint64, err error)
   130  
   131  // BlockingQuery takes a query function and runs the function until the minimum
   132  // query index is met or until the passed context is cancelled.
   133  func (s *StateStore) BlockingQuery(query QueryFn, minIndex uint64, ctx context.Context) (
   134  	resp interface{}, index uint64, err error) {
   135  
   136  RUN_QUERY:
   137  	// We capture the state store and its abandon channel but pass a snapshot to
   138  	// the blocking query function. We operate on the snapshot to allow separate
   139  	// calls to the state store not all wrapped within the same transaction.
   140  	abandonCh := s.AbandonCh()
   141  	snap, _ := s.Snapshot()
   142  	stateSnap := &snap.StateStore
   143  
   144  	// We can skip all watch tracking if this isn't a blocking query.
   145  	var ws memdb.WatchSet
   146  	if minIndex > 0 {
   147  		ws = memdb.NewWatchSet()
   148  
   149  		// This channel will be closed if a snapshot is restored and the
   150  		// whole state store is abandoned.
   151  		ws.Add(abandonCh)
   152  	}
   153  
   154  	resp, index, err = query(ws, stateSnap)
   155  	if err != nil {
   156  		return nil, index, err
   157  	}
   158  
   159  	// We haven't reached the min-index yet.
   160  	if minIndex > 0 && index <= minIndex {
   161  		if err := ws.WatchCtx(ctx); err != nil {
   162  			return nil, index, err
   163  		}
   164  
   165  		goto RUN_QUERY
   166  	}
   167  
   168  	return resp, index, nil
   169  }
   170  
   171  // UpsertPlanResults is used to upsert the results of a plan.
   172  func (s *StateStore) UpsertPlanResults(index uint64, results *structs.ApplyPlanResultsRequest) error {
   173  	txn := s.db.Txn(true)
   174  	defer txn.Abort()
   175  
   176  	// Upsert the newly created or updated deployment
   177  	if results.Deployment != nil {
   178  		if err := s.upsertDeploymentImpl(index, results.Deployment, txn); err != nil {
   179  			return err
   180  		}
   181  	}
   182  
   183  	// Update the status of deployments effected by the plan.
   184  	if len(results.DeploymentUpdates) != 0 {
   185  		s.upsertDeploymentUpdates(index, results.DeploymentUpdates, txn)
   186  	}
   187  
   188  	// Attach the job to all the allocations. It is pulled out in the payload to
   189  	// avoid the redundancy of encoding, but should be denormalized prior to
   190  	// being inserted into MemDB.
   191  	structs.DenormalizeAllocationJobs(results.Job, results.Alloc)
   192  
   193  	// COMPAT(0.11): Remove in 0.11
   194  	// Calculate the total resources of allocations. It is pulled out in the
   195  	// payload to avoid encoding something that can be computed, but should be
   196  	// denormalized prior to being inserted into MemDB.
   197  	for _, alloc := range results.Alloc {
   198  		if alloc.Resources != nil {
   199  			continue
   200  		}
   201  
   202  		alloc.Resources = new(structs.Resources)
   203  		for _, task := range alloc.TaskResources {
   204  			alloc.Resources.Add(task)
   205  		}
   206  
   207  		// Add the shared resources
   208  		alloc.Resources.Add(alloc.SharedResources)
   209  	}
   210  
   211  	// Upsert the allocations
   212  	if err := s.upsertAllocsImpl(index, results.Alloc, txn); err != nil {
   213  		return err
   214  	}
   215  
   216  	// COMPAT: Nomad versions before 0.7.1 did not include the eval ID when
   217  	// applying the plan. Thus while we are upgrading, we ignore updating the
   218  	// modify index of evaluations from older plans.
   219  	if results.EvalID != "" {
   220  		// Update the modify index of the eval id
   221  		if err := s.updateEvalModifyIndex(txn, index, results.EvalID); err != nil {
   222  			return err
   223  		}
   224  	}
   225  
   226  	// Prepare preempted allocs in the plan results for update
   227  	var preemptedAllocs []*structs.Allocation
   228  	for _, preemptedAlloc := range results.NodePreemptions {
   229  		// Look for existing alloc
   230  		existing, err := txn.First("allocs", "id", preemptedAlloc.ID)
   231  		if err != nil {
   232  			return fmt.Errorf("alloc lookup failed: %v", err)
   233  		}
   234  
   235  		// Nothing to do if this does not exist
   236  		if existing == nil {
   237  			continue
   238  		}
   239  		exist := existing.(*structs.Allocation)
   240  
   241  		// Copy everything from the existing allocation
   242  		copyAlloc := exist.Copy()
   243  
   244  		// Only update the fields set by the scheduler
   245  		copyAlloc.DesiredStatus = preemptedAlloc.DesiredStatus
   246  		copyAlloc.PreemptedByAllocation = preemptedAlloc.PreemptedByAllocation
   247  		copyAlloc.DesiredDescription = preemptedAlloc.DesiredDescription
   248  		copyAlloc.ModifyTime = preemptedAlloc.ModifyTime
   249  		preemptedAllocs = append(preemptedAllocs, copyAlloc)
   250  
   251  	}
   252  
   253  	// Upsert the preempted allocations
   254  	if err := s.upsertAllocsImpl(index, preemptedAllocs, txn); err != nil {
   255  		return err
   256  	}
   257  
   258  	// Upsert followup evals for allocs that were preempted
   259  	for _, eval := range results.PreemptionEvals {
   260  		if err := s.nestedUpsertEval(txn, index, eval); err != nil {
   261  			return err
   262  		}
   263  	}
   264  
   265  	txn.Commit()
   266  	return nil
   267  }
   268  
   269  // upsertDeploymentUpdates updates the deployments given the passed status
   270  // updates.
   271  func (s *StateStore) upsertDeploymentUpdates(index uint64, updates []*structs.DeploymentStatusUpdate, txn *memdb.Txn) error {
   272  	for _, u := range updates {
   273  		if err := s.updateDeploymentStatusImpl(index, u, txn); err != nil {
   274  			return err
   275  		}
   276  	}
   277  
   278  	return nil
   279  }
   280  
   281  // UpsertJobSummary upserts a job summary into the state store.
   282  func (s *StateStore) UpsertJobSummary(index uint64, jobSummary *structs.JobSummary) error {
   283  	txn := s.db.Txn(true)
   284  	defer txn.Abort()
   285  
   286  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
   287  	if jobSummary.Namespace == "" {
   288  		jobSummary.Namespace = structs.DefaultNamespace
   289  	}
   290  
   291  	// Check if the job summary already exists
   292  	existing, err := txn.First("job_summary", "id", jobSummary.Namespace, jobSummary.JobID)
   293  	if err != nil {
   294  		return fmt.Errorf("job summary lookup failed: %v", err)
   295  	}
   296  
   297  	// Setup the indexes correctly
   298  	if existing != nil {
   299  		jobSummary.CreateIndex = existing.(*structs.JobSummary).CreateIndex
   300  		jobSummary.ModifyIndex = index
   301  	} else {
   302  		jobSummary.CreateIndex = index
   303  		jobSummary.ModifyIndex = index
   304  	}
   305  
   306  	// Update the index
   307  	if err := txn.Insert("job_summary", jobSummary); err != nil {
   308  		return err
   309  	}
   310  
   311  	// Update the indexes table for job summary
   312  	if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
   313  		return fmt.Errorf("index update failed: %v", err)
   314  	}
   315  
   316  	txn.Commit()
   317  	return nil
   318  }
   319  
   320  // DeleteJobSummary deletes the job summary with the given ID. This is for
   321  // testing purposes only.
   322  func (s *StateStore) DeleteJobSummary(index uint64, namespace, id string) error {
   323  	txn := s.db.Txn(true)
   324  	defer txn.Abort()
   325  
   326  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
   327  	if namespace == "" {
   328  		namespace = structs.DefaultNamespace
   329  	}
   330  
   331  	// Delete the job summary
   332  	if _, err := txn.DeleteAll("job_summary", "id", namespace, id); err != nil {
   333  		return fmt.Errorf("deleting job summary failed: %v", err)
   334  	}
   335  	if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
   336  		return fmt.Errorf("index update failed: %v", err)
   337  	}
   338  	txn.Commit()
   339  	return nil
   340  }
   341  
   342  // UpsertDeployment is used to insert a new deployment. If cancelPrior is set to
   343  // true, all prior deployments for the same job will be cancelled.
   344  func (s *StateStore) UpsertDeployment(index uint64, deployment *structs.Deployment) error {
   345  	txn := s.db.Txn(true)
   346  	defer txn.Abort()
   347  	if err := s.upsertDeploymentImpl(index, deployment, txn); err != nil {
   348  		return err
   349  	}
   350  	txn.Commit()
   351  	return nil
   352  }
   353  
   354  func (s *StateStore) upsertDeploymentImpl(index uint64, deployment *structs.Deployment, txn *memdb.Txn) error {
   355  	// Check if the deployment already exists
   356  	existing, err := txn.First("deployment", "id", deployment.ID)
   357  	if err != nil {
   358  		return fmt.Errorf("deployment lookup failed: %v", err)
   359  	}
   360  
   361  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
   362  	if deployment.Namespace == "" {
   363  		deployment.Namespace = structs.DefaultNamespace
   364  	}
   365  
   366  	// Setup the indexes correctly
   367  	if existing != nil {
   368  		deployment.CreateIndex = existing.(*structs.Deployment).CreateIndex
   369  		deployment.ModifyIndex = index
   370  	} else {
   371  		deployment.CreateIndex = index
   372  		deployment.ModifyIndex = index
   373  	}
   374  
   375  	// Insert the deployment
   376  	if err := txn.Insert("deployment", deployment); err != nil {
   377  		return err
   378  	}
   379  
   380  	// Update the indexes table for deployment
   381  	if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil {
   382  		return fmt.Errorf("index update failed: %v", err)
   383  	}
   384  
   385  	// If the deployment is being marked as complete, set the job to stable.
   386  	if deployment.Status == structs.DeploymentStatusSuccessful {
   387  		if err := s.updateJobStabilityImpl(index, deployment.Namespace, deployment.JobID, deployment.JobVersion, true, txn); err != nil {
   388  			return fmt.Errorf("failed to update job stability: %v", err)
   389  		}
   390  	}
   391  
   392  	return nil
   393  }
   394  
   395  func (s *StateStore) Deployments(ws memdb.WatchSet) (memdb.ResultIterator, error) {
   396  	txn := s.db.Txn(false)
   397  
   398  	// Walk the entire deployments table
   399  	iter, err := txn.Get("deployment", "id")
   400  	if err != nil {
   401  		return nil, err
   402  	}
   403  
   404  	ws.Add(iter.WatchCh())
   405  	return iter, nil
   406  }
   407  
   408  func (s *StateStore) DeploymentsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) {
   409  	txn := s.db.Txn(false)
   410  
   411  	// Walk the entire deployments table
   412  	iter, err := txn.Get("deployment", "namespace", namespace)
   413  	if err != nil {
   414  		return nil, err
   415  	}
   416  
   417  	ws.Add(iter.WatchCh())
   418  	return iter, nil
   419  }
   420  
   421  func (s *StateStore) DeploymentsByIDPrefix(ws memdb.WatchSet, namespace, deploymentID string) (memdb.ResultIterator, error) {
   422  	txn := s.db.Txn(false)
   423  
   424  	// Walk the entire deployments table
   425  	iter, err := txn.Get("deployment", "id_prefix", deploymentID)
   426  	if err != nil {
   427  		return nil, err
   428  	}
   429  
   430  	ws.Add(iter.WatchCh())
   431  
   432  	// Wrap the iterator in a filter
   433  	wrap := memdb.NewFilterIterator(iter, deploymentNamespaceFilter(namespace))
   434  	return wrap, nil
   435  }
   436  
   437  // deploymentNamespaceFilter returns a filter function that filters all
   438  // deployment not in the given namespace.
   439  func deploymentNamespaceFilter(namespace string) func(interface{}) bool {
   440  	return func(raw interface{}) bool {
   441  		d, ok := raw.(*structs.Deployment)
   442  		if !ok {
   443  			return true
   444  		}
   445  
   446  		return d.Namespace != namespace
   447  	}
   448  }
   449  
   450  func (s *StateStore) DeploymentByID(ws memdb.WatchSet, deploymentID string) (*structs.Deployment, error) {
   451  	txn := s.db.Txn(false)
   452  	return s.deploymentByIDImpl(ws, deploymentID, txn)
   453  }
   454  
   455  func (s *StateStore) deploymentByIDImpl(ws memdb.WatchSet, deploymentID string, txn *memdb.Txn) (*structs.Deployment, error) {
   456  	watchCh, existing, err := txn.FirstWatch("deployment", "id", deploymentID)
   457  	if err != nil {
   458  		return nil, fmt.Errorf("deployment lookup failed: %v", err)
   459  	}
   460  	ws.Add(watchCh)
   461  
   462  	if existing != nil {
   463  		return existing.(*structs.Deployment), nil
   464  	}
   465  
   466  	return nil, nil
   467  }
   468  
   469  func (s *StateStore) DeploymentsByJobID(ws memdb.WatchSet, namespace, jobID string) ([]*structs.Deployment, error) {
   470  	txn := s.db.Txn(false)
   471  
   472  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
   473  	if namespace == "" {
   474  		namespace = structs.DefaultNamespace
   475  	}
   476  
   477  	// Get an iterator over the deployments
   478  	iter, err := txn.Get("deployment", "job", namespace, jobID)
   479  	if err != nil {
   480  		return nil, err
   481  	}
   482  
   483  	ws.Add(iter.WatchCh())
   484  
   485  	var out []*structs.Deployment
   486  	for {
   487  		raw := iter.Next()
   488  		if raw == nil {
   489  			break
   490  		}
   491  
   492  		d := raw.(*structs.Deployment)
   493  		out = append(out, d)
   494  	}
   495  
   496  	return out, nil
   497  }
   498  
   499  // LatestDeploymentByJobID returns the latest deployment for the given job. The
   500  // latest is determined strictly by CreateIndex.
   501  func (s *StateStore) LatestDeploymentByJobID(ws memdb.WatchSet, namespace, jobID string) (*structs.Deployment, error) {
   502  	txn := s.db.Txn(false)
   503  
   504  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
   505  	if namespace == "" {
   506  		namespace = structs.DefaultNamespace
   507  	}
   508  
   509  	// Get an iterator over the deployments
   510  	iter, err := txn.Get("deployment", "job", namespace, jobID)
   511  	if err != nil {
   512  		return nil, err
   513  	}
   514  
   515  	ws.Add(iter.WatchCh())
   516  
   517  	var out *structs.Deployment
   518  	for {
   519  		raw := iter.Next()
   520  		if raw == nil {
   521  			break
   522  		}
   523  
   524  		d := raw.(*structs.Deployment)
   525  		if out == nil || out.CreateIndex < d.CreateIndex {
   526  			out = d
   527  		}
   528  	}
   529  
   530  	return out, nil
   531  }
   532  
   533  // DeleteDeployment is used to delete a set of deployments by ID
   534  func (s *StateStore) DeleteDeployment(index uint64, deploymentIDs []string) error {
   535  	txn := s.db.Txn(true)
   536  	defer txn.Abort()
   537  
   538  	if len(deploymentIDs) == 0 {
   539  		return nil
   540  	}
   541  
   542  	for _, deploymentID := range deploymentIDs {
   543  		// Lookup the deployment
   544  		existing, err := txn.First("deployment", "id", deploymentID)
   545  		if err != nil {
   546  			return fmt.Errorf("deployment lookup failed: %v", err)
   547  		}
   548  		if existing == nil {
   549  			return fmt.Errorf("deployment not found")
   550  		}
   551  
   552  		// Delete the deployment
   553  		if err := txn.Delete("deployment", existing); err != nil {
   554  			return fmt.Errorf("deployment delete failed: %v", err)
   555  		}
   556  	}
   557  
   558  	if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil {
   559  		return fmt.Errorf("index update failed: %v", err)
   560  	}
   561  
   562  	txn.Commit()
   563  	return nil
   564  }
   565  
   566  // UpsertNode is used to register a node or update a node definition
   567  // This is assumed to be triggered by the client, so we retain the value
   568  // of drain/eligibility which is set by the scheduler.
   569  func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
   570  	txn := s.db.Txn(true)
   571  	defer txn.Abort()
   572  
   573  	// Check if the node already exists
   574  	existing, err := txn.First("nodes", "id", node.ID)
   575  	if err != nil {
   576  		return fmt.Errorf("node lookup failed: %v", err)
   577  	}
   578  
   579  	// Setup the indexes correctly
   580  	if existing != nil {
   581  		exist := existing.(*structs.Node)
   582  		node.CreateIndex = exist.CreateIndex
   583  		node.ModifyIndex = index
   584  
   585  		// Retain node events that have already been set on the node
   586  		node.Events = exist.Events
   587  
   588  		// If we are transitioning from down, record the re-registration
   589  		if exist.Status == structs.NodeStatusDown && node.Status != structs.NodeStatusDown {
   590  			appendNodeEvents(index, node, []*structs.NodeEvent{
   591  				structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster).
   592  					SetMessage(NodeRegisterEventReregistered).
   593  					SetTimestamp(time.Unix(node.StatusUpdatedAt, 0))})
   594  		}
   595  
   596  		node.Drain = exist.Drain                                 // Retain the drain mode
   597  		node.SchedulingEligibility = exist.SchedulingEligibility // Retain the eligibility
   598  		node.DrainStrategy = exist.DrainStrategy                 // Retain the drain strategy
   599  	} else {
   600  		// Because this is the first time the node is being registered, we should
   601  		// also create a node registration event
   602  		nodeEvent := structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster).
   603  			SetMessage(NodeRegisterEventRegistered).
   604  			SetTimestamp(time.Unix(node.StatusUpdatedAt, 0))
   605  		node.Events = []*structs.NodeEvent{nodeEvent}
   606  		node.CreateIndex = index
   607  		node.ModifyIndex = index
   608  	}
   609  
   610  	// Insert the node
   611  	if err := txn.Insert("nodes", node); err != nil {
   612  		return fmt.Errorf("node insert failed: %v", err)
   613  	}
   614  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   615  		return fmt.Errorf("index update failed: %v", err)
   616  	}
   617  
   618  	txn.Commit()
   619  	return nil
   620  }
   621  
   622  // DeleteNode is used to deregister a node
   623  func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
   624  	txn := s.db.Txn(true)
   625  	defer txn.Abort()
   626  
   627  	// Lookup the node
   628  	existing, err := txn.First("nodes", "id", nodeID)
   629  	if err != nil {
   630  		return fmt.Errorf("node lookup failed: %v", err)
   631  	}
   632  	if existing == nil {
   633  		return fmt.Errorf("node not found")
   634  	}
   635  
   636  	// Delete the node
   637  	if err := txn.Delete("nodes", existing); err != nil {
   638  		return fmt.Errorf("node delete failed: %v", err)
   639  	}
   640  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   641  		return fmt.Errorf("index update failed: %v", err)
   642  	}
   643  
   644  	txn.Commit()
   645  	return nil
   646  }
   647  
   648  // UpdateNodeStatus is used to update the status of a node
   649  func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string, event *structs.NodeEvent) error {
   650  	txn := s.db.Txn(true)
   651  	defer txn.Abort()
   652  
   653  	// Lookup the node
   654  	existing, err := txn.First("nodes", "id", nodeID)
   655  	if err != nil {
   656  		return fmt.Errorf("node lookup failed: %v", err)
   657  	}
   658  	if existing == nil {
   659  		return fmt.Errorf("node not found")
   660  	}
   661  
   662  	// Copy the existing node
   663  	existingNode := existing.(*structs.Node)
   664  	copyNode := existingNode.Copy()
   665  
   666  	// Add the event if given
   667  	if event != nil {
   668  		appendNodeEvents(index, copyNode, []*structs.NodeEvent{event})
   669  	}
   670  
   671  	// Update the status in the copy
   672  	copyNode.Status = status
   673  	copyNode.ModifyIndex = index
   674  
   675  	// Insert the node
   676  	if err := txn.Insert("nodes", copyNode); err != nil {
   677  		return fmt.Errorf("node update failed: %v", err)
   678  	}
   679  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   680  		return fmt.Errorf("index update failed: %v", err)
   681  	}
   682  
   683  	txn.Commit()
   684  	return nil
   685  }
   686  
   687  // BatchUpdateNodeDrain is used to update the drain of a node set of nodes
   688  func (s *StateStore) BatchUpdateNodeDrain(index uint64, updates map[string]*structs.DrainUpdate, events map[string]*structs.NodeEvent) error {
   689  	txn := s.db.Txn(true)
   690  	defer txn.Abort()
   691  	for node, update := range updates {
   692  		if err := s.updateNodeDrainImpl(txn, index, node, update.DrainStrategy, update.MarkEligible, events[node]); err != nil {
   693  			return err
   694  		}
   695  	}
   696  	txn.Commit()
   697  	return nil
   698  }
   699  
   700  // UpdateNodeDrain is used to update the drain of a node
   701  func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string,
   702  	drain *structs.DrainStrategy, markEligible bool, event *structs.NodeEvent) error {
   703  
   704  	txn := s.db.Txn(true)
   705  	defer txn.Abort()
   706  	if err := s.updateNodeDrainImpl(txn, index, nodeID, drain, markEligible, event); err != nil {
   707  		return err
   708  	}
   709  	txn.Commit()
   710  	return nil
   711  }
   712  
   713  func (s *StateStore) updateNodeDrainImpl(txn *memdb.Txn, index uint64, nodeID string,
   714  	drain *structs.DrainStrategy, markEligible bool, event *structs.NodeEvent) error {
   715  
   716  	// Lookup the node
   717  	existing, err := txn.First("nodes", "id", nodeID)
   718  	if err != nil {
   719  		return fmt.Errorf("node lookup failed: %v", err)
   720  	}
   721  	if existing == nil {
   722  		return fmt.Errorf("node not found")
   723  	}
   724  
   725  	// Copy the existing node
   726  	existingNode := existing.(*structs.Node)
   727  	copyNode := existingNode.Copy()
   728  
   729  	// Add the event if given
   730  	if event != nil {
   731  		appendNodeEvents(index, copyNode, []*structs.NodeEvent{event})
   732  	}
   733  
   734  	// Update the drain in the copy
   735  	copyNode.Drain = drain != nil // COMPAT: Remove in Nomad 0.9
   736  	copyNode.DrainStrategy = drain
   737  	if drain != nil {
   738  		copyNode.SchedulingEligibility = structs.NodeSchedulingIneligible
   739  	} else if markEligible {
   740  		copyNode.SchedulingEligibility = structs.NodeSchedulingEligible
   741  	}
   742  
   743  	copyNode.ModifyIndex = index
   744  
   745  	// Insert the node
   746  	if err := txn.Insert("nodes", copyNode); err != nil {
   747  		return fmt.Errorf("node update failed: %v", err)
   748  	}
   749  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   750  		return fmt.Errorf("index update failed: %v", err)
   751  	}
   752  
   753  	return nil
   754  }
   755  
   756  // UpdateNodeEligibility is used to update the scheduling eligibility of a node
   757  func (s *StateStore) UpdateNodeEligibility(index uint64, nodeID string, eligibility string, event *structs.NodeEvent) error {
   758  
   759  	txn := s.db.Txn(true)
   760  	defer txn.Abort()
   761  
   762  	// Lookup the node
   763  	existing, err := txn.First("nodes", "id", nodeID)
   764  	if err != nil {
   765  		return fmt.Errorf("node lookup failed: %v", err)
   766  	}
   767  	if existing == nil {
   768  		return fmt.Errorf("node not found")
   769  	}
   770  
   771  	// Copy the existing node
   772  	existingNode := existing.(*structs.Node)
   773  	copyNode := existingNode.Copy()
   774  
   775  	// Add the event if given
   776  	if event != nil {
   777  		appendNodeEvents(index, copyNode, []*structs.NodeEvent{event})
   778  	}
   779  
   780  	// Check if this is a valid action
   781  	if copyNode.DrainStrategy != nil && eligibility == structs.NodeSchedulingEligible {
   782  		return fmt.Errorf("can not set node's scheduling eligibility to eligible while it is draining")
   783  	}
   784  
   785  	// Update the eligibility in the copy
   786  	copyNode.SchedulingEligibility = eligibility
   787  	copyNode.ModifyIndex = index
   788  
   789  	// Insert the node
   790  	if err := txn.Insert("nodes", copyNode); err != nil {
   791  		return fmt.Errorf("node update failed: %v", err)
   792  	}
   793  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   794  		return fmt.Errorf("index update failed: %v", err)
   795  	}
   796  
   797  	txn.Commit()
   798  	return nil
   799  }
   800  
   801  // UpsertNodeEvents adds the node events to the nodes, rotating events as
   802  // necessary.
   803  func (s *StateStore) UpsertNodeEvents(index uint64, nodeEvents map[string][]*structs.NodeEvent) error {
   804  	txn := s.db.Txn(true)
   805  	defer txn.Abort()
   806  
   807  	for nodeID, events := range nodeEvents {
   808  		if err := s.upsertNodeEvents(index, nodeID, events, txn); err != nil {
   809  			return err
   810  		}
   811  	}
   812  
   813  	txn.Commit()
   814  	return nil
   815  }
   816  
   817  // upsertNodeEvent upserts a node event for a respective node. It also maintains
   818  // that a fixed number of node events are ever stored simultaneously, deleting
   819  // older events once this bound has been reached.
   820  func (s *StateStore) upsertNodeEvents(index uint64, nodeID string, events []*structs.NodeEvent, txn *memdb.Txn) error {
   821  	// Lookup the node
   822  	existing, err := txn.First("nodes", "id", nodeID)
   823  	if err != nil {
   824  		return fmt.Errorf("node lookup failed: %v", err)
   825  	}
   826  	if existing == nil {
   827  		return fmt.Errorf("node not found")
   828  	}
   829  
   830  	// Copy the existing node
   831  	existingNode := existing.(*structs.Node)
   832  	copyNode := existingNode.Copy()
   833  	appendNodeEvents(index, copyNode, events)
   834  
   835  	// Insert the node
   836  	if err := txn.Insert("nodes", copyNode); err != nil {
   837  		return fmt.Errorf("node update failed: %v", err)
   838  	}
   839  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   840  		return fmt.Errorf("index update failed: %v", err)
   841  	}
   842  
   843  	return nil
   844  }
   845  
   846  // appendNodeEvents is a helper that takes a node and new events and appends
   847  // them, pruning older events as needed.
   848  func appendNodeEvents(index uint64, node *structs.Node, events []*structs.NodeEvent) {
   849  	// Add the events, updating the indexes
   850  	for _, e := range events {
   851  		e.CreateIndex = index
   852  		node.Events = append(node.Events, e)
   853  	}
   854  
   855  	// Keep node events pruned to not exceed the max allowed
   856  	if l := len(node.Events); l > structs.MaxRetainedNodeEvents {
   857  		delta := l - structs.MaxRetainedNodeEvents
   858  		node.Events = node.Events[delta:]
   859  	}
   860  }
   861  
   862  // NodeByID is used to lookup a node by ID
   863  func (s *StateStore) NodeByID(ws memdb.WatchSet, nodeID string) (*structs.Node, error) {
   864  	txn := s.db.Txn(false)
   865  
   866  	watchCh, existing, err := txn.FirstWatch("nodes", "id", nodeID)
   867  	if err != nil {
   868  		return nil, fmt.Errorf("node lookup failed: %v", err)
   869  	}
   870  	ws.Add(watchCh)
   871  
   872  	if existing != nil {
   873  		return existing.(*structs.Node), nil
   874  	}
   875  	return nil, nil
   876  }
   877  
   878  // NodesByIDPrefix is used to lookup nodes by prefix
   879  func (s *StateStore) NodesByIDPrefix(ws memdb.WatchSet, nodeID string) (memdb.ResultIterator, error) {
   880  	txn := s.db.Txn(false)
   881  
   882  	iter, err := txn.Get("nodes", "id_prefix", nodeID)
   883  	if err != nil {
   884  		return nil, fmt.Errorf("node lookup failed: %v", err)
   885  	}
   886  	ws.Add(iter.WatchCh())
   887  
   888  	return iter, nil
   889  }
   890  
   891  // NodeBySecretID is used to lookup a node by SecretID
   892  func (s *StateStore) NodeBySecretID(ws memdb.WatchSet, secretID string) (*structs.Node, error) {
   893  	txn := s.db.Txn(false)
   894  
   895  	watchCh, existing, err := txn.FirstWatch("nodes", "secret_id", secretID)
   896  	if err != nil {
   897  		return nil, fmt.Errorf("node lookup by SecretID failed: %v", err)
   898  	}
   899  	ws.Add(watchCh)
   900  
   901  	if existing != nil {
   902  		return existing.(*structs.Node), nil
   903  	}
   904  	return nil, nil
   905  }
   906  
   907  // Nodes returns an iterator over all the nodes
   908  func (s *StateStore) Nodes(ws memdb.WatchSet) (memdb.ResultIterator, error) {
   909  	txn := s.db.Txn(false)
   910  
   911  	// Walk the entire nodes table
   912  	iter, err := txn.Get("nodes", "id")
   913  	if err != nil {
   914  		return nil, err
   915  	}
   916  	ws.Add(iter.WatchCh())
   917  	return iter, nil
   918  }
   919  
   920  // UpsertJob is used to register a job or update a job definition
   921  func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
   922  	txn := s.db.Txn(true)
   923  	defer txn.Abort()
   924  	if err := s.upsertJobImpl(index, job, false, txn); err != nil {
   925  		return err
   926  	}
   927  	txn.Commit()
   928  	return nil
   929  }
   930  
   931  // UpsertJobTxn is used to register a job or update a job definition, like UpsertJob,
   932  // but in a transaction.  Useful for when making multiple modifications atomically
   933  func (s *StateStore) UpsertJobTxn(index uint64, job *structs.Job, txn Txn) error {
   934  	return s.upsertJobImpl(index, job, false, txn)
   935  }
   936  
   937  // upsertJobImpl is the implementation for registering a job or updating a job definition
   938  func (s *StateStore) upsertJobImpl(index uint64, job *structs.Job, keepVersion bool, txn *memdb.Txn) error {
   939  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
   940  	if job.Namespace == "" {
   941  		job.Namespace = structs.DefaultNamespace
   942  	}
   943  
   944  	// Assert the namespace exists
   945  	if exists, err := s.namespaceExists(txn, job.Namespace); err != nil {
   946  		return err
   947  	} else if !exists {
   948  		return fmt.Errorf("job %q is in nonexistent namespace %q", job.ID, job.Namespace)
   949  	}
   950  
   951  	// Check if the job already exists
   952  	existing, err := txn.First("jobs", "id", job.Namespace, job.ID)
   953  	if err != nil {
   954  		return fmt.Errorf("job lookup failed: %v", err)
   955  	}
   956  
   957  	// Setup the indexes correctly
   958  	if existing != nil {
   959  		job.CreateIndex = existing.(*structs.Job).CreateIndex
   960  		job.ModifyIndex = index
   961  
   962  		// Bump the version unless asked to keep it. This should only be done
   963  		// when changing an internal field such as Stable. A spec change should
   964  		// always come with a version bump
   965  		if !keepVersion {
   966  			job.JobModifyIndex = index
   967  			job.Version = existing.(*structs.Job).Version + 1
   968  		}
   969  
   970  		// Compute the job status
   971  		var err error
   972  		job.Status, err = s.getJobStatus(txn, job, false)
   973  		if err != nil {
   974  			return fmt.Errorf("setting job status for %q failed: %v", job.ID, err)
   975  		}
   976  	} else {
   977  		job.CreateIndex = index
   978  		job.ModifyIndex = index
   979  		job.JobModifyIndex = index
   980  		job.Version = 0
   981  
   982  		if err := s.setJobStatus(index, txn, job, false, ""); err != nil {
   983  			return fmt.Errorf("setting job status for %q failed: %v", job.ID, err)
   984  		}
   985  
   986  		// Have to get the job again since it could have been updated
   987  		updated, err := txn.First("jobs", "id", job.Namespace, job.ID)
   988  		if err != nil {
   989  			return fmt.Errorf("job lookup failed: %v", err)
   990  		}
   991  		if updated != nil {
   992  			job = updated.(*structs.Job)
   993  		}
   994  	}
   995  
   996  	if err := s.updateSummaryWithJob(index, job, txn); err != nil {
   997  		return fmt.Errorf("unable to create job summary: %v", err)
   998  	}
   999  
  1000  	if err := s.upsertJobVersion(index, job, txn); err != nil {
  1001  		return fmt.Errorf("unable to upsert job into job_version table: %v", err)
  1002  	}
  1003  
  1004  	// Insert the job
  1005  	if err := txn.Insert("jobs", job); err != nil {
  1006  		return fmt.Errorf("job insert failed: %v", err)
  1007  	}
  1008  	if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil {
  1009  		return fmt.Errorf("index update failed: %v", err)
  1010  	}
  1011  
  1012  	return nil
  1013  }
  1014  
  1015  // DeleteJob is used to deregister a job
  1016  func (s *StateStore) DeleteJob(index uint64, namespace, jobID string) error {
  1017  	txn := s.db.Txn(true)
  1018  	defer txn.Abort()
  1019  
  1020  	err := s.DeleteJobTxn(index, namespace, jobID, txn)
  1021  	if err == nil {
  1022  		txn.Commit()
  1023  	}
  1024  	return err
  1025  }
  1026  
  1027  // DeleteJobTxn is used to deregister a job, like DeleteJob,
  1028  // but in a transaction.  Useful for when making multiple modifications atomically
  1029  func (s *StateStore) DeleteJobTxn(index uint64, namespace, jobID string, txn Txn) error {
  1030  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1031  	if namespace == "" {
  1032  		namespace = structs.DefaultNamespace
  1033  	}
  1034  
  1035  	// Lookup the node
  1036  	existing, err := txn.First("jobs", "id", namespace, jobID)
  1037  	if err != nil {
  1038  		return fmt.Errorf("job lookup failed: %v", err)
  1039  	}
  1040  	if existing == nil {
  1041  		return fmt.Errorf("job not found")
  1042  	}
  1043  
  1044  	// Check if we should update a parent job summary
  1045  	job := existing.(*structs.Job)
  1046  	if job.ParentID != "" {
  1047  		summaryRaw, err := txn.First("job_summary", "id", namespace, job.ParentID)
  1048  		if err != nil {
  1049  			return fmt.Errorf("unable to retrieve summary for parent job: %v", err)
  1050  		}
  1051  
  1052  		// Only continue if the summary exists. It could not exist if the parent
  1053  		// job was removed
  1054  		if summaryRaw != nil {
  1055  			existing := summaryRaw.(*structs.JobSummary)
  1056  			pSummary := existing.Copy()
  1057  			if pSummary.Children != nil {
  1058  
  1059  				modified := false
  1060  				switch job.Status {
  1061  				case structs.JobStatusPending:
  1062  					pSummary.Children.Pending--
  1063  					pSummary.Children.Dead++
  1064  					modified = true
  1065  				case structs.JobStatusRunning:
  1066  					pSummary.Children.Running--
  1067  					pSummary.Children.Dead++
  1068  					modified = true
  1069  				case structs.JobStatusDead:
  1070  				default:
  1071  					return fmt.Errorf("unknown old job status %q", job.Status)
  1072  				}
  1073  
  1074  				if modified {
  1075  					// Update the modify index
  1076  					pSummary.ModifyIndex = index
  1077  
  1078  					// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1079  					if pSummary.Namespace == "" {
  1080  						pSummary.Namespace = structs.DefaultNamespace
  1081  					}
  1082  
  1083  					// Insert the summary
  1084  					if err := txn.Insert("job_summary", pSummary); err != nil {
  1085  						return fmt.Errorf("job summary insert failed: %v", err)
  1086  					}
  1087  					if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  1088  						return fmt.Errorf("index update failed: %v", err)
  1089  					}
  1090  				}
  1091  			}
  1092  		}
  1093  	}
  1094  
  1095  	// Delete the job
  1096  	if err := txn.Delete("jobs", existing); err != nil {
  1097  		return fmt.Errorf("job delete failed: %v", err)
  1098  	}
  1099  	if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil {
  1100  		return fmt.Errorf("index update failed: %v", err)
  1101  	}
  1102  
  1103  	// Delete the job versions
  1104  	if err := s.deleteJobVersions(index, job, txn); err != nil {
  1105  		return err
  1106  	}
  1107  
  1108  	// Delete the job summary
  1109  	if _, err = txn.DeleteAll("job_summary", "id", namespace, jobID); err != nil {
  1110  		return fmt.Errorf("deleing job summary failed: %v", err)
  1111  	}
  1112  	if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  1113  		return fmt.Errorf("index update failed: %v", err)
  1114  	}
  1115  
  1116  	return nil
  1117  }
  1118  
  1119  // deleteJobVersions deletes all versions of the given job.
  1120  func (s *StateStore) deleteJobVersions(index uint64, job *structs.Job, txn *memdb.Txn) error {
  1121  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1122  	if job.Namespace == "" {
  1123  		job.Namespace = structs.DefaultNamespace
  1124  	}
  1125  
  1126  	iter, err := txn.Get("job_version", "id_prefix", job.Namespace, job.ID)
  1127  	if err != nil {
  1128  		return err
  1129  	}
  1130  
  1131  	// Put them into a slice so there are no safety concerns while actually
  1132  	// performing the deletes
  1133  	jobs := []*structs.Job{}
  1134  	for {
  1135  		raw := iter.Next()
  1136  		if raw == nil {
  1137  			break
  1138  		}
  1139  
  1140  		// Ensure the ID is an exact match
  1141  		j := raw.(*structs.Job)
  1142  		if j.ID != job.ID {
  1143  			continue
  1144  		}
  1145  
  1146  		jobs = append(jobs, j)
  1147  	}
  1148  
  1149  	// Do the deletes
  1150  	for _, j := range jobs {
  1151  		if err := txn.Delete("job_version", j); err != nil {
  1152  			return fmt.Errorf("deleting job versions failed: %v", err)
  1153  		}
  1154  	}
  1155  
  1156  	if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil {
  1157  		return fmt.Errorf("index update failed: %v", err)
  1158  	}
  1159  
  1160  	return nil
  1161  }
  1162  
  1163  // upsertJobVersion inserts a job into its historic version table and limits the
  1164  // number of job versions that are tracked.
  1165  func (s *StateStore) upsertJobVersion(index uint64, job *structs.Job, txn *memdb.Txn) error {
  1166  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1167  	if job.Namespace == "" {
  1168  		job.Namespace = structs.DefaultNamespace
  1169  	}
  1170  
  1171  	// Insert the job
  1172  	if err := txn.Insert("job_version", job); err != nil {
  1173  		return fmt.Errorf("failed to insert job into job_version table: %v", err)
  1174  	}
  1175  
  1176  	if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil {
  1177  		return fmt.Errorf("index update failed: %v", err)
  1178  	}
  1179  
  1180  	// Get all the historic jobs for this ID
  1181  	all, err := s.jobVersionByID(txn, nil, job.Namespace, job.ID)
  1182  	if err != nil {
  1183  		return fmt.Errorf("failed to look up job versions for %q: %v", job.ID, err)
  1184  	}
  1185  
  1186  	// If we are below the limit there is no GCing to be done
  1187  	if len(all) <= structs.JobTrackedVersions {
  1188  		return nil
  1189  	}
  1190  
  1191  	// We have to delete a historic job to make room.
  1192  	// Find index of the highest versioned stable job
  1193  	stableIdx := -1
  1194  	for i, j := range all {
  1195  		if j.Stable {
  1196  			stableIdx = i
  1197  			break
  1198  		}
  1199  	}
  1200  
  1201  	// If the stable job is the oldest version, do a swap to bring it into the
  1202  	// keep set.
  1203  	max := structs.JobTrackedVersions
  1204  	if stableIdx == max {
  1205  		all[max-1], all[max] = all[max], all[max-1]
  1206  	}
  1207  
  1208  	// Delete the job outside of the set that are being kept.
  1209  	d := all[max]
  1210  	if err := txn.Delete("job_version", d); err != nil {
  1211  		return fmt.Errorf("failed to delete job %v (%d) from job_version", d.ID, d.Version)
  1212  	}
  1213  
  1214  	return nil
  1215  }
  1216  
  1217  // JobByID is used to lookup a job by its ID. JobByID returns the current/latest job
  1218  // version.
  1219  func (s *StateStore) JobByID(ws memdb.WatchSet, namespace, id string) (*structs.Job, error) {
  1220  	txn := s.db.Txn(false)
  1221  	return s.JobByIDTxn(ws, namespace, id, txn)
  1222  }
  1223  
  1224  // JobByIDTxn is used to lookup a job by its ID, like  JobByID. JobByID returns the job version
  1225  // accessible through in the transaction
  1226  func (s *StateStore) JobByIDTxn(ws memdb.WatchSet, namespace, id string, txn Txn) (*structs.Job, error) {
  1227  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1228  	if namespace == "" {
  1229  		namespace = structs.DefaultNamespace
  1230  	}
  1231  
  1232  	watchCh, existing, err := txn.FirstWatch("jobs", "id", namespace, id)
  1233  	if err != nil {
  1234  		return nil, fmt.Errorf("job lookup failed: %v", err)
  1235  	}
  1236  	ws.Add(watchCh)
  1237  
  1238  	if existing != nil {
  1239  		return existing.(*structs.Job), nil
  1240  	}
  1241  	return nil, nil
  1242  }
  1243  
  1244  // JobsByIDPrefix is used to lookup a job by prefix
  1245  func (s *StateStore) JobsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) {
  1246  	txn := s.db.Txn(false)
  1247  
  1248  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1249  	if namespace == "" {
  1250  		namespace = structs.DefaultNamespace
  1251  	}
  1252  
  1253  	iter, err := txn.Get("jobs", "id_prefix", namespace, id)
  1254  	if err != nil {
  1255  		return nil, fmt.Errorf("job lookup failed: %v", err)
  1256  	}
  1257  
  1258  	ws.Add(iter.WatchCh())
  1259  
  1260  	return iter, nil
  1261  }
  1262  
  1263  // JobVersionsByID returns all the tracked versions of a job.
  1264  func (s *StateStore) JobVersionsByID(ws memdb.WatchSet, namespace, id string) ([]*structs.Job, error) {
  1265  	txn := s.db.Txn(false)
  1266  
  1267  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1268  	if namespace == "" {
  1269  		namespace = structs.DefaultNamespace
  1270  	}
  1271  
  1272  	return s.jobVersionByID(txn, &ws, namespace, id)
  1273  }
  1274  
  1275  // jobVersionByID is the underlying implementation for retrieving all tracked
  1276  // versions of a job and is called under an existing transaction. A watch set
  1277  // can optionally be passed in to add the job histories to the watch set.
  1278  func (s *StateStore) jobVersionByID(txn *memdb.Txn, ws *memdb.WatchSet, namespace, id string) ([]*structs.Job, error) {
  1279  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1280  	if namespace == "" {
  1281  		namespace = structs.DefaultNamespace
  1282  	}
  1283  
  1284  	// Get all the historic jobs for this ID
  1285  	iter, err := txn.Get("job_version", "id_prefix", namespace, id)
  1286  	if err != nil {
  1287  		return nil, err
  1288  	}
  1289  
  1290  	if ws != nil {
  1291  		ws.Add(iter.WatchCh())
  1292  	}
  1293  
  1294  	var all []*structs.Job
  1295  	for {
  1296  		raw := iter.Next()
  1297  		if raw == nil {
  1298  			break
  1299  		}
  1300  
  1301  		// Ensure the ID is an exact match
  1302  		j := raw.(*structs.Job)
  1303  		if j.ID != id {
  1304  			continue
  1305  		}
  1306  
  1307  		all = append(all, j)
  1308  	}
  1309  
  1310  	// Sort in reverse order so that the highest version is first
  1311  	sort.Slice(all, func(i, j int) bool {
  1312  		return all[i].Version > all[j].Version
  1313  	})
  1314  
  1315  	return all, nil
  1316  }
  1317  
  1318  // JobByIDAndVersion returns the job identified by its ID and Version. The
  1319  // passed watchset may be nil.
  1320  func (s *StateStore) JobByIDAndVersion(ws memdb.WatchSet, namespace, id string, version uint64) (*structs.Job, error) {
  1321  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1322  	if namespace == "" {
  1323  		namespace = structs.DefaultNamespace
  1324  	}
  1325  	txn := s.db.Txn(false)
  1326  	return s.jobByIDAndVersionImpl(ws, namespace, id, version, txn)
  1327  }
  1328  
  1329  // jobByIDAndVersionImpl returns the job identified by its ID and Version. The
  1330  // passed watchset may be nil.
  1331  func (s *StateStore) jobByIDAndVersionImpl(ws memdb.WatchSet, namespace, id string,
  1332  	version uint64, txn *memdb.Txn) (*structs.Job, error) {
  1333  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1334  	if namespace == "" {
  1335  		namespace = structs.DefaultNamespace
  1336  	}
  1337  
  1338  	watchCh, existing, err := txn.FirstWatch("job_version", "id", namespace, id, version)
  1339  	if err != nil {
  1340  		return nil, err
  1341  	}
  1342  
  1343  	if ws != nil {
  1344  		ws.Add(watchCh)
  1345  	}
  1346  
  1347  	if existing != nil {
  1348  		job := existing.(*structs.Job)
  1349  		return job, nil
  1350  	}
  1351  
  1352  	return nil, nil
  1353  }
  1354  
  1355  func (s *StateStore) JobVersions(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  1356  	txn := s.db.Txn(false)
  1357  
  1358  	// Walk the entire deployments table
  1359  	iter, err := txn.Get("job_version", "id")
  1360  	if err != nil {
  1361  		return nil, err
  1362  	}
  1363  
  1364  	ws.Add(iter.WatchCh())
  1365  	return iter, nil
  1366  }
  1367  
  1368  // Jobs returns an iterator over all the jobs
  1369  func (s *StateStore) Jobs(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  1370  	txn := s.db.Txn(false)
  1371  
  1372  	// Walk the entire jobs table
  1373  	iter, err := txn.Get("jobs", "id")
  1374  	if err != nil {
  1375  		return nil, err
  1376  	}
  1377  
  1378  	ws.Add(iter.WatchCh())
  1379  
  1380  	return iter, nil
  1381  }
  1382  
  1383  // JobsByNamespace returns an iterator over all the jobs for the given namespace
  1384  func (s *StateStore) JobsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) {
  1385  	txn := s.db.Txn(false)
  1386  	return s.jobsByNamespaceImpl(ws, namespace, txn)
  1387  }
  1388  
  1389  // jobsByNamespaceImpl returns an iterator over all the jobs for the given namespace
  1390  func (s *StateStore) jobsByNamespaceImpl(ws memdb.WatchSet, namespace string, txn *memdb.Txn) (memdb.ResultIterator, error) {
  1391  	// Walk the entire jobs table
  1392  	iter, err := txn.Get("jobs", "id_prefix", namespace, "")
  1393  	if err != nil {
  1394  		return nil, err
  1395  	}
  1396  
  1397  	ws.Add(iter.WatchCh())
  1398  
  1399  	return iter, nil
  1400  }
  1401  
  1402  // JobsByPeriodic returns an iterator over all the periodic or non-periodic jobs.
  1403  func (s *StateStore) JobsByPeriodic(ws memdb.WatchSet, periodic bool) (memdb.ResultIterator, error) {
  1404  	txn := s.db.Txn(false)
  1405  
  1406  	iter, err := txn.Get("jobs", "periodic", periodic)
  1407  	if err != nil {
  1408  		return nil, err
  1409  	}
  1410  
  1411  	ws.Add(iter.WatchCh())
  1412  
  1413  	return iter, nil
  1414  }
  1415  
  1416  // JobsByScheduler returns an iterator over all the jobs with the specific
  1417  // scheduler type.
  1418  func (s *StateStore) JobsByScheduler(ws memdb.WatchSet, schedulerType string) (memdb.ResultIterator, error) {
  1419  	txn := s.db.Txn(false)
  1420  
  1421  	// Return an iterator for jobs with the specific type.
  1422  	iter, err := txn.Get("jobs", "type", schedulerType)
  1423  	if err != nil {
  1424  		return nil, err
  1425  	}
  1426  
  1427  	ws.Add(iter.WatchCh())
  1428  
  1429  	return iter, nil
  1430  }
  1431  
  1432  // JobsByGC returns an iterator over all jobs eligible or uneligible for garbage
  1433  // collection.
  1434  func (s *StateStore) JobsByGC(ws memdb.WatchSet, gc bool) (memdb.ResultIterator, error) {
  1435  	txn := s.db.Txn(false)
  1436  
  1437  	iter, err := txn.Get("jobs", "gc", gc)
  1438  	if err != nil {
  1439  		return nil, err
  1440  	}
  1441  
  1442  	ws.Add(iter.WatchCh())
  1443  
  1444  	return iter, nil
  1445  }
  1446  
  1447  // JobSummary returns a job summary object which matches a specific id.
  1448  func (s *StateStore) JobSummaryByID(ws memdb.WatchSet, namespace, jobID string) (*structs.JobSummary, error) {
  1449  	txn := s.db.Txn(false)
  1450  
  1451  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1452  	if namespace == "" {
  1453  		namespace = structs.DefaultNamespace
  1454  	}
  1455  
  1456  	watchCh, existing, err := txn.FirstWatch("job_summary", "id", namespace, jobID)
  1457  	if err != nil {
  1458  		return nil, err
  1459  	}
  1460  
  1461  	ws.Add(watchCh)
  1462  
  1463  	if existing != nil {
  1464  		summary := existing.(*structs.JobSummary)
  1465  		return summary, nil
  1466  	}
  1467  
  1468  	return nil, nil
  1469  }
  1470  
  1471  // JobSummaries walks the entire job summary table and returns all the job
  1472  // summary objects
  1473  func (s *StateStore) JobSummaries(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  1474  	txn := s.db.Txn(false)
  1475  
  1476  	iter, err := txn.Get("job_summary", "id")
  1477  	if err != nil {
  1478  		return nil, err
  1479  	}
  1480  
  1481  	ws.Add(iter.WatchCh())
  1482  
  1483  	return iter, nil
  1484  }
  1485  
  1486  // JobSummaryByPrefix is used to look up Job Summary by id prefix
  1487  func (s *StateStore) JobSummaryByPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) {
  1488  	txn := s.db.Txn(false)
  1489  
  1490  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1491  	if namespace == "" {
  1492  		namespace = structs.DefaultNamespace
  1493  	}
  1494  
  1495  	iter, err := txn.Get("job_summary", "id_prefix", namespace, id)
  1496  	if err != nil {
  1497  		return nil, fmt.Errorf("eval lookup failed: %v", err)
  1498  	}
  1499  
  1500  	ws.Add(iter.WatchCh())
  1501  
  1502  	return iter, nil
  1503  }
  1504  
  1505  // UpsertPeriodicLaunch is used to register a launch or update it.
  1506  func (s *StateStore) UpsertPeriodicLaunch(index uint64, launch *structs.PeriodicLaunch) error {
  1507  	txn := s.db.Txn(true)
  1508  	defer txn.Abort()
  1509  
  1510  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1511  	if launch.Namespace == "" {
  1512  		launch.Namespace = structs.DefaultNamespace
  1513  	}
  1514  
  1515  	// Check if the job already exists
  1516  	existing, err := txn.First("periodic_launch", "id", launch.Namespace, launch.ID)
  1517  	if err != nil {
  1518  		return fmt.Errorf("periodic launch lookup failed: %v", err)
  1519  	}
  1520  
  1521  	// Setup the indexes correctly
  1522  	if existing != nil {
  1523  		launch.CreateIndex = existing.(*structs.PeriodicLaunch).CreateIndex
  1524  		launch.ModifyIndex = index
  1525  	} else {
  1526  		launch.CreateIndex = index
  1527  		launch.ModifyIndex = index
  1528  	}
  1529  
  1530  	// Insert the job
  1531  	if err := txn.Insert("periodic_launch", launch); err != nil {
  1532  		return fmt.Errorf("launch insert failed: %v", err)
  1533  	}
  1534  	if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil {
  1535  		return fmt.Errorf("index update failed: %v", err)
  1536  	}
  1537  
  1538  	txn.Commit()
  1539  	return nil
  1540  }
  1541  
  1542  // DeletePeriodicLaunch is used to delete the periodic launch
  1543  func (s *StateStore) DeletePeriodicLaunch(index uint64, namespace, jobID string) error {
  1544  	txn := s.db.Txn(true)
  1545  	defer txn.Abort()
  1546  
  1547  	err := s.DeletePeriodicLaunchTxn(index, namespace, jobID, txn)
  1548  	if err == nil {
  1549  		txn.Commit()
  1550  	}
  1551  	return err
  1552  }
  1553  
  1554  // DeletePeriodicLaunchTxn is used to delete the periodic launch, like DeletePeriodicLaunch
  1555  // but in a transaction.  Useful for when making multiple modifications atomically
  1556  func (s *StateStore) DeletePeriodicLaunchTxn(index uint64, namespace, jobID string, txn Txn) error {
  1557  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1558  	if namespace == "" {
  1559  		namespace = structs.DefaultNamespace
  1560  	}
  1561  
  1562  	// Lookup the launch
  1563  	existing, err := txn.First("periodic_launch", "id", namespace, jobID)
  1564  	if err != nil {
  1565  		return fmt.Errorf("launch lookup failed: %v", err)
  1566  	}
  1567  	if existing == nil {
  1568  		return fmt.Errorf("launch not found")
  1569  	}
  1570  
  1571  	// Delete the launch
  1572  	if err := txn.Delete("periodic_launch", existing); err != nil {
  1573  		return fmt.Errorf("launch delete failed: %v", err)
  1574  	}
  1575  	if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil {
  1576  		return fmt.Errorf("index update failed: %v", err)
  1577  	}
  1578  
  1579  	return nil
  1580  }
  1581  
  1582  // PeriodicLaunchByID is used to lookup a periodic launch by the periodic job
  1583  // ID.
  1584  func (s *StateStore) PeriodicLaunchByID(ws memdb.WatchSet, namespace, id string) (*structs.PeriodicLaunch, error) {
  1585  	txn := s.db.Txn(false)
  1586  
  1587  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1588  	if namespace == "" {
  1589  		namespace = structs.DefaultNamespace
  1590  	}
  1591  
  1592  	watchCh, existing, err := txn.FirstWatch("periodic_launch", "id", namespace, id)
  1593  	if err != nil {
  1594  		return nil, fmt.Errorf("periodic launch lookup failed: %v", err)
  1595  	}
  1596  
  1597  	ws.Add(watchCh)
  1598  
  1599  	if existing != nil {
  1600  		return existing.(*structs.PeriodicLaunch), nil
  1601  	}
  1602  	return nil, nil
  1603  }
  1604  
  1605  // PeriodicLaunches returns an iterator over all the periodic launches
  1606  func (s *StateStore) PeriodicLaunches(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  1607  	txn := s.db.Txn(false)
  1608  
  1609  	// Walk the entire table
  1610  	iter, err := txn.Get("periodic_launch", "id")
  1611  	if err != nil {
  1612  		return nil, err
  1613  	}
  1614  
  1615  	ws.Add(iter.WatchCh())
  1616  
  1617  	return iter, nil
  1618  }
  1619  
  1620  // UpsertEvals is used to upsert a set of evaluations
  1621  func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) error {
  1622  	txn := s.db.Txn(true)
  1623  	defer txn.Abort()
  1624  
  1625  	err := s.UpsertEvalsTxn(index, evals, txn)
  1626  	if err == nil {
  1627  		txn.Commit()
  1628  	}
  1629  	return err
  1630  }
  1631  
  1632  // UpsertEvals is used to upsert a set of evaluations, like UpsertEvals
  1633  // but in a transaction.  Useful for when making multiple modifications atomically
  1634  func (s *StateStore) UpsertEvalsTxn(index uint64, evals []*structs.Evaluation, txn Txn) error {
  1635  	// Do a nested upsert
  1636  	jobs := make(map[structs.NamespacedID]string, len(evals))
  1637  	for _, eval := range evals {
  1638  		if err := s.nestedUpsertEval(txn, index, eval); err != nil {
  1639  			return err
  1640  		}
  1641  
  1642  		tuple := structs.NamespacedID{
  1643  			ID:        eval.JobID,
  1644  			Namespace: eval.Namespace,
  1645  		}
  1646  		jobs[tuple] = ""
  1647  	}
  1648  
  1649  	// Set the job's status
  1650  	if err := s.setJobStatuses(index, txn, jobs, false); err != nil {
  1651  		return fmt.Errorf("setting job status failed: %v", err)
  1652  	}
  1653  
  1654  	return nil
  1655  }
  1656  
  1657  // nestedUpsertEvaluation is used to nest an evaluation upsert within a transaction
  1658  func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *structs.Evaluation) error {
  1659  	// Lookup the evaluation
  1660  	existing, err := txn.First("evals", "id", eval.ID)
  1661  	if err != nil {
  1662  		return fmt.Errorf("eval lookup failed: %v", err)
  1663  	}
  1664  
  1665  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1666  	if eval.Namespace == "" {
  1667  		eval.Namespace = structs.DefaultNamespace
  1668  	}
  1669  
  1670  	// Update the indexes
  1671  	if existing != nil {
  1672  		eval.CreateIndex = existing.(*structs.Evaluation).CreateIndex
  1673  		eval.ModifyIndex = index
  1674  	} else {
  1675  		eval.CreateIndex = index
  1676  		eval.ModifyIndex = index
  1677  	}
  1678  
  1679  	// Update the job summary
  1680  	summaryRaw, err := txn.First("job_summary", "id", eval.Namespace, eval.JobID)
  1681  	if err != nil {
  1682  		return fmt.Errorf("job summary lookup failed: %v", err)
  1683  	}
  1684  	if summaryRaw != nil {
  1685  		js := summaryRaw.(*structs.JobSummary).Copy()
  1686  		hasSummaryChanged := false
  1687  		for tg, num := range eval.QueuedAllocations {
  1688  			if summary, ok := js.Summary[tg]; ok {
  1689  				if summary.Queued != num {
  1690  					summary.Queued = num
  1691  					js.Summary[tg] = summary
  1692  					hasSummaryChanged = true
  1693  				}
  1694  			} else {
  1695  				s.logger.Error("unable to update queued for job and task group", "job_id", eval.JobID, "task_group", tg, "namespace", eval.Namespace)
  1696  			}
  1697  		}
  1698  
  1699  		// Insert the job summary
  1700  		if hasSummaryChanged {
  1701  			// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1702  			if js.Namespace == "" {
  1703  				js.Namespace = structs.DefaultNamespace
  1704  			}
  1705  
  1706  			js.ModifyIndex = index
  1707  			if err := txn.Insert("job_summary", js); err != nil {
  1708  				return fmt.Errorf("job summary insert failed: %v", err)
  1709  			}
  1710  			if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  1711  				return fmt.Errorf("index update failed: %v", err)
  1712  			}
  1713  		}
  1714  	}
  1715  
  1716  	// Check if the job has any blocked evaluations and cancel them
  1717  	if eval.Status == structs.EvalStatusComplete && len(eval.FailedTGAllocs) == 0 {
  1718  		// Get the blocked evaluation for a job if it exists
  1719  		iter, err := txn.Get("evals", "job", eval.Namespace, eval.JobID, structs.EvalStatusBlocked)
  1720  		if err != nil {
  1721  			return fmt.Errorf("failed to get blocked evals for job %q in namespace %q: %v", eval.JobID, eval.Namespace, err)
  1722  		}
  1723  
  1724  		var blocked []*structs.Evaluation
  1725  		for {
  1726  			raw := iter.Next()
  1727  			if raw == nil {
  1728  				break
  1729  			}
  1730  			blocked = append(blocked, raw.(*structs.Evaluation))
  1731  		}
  1732  
  1733  		// Go through and update the evals
  1734  		for _, eval := range blocked {
  1735  			newEval := eval.Copy()
  1736  			newEval.Status = structs.EvalStatusCancelled
  1737  			newEval.StatusDescription = fmt.Sprintf("evaluation %q successful", newEval.ID)
  1738  			newEval.ModifyIndex = index
  1739  
  1740  			// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1741  			if newEval.Namespace == "" {
  1742  				newEval.Namespace = structs.DefaultNamespace
  1743  			}
  1744  
  1745  			if err := txn.Insert("evals", newEval); err != nil {
  1746  				return fmt.Errorf("eval insert failed: %v", err)
  1747  			}
  1748  		}
  1749  	}
  1750  
  1751  	// Insert the eval
  1752  	if err := txn.Insert("evals", eval); err != nil {
  1753  		return fmt.Errorf("eval insert failed: %v", err)
  1754  	}
  1755  	if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil {
  1756  		return fmt.Errorf("index update failed: %v", err)
  1757  	}
  1758  	return nil
  1759  }
  1760  
  1761  // updateEvalModifyIndex is used to update the modify index of an evaluation that has been
  1762  // through a scheduler pass. This is done as part of plan apply. It ensures that when a subsequent
  1763  // scheduler workers process a re-queued evaluation it sees any partial updates from the plan apply.
  1764  func (s *StateStore) updateEvalModifyIndex(txn *memdb.Txn, index uint64, evalID string) error {
  1765  	// Lookup the evaluation
  1766  	existing, err := txn.First("evals", "id", evalID)
  1767  	if err != nil {
  1768  		return fmt.Errorf("eval lookup failed: %v", err)
  1769  	}
  1770  	if existing == nil {
  1771  		s.logger.Error("unable to find eval", "eval_id", evalID)
  1772  		return fmt.Errorf("unable to find eval id %q", evalID)
  1773  	}
  1774  	eval := existing.(*structs.Evaluation).Copy()
  1775  	// Update the indexes
  1776  	eval.ModifyIndex = index
  1777  
  1778  	// Insert the eval
  1779  	if err := txn.Insert("evals", eval); err != nil {
  1780  		return fmt.Errorf("eval insert failed: %v", err)
  1781  	}
  1782  	if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil {
  1783  		return fmt.Errorf("index update failed: %v", err)
  1784  	}
  1785  	return nil
  1786  }
  1787  
  1788  // DeleteEval is used to delete an evaluation
  1789  func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error {
  1790  	txn := s.db.Txn(true)
  1791  	defer txn.Abort()
  1792  
  1793  	jobs := make(map[structs.NamespacedID]string, len(evals))
  1794  	for _, eval := range evals {
  1795  		existing, err := txn.First("evals", "id", eval)
  1796  		if err != nil {
  1797  			return fmt.Errorf("eval lookup failed: %v", err)
  1798  		}
  1799  		if existing == nil {
  1800  			continue
  1801  		}
  1802  		if err := txn.Delete("evals", existing); err != nil {
  1803  			return fmt.Errorf("eval delete failed: %v", err)
  1804  		}
  1805  		eval := existing.(*structs.Evaluation)
  1806  
  1807  		tuple := structs.NamespacedID{
  1808  			ID:        eval.JobID,
  1809  			Namespace: eval.Namespace,
  1810  		}
  1811  		jobs[tuple] = ""
  1812  	}
  1813  
  1814  	for _, alloc := range allocs {
  1815  		raw, err := txn.First("allocs", "id", alloc)
  1816  		if err != nil {
  1817  			return fmt.Errorf("alloc lookup failed: %v", err)
  1818  		}
  1819  		if raw == nil {
  1820  			continue
  1821  		}
  1822  		if err := txn.Delete("allocs", raw); err != nil {
  1823  			return fmt.Errorf("alloc delete failed: %v", err)
  1824  		}
  1825  	}
  1826  
  1827  	// Update the indexes
  1828  	if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil {
  1829  		return fmt.Errorf("index update failed: %v", err)
  1830  	}
  1831  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  1832  		return fmt.Errorf("index update failed: %v", err)
  1833  	}
  1834  
  1835  	// Set the job's status
  1836  	if err := s.setJobStatuses(index, txn, jobs, true); err != nil {
  1837  		return fmt.Errorf("setting job status failed: %v", err)
  1838  	}
  1839  
  1840  	txn.Commit()
  1841  	return nil
  1842  }
  1843  
  1844  // EvalByID is used to lookup an eval by its ID
  1845  func (s *StateStore) EvalByID(ws memdb.WatchSet, id string) (*structs.Evaluation, error) {
  1846  	txn := s.db.Txn(false)
  1847  
  1848  	watchCh, existing, err := txn.FirstWatch("evals", "id", id)
  1849  	if err != nil {
  1850  		return nil, fmt.Errorf("eval lookup failed: %v", err)
  1851  	}
  1852  
  1853  	ws.Add(watchCh)
  1854  
  1855  	if existing != nil {
  1856  		return existing.(*structs.Evaluation), nil
  1857  	}
  1858  	return nil, nil
  1859  }
  1860  
  1861  // EvalsByIDPrefix is used to lookup evaluations by prefix in a particular
  1862  // namespace
  1863  func (s *StateStore) EvalsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) {
  1864  	txn := s.db.Txn(false)
  1865  
  1866  	// Get an iterator over all evals by the id prefix
  1867  	iter, err := txn.Get("evals", "id_prefix", id)
  1868  	if err != nil {
  1869  		return nil, fmt.Errorf("eval lookup failed: %v", err)
  1870  	}
  1871  
  1872  	ws.Add(iter.WatchCh())
  1873  
  1874  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1875  	if namespace == "" {
  1876  		namespace = structs.DefaultNamespace
  1877  	}
  1878  
  1879  	// Wrap the iterator in a filter
  1880  	wrap := memdb.NewFilterIterator(iter, evalNamespaceFilter(namespace))
  1881  	return wrap, nil
  1882  }
  1883  
  1884  // evalNamespaceFilter returns a filter function that filters all evaluations
  1885  // not in the given namespace.
  1886  func evalNamespaceFilter(namespace string) func(interface{}) bool {
  1887  	return func(raw interface{}) bool {
  1888  		eval, ok := raw.(*structs.Evaluation)
  1889  		if !ok {
  1890  			return true
  1891  		}
  1892  
  1893  		return eval.Namespace != namespace
  1894  	}
  1895  }
  1896  
  1897  // EvalsByJob returns all the evaluations by job id
  1898  func (s *StateStore) EvalsByJob(ws memdb.WatchSet, namespace, jobID string) ([]*structs.Evaluation, error) {
  1899  	txn := s.db.Txn(false)
  1900  
  1901  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1902  	if namespace == "" {
  1903  		namespace = structs.DefaultNamespace
  1904  	}
  1905  
  1906  	// Get an iterator over the node allocations
  1907  	iter, err := txn.Get("evals", "job_prefix", namespace, jobID)
  1908  	if err != nil {
  1909  		return nil, err
  1910  	}
  1911  
  1912  	ws.Add(iter.WatchCh())
  1913  
  1914  	var out []*structs.Evaluation
  1915  	for {
  1916  		raw := iter.Next()
  1917  		if raw == nil {
  1918  			break
  1919  		}
  1920  
  1921  		e := raw.(*structs.Evaluation)
  1922  
  1923  		// Filter non-exact matches
  1924  		if e.JobID != jobID {
  1925  			continue
  1926  		}
  1927  
  1928  		out = append(out, e)
  1929  	}
  1930  	return out, nil
  1931  }
  1932  
  1933  // Evals returns an iterator over all the evaluations
  1934  func (s *StateStore) Evals(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  1935  	txn := s.db.Txn(false)
  1936  
  1937  	// Walk the entire table
  1938  	iter, err := txn.Get("evals", "id")
  1939  	if err != nil {
  1940  		return nil, err
  1941  	}
  1942  
  1943  	ws.Add(iter.WatchCh())
  1944  
  1945  	return iter, nil
  1946  }
  1947  
  1948  // EvalsByNamespace returns an iterator over all the evaluations in the given
  1949  // namespace
  1950  func (s *StateStore) EvalsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) {
  1951  	txn := s.db.Txn(false)
  1952  
  1953  	// Walk the entire table
  1954  	iter, err := txn.Get("evals", "namespace", namespace)
  1955  	if err != nil {
  1956  		return nil, err
  1957  	}
  1958  
  1959  	ws.Add(iter.WatchCh())
  1960  
  1961  	return iter, nil
  1962  }
  1963  
  1964  // UpdateAllocsFromClient is used to update an allocation based on input
  1965  // from a client. While the schedulers are the authority on the allocation for
  1966  // most things, some updates are authoritative from the client. Specifically,
  1967  // the desired state comes from the schedulers, while the actual state comes
  1968  // from clients.
  1969  func (s *StateStore) UpdateAllocsFromClient(index uint64, allocs []*structs.Allocation) error {
  1970  	txn := s.db.Txn(true)
  1971  	defer txn.Abort()
  1972  
  1973  	// Handle each of the updated allocations
  1974  	for _, alloc := range allocs {
  1975  		if err := s.nestedUpdateAllocFromClient(txn, index, alloc); err != nil {
  1976  			return err
  1977  		}
  1978  	}
  1979  
  1980  	// Update the indexes
  1981  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  1982  		return fmt.Errorf("index update failed: %v", err)
  1983  	}
  1984  
  1985  	txn.Commit()
  1986  	return nil
  1987  }
  1988  
  1989  // nestedUpdateAllocFromClient is used to nest an update of an allocation with client status
  1990  func (s *StateStore) nestedUpdateAllocFromClient(txn *memdb.Txn, index uint64, alloc *structs.Allocation) error {
  1991  	// Look for existing alloc
  1992  	existing, err := txn.First("allocs", "id", alloc.ID)
  1993  	if err != nil {
  1994  		return fmt.Errorf("alloc lookup failed: %v", err)
  1995  	}
  1996  
  1997  	// Nothing to do if this does not exist
  1998  	if existing == nil {
  1999  		return nil
  2000  	}
  2001  	exist := existing.(*structs.Allocation)
  2002  
  2003  	// Copy everything from the existing allocation
  2004  	copyAlloc := exist.Copy()
  2005  
  2006  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  2007  	if copyAlloc.Namespace == "" {
  2008  		copyAlloc.Namespace = structs.DefaultNamespace
  2009  	}
  2010  
  2011  	// Pull in anything the client is the authority on
  2012  	copyAlloc.ClientStatus = alloc.ClientStatus
  2013  	copyAlloc.ClientDescription = alloc.ClientDescription
  2014  	copyAlloc.TaskStates = alloc.TaskStates
  2015  
  2016  	// The client can only set its deployment health and timestamp, so just take
  2017  	// those
  2018  	if copyAlloc.DeploymentStatus != nil && alloc.DeploymentStatus != nil {
  2019  		oldHasHealthy := copyAlloc.DeploymentStatus.HasHealth()
  2020  		newHasHealthy := alloc.DeploymentStatus.HasHealth()
  2021  
  2022  		// We got new health information from the client
  2023  		if newHasHealthy && (!oldHasHealthy || *copyAlloc.DeploymentStatus.Healthy != *alloc.DeploymentStatus.Healthy) {
  2024  			// Updated deployment health and timestamp
  2025  			copyAlloc.DeploymentStatus.Healthy = helper.BoolToPtr(*alloc.DeploymentStatus.Healthy)
  2026  			copyAlloc.DeploymentStatus.Timestamp = alloc.DeploymentStatus.Timestamp
  2027  			copyAlloc.DeploymentStatus.ModifyIndex = index
  2028  		}
  2029  	} else if alloc.DeploymentStatus != nil {
  2030  		// First time getting a deployment status so copy everything and just
  2031  		// set the index
  2032  		copyAlloc.DeploymentStatus = alloc.DeploymentStatus.Copy()
  2033  		copyAlloc.DeploymentStatus.ModifyIndex = index
  2034  	}
  2035  
  2036  	// Update the modify index
  2037  	copyAlloc.ModifyIndex = index
  2038  
  2039  	// Update the modify time
  2040  	copyAlloc.ModifyTime = alloc.ModifyTime
  2041  
  2042  	if err := s.updateDeploymentWithAlloc(index, copyAlloc, exist, txn); err != nil {
  2043  		return fmt.Errorf("error updating deployment: %v", err)
  2044  	}
  2045  
  2046  	if err := s.updateSummaryWithAlloc(index, copyAlloc, exist, txn); err != nil {
  2047  		return fmt.Errorf("error updating job summary: %v", err)
  2048  	}
  2049  
  2050  	if err := s.updateEntWithAlloc(index, copyAlloc, exist, txn); err != nil {
  2051  		return err
  2052  	}
  2053  
  2054  	// Update the allocation
  2055  	if err := txn.Insert("allocs", copyAlloc); err != nil {
  2056  		return fmt.Errorf("alloc insert failed: %v", err)
  2057  	}
  2058  
  2059  	// Set the job's status
  2060  	forceStatus := ""
  2061  	if !copyAlloc.TerminalStatus() {
  2062  		forceStatus = structs.JobStatusRunning
  2063  	}
  2064  
  2065  	tuple := structs.NamespacedID{
  2066  		ID:        exist.JobID,
  2067  		Namespace: exist.Namespace,
  2068  	}
  2069  	jobs := map[structs.NamespacedID]string{tuple: forceStatus}
  2070  
  2071  	if err := s.setJobStatuses(index, txn, jobs, false); err != nil {
  2072  		return fmt.Errorf("setting job status failed: %v", err)
  2073  	}
  2074  	return nil
  2075  }
  2076  
  2077  // UpsertAllocs is used to evict a set of allocations and allocate new ones at
  2078  // the same time.
  2079  func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error {
  2080  	txn := s.db.Txn(true)
  2081  	defer txn.Abort()
  2082  	if err := s.upsertAllocsImpl(index, allocs, txn); err != nil {
  2083  		return err
  2084  	}
  2085  	txn.Commit()
  2086  	return nil
  2087  }
  2088  
  2089  // upsertAllocs is the actual implementation of UpsertAllocs so that it may be
  2090  // used with an existing transaction.
  2091  func (s *StateStore) upsertAllocsImpl(index uint64, allocs []*structs.Allocation, txn *memdb.Txn) error {
  2092  	// Handle the allocations
  2093  	jobs := make(map[structs.NamespacedID]string, 1)
  2094  	for _, alloc := range allocs {
  2095  		existing, err := txn.First("allocs", "id", alloc.ID)
  2096  		if err != nil {
  2097  			return fmt.Errorf("alloc lookup failed: %v", err)
  2098  		}
  2099  		exist, _ := existing.(*structs.Allocation)
  2100  
  2101  		if exist == nil {
  2102  			alloc.CreateIndex = index
  2103  			alloc.ModifyIndex = index
  2104  			alloc.AllocModifyIndex = index
  2105  			if alloc.DeploymentStatus != nil {
  2106  				alloc.DeploymentStatus.ModifyIndex = index
  2107  			}
  2108  
  2109  			// Issue https://github.com/hashicorp/nomad/issues/2583 uncovered
  2110  			// the a race between a forced garbage collection and the scheduler
  2111  			// marking an allocation as terminal. The issue is that the
  2112  			// allocation from the scheduler has its job normalized and the FSM
  2113  			// will only denormalize if the allocation is not terminal.  However
  2114  			// if the allocation is garbage collected, that will result in a
  2115  			// allocation being upserted for the first time without a job
  2116  			// attached. By returning an error here, it will cause the FSM to
  2117  			// error, causing the plan_apply to error and thus causing the
  2118  			// evaluation to be failed. This will force an index refresh that
  2119  			// should solve this issue.
  2120  			if alloc.Job == nil {
  2121  				return fmt.Errorf("attempting to upsert allocation %q without a job", alloc.ID)
  2122  			}
  2123  		} else {
  2124  			alloc.CreateIndex = exist.CreateIndex
  2125  			alloc.ModifyIndex = index
  2126  			alloc.AllocModifyIndex = index
  2127  
  2128  			// Keep the clients task states
  2129  			alloc.TaskStates = exist.TaskStates
  2130  
  2131  			// If the scheduler is marking this allocation as lost we do not
  2132  			// want to reuse the status of the existing allocation.
  2133  			if alloc.ClientStatus != structs.AllocClientStatusLost {
  2134  				alloc.ClientStatus = exist.ClientStatus
  2135  				alloc.ClientDescription = exist.ClientDescription
  2136  			}
  2137  
  2138  			// The job has been denormalized so re-attach the original job
  2139  			if alloc.Job == nil {
  2140  				alloc.Job = exist.Job
  2141  			}
  2142  		}
  2143  
  2144  		// COMPAT 0.7: Upgrade old objects that do not have namespaces
  2145  		if alloc.Namespace == "" {
  2146  			alloc.Namespace = structs.DefaultNamespace
  2147  		}
  2148  
  2149  		// OPTIMIZATION:
  2150  		// These should be given a map of new to old allocation and the updates
  2151  		// should be one on all changes. The current implementation causes O(n)
  2152  		// lookups/copies/insertions rather than O(1)
  2153  		if err := s.updateDeploymentWithAlloc(index, alloc, exist, txn); err != nil {
  2154  			return fmt.Errorf("error updating deployment: %v", err)
  2155  		}
  2156  
  2157  		if err := s.updateSummaryWithAlloc(index, alloc, exist, txn); err != nil {
  2158  			return fmt.Errorf("error updating job summary: %v", err)
  2159  		}
  2160  
  2161  		if err := s.updateEntWithAlloc(index, alloc, exist, txn); err != nil {
  2162  			return err
  2163  		}
  2164  
  2165  		if err := txn.Insert("allocs", alloc); err != nil {
  2166  			return fmt.Errorf("alloc insert failed: %v", err)
  2167  		}
  2168  
  2169  		if alloc.PreviousAllocation != "" {
  2170  			prevAlloc, err := txn.First("allocs", "id", alloc.PreviousAllocation)
  2171  			if err != nil {
  2172  				return fmt.Errorf("alloc lookup failed: %v", err)
  2173  			}
  2174  			existingPrevAlloc, _ := prevAlloc.(*structs.Allocation)
  2175  			if existingPrevAlloc != nil {
  2176  				prevAllocCopy := existingPrevAlloc.Copy()
  2177  				prevAllocCopy.NextAllocation = alloc.ID
  2178  				prevAllocCopy.ModifyIndex = index
  2179  				if err := txn.Insert("allocs", prevAllocCopy); err != nil {
  2180  					return fmt.Errorf("alloc insert failed: %v", err)
  2181  				}
  2182  			}
  2183  		}
  2184  
  2185  		// If the allocation is running, force the job to running status.
  2186  		forceStatus := ""
  2187  		if !alloc.TerminalStatus() {
  2188  			forceStatus = structs.JobStatusRunning
  2189  		}
  2190  
  2191  		tuple := structs.NamespacedID{
  2192  			ID:        alloc.JobID,
  2193  			Namespace: alloc.Namespace,
  2194  		}
  2195  		jobs[tuple] = forceStatus
  2196  	}
  2197  
  2198  	// Update the indexes
  2199  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  2200  		return fmt.Errorf("index update failed: %v", err)
  2201  	}
  2202  
  2203  	// Set the job's status
  2204  	if err := s.setJobStatuses(index, txn, jobs, false); err != nil {
  2205  		return fmt.Errorf("setting job status failed: %v", err)
  2206  	}
  2207  
  2208  	return nil
  2209  }
  2210  
  2211  // UpdateAllocsDesiredTransitions is used to update a set of allocations
  2212  // desired transitions.
  2213  func (s *StateStore) UpdateAllocsDesiredTransitions(index uint64, allocs map[string]*structs.DesiredTransition,
  2214  	evals []*structs.Evaluation) error {
  2215  
  2216  	txn := s.db.Txn(true)
  2217  	defer txn.Abort()
  2218  
  2219  	// Handle each of the updated allocations
  2220  	for id, transition := range allocs {
  2221  		if err := s.nestedUpdateAllocDesiredTransition(txn, index, id, transition); err != nil {
  2222  			return err
  2223  		}
  2224  	}
  2225  
  2226  	for _, eval := range evals {
  2227  		if err := s.nestedUpsertEval(txn, index, eval); err != nil {
  2228  			return err
  2229  		}
  2230  	}
  2231  
  2232  	// Update the indexes
  2233  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  2234  		return fmt.Errorf("index update failed: %v", err)
  2235  	}
  2236  
  2237  	txn.Commit()
  2238  	return nil
  2239  }
  2240  
  2241  // nestedUpdateAllocDesiredTransition is used to nest an update of an
  2242  // allocations desired transition
  2243  func (s *StateStore) nestedUpdateAllocDesiredTransition(
  2244  	txn *memdb.Txn, index uint64, allocID string,
  2245  	transition *structs.DesiredTransition) error {
  2246  
  2247  	// Look for existing alloc
  2248  	existing, err := txn.First("allocs", "id", allocID)
  2249  	if err != nil {
  2250  		return fmt.Errorf("alloc lookup failed: %v", err)
  2251  	}
  2252  
  2253  	// Nothing to do if this does not exist
  2254  	if existing == nil {
  2255  		return nil
  2256  	}
  2257  	exist := existing.(*structs.Allocation)
  2258  
  2259  	// Copy everything from the existing allocation
  2260  	copyAlloc := exist.Copy()
  2261  
  2262  	// Merge the desired transitions
  2263  	copyAlloc.DesiredTransition.Merge(transition)
  2264  
  2265  	// Update the modify index
  2266  	copyAlloc.ModifyIndex = index
  2267  
  2268  	// Update the allocation
  2269  	if err := txn.Insert("allocs", copyAlloc); err != nil {
  2270  		return fmt.Errorf("alloc insert failed: %v", err)
  2271  	}
  2272  
  2273  	return nil
  2274  }
  2275  
  2276  // AllocByID is used to lookup an allocation by its ID
  2277  func (s *StateStore) AllocByID(ws memdb.WatchSet, id string) (*structs.Allocation, error) {
  2278  	txn := s.db.Txn(false)
  2279  
  2280  	watchCh, existing, err := txn.FirstWatch("allocs", "id", id)
  2281  	if err != nil {
  2282  		return nil, fmt.Errorf("alloc lookup failed: %v", err)
  2283  	}
  2284  
  2285  	ws.Add(watchCh)
  2286  
  2287  	if existing != nil {
  2288  		return existing.(*structs.Allocation), nil
  2289  	}
  2290  	return nil, nil
  2291  }
  2292  
  2293  // AllocsByIDPrefix is used to lookup allocs by prefix
  2294  func (s *StateStore) AllocsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) {
  2295  	txn := s.db.Txn(false)
  2296  
  2297  	iter, err := txn.Get("allocs", "id_prefix", id)
  2298  	if err != nil {
  2299  		return nil, fmt.Errorf("alloc lookup failed: %v", err)
  2300  	}
  2301  
  2302  	ws.Add(iter.WatchCh())
  2303  
  2304  	// Wrap the iterator in a filter
  2305  	wrap := memdb.NewFilterIterator(iter, allocNamespaceFilter(namespace))
  2306  	return wrap, nil
  2307  }
  2308  
  2309  // allocNamespaceFilter returns a filter function that filters all allocations
  2310  // not in the given namespace.
  2311  func allocNamespaceFilter(namespace string) func(interface{}) bool {
  2312  	return func(raw interface{}) bool {
  2313  		alloc, ok := raw.(*structs.Allocation)
  2314  		if !ok {
  2315  			return true
  2316  		}
  2317  
  2318  		return alloc.Namespace != namespace
  2319  	}
  2320  }
  2321  
  2322  // AllocsByNode returns all the allocations by node
  2323  func (s *StateStore) AllocsByNode(ws memdb.WatchSet, node string) ([]*structs.Allocation, error) {
  2324  	txn := s.db.Txn(false)
  2325  
  2326  	// Get an iterator over the node allocations, using only the
  2327  	// node prefix which ignores the terminal status
  2328  	iter, err := txn.Get("allocs", "node_prefix", node)
  2329  	if err != nil {
  2330  		return nil, err
  2331  	}
  2332  
  2333  	ws.Add(iter.WatchCh())
  2334  
  2335  	var out []*structs.Allocation
  2336  	for {
  2337  		raw := iter.Next()
  2338  		if raw == nil {
  2339  			break
  2340  		}
  2341  		out = append(out, raw.(*structs.Allocation))
  2342  	}
  2343  	return out, nil
  2344  }
  2345  
  2346  // AllocsByNode returns all the allocations by node and terminal status
  2347  func (s *StateStore) AllocsByNodeTerminal(ws memdb.WatchSet, node string, terminal bool) ([]*structs.Allocation, error) {
  2348  	txn := s.db.Txn(false)
  2349  
  2350  	// Get an iterator over the node allocations
  2351  	iter, err := txn.Get("allocs", "node", node, terminal)
  2352  	if err != nil {
  2353  		return nil, err
  2354  	}
  2355  
  2356  	ws.Add(iter.WatchCh())
  2357  
  2358  	var out []*structs.Allocation
  2359  	for {
  2360  		raw := iter.Next()
  2361  		if raw == nil {
  2362  			break
  2363  		}
  2364  		out = append(out, raw.(*structs.Allocation))
  2365  	}
  2366  	return out, nil
  2367  }
  2368  
  2369  // AllocsByJob returns all the allocations by job id
  2370  func (s *StateStore) AllocsByJob(ws memdb.WatchSet, namespace, jobID string, all bool) ([]*structs.Allocation, error) {
  2371  	txn := s.db.Txn(false)
  2372  
  2373  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  2374  	if namespace == "" {
  2375  		namespace = structs.DefaultNamespace
  2376  	}
  2377  
  2378  	// Get the job
  2379  	var job *structs.Job
  2380  	rawJob, err := txn.First("jobs", "id", namespace, jobID)
  2381  	if err != nil {
  2382  		return nil, err
  2383  	}
  2384  	if rawJob != nil {
  2385  		job = rawJob.(*structs.Job)
  2386  	}
  2387  
  2388  	// Get an iterator over the node allocations
  2389  	iter, err := txn.Get("allocs", "job", namespace, jobID)
  2390  	if err != nil {
  2391  		return nil, err
  2392  	}
  2393  
  2394  	ws.Add(iter.WatchCh())
  2395  
  2396  	var out []*structs.Allocation
  2397  	for {
  2398  		raw := iter.Next()
  2399  		if raw == nil {
  2400  			break
  2401  		}
  2402  
  2403  		alloc := raw.(*structs.Allocation)
  2404  		// If the allocation belongs to a job with the same ID but a different
  2405  		// create index and we are not getting all the allocations whose Jobs
  2406  		// matches the same Job ID then we skip it
  2407  		if !all && job != nil && alloc.Job.CreateIndex != job.CreateIndex {
  2408  			continue
  2409  		}
  2410  		out = append(out, raw.(*structs.Allocation))
  2411  	}
  2412  	return out, nil
  2413  }
  2414  
  2415  // AllocsByEval returns all the allocations by eval id
  2416  func (s *StateStore) AllocsByEval(ws memdb.WatchSet, evalID string) ([]*structs.Allocation, error) {
  2417  	txn := s.db.Txn(false)
  2418  
  2419  	// Get an iterator over the eval allocations
  2420  	iter, err := txn.Get("allocs", "eval", evalID)
  2421  	if err != nil {
  2422  		return nil, err
  2423  	}
  2424  
  2425  	ws.Add(iter.WatchCh())
  2426  
  2427  	var out []*structs.Allocation
  2428  	for {
  2429  		raw := iter.Next()
  2430  		if raw == nil {
  2431  			break
  2432  		}
  2433  		out = append(out, raw.(*structs.Allocation))
  2434  	}
  2435  	return out, nil
  2436  }
  2437  
  2438  // AllocsByDeployment returns all the allocations by deployment id
  2439  func (s *StateStore) AllocsByDeployment(ws memdb.WatchSet, deploymentID string) ([]*structs.Allocation, error) {
  2440  	txn := s.db.Txn(false)
  2441  
  2442  	// Get an iterator over the deployments allocations
  2443  	iter, err := txn.Get("allocs", "deployment", deploymentID)
  2444  	if err != nil {
  2445  		return nil, err
  2446  	}
  2447  
  2448  	ws.Add(iter.WatchCh())
  2449  
  2450  	var out []*structs.Allocation
  2451  	for {
  2452  		raw := iter.Next()
  2453  		if raw == nil {
  2454  			break
  2455  		}
  2456  		out = append(out, raw.(*structs.Allocation))
  2457  	}
  2458  	return out, nil
  2459  }
  2460  
  2461  // Allocs returns an iterator over all the evaluations
  2462  func (s *StateStore) Allocs(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  2463  	txn := s.db.Txn(false)
  2464  
  2465  	// Walk the entire table
  2466  	iter, err := txn.Get("allocs", "id")
  2467  	if err != nil {
  2468  		return nil, err
  2469  	}
  2470  
  2471  	ws.Add(iter.WatchCh())
  2472  
  2473  	return iter, nil
  2474  }
  2475  
  2476  // AllocsByNamespace returns an iterator over all the allocations in the
  2477  // namespace
  2478  func (s *StateStore) AllocsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) {
  2479  	txn := s.db.Txn(false)
  2480  	return s.allocsByNamespaceImpl(ws, txn, namespace)
  2481  }
  2482  
  2483  // allocsByNamespaceImpl returns an iterator over all the allocations in the
  2484  // namespace
  2485  func (s *StateStore) allocsByNamespaceImpl(ws memdb.WatchSet, txn *memdb.Txn, namespace string) (memdb.ResultIterator, error) {
  2486  	// Walk the entire table
  2487  	iter, err := txn.Get("allocs", "namespace", namespace)
  2488  	if err != nil {
  2489  		return nil, err
  2490  	}
  2491  
  2492  	ws.Add(iter.WatchCh())
  2493  
  2494  	return iter, nil
  2495  }
  2496  
  2497  // UpsertVaultAccessors is used to register a set of Vault Accessors
  2498  func (s *StateStore) UpsertVaultAccessor(index uint64, accessors []*structs.VaultAccessor) error {
  2499  	txn := s.db.Txn(true)
  2500  	defer txn.Abort()
  2501  
  2502  	for _, accessor := range accessors {
  2503  		// Set the create index
  2504  		accessor.CreateIndex = index
  2505  
  2506  		// Insert the accessor
  2507  		if err := txn.Insert("vault_accessors", accessor); err != nil {
  2508  			return fmt.Errorf("accessor insert failed: %v", err)
  2509  		}
  2510  	}
  2511  
  2512  	if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil {
  2513  		return fmt.Errorf("index update failed: %v", err)
  2514  	}
  2515  
  2516  	txn.Commit()
  2517  	return nil
  2518  }
  2519  
  2520  // DeleteVaultAccessors is used to delete a set of Vault Accessors
  2521  func (s *StateStore) DeleteVaultAccessors(index uint64, accessors []*structs.VaultAccessor) error {
  2522  	txn := s.db.Txn(true)
  2523  	defer txn.Abort()
  2524  
  2525  	// Lookup the accessor
  2526  	for _, accessor := range accessors {
  2527  		// Delete the accessor
  2528  		if err := txn.Delete("vault_accessors", accessor); err != nil {
  2529  			return fmt.Errorf("accessor delete failed: %v", err)
  2530  		}
  2531  	}
  2532  
  2533  	if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil {
  2534  		return fmt.Errorf("index update failed: %v", err)
  2535  	}
  2536  
  2537  	txn.Commit()
  2538  	return nil
  2539  }
  2540  
  2541  // VaultAccessor returns the given Vault accessor
  2542  func (s *StateStore) VaultAccessor(ws memdb.WatchSet, accessor string) (*structs.VaultAccessor, error) {
  2543  	txn := s.db.Txn(false)
  2544  
  2545  	watchCh, existing, err := txn.FirstWatch("vault_accessors", "id", accessor)
  2546  	if err != nil {
  2547  		return nil, fmt.Errorf("accessor lookup failed: %v", err)
  2548  	}
  2549  
  2550  	ws.Add(watchCh)
  2551  
  2552  	if existing != nil {
  2553  		return existing.(*structs.VaultAccessor), nil
  2554  	}
  2555  
  2556  	return nil, nil
  2557  }
  2558  
  2559  // VaultAccessors returns an iterator of Vault accessors.
  2560  func (s *StateStore) VaultAccessors(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  2561  	txn := s.db.Txn(false)
  2562  
  2563  	iter, err := txn.Get("vault_accessors", "id")
  2564  	if err != nil {
  2565  		return nil, err
  2566  	}
  2567  
  2568  	ws.Add(iter.WatchCh())
  2569  
  2570  	return iter, nil
  2571  }
  2572  
  2573  // VaultAccessorsByAlloc returns all the Vault accessors by alloc id
  2574  func (s *StateStore) VaultAccessorsByAlloc(ws memdb.WatchSet, allocID string) ([]*structs.VaultAccessor, error) {
  2575  	txn := s.db.Txn(false)
  2576  
  2577  	// Get an iterator over the accessors
  2578  	iter, err := txn.Get("vault_accessors", "alloc_id", allocID)
  2579  	if err != nil {
  2580  		return nil, err
  2581  	}
  2582  
  2583  	ws.Add(iter.WatchCh())
  2584  
  2585  	var out []*structs.VaultAccessor
  2586  	for {
  2587  		raw := iter.Next()
  2588  		if raw == nil {
  2589  			break
  2590  		}
  2591  		out = append(out, raw.(*structs.VaultAccessor))
  2592  	}
  2593  	return out, nil
  2594  }
  2595  
  2596  // VaultAccessorsByNode returns all the Vault accessors by node id
  2597  func (s *StateStore) VaultAccessorsByNode(ws memdb.WatchSet, nodeID string) ([]*structs.VaultAccessor, error) {
  2598  	txn := s.db.Txn(false)
  2599  
  2600  	// Get an iterator over the accessors
  2601  	iter, err := txn.Get("vault_accessors", "node_id", nodeID)
  2602  	if err != nil {
  2603  		return nil, err
  2604  	}
  2605  
  2606  	ws.Add(iter.WatchCh())
  2607  
  2608  	var out []*structs.VaultAccessor
  2609  	for {
  2610  		raw := iter.Next()
  2611  		if raw == nil {
  2612  			break
  2613  		}
  2614  		out = append(out, raw.(*structs.VaultAccessor))
  2615  	}
  2616  	return out, nil
  2617  }
  2618  
  2619  // UpdateDeploymentStatus is used to make deployment status updates and
  2620  // potentially make a evaluation
  2621  func (s *StateStore) UpdateDeploymentStatus(index uint64, req *structs.DeploymentStatusUpdateRequest) error {
  2622  	txn := s.db.Txn(true)
  2623  	defer txn.Abort()
  2624  
  2625  	if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil {
  2626  		return err
  2627  	}
  2628  
  2629  	// Upsert the job if necessary
  2630  	if req.Job != nil {
  2631  		if err := s.upsertJobImpl(index, req.Job, false, txn); err != nil {
  2632  			return err
  2633  		}
  2634  	}
  2635  
  2636  	// Upsert the optional eval
  2637  	if req.Eval != nil {
  2638  		if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil {
  2639  			return err
  2640  		}
  2641  	}
  2642  
  2643  	txn.Commit()
  2644  	return nil
  2645  }
  2646  
  2647  // updateDeploymentStatusImpl is used to make deployment status updates
  2648  func (s *StateStore) updateDeploymentStatusImpl(index uint64, u *structs.DeploymentStatusUpdate, txn *memdb.Txn) error {
  2649  	// Retrieve deployment
  2650  	ws := memdb.NewWatchSet()
  2651  	deployment, err := s.deploymentByIDImpl(ws, u.DeploymentID, txn)
  2652  	if err != nil {
  2653  		return err
  2654  	} else if deployment == nil {
  2655  		return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", u.DeploymentID)
  2656  	} else if !deployment.Active() {
  2657  		return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status)
  2658  	}
  2659  
  2660  	// Apply the new status
  2661  	copy := deployment.Copy()
  2662  	copy.Status = u.Status
  2663  	copy.StatusDescription = u.StatusDescription
  2664  	copy.ModifyIndex = index
  2665  
  2666  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  2667  	if copy.Namespace == "" {
  2668  		copy.Namespace = structs.DefaultNamespace
  2669  	}
  2670  
  2671  	// Insert the deployment
  2672  	if err := txn.Insert("deployment", copy); err != nil {
  2673  		return err
  2674  	}
  2675  
  2676  	// Update the index
  2677  	if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil {
  2678  		return fmt.Errorf("index update failed: %v", err)
  2679  	}
  2680  
  2681  	// If the deployment is being marked as complete, set the job to stable.
  2682  	if copy.Status == structs.DeploymentStatusSuccessful {
  2683  		if err := s.updateJobStabilityImpl(index, copy.Namespace, copy.JobID, copy.JobVersion, true, txn); err != nil {
  2684  			return fmt.Errorf("failed to update job stability: %v", err)
  2685  		}
  2686  	}
  2687  
  2688  	return nil
  2689  }
  2690  
  2691  // UpdateJobStability updates the stability of the given job and version to the
  2692  // desired status.
  2693  func (s *StateStore) UpdateJobStability(index uint64, namespace, jobID string, jobVersion uint64, stable bool) error {
  2694  	txn := s.db.Txn(true)
  2695  	defer txn.Abort()
  2696  
  2697  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  2698  	if namespace == "" {
  2699  		namespace = structs.DefaultNamespace
  2700  	}
  2701  
  2702  	if err := s.updateJobStabilityImpl(index, namespace, jobID, jobVersion, stable, txn); err != nil {
  2703  		return err
  2704  	}
  2705  
  2706  	txn.Commit()
  2707  	return nil
  2708  }
  2709  
  2710  // updateJobStabilityImpl updates the stability of the given job and version
  2711  func (s *StateStore) updateJobStabilityImpl(index uint64, namespace, jobID string, jobVersion uint64, stable bool, txn *memdb.Txn) error {
  2712  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  2713  	if namespace == "" {
  2714  		namespace = structs.DefaultNamespace
  2715  	}
  2716  
  2717  	// Get the job that is referenced
  2718  	job, err := s.jobByIDAndVersionImpl(nil, namespace, jobID, jobVersion, txn)
  2719  	if err != nil {
  2720  		return err
  2721  	}
  2722  
  2723  	// Has already been cleared, nothing to do
  2724  	if job == nil {
  2725  		return nil
  2726  	}
  2727  
  2728  	// If the job already has the desired stability, nothing to do
  2729  	if job.Stable == stable {
  2730  		return nil
  2731  	}
  2732  
  2733  	copy := job.Copy()
  2734  	copy.Stable = stable
  2735  	return s.upsertJobImpl(index, copy, true, txn)
  2736  }
  2737  
  2738  // UpdateDeploymentPromotion is used to promote canaries in a deployment and
  2739  // potentially make a evaluation
  2740  func (s *StateStore) UpdateDeploymentPromotion(index uint64, req *structs.ApplyDeploymentPromoteRequest) error {
  2741  	txn := s.db.Txn(true)
  2742  	defer txn.Abort()
  2743  
  2744  	// Retrieve deployment and ensure it is not terminal and is active
  2745  	ws := memdb.NewWatchSet()
  2746  	deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn)
  2747  	if err != nil {
  2748  		return err
  2749  	} else if deployment == nil {
  2750  		return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID)
  2751  	} else if !deployment.Active() {
  2752  		return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status)
  2753  	}
  2754  
  2755  	// Retrieve effected allocations
  2756  	iter, err := txn.Get("allocs", "deployment", req.DeploymentID)
  2757  	if err != nil {
  2758  		return err
  2759  	}
  2760  
  2761  	// groupIndex is a map of groups being promoted
  2762  	groupIndex := make(map[string]struct{}, len(req.Groups))
  2763  	for _, g := range req.Groups {
  2764  		groupIndex[g] = struct{}{}
  2765  	}
  2766  
  2767  	// canaryIndex is the set of placed canaries in the deployment
  2768  	canaryIndex := make(map[string]struct{}, len(deployment.TaskGroups))
  2769  	for _, state := range deployment.TaskGroups {
  2770  		for _, c := range state.PlacedCanaries {
  2771  			canaryIndex[c] = struct{}{}
  2772  		}
  2773  	}
  2774  
  2775  	// healthyCounts is a mapping of group to the number of healthy canaries
  2776  	healthyCounts := make(map[string]int, len(deployment.TaskGroups))
  2777  
  2778  	// promotable is the set of allocations that we can move from canary to
  2779  	// non-canary
  2780  	var promotable []*structs.Allocation
  2781  
  2782  	for {
  2783  		raw := iter.Next()
  2784  		if raw == nil {
  2785  			break
  2786  		}
  2787  
  2788  		alloc := raw.(*structs.Allocation)
  2789  
  2790  		// Check that the alloc is a canary
  2791  		if _, ok := canaryIndex[alloc.ID]; !ok {
  2792  			continue
  2793  		}
  2794  
  2795  		// Check that the canary is part of a group being promoted
  2796  		if _, ok := groupIndex[alloc.TaskGroup]; !req.All && !ok {
  2797  			continue
  2798  		}
  2799  
  2800  		// Ensure the canaries are healthy
  2801  		if alloc.TerminalStatus() || !alloc.DeploymentStatus.IsHealthy() {
  2802  			continue
  2803  		}
  2804  
  2805  		healthyCounts[alloc.TaskGroup]++
  2806  		promotable = append(promotable, alloc)
  2807  	}
  2808  
  2809  	// Determine if we have enough healthy allocations
  2810  	var unhealthyErr multierror.Error
  2811  	for tg, state := range deployment.TaskGroups {
  2812  		if _, ok := groupIndex[tg]; !req.All && !ok {
  2813  			continue
  2814  		}
  2815  
  2816  		need := state.DesiredCanaries
  2817  		if need == 0 {
  2818  			continue
  2819  		}
  2820  
  2821  		if have := healthyCounts[tg]; have < need {
  2822  			multierror.Append(&unhealthyErr, fmt.Errorf("Task group %q has %d/%d healthy allocations", tg, have, need))
  2823  		}
  2824  	}
  2825  
  2826  	if err := unhealthyErr.ErrorOrNil(); err != nil {
  2827  		return err
  2828  	}
  2829  
  2830  	// Update deployment
  2831  	copy := deployment.Copy()
  2832  	copy.ModifyIndex = index
  2833  	for tg, status := range copy.TaskGroups {
  2834  		_, ok := groupIndex[tg]
  2835  		if !req.All && !ok {
  2836  			continue
  2837  		}
  2838  
  2839  		status.Promoted = true
  2840  	}
  2841  
  2842  	// If the deployment no longer needs promotion, update its status
  2843  	if !copy.RequiresPromotion() && copy.Status == structs.DeploymentStatusRunning {
  2844  		copy.StatusDescription = structs.DeploymentStatusDescriptionRunning
  2845  	}
  2846  
  2847  	// Insert the deployment
  2848  	if err := s.upsertDeploymentImpl(index, copy, txn); err != nil {
  2849  		return err
  2850  	}
  2851  
  2852  	// Upsert the optional eval
  2853  	if req.Eval != nil {
  2854  		if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil {
  2855  			return err
  2856  		}
  2857  	}
  2858  
  2859  	// For each promotable allocation remoce the canary field
  2860  	for _, alloc := range promotable {
  2861  		promoted := alloc.Copy()
  2862  		promoted.DeploymentStatus.Canary = false
  2863  		promoted.DeploymentStatus.ModifyIndex = index
  2864  		promoted.ModifyIndex = index
  2865  		promoted.AllocModifyIndex = index
  2866  
  2867  		if err := txn.Insert("allocs", promoted); err != nil {
  2868  			return fmt.Errorf("alloc insert failed: %v", err)
  2869  		}
  2870  	}
  2871  
  2872  	// Update the alloc index
  2873  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  2874  		return fmt.Errorf("index update failed: %v", err)
  2875  	}
  2876  
  2877  	txn.Commit()
  2878  	return nil
  2879  }
  2880  
  2881  // UpdateDeploymentAllocHealth is used to update the health of allocations as
  2882  // part of the deployment and potentially make a evaluation
  2883  func (s *StateStore) UpdateDeploymentAllocHealth(index uint64, req *structs.ApplyDeploymentAllocHealthRequest) error {
  2884  	txn := s.db.Txn(true)
  2885  	defer txn.Abort()
  2886  
  2887  	// Retrieve deployment and ensure it is not terminal and is active
  2888  	ws := memdb.NewWatchSet()
  2889  	deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn)
  2890  	if err != nil {
  2891  		return err
  2892  	} else if deployment == nil {
  2893  		return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID)
  2894  	} else if !deployment.Active() {
  2895  		return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status)
  2896  	}
  2897  
  2898  	// Update the health status of each allocation
  2899  	if total := len(req.HealthyAllocationIDs) + len(req.UnhealthyAllocationIDs); total != 0 {
  2900  		setAllocHealth := func(id string, healthy bool, ts time.Time) error {
  2901  			existing, err := txn.First("allocs", "id", id)
  2902  			if err != nil {
  2903  				return fmt.Errorf("alloc %q lookup failed: %v", id, err)
  2904  			}
  2905  			if existing == nil {
  2906  				return fmt.Errorf("unknown alloc %q", id)
  2907  			}
  2908  
  2909  			old := existing.(*structs.Allocation)
  2910  			if old.DeploymentID != req.DeploymentID {
  2911  				return fmt.Errorf("alloc %q is not part of deployment %q", id, req.DeploymentID)
  2912  			}
  2913  
  2914  			// Set the health
  2915  			copy := old.Copy()
  2916  			if copy.DeploymentStatus == nil {
  2917  				copy.DeploymentStatus = &structs.AllocDeploymentStatus{}
  2918  			}
  2919  			copy.DeploymentStatus.Healthy = helper.BoolToPtr(healthy)
  2920  			copy.DeploymentStatus.Timestamp = ts
  2921  			copy.DeploymentStatus.ModifyIndex = index
  2922  			copy.ModifyIndex = index
  2923  
  2924  			if err := s.updateDeploymentWithAlloc(index, copy, old, txn); err != nil {
  2925  				return fmt.Errorf("error updating deployment: %v", err)
  2926  			}
  2927  
  2928  			if err := txn.Insert("allocs", copy); err != nil {
  2929  				return fmt.Errorf("alloc insert failed: %v", err)
  2930  			}
  2931  
  2932  			return nil
  2933  		}
  2934  
  2935  		for _, id := range req.HealthyAllocationIDs {
  2936  			if err := setAllocHealth(id, true, req.Timestamp); err != nil {
  2937  				return err
  2938  			}
  2939  		}
  2940  		for _, id := range req.UnhealthyAllocationIDs {
  2941  			if err := setAllocHealth(id, false, req.Timestamp); err != nil {
  2942  				return err
  2943  			}
  2944  		}
  2945  
  2946  		// Update the indexes
  2947  		if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  2948  			return fmt.Errorf("index update failed: %v", err)
  2949  		}
  2950  	}
  2951  
  2952  	// Update the deployment status as needed.
  2953  	if req.DeploymentUpdate != nil {
  2954  		if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil {
  2955  			return err
  2956  		}
  2957  	}
  2958  
  2959  	// Upsert the job if necessary
  2960  	if req.Job != nil {
  2961  		if err := s.upsertJobImpl(index, req.Job, false, txn); err != nil {
  2962  			return err
  2963  		}
  2964  	}
  2965  
  2966  	// Upsert the optional eval
  2967  	if req.Eval != nil {
  2968  		if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil {
  2969  			return err
  2970  		}
  2971  	}
  2972  
  2973  	txn.Commit()
  2974  	return nil
  2975  }
  2976  
  2977  // LastIndex returns the greatest index value for all indexes
  2978  func (s *StateStore) LatestIndex() (uint64, error) {
  2979  	indexes, err := s.Indexes()
  2980  	if err != nil {
  2981  		return 0, err
  2982  	}
  2983  
  2984  	var max uint64 = 0
  2985  	for {
  2986  		raw := indexes.Next()
  2987  		if raw == nil {
  2988  			break
  2989  		}
  2990  
  2991  		// Prepare the request struct
  2992  		idx := raw.(*IndexEntry)
  2993  
  2994  		// Determine the max
  2995  		if idx.Value > max {
  2996  			max = idx.Value
  2997  		}
  2998  	}
  2999  
  3000  	return max, nil
  3001  }
  3002  
  3003  // Index finds the matching index value
  3004  func (s *StateStore) Index(name string) (uint64, error) {
  3005  	txn := s.db.Txn(false)
  3006  
  3007  	// Lookup the first matching index
  3008  	out, err := txn.First("index", "id", name)
  3009  	if err != nil {
  3010  		return 0, err
  3011  	}
  3012  	if out == nil {
  3013  		return 0, nil
  3014  	}
  3015  	return out.(*IndexEntry).Value, nil
  3016  }
  3017  
  3018  // RemoveIndex is a helper method to remove an index for testing purposes
  3019  func (s *StateStore) RemoveIndex(name string) error {
  3020  	txn := s.db.Txn(true)
  3021  	defer txn.Abort()
  3022  
  3023  	if _, err := txn.DeleteAll("index", "id", name); err != nil {
  3024  		return err
  3025  	}
  3026  
  3027  	txn.Commit()
  3028  	return nil
  3029  }
  3030  
  3031  // Indexes returns an iterator over all the indexes
  3032  func (s *StateStore) Indexes() (memdb.ResultIterator, error) {
  3033  	txn := s.db.Txn(false)
  3034  
  3035  	// Walk the entire nodes table
  3036  	iter, err := txn.Get("index", "id")
  3037  	if err != nil {
  3038  		return nil, err
  3039  	}
  3040  	return iter, nil
  3041  }
  3042  
  3043  // ReconcileJobSummaries re-creates summaries for all jobs present in the state
  3044  // store
  3045  func (s *StateStore) ReconcileJobSummaries(index uint64) error {
  3046  	txn := s.db.Txn(true)
  3047  	defer txn.Abort()
  3048  
  3049  	// Get all the jobs
  3050  	iter, err := txn.Get("jobs", "id")
  3051  	if err != nil {
  3052  		return err
  3053  	}
  3054  	// COMPAT: Remove after 0.11
  3055  	// Iterate over jobs to build a list of parent jobs and their children
  3056  	parentMap := make(map[string][]*structs.Job)
  3057  	for {
  3058  		rawJob := iter.Next()
  3059  		if rawJob == nil {
  3060  			break
  3061  		}
  3062  		job := rawJob.(*structs.Job)
  3063  		if job.ParentID != "" {
  3064  			children := parentMap[job.ParentID]
  3065  			children = append(children, job)
  3066  			parentMap[job.ParentID] = children
  3067  		}
  3068  	}
  3069  
  3070  	// Get all the jobs again
  3071  	iter, err = txn.Get("jobs", "id")
  3072  	if err != nil {
  3073  		return err
  3074  	}
  3075  
  3076  	for {
  3077  		rawJob := iter.Next()
  3078  		if rawJob == nil {
  3079  			break
  3080  		}
  3081  		job := rawJob.(*structs.Job)
  3082  
  3083  		if job.IsParameterized() || job.IsPeriodic() {
  3084  			// COMPAT: Remove after 0.11
  3085  
  3086  			// The following block of code fixes incorrect child summaries due to a bug
  3087  			// See https://github.com/hashicorp/nomad/issues/3886 for details
  3088  			rawSummary, err := txn.First("job_summary", "id", job.Namespace, job.ID)
  3089  			if err != nil {
  3090  				return err
  3091  			}
  3092  			if rawSummary == nil {
  3093  				continue
  3094  			}
  3095  
  3096  			oldSummary := rawSummary.(*structs.JobSummary)
  3097  
  3098  			// Create an empty summary
  3099  			summary := &structs.JobSummary{
  3100  				JobID:     job.ID,
  3101  				Namespace: job.Namespace,
  3102  				Summary:   make(map[string]structs.TaskGroupSummary),
  3103  				Children:  &structs.JobChildrenSummary{},
  3104  			}
  3105  
  3106  			// Iterate over children of this job if any to fix summary counts
  3107  			children := parentMap[job.ID]
  3108  			for _, childJob := range children {
  3109  				switch childJob.Status {
  3110  				case structs.JobStatusPending:
  3111  					summary.Children.Pending++
  3112  				case structs.JobStatusDead:
  3113  					summary.Children.Dead++
  3114  				case structs.JobStatusRunning:
  3115  					summary.Children.Running++
  3116  				}
  3117  			}
  3118  
  3119  			// Insert the job summary if its different
  3120  			if !reflect.DeepEqual(summary, oldSummary) {
  3121  				// Set the create index of the summary same as the job's create index
  3122  				// and the modify index to the current index
  3123  				summary.CreateIndex = job.CreateIndex
  3124  				summary.ModifyIndex = index
  3125  
  3126  				if err := txn.Insert("job_summary", summary); err != nil {
  3127  					return fmt.Errorf("error inserting job summary: %v", err)
  3128  				}
  3129  			}
  3130  
  3131  			// Done with handling a parent job, continue to next
  3132  			continue
  3133  		}
  3134  
  3135  		// Create a job summary for the job
  3136  		summary := &structs.JobSummary{
  3137  			JobID:     job.ID,
  3138  			Namespace: job.Namespace,
  3139  			Summary:   make(map[string]structs.TaskGroupSummary),
  3140  		}
  3141  		for _, tg := range job.TaskGroups {
  3142  			summary.Summary[tg.Name] = structs.TaskGroupSummary{}
  3143  		}
  3144  
  3145  		// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3146  		if job.Namespace == "" {
  3147  			job.Namespace = structs.DefaultNamespace
  3148  		}
  3149  
  3150  		// Find all the allocations for the jobs
  3151  		iterAllocs, err := txn.Get("allocs", "job", job.Namespace, job.ID)
  3152  		if err != nil {
  3153  			return err
  3154  		}
  3155  
  3156  		// Calculate the summary for the job
  3157  		for {
  3158  			rawAlloc := iterAllocs.Next()
  3159  			if rawAlloc == nil {
  3160  				break
  3161  			}
  3162  			alloc := rawAlloc.(*structs.Allocation)
  3163  
  3164  			// Ignore the allocation if it doesn't belong to the currently
  3165  			// registered job. The allocation is checked because of issue #2304
  3166  			if alloc.Job == nil || alloc.Job.CreateIndex != job.CreateIndex {
  3167  				continue
  3168  			}
  3169  
  3170  			tg := summary.Summary[alloc.TaskGroup]
  3171  			switch alloc.ClientStatus {
  3172  			case structs.AllocClientStatusFailed:
  3173  				tg.Failed += 1
  3174  			case structs.AllocClientStatusLost:
  3175  				tg.Lost += 1
  3176  			case structs.AllocClientStatusComplete:
  3177  				tg.Complete += 1
  3178  			case structs.AllocClientStatusRunning:
  3179  				tg.Running += 1
  3180  			case structs.AllocClientStatusPending:
  3181  				tg.Starting += 1
  3182  			default:
  3183  				s.logger.Error("invalid client status set on allocation", "client_status", alloc.ClientStatus, "alloc_id", alloc.ID)
  3184  			}
  3185  			summary.Summary[alloc.TaskGroup] = tg
  3186  		}
  3187  
  3188  		// Set the create index of the summary same as the job's create index
  3189  		// and the modify index to the current index
  3190  		summary.CreateIndex = job.CreateIndex
  3191  		summary.ModifyIndex = index
  3192  
  3193  		// Insert the job summary
  3194  		if err := txn.Insert("job_summary", summary); err != nil {
  3195  			return fmt.Errorf("error inserting job summary: %v", err)
  3196  		}
  3197  	}
  3198  
  3199  	// Update the indexes table for job summary
  3200  	if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  3201  		return fmt.Errorf("index update failed: %v", err)
  3202  	}
  3203  	txn.Commit()
  3204  	return nil
  3205  }
  3206  
  3207  // setJobStatuses is a helper for calling setJobStatus on multiple jobs by ID.
  3208  // It takes a map of job IDs to an optional forceStatus string. It returns an
  3209  // error if the job doesn't exist or setJobStatus fails.
  3210  func (s *StateStore) setJobStatuses(index uint64, txn *memdb.Txn,
  3211  	jobs map[structs.NamespacedID]string, evalDelete bool) error {
  3212  	for tuple, forceStatus := range jobs {
  3213  		// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3214  		if tuple.Namespace == "" {
  3215  			tuple.Namespace = structs.DefaultNamespace
  3216  		}
  3217  
  3218  		existing, err := txn.First("jobs", "id", tuple.Namespace, tuple.ID)
  3219  		if err != nil {
  3220  			return fmt.Errorf("job lookup failed: %v", err)
  3221  		}
  3222  
  3223  		if existing == nil {
  3224  			continue
  3225  		}
  3226  
  3227  		if err := s.setJobStatus(index, txn, existing.(*structs.Job), evalDelete, forceStatus); err != nil {
  3228  			return err
  3229  		}
  3230  	}
  3231  
  3232  	return nil
  3233  }
  3234  
  3235  // setJobStatus sets the status of the job by looking up associated evaluations
  3236  // and allocations. evalDelete should be set to true if setJobStatus is being
  3237  // called because an evaluation is being deleted (potentially because of garbage
  3238  // collection). If forceStatus is non-empty, the job's status will be set to the
  3239  // passed status.
  3240  func (s *StateStore) setJobStatus(index uint64, txn *memdb.Txn,
  3241  	job *structs.Job, evalDelete bool, forceStatus string) error {
  3242  
  3243  	// Capture the current status so we can check if there is a change
  3244  	oldStatus := job.Status
  3245  	if index == job.CreateIndex {
  3246  		oldStatus = ""
  3247  	}
  3248  	newStatus := forceStatus
  3249  
  3250  	// If forceStatus is not set, compute the jobs status.
  3251  	if forceStatus == "" {
  3252  		var err error
  3253  		newStatus, err = s.getJobStatus(txn, job, evalDelete)
  3254  		if err != nil {
  3255  			return err
  3256  		}
  3257  	}
  3258  
  3259  	// Fast-path if nothing has changed.
  3260  	if oldStatus == newStatus {
  3261  		return nil
  3262  	}
  3263  
  3264  	// Copy and update the existing job
  3265  	updated := job.Copy()
  3266  	updated.Status = newStatus
  3267  	updated.ModifyIndex = index
  3268  
  3269  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3270  	if updated.Namespace == "" {
  3271  		updated.Namespace = structs.DefaultNamespace
  3272  	}
  3273  
  3274  	// Insert the job
  3275  	if err := txn.Insert("jobs", updated); err != nil {
  3276  		return fmt.Errorf("job insert failed: %v", err)
  3277  	}
  3278  	if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil {
  3279  		return fmt.Errorf("index update failed: %v", err)
  3280  	}
  3281  
  3282  	// Update the children summary
  3283  	if updated.ParentID != "" {
  3284  		// Try to update the summary of the parent job summary
  3285  		summaryRaw, err := txn.First("job_summary", "id", updated.Namespace, updated.ParentID)
  3286  		if err != nil {
  3287  			return fmt.Errorf("unable to retrieve summary for parent job: %v", err)
  3288  		}
  3289  
  3290  		// Only continue if the summary exists. It could not exist if the parent
  3291  		// job was removed
  3292  		if summaryRaw != nil {
  3293  			existing := summaryRaw.(*structs.JobSummary)
  3294  			pSummary := existing.Copy()
  3295  			if pSummary.Children == nil {
  3296  				pSummary.Children = new(structs.JobChildrenSummary)
  3297  			}
  3298  
  3299  			// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3300  			if pSummary.Namespace == "" {
  3301  				pSummary.Namespace = structs.DefaultNamespace
  3302  			}
  3303  
  3304  			// Determine the transition and update the correct fields
  3305  			children := pSummary.Children
  3306  
  3307  			// Decrement old status
  3308  			if oldStatus != "" {
  3309  				switch oldStatus {
  3310  				case structs.JobStatusPending:
  3311  					children.Pending--
  3312  				case structs.JobStatusRunning:
  3313  					children.Running--
  3314  				case structs.JobStatusDead:
  3315  					children.Dead--
  3316  				default:
  3317  					return fmt.Errorf("unknown old job status %q", oldStatus)
  3318  				}
  3319  			}
  3320  
  3321  			// Increment new status
  3322  			switch newStatus {
  3323  			case structs.JobStatusPending:
  3324  				children.Pending++
  3325  			case structs.JobStatusRunning:
  3326  				children.Running++
  3327  			case structs.JobStatusDead:
  3328  				children.Dead++
  3329  			default:
  3330  				return fmt.Errorf("unknown new job status %q", newStatus)
  3331  			}
  3332  
  3333  			// Update the index
  3334  			pSummary.ModifyIndex = index
  3335  
  3336  			// Insert the summary
  3337  			if err := txn.Insert("job_summary", pSummary); err != nil {
  3338  				return fmt.Errorf("job summary insert failed: %v", err)
  3339  			}
  3340  			if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  3341  				return fmt.Errorf("index update failed: %v", err)
  3342  			}
  3343  		}
  3344  	}
  3345  
  3346  	return nil
  3347  }
  3348  
  3349  func (s *StateStore) getJobStatus(txn *memdb.Txn, job *structs.Job, evalDelete bool) (string, error) {
  3350  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3351  	if job.Namespace == "" {
  3352  		job.Namespace = structs.DefaultNamespace
  3353  	}
  3354  
  3355  	// System, Periodic and Parameterized jobs are running until explicitly
  3356  	// stopped
  3357  	if job.Type == structs.JobTypeSystem || job.IsParameterized() || job.IsPeriodic() {
  3358  		if job.Stop {
  3359  			return structs.JobStatusDead, nil
  3360  		}
  3361  
  3362  		return structs.JobStatusRunning, nil
  3363  	}
  3364  
  3365  	allocs, err := txn.Get("allocs", "job", job.Namespace, job.ID)
  3366  	if err != nil {
  3367  		return "", err
  3368  	}
  3369  
  3370  	// If there is a non-terminal allocation, the job is running.
  3371  	hasAlloc := false
  3372  	for alloc := allocs.Next(); alloc != nil; alloc = allocs.Next() {
  3373  		hasAlloc = true
  3374  		if !alloc.(*structs.Allocation).TerminalStatus() {
  3375  			return structs.JobStatusRunning, nil
  3376  		}
  3377  	}
  3378  
  3379  	evals, err := txn.Get("evals", "job_prefix", job.Namespace, job.ID)
  3380  	if err != nil {
  3381  		return "", err
  3382  	}
  3383  
  3384  	hasEval := false
  3385  	for raw := evals.Next(); raw != nil; raw = evals.Next() {
  3386  		e := raw.(*structs.Evaluation)
  3387  
  3388  		// Filter non-exact matches
  3389  		if e.JobID != job.ID {
  3390  			continue
  3391  		}
  3392  
  3393  		hasEval = true
  3394  		if !e.TerminalStatus() {
  3395  			return structs.JobStatusPending, nil
  3396  		}
  3397  	}
  3398  
  3399  	// The job is dead if all the allocations and evals are terminal or if there
  3400  	// are no evals because of garbage collection.
  3401  	if evalDelete || hasEval || hasAlloc {
  3402  		return structs.JobStatusDead, nil
  3403  	}
  3404  
  3405  	return structs.JobStatusPending, nil
  3406  }
  3407  
  3408  // updateSummaryWithJob creates or updates job summaries when new jobs are
  3409  // upserted or existing ones are updated
  3410  func (s *StateStore) updateSummaryWithJob(index uint64, job *structs.Job,
  3411  	txn *memdb.Txn) error {
  3412  
  3413  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3414  	if job.Namespace == "" {
  3415  		job.Namespace = structs.DefaultNamespace
  3416  	}
  3417  
  3418  	// Update the job summary
  3419  	summaryRaw, err := txn.First("job_summary", "id", job.Namespace, job.ID)
  3420  	if err != nil {
  3421  		return fmt.Errorf("job summary lookup failed: %v", err)
  3422  	}
  3423  
  3424  	// Get the summary or create if necessary
  3425  	var summary *structs.JobSummary
  3426  	hasSummaryChanged := false
  3427  	if summaryRaw != nil {
  3428  		summary = summaryRaw.(*structs.JobSummary).Copy()
  3429  	} else {
  3430  		summary = &structs.JobSummary{
  3431  			JobID:       job.ID,
  3432  			Namespace:   job.Namespace,
  3433  			Summary:     make(map[string]structs.TaskGroupSummary),
  3434  			Children:    new(structs.JobChildrenSummary),
  3435  			CreateIndex: index,
  3436  		}
  3437  		hasSummaryChanged = true
  3438  	}
  3439  
  3440  	for _, tg := range job.TaskGroups {
  3441  		if _, ok := summary.Summary[tg.Name]; !ok {
  3442  			newSummary := structs.TaskGroupSummary{
  3443  				Complete: 0,
  3444  				Failed:   0,
  3445  				Running:  0,
  3446  				Starting: 0,
  3447  			}
  3448  			summary.Summary[tg.Name] = newSummary
  3449  			hasSummaryChanged = true
  3450  		}
  3451  	}
  3452  
  3453  	// The job summary has changed, so update the modify index.
  3454  	if hasSummaryChanged {
  3455  		summary.ModifyIndex = index
  3456  
  3457  		// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3458  		if summary.Namespace == "" {
  3459  			summary.Namespace = structs.DefaultNamespace
  3460  		}
  3461  
  3462  		// Update the indexes table for job summary
  3463  		if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  3464  			return fmt.Errorf("index update failed: %v", err)
  3465  		}
  3466  		if err := txn.Insert("job_summary", summary); err != nil {
  3467  			return err
  3468  		}
  3469  	}
  3470  
  3471  	return nil
  3472  }
  3473  
  3474  // updateDeploymentWithAlloc is used to update the deployment state associated
  3475  // with the given allocation. The passed alloc may be updated if the deployment
  3476  // status has changed to capture the modify index at which it has changed.
  3477  func (s *StateStore) updateDeploymentWithAlloc(index uint64, alloc, existing *structs.Allocation, txn *memdb.Txn) error {
  3478  	// Nothing to do if the allocation is not associated with a deployment
  3479  	if alloc.DeploymentID == "" {
  3480  		return nil
  3481  	}
  3482  
  3483  	// Get the deployment
  3484  	ws := memdb.NewWatchSet()
  3485  	deployment, err := s.deploymentByIDImpl(ws, alloc.DeploymentID, txn)
  3486  	if err != nil {
  3487  		return err
  3488  	}
  3489  	if deployment == nil {
  3490  		return nil
  3491  	}
  3492  
  3493  	// Retrieve the deployment state object
  3494  	_, ok := deployment.TaskGroups[alloc.TaskGroup]
  3495  	if !ok {
  3496  		// If the task group isn't part of the deployment, the task group wasn't
  3497  		// part of a rolling update so nothing to do
  3498  		return nil
  3499  	}
  3500  
  3501  	// Do not modify in-place. Instead keep track of what must be done
  3502  	placed := 0
  3503  	healthy := 0
  3504  	unhealthy := 0
  3505  
  3506  	// If there was no existing allocation, this is a placement and we increment
  3507  	// the placement
  3508  	existingHealthSet := existing != nil && existing.DeploymentStatus.HasHealth()
  3509  	allocHealthSet := alloc.DeploymentStatus.HasHealth()
  3510  	if existing == nil || existing.DeploymentID != alloc.DeploymentID {
  3511  		placed++
  3512  	} else if !existingHealthSet && allocHealthSet {
  3513  		if *alloc.DeploymentStatus.Healthy {
  3514  			healthy++
  3515  		} else {
  3516  			unhealthy++
  3517  		}
  3518  	} else if existingHealthSet && allocHealthSet {
  3519  		// See if it has gone from healthy to unhealthy
  3520  		if *existing.DeploymentStatus.Healthy && !*alloc.DeploymentStatus.Healthy {
  3521  			healthy--
  3522  			unhealthy++
  3523  		}
  3524  	}
  3525  
  3526  	// Nothing to do
  3527  	if placed == 0 && healthy == 0 && unhealthy == 0 {
  3528  		return nil
  3529  	}
  3530  
  3531  	// Update the allocation's deployment status modify index
  3532  	if alloc.DeploymentStatus != nil && healthy+unhealthy != 0 {
  3533  		alloc.DeploymentStatus.ModifyIndex = index
  3534  	}
  3535  
  3536  	// Create a copy of the deployment object
  3537  	deploymentCopy := deployment.Copy()
  3538  	deploymentCopy.ModifyIndex = index
  3539  
  3540  	state := deploymentCopy.TaskGroups[alloc.TaskGroup]
  3541  	state.PlacedAllocs += placed
  3542  	state.HealthyAllocs += healthy
  3543  	state.UnhealthyAllocs += unhealthy
  3544  
  3545  	// Update the progress deadline
  3546  	if pd := state.ProgressDeadline; pd != 0 {
  3547  		// If we are the first placed allocation for the deployment start the progress deadline.
  3548  		if placed != 0 && state.RequireProgressBy.IsZero() {
  3549  			// Use modify time instead of create time because we may in-place
  3550  			// update the allocation to be part of a new deployment.
  3551  			state.RequireProgressBy = time.Unix(0, alloc.ModifyTime).Add(pd)
  3552  		} else if healthy != 0 {
  3553  			if d := alloc.DeploymentStatus.Timestamp.Add(pd); d.After(state.RequireProgressBy) {
  3554  				state.RequireProgressBy = d
  3555  			}
  3556  		}
  3557  	}
  3558  
  3559  	// Upsert the deployment
  3560  	if err := s.upsertDeploymentImpl(index, deploymentCopy, txn); err != nil {
  3561  		return err
  3562  	}
  3563  
  3564  	return nil
  3565  }
  3566  
  3567  // updateSummaryWithAlloc updates the job summary when allocations are updated
  3568  // or inserted
  3569  func (s *StateStore) updateSummaryWithAlloc(index uint64, alloc *structs.Allocation,
  3570  	existingAlloc *structs.Allocation, txn *memdb.Txn) error {
  3571  
  3572  	// We don't have to update the summary if the job is missing
  3573  	if alloc.Job == nil {
  3574  		return nil
  3575  	}
  3576  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3577  	if alloc.Namespace == "" {
  3578  		alloc.Namespace = structs.DefaultNamespace
  3579  	}
  3580  
  3581  	summaryRaw, err := txn.First("job_summary", "id", alloc.Namespace, alloc.JobID)
  3582  	if err != nil {
  3583  		return fmt.Errorf("unable to lookup job summary for job id %q in namespace %q: %v", alloc.JobID, alloc.Namespace, err)
  3584  	}
  3585  
  3586  	if summaryRaw == nil {
  3587  		// Check if the job is de-registered
  3588  		rawJob, err := txn.First("jobs", "id", alloc.Namespace, alloc.JobID)
  3589  		if err != nil {
  3590  			return fmt.Errorf("unable to query job: %v", err)
  3591  		}
  3592  
  3593  		// If the job is de-registered then we skip updating it's summary
  3594  		if rawJob == nil {
  3595  			return nil
  3596  		}
  3597  
  3598  		return fmt.Errorf("job summary for job %q in namespace %q is not present", alloc.JobID, alloc.Namespace)
  3599  	}
  3600  
  3601  	// Get a copy of the existing summary
  3602  	jobSummary := summaryRaw.(*structs.JobSummary).Copy()
  3603  
  3604  	// Not updating the job summary because the allocation doesn't belong to the
  3605  	// currently registered job
  3606  	if jobSummary.CreateIndex != alloc.Job.CreateIndex {
  3607  		return nil
  3608  	}
  3609  
  3610  	tgSummary, ok := jobSummary.Summary[alloc.TaskGroup]
  3611  	if !ok {
  3612  		return fmt.Errorf("unable to find task group in the job summary: %v", alloc.TaskGroup)
  3613  	}
  3614  
  3615  	summaryChanged := false
  3616  	if existingAlloc == nil {
  3617  		switch alloc.DesiredStatus {
  3618  		case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict:
  3619  			s.logger.Error("new allocation inserted into state store with bad desired status",
  3620  				"alloc_id", alloc.ID, "desired_status", alloc.DesiredStatus)
  3621  		}
  3622  		switch alloc.ClientStatus {
  3623  		case structs.AllocClientStatusPending:
  3624  			tgSummary.Starting += 1
  3625  			if tgSummary.Queued > 0 {
  3626  				tgSummary.Queued -= 1
  3627  			}
  3628  			summaryChanged = true
  3629  		case structs.AllocClientStatusRunning, structs.AllocClientStatusFailed,
  3630  			structs.AllocClientStatusComplete:
  3631  			s.logger.Error("new allocation inserted into state store with bad client status",
  3632  				"alloc_id", alloc.ID, "client_status", alloc.ClientStatus)
  3633  		}
  3634  	} else if existingAlloc.ClientStatus != alloc.ClientStatus {
  3635  		// Incrementing the client of the bin of the current state
  3636  		switch alloc.ClientStatus {
  3637  		case structs.AllocClientStatusRunning:
  3638  			tgSummary.Running += 1
  3639  		case structs.AllocClientStatusFailed:
  3640  			tgSummary.Failed += 1
  3641  		case structs.AllocClientStatusPending:
  3642  			tgSummary.Starting += 1
  3643  		case structs.AllocClientStatusComplete:
  3644  			tgSummary.Complete += 1
  3645  		case structs.AllocClientStatusLost:
  3646  			tgSummary.Lost += 1
  3647  		}
  3648  
  3649  		// Decrementing the count of the bin of the last state
  3650  		switch existingAlloc.ClientStatus {
  3651  		case structs.AllocClientStatusRunning:
  3652  			if tgSummary.Running > 0 {
  3653  				tgSummary.Running -= 1
  3654  			}
  3655  		case structs.AllocClientStatusPending:
  3656  			if tgSummary.Starting > 0 {
  3657  				tgSummary.Starting -= 1
  3658  			}
  3659  		case structs.AllocClientStatusLost:
  3660  			if tgSummary.Lost > 0 {
  3661  				tgSummary.Lost -= 1
  3662  			}
  3663  		case structs.AllocClientStatusFailed, structs.AllocClientStatusComplete:
  3664  		default:
  3665  			s.logger.Error("invalid old client status for allocatio",
  3666  				"alloc_id", existingAlloc.ID, "client_status", existingAlloc.ClientStatus)
  3667  		}
  3668  		summaryChanged = true
  3669  	}
  3670  	jobSummary.Summary[alloc.TaskGroup] = tgSummary
  3671  
  3672  	if summaryChanged {
  3673  		jobSummary.ModifyIndex = index
  3674  
  3675  		// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3676  		if jobSummary.Namespace == "" {
  3677  			jobSummary.Namespace = structs.DefaultNamespace
  3678  		}
  3679  
  3680  		// Update the indexes table for job summary
  3681  		if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  3682  			return fmt.Errorf("index update failed: %v", err)
  3683  		}
  3684  
  3685  		if err := txn.Insert("job_summary", jobSummary); err != nil {
  3686  			return fmt.Errorf("updating job summary failed: %v", err)
  3687  		}
  3688  	}
  3689  
  3690  	return nil
  3691  }
  3692  
  3693  // addEphemeralDiskToTaskGroups adds missing EphemeralDisk objects to TaskGroups
  3694  func (s *StateStore) addEphemeralDiskToTaskGroups(job *structs.Job) {
  3695  	for _, tg := range job.TaskGroups {
  3696  		var diskMB int
  3697  		for _, task := range tg.Tasks {
  3698  			if task.Resources != nil {
  3699  				diskMB += task.Resources.DiskMB
  3700  				task.Resources.DiskMB = 0
  3701  			}
  3702  		}
  3703  		if tg.EphemeralDisk != nil {
  3704  			continue
  3705  		}
  3706  		tg.EphemeralDisk = &structs.EphemeralDisk{
  3707  			SizeMB: diskMB,
  3708  		}
  3709  	}
  3710  }
  3711  
  3712  // UpsertACLPolicies is used to create or update a set of ACL policies
  3713  func (s *StateStore) UpsertACLPolicies(index uint64, policies []*structs.ACLPolicy) error {
  3714  	txn := s.db.Txn(true)
  3715  	defer txn.Abort()
  3716  
  3717  	for _, policy := range policies {
  3718  		// Ensure the policy hash is non-nil. This should be done outside the state store
  3719  		// for performance reasons, but we check here for defense in depth.
  3720  		if len(policy.Hash) == 0 {
  3721  			policy.SetHash()
  3722  		}
  3723  
  3724  		// Check if the policy already exists
  3725  		existing, err := txn.First("acl_policy", "id", policy.Name)
  3726  		if err != nil {
  3727  			return fmt.Errorf("policy lookup failed: %v", err)
  3728  		}
  3729  
  3730  		// Update all the indexes
  3731  		if existing != nil {
  3732  			policy.CreateIndex = existing.(*structs.ACLPolicy).CreateIndex
  3733  			policy.ModifyIndex = index
  3734  		} else {
  3735  			policy.CreateIndex = index
  3736  			policy.ModifyIndex = index
  3737  		}
  3738  
  3739  		// Update the policy
  3740  		if err := txn.Insert("acl_policy", policy); err != nil {
  3741  			return fmt.Errorf("upserting policy failed: %v", err)
  3742  		}
  3743  	}
  3744  
  3745  	// Update the indexes tabl
  3746  	if err := txn.Insert("index", &IndexEntry{"acl_policy", index}); err != nil {
  3747  		return fmt.Errorf("index update failed: %v", err)
  3748  	}
  3749  
  3750  	txn.Commit()
  3751  	return nil
  3752  }
  3753  
  3754  // DeleteACLPolicies deletes the policies with the given names
  3755  func (s *StateStore) DeleteACLPolicies(index uint64, names []string) error {
  3756  	txn := s.db.Txn(true)
  3757  	defer txn.Abort()
  3758  
  3759  	// Delete the policy
  3760  	for _, name := range names {
  3761  		if _, err := txn.DeleteAll("acl_policy", "id", name); err != nil {
  3762  			return fmt.Errorf("deleting acl policy failed: %v", err)
  3763  		}
  3764  	}
  3765  	if err := txn.Insert("index", &IndexEntry{"acl_policy", index}); err != nil {
  3766  		return fmt.Errorf("index update failed: %v", err)
  3767  	}
  3768  	txn.Commit()
  3769  	return nil
  3770  }
  3771  
  3772  // ACLPolicyByName is used to lookup a policy by name
  3773  func (s *StateStore) ACLPolicyByName(ws memdb.WatchSet, name string) (*structs.ACLPolicy, error) {
  3774  	txn := s.db.Txn(false)
  3775  
  3776  	watchCh, existing, err := txn.FirstWatch("acl_policy", "id", name)
  3777  	if err != nil {
  3778  		return nil, fmt.Errorf("acl policy lookup failed: %v", err)
  3779  	}
  3780  	ws.Add(watchCh)
  3781  
  3782  	if existing != nil {
  3783  		return existing.(*structs.ACLPolicy), nil
  3784  	}
  3785  	return nil, nil
  3786  }
  3787  
  3788  // ACLPolicyByNamePrefix is used to lookup policies by prefix
  3789  func (s *StateStore) ACLPolicyByNamePrefix(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) {
  3790  	txn := s.db.Txn(false)
  3791  
  3792  	iter, err := txn.Get("acl_policy", "id_prefix", prefix)
  3793  	if err != nil {
  3794  		return nil, fmt.Errorf("acl policy lookup failed: %v", err)
  3795  	}
  3796  	ws.Add(iter.WatchCh())
  3797  
  3798  	return iter, nil
  3799  }
  3800  
  3801  // ACLPolicies returns an iterator over all the acl policies
  3802  func (s *StateStore) ACLPolicies(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  3803  	txn := s.db.Txn(false)
  3804  
  3805  	// Walk the entire table
  3806  	iter, err := txn.Get("acl_policy", "id")
  3807  	if err != nil {
  3808  		return nil, err
  3809  	}
  3810  	ws.Add(iter.WatchCh())
  3811  	return iter, nil
  3812  }
  3813  
  3814  // UpsertACLTokens is used to create or update a set of ACL tokens
  3815  func (s *StateStore) UpsertACLTokens(index uint64, tokens []*structs.ACLToken) error {
  3816  	txn := s.db.Txn(true)
  3817  	defer txn.Abort()
  3818  
  3819  	for _, token := range tokens {
  3820  		// Ensure the policy hash is non-nil. This should be done outside the state store
  3821  		// for performance reasons, but we check here for defense in depth.
  3822  		if len(token.Hash) == 0 {
  3823  			token.SetHash()
  3824  		}
  3825  
  3826  		// Check if the token already exists
  3827  		existing, err := txn.First("acl_token", "id", token.AccessorID)
  3828  		if err != nil {
  3829  			return fmt.Errorf("token lookup failed: %v", err)
  3830  		}
  3831  
  3832  		// Update all the indexes
  3833  		if existing != nil {
  3834  			existTK := existing.(*structs.ACLToken)
  3835  			token.CreateIndex = existTK.CreateIndex
  3836  			token.ModifyIndex = index
  3837  
  3838  			// Do not allow SecretID or create time to change
  3839  			token.SecretID = existTK.SecretID
  3840  			token.CreateTime = existTK.CreateTime
  3841  
  3842  		} else {
  3843  			token.CreateIndex = index
  3844  			token.ModifyIndex = index
  3845  		}
  3846  
  3847  		// Update the token
  3848  		if err := txn.Insert("acl_token", token); err != nil {
  3849  			return fmt.Errorf("upserting token failed: %v", err)
  3850  		}
  3851  	}
  3852  
  3853  	// Update the indexes table
  3854  	if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil {
  3855  		return fmt.Errorf("index update failed: %v", err)
  3856  	}
  3857  	txn.Commit()
  3858  	return nil
  3859  }
  3860  
  3861  // DeleteACLTokens deletes the tokens with the given accessor ids
  3862  func (s *StateStore) DeleteACLTokens(index uint64, ids []string) error {
  3863  	txn := s.db.Txn(true)
  3864  	defer txn.Abort()
  3865  
  3866  	// Delete the tokens
  3867  	for _, id := range ids {
  3868  		if _, err := txn.DeleteAll("acl_token", "id", id); err != nil {
  3869  			return fmt.Errorf("deleting acl token failed: %v", err)
  3870  		}
  3871  	}
  3872  	if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil {
  3873  		return fmt.Errorf("index update failed: %v", err)
  3874  	}
  3875  	txn.Commit()
  3876  	return nil
  3877  }
  3878  
  3879  // ACLTokenByAccessorID is used to lookup a token by accessor ID
  3880  func (s *StateStore) ACLTokenByAccessorID(ws memdb.WatchSet, id string) (*structs.ACLToken, error) {
  3881  	txn := s.db.Txn(false)
  3882  
  3883  	watchCh, existing, err := txn.FirstWatch("acl_token", "id", id)
  3884  	if err != nil {
  3885  		return nil, fmt.Errorf("acl token lookup failed: %v", err)
  3886  	}
  3887  	ws.Add(watchCh)
  3888  
  3889  	if existing != nil {
  3890  		return existing.(*structs.ACLToken), nil
  3891  	}
  3892  	return nil, nil
  3893  }
  3894  
  3895  // ACLTokenBySecretID is used to lookup a token by secret ID
  3896  func (s *StateStore) ACLTokenBySecretID(ws memdb.WatchSet, secretID string) (*structs.ACLToken, error) {
  3897  	txn := s.db.Txn(false)
  3898  
  3899  	watchCh, existing, err := txn.FirstWatch("acl_token", "secret", secretID)
  3900  	if err != nil {
  3901  		return nil, fmt.Errorf("acl token lookup failed: %v", err)
  3902  	}
  3903  	ws.Add(watchCh)
  3904  
  3905  	if existing != nil {
  3906  		return existing.(*structs.ACLToken), nil
  3907  	}
  3908  	return nil, nil
  3909  }
  3910  
  3911  // ACLTokenByAccessorIDPrefix is used to lookup tokens by prefix
  3912  func (s *StateStore) ACLTokenByAccessorIDPrefix(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) {
  3913  	txn := s.db.Txn(false)
  3914  
  3915  	iter, err := txn.Get("acl_token", "id_prefix", prefix)
  3916  	if err != nil {
  3917  		return nil, fmt.Errorf("acl token lookup failed: %v", err)
  3918  	}
  3919  	ws.Add(iter.WatchCh())
  3920  	return iter, nil
  3921  }
  3922  
  3923  // ACLTokens returns an iterator over all the tokens
  3924  func (s *StateStore) ACLTokens(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  3925  	txn := s.db.Txn(false)
  3926  
  3927  	// Walk the entire table
  3928  	iter, err := txn.Get("acl_token", "id")
  3929  	if err != nil {
  3930  		return nil, err
  3931  	}
  3932  	ws.Add(iter.WatchCh())
  3933  	return iter, nil
  3934  }
  3935  
  3936  // ACLTokensByGlobal returns an iterator over all the tokens filtered by global value
  3937  func (s *StateStore) ACLTokensByGlobal(ws memdb.WatchSet, globalVal bool) (memdb.ResultIterator, error) {
  3938  	txn := s.db.Txn(false)
  3939  
  3940  	// Walk the entire table
  3941  	iter, err := txn.Get("acl_token", "global", globalVal)
  3942  	if err != nil {
  3943  		return nil, err
  3944  	}
  3945  	ws.Add(iter.WatchCh())
  3946  	return iter, nil
  3947  }
  3948  
  3949  // CanBootstrapACLToken checks if bootstrapping is possible and returns the reset index
  3950  func (s *StateStore) CanBootstrapACLToken() (bool, uint64, error) {
  3951  	txn := s.db.Txn(false)
  3952  
  3953  	// Lookup the bootstrap sentinel
  3954  	out, err := txn.First("index", "id", "acl_token_bootstrap")
  3955  	if err != nil {
  3956  		return false, 0, err
  3957  	}
  3958  
  3959  	// No entry, we haven't bootstrapped yet
  3960  	if out == nil {
  3961  		return true, 0, nil
  3962  	}
  3963  
  3964  	// Return the reset index if we've already bootstrapped
  3965  	return false, out.(*IndexEntry).Value, nil
  3966  }
  3967  
  3968  // BootstrapACLToken is used to create an initial ACL token
  3969  func (s *StateStore) BootstrapACLTokens(index, resetIndex uint64, token *structs.ACLToken) error {
  3970  	txn := s.db.Txn(true)
  3971  	defer txn.Abort()
  3972  
  3973  	// Check if we have already done a bootstrap
  3974  	existing, err := txn.First("index", "id", "acl_token_bootstrap")
  3975  	if err != nil {
  3976  		return fmt.Errorf("bootstrap check failed: %v", err)
  3977  	}
  3978  	if existing != nil {
  3979  		if resetIndex == 0 {
  3980  			return fmt.Errorf("ACL bootstrap already done")
  3981  		} else if resetIndex != existing.(*IndexEntry).Value {
  3982  			return fmt.Errorf("Invalid reset index for ACL bootstrap")
  3983  		}
  3984  	}
  3985  
  3986  	// Update the Create/Modify time
  3987  	token.CreateIndex = index
  3988  	token.ModifyIndex = index
  3989  
  3990  	// Insert the token
  3991  	if err := txn.Insert("acl_token", token); err != nil {
  3992  		return fmt.Errorf("upserting token failed: %v", err)
  3993  	}
  3994  
  3995  	// Update the indexes table, prevents future bootstrap until reset
  3996  	if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil {
  3997  		return fmt.Errorf("index update failed: %v", err)
  3998  	}
  3999  	if err := txn.Insert("index", &IndexEntry{"acl_token_bootstrap", index}); err != nil {
  4000  		return fmt.Errorf("index update failed: %v", err)
  4001  	}
  4002  	txn.Commit()
  4003  	return nil
  4004  }
  4005  
  4006  // SchedulerConfig is used to get the current Scheduler configuration.
  4007  func (s *StateStore) SchedulerConfig() (uint64, *structs.SchedulerConfiguration, error) {
  4008  	tx := s.db.Txn(false)
  4009  	defer tx.Abort()
  4010  
  4011  	// Get the scheduler config
  4012  	c, err := tx.First("scheduler_config", "id")
  4013  	if err != nil {
  4014  		return 0, nil, fmt.Errorf("failed scheduler config lookup: %s", err)
  4015  	}
  4016  
  4017  	config, ok := c.(*structs.SchedulerConfiguration)
  4018  	if !ok {
  4019  		return 0, nil, nil
  4020  	}
  4021  
  4022  	return config.ModifyIndex, config, nil
  4023  }
  4024  
  4025  // SchedulerSetConfig is used to set the current Scheduler configuration.
  4026  func (s *StateStore) SchedulerSetConfig(idx uint64, config *structs.SchedulerConfiguration) error {
  4027  	tx := s.db.Txn(true)
  4028  	defer tx.Abort()
  4029  
  4030  	s.schedulerSetConfigTxn(idx, tx, config)
  4031  
  4032  	tx.Commit()
  4033  	return nil
  4034  }
  4035  
  4036  // WithWriteTransaction executes the passed function within a write transaction,
  4037  // and returns its result.  If the invocation returns no error, the transaction
  4038  // is committed; otherwise, it's aborted.
  4039  func (s *StateStore) WithWriteTransaction(fn func(Txn) error) error {
  4040  	tx := s.db.Txn(true)
  4041  	defer tx.Abort()
  4042  
  4043  	err := fn(tx)
  4044  	if err == nil {
  4045  		tx.Commit()
  4046  	}
  4047  	return err
  4048  }
  4049  
  4050  // SchedulerCASConfig is used to update the scheduler configuration with a
  4051  // given Raft index. If the CAS index specified is not equal to the last observed index
  4052  // for the config, then the call is a noop.
  4053  func (s *StateStore) SchedulerCASConfig(idx, cidx uint64, config *structs.SchedulerConfiguration) (bool, error) {
  4054  	tx := s.db.Txn(true)
  4055  	defer tx.Abort()
  4056  
  4057  	// Check for an existing config
  4058  	existing, err := tx.First("scheduler_config", "id")
  4059  	if err != nil {
  4060  		return false, fmt.Errorf("failed scheduler config lookup: %s", err)
  4061  	}
  4062  
  4063  	// If the existing index does not match the provided CAS
  4064  	// index arg, then we shouldn't update anything and can safely
  4065  	// return early here.
  4066  	e, ok := existing.(*structs.SchedulerConfiguration)
  4067  	if !ok || (e != nil && e.ModifyIndex != cidx) {
  4068  		return false, nil
  4069  	}
  4070  
  4071  	s.schedulerSetConfigTxn(idx, tx, config)
  4072  
  4073  	tx.Commit()
  4074  	return true, nil
  4075  }
  4076  
  4077  func (s *StateStore) schedulerSetConfigTxn(idx uint64, tx *memdb.Txn, config *structs.SchedulerConfiguration) error {
  4078  	// Check for an existing config
  4079  	existing, err := tx.First("scheduler_config", "id")
  4080  	if err != nil {
  4081  		return fmt.Errorf("failed scheduler config lookup: %s", err)
  4082  	}
  4083  
  4084  	// Set the indexes.
  4085  	if existing != nil {
  4086  		config.CreateIndex = existing.(*structs.SchedulerConfiguration).CreateIndex
  4087  	} else {
  4088  		config.CreateIndex = idx
  4089  	}
  4090  	config.ModifyIndex = idx
  4091  
  4092  	if err := tx.Insert("scheduler_config", config); err != nil {
  4093  		return fmt.Errorf("failed updating scheduler config: %s", err)
  4094  	}
  4095  	return nil
  4096  }
  4097  
  4098  // StateSnapshot is used to provide a point-in-time snapshot
  4099  type StateSnapshot struct {
  4100  	StateStore
  4101  }
  4102  
  4103  // StateRestore is used to optimize the performance when
  4104  // restoring state by only using a single large transaction
  4105  // instead of thousands of sub transactions
  4106  type StateRestore struct {
  4107  	txn *memdb.Txn
  4108  }
  4109  
  4110  // Abort is used to abort the restore operation
  4111  func (s *StateRestore) Abort() {
  4112  	s.txn.Abort()
  4113  }
  4114  
  4115  // Commit is used to commit the restore operation
  4116  func (s *StateRestore) Commit() {
  4117  	s.txn.Commit()
  4118  }
  4119  
  4120  // NodeRestore is used to restore a node
  4121  func (r *StateRestore) NodeRestore(node *structs.Node) error {
  4122  	if err := r.txn.Insert("nodes", node); err != nil {
  4123  		return fmt.Errorf("node insert failed: %v", err)
  4124  	}
  4125  	return nil
  4126  }
  4127  
  4128  // JobRestore is used to restore a job
  4129  func (r *StateRestore) JobRestore(job *structs.Job) error {
  4130  	if err := r.txn.Insert("jobs", job); err != nil {
  4131  		return fmt.Errorf("job insert failed: %v", err)
  4132  	}
  4133  	return nil
  4134  }
  4135  
  4136  // EvalRestore is used to restore an evaluation
  4137  func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
  4138  	if err := r.txn.Insert("evals", eval); err != nil {
  4139  		return fmt.Errorf("eval insert failed: %v", err)
  4140  	}
  4141  	return nil
  4142  }
  4143  
  4144  // AllocRestore is used to restore an allocation
  4145  func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error {
  4146  	if err := r.txn.Insert("allocs", alloc); err != nil {
  4147  		return fmt.Errorf("alloc insert failed: %v", err)
  4148  	}
  4149  	return nil
  4150  }
  4151  
  4152  // IndexRestore is used to restore an index
  4153  func (r *StateRestore) IndexRestore(idx *IndexEntry) error {
  4154  	if err := r.txn.Insert("index", idx); err != nil {
  4155  		return fmt.Errorf("index insert failed: %v", err)
  4156  	}
  4157  	return nil
  4158  }
  4159  
  4160  // PeriodicLaunchRestore is used to restore a periodic launch.
  4161  func (r *StateRestore) PeriodicLaunchRestore(launch *structs.PeriodicLaunch) error {
  4162  	if err := r.txn.Insert("periodic_launch", launch); err != nil {
  4163  		return fmt.Errorf("periodic launch insert failed: %v", err)
  4164  	}
  4165  	return nil
  4166  }
  4167  
  4168  // JobSummaryRestore is used to restore a job summary
  4169  func (r *StateRestore) JobSummaryRestore(jobSummary *structs.JobSummary) error {
  4170  	if err := r.txn.Insert("job_summary", jobSummary); err != nil {
  4171  		return fmt.Errorf("job summary insert failed: %v", err)
  4172  	}
  4173  	return nil
  4174  }
  4175  
  4176  // JobVersionRestore is used to restore a job version
  4177  func (r *StateRestore) JobVersionRestore(version *structs.Job) error {
  4178  	if err := r.txn.Insert("job_version", version); err != nil {
  4179  		return fmt.Errorf("job version insert failed: %v", err)
  4180  	}
  4181  	return nil
  4182  }
  4183  
  4184  // DeploymentRestore is used to restore a deployment
  4185  func (r *StateRestore) DeploymentRestore(deployment *structs.Deployment) error {
  4186  	if err := r.txn.Insert("deployment", deployment); err != nil {
  4187  		return fmt.Errorf("deployment insert failed: %v", err)
  4188  	}
  4189  	return nil
  4190  }
  4191  
  4192  // VaultAccessorRestore is used to restore a vault accessor
  4193  func (r *StateRestore) VaultAccessorRestore(accessor *structs.VaultAccessor) error {
  4194  	if err := r.txn.Insert("vault_accessors", accessor); err != nil {
  4195  		return fmt.Errorf("vault accessor insert failed: %v", err)
  4196  	}
  4197  	return nil
  4198  }
  4199  
  4200  // ACLPolicyRestore is used to restore an ACL policy
  4201  func (r *StateRestore) ACLPolicyRestore(policy *structs.ACLPolicy) error {
  4202  	if err := r.txn.Insert("acl_policy", policy); err != nil {
  4203  		return fmt.Errorf("inserting acl policy failed: %v", err)
  4204  	}
  4205  	return nil
  4206  }
  4207  
  4208  // ACLTokenRestore is used to restore an ACL token
  4209  func (r *StateRestore) ACLTokenRestore(token *structs.ACLToken) error {
  4210  	if err := r.txn.Insert("acl_token", token); err != nil {
  4211  		return fmt.Errorf("inserting acl token failed: %v", err)
  4212  	}
  4213  	return nil
  4214  }
  4215  
  4216  func (r *StateRestore) SchedulerConfigRestore(schedConfig *structs.SchedulerConfiguration) error {
  4217  	if err := r.txn.Insert("scheduler_config", schedConfig); err != nil {
  4218  		return fmt.Errorf("inserting scheduler config failed: %s", err)
  4219  	}
  4220  	return nil
  4221  }
  4222  
  4223  // addEphemeralDiskToTaskGroups adds missing EphemeralDisk objects to TaskGroups
  4224  func (r *StateRestore) addEphemeralDiskToTaskGroups(job *structs.Job) {
  4225  	for _, tg := range job.TaskGroups {
  4226  		if tg.EphemeralDisk != nil {
  4227  			continue
  4228  		}
  4229  		var sizeMB int
  4230  		for _, task := range tg.Tasks {
  4231  			if task.Resources != nil {
  4232  				sizeMB += task.Resources.DiskMB
  4233  				task.Resources.DiskMB = 0
  4234  			}
  4235  		}
  4236  		tg.EphemeralDisk = &structs.EphemeralDisk{
  4237  			SizeMB: sizeMB,
  4238  		}
  4239  	}
  4240  }