github.com/anuvu/nomad@v0.8.7-atom1/nomad/state/state_store.go (about)

     1  package state
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"log"
     8  	"sort"
     9  	"time"
    10  
    11  	"github.com/hashicorp/go-memdb"
    12  	multierror "github.com/hashicorp/go-multierror"
    13  	"github.com/hashicorp/nomad/helper"
    14  	"github.com/hashicorp/nomad/nomad/structs"
    15  )
    16  
    17  // Txn is a transaction against a state store.
    18  // This can be a read or write transaction.
    19  type Txn = *memdb.Txn
    20  
    21  const (
    22  	// NodeRegisterEventReregistered is the message used when the node becomes
    23  	// reregistered.
    24  	NodeRegisterEventRegistered = "Node registered"
    25  
    26  	// NodeRegisterEventReregistered is the message used when the node becomes
    27  	// reregistered.
    28  	NodeRegisterEventReregistered = "Node re-registered"
    29  )
    30  
    31  // IndexEntry is used with the "index" table
    32  // for managing the latest Raft index affecting a table.
    33  type IndexEntry struct {
    34  	Key   string
    35  	Value uint64
    36  }
    37  
    38  // StateStoreConfig is used to configure a new state store
    39  type StateStoreConfig struct {
    40  	// LogOutput is used to configure the output of the state store's logs
    41  	LogOutput io.Writer
    42  
    43  	// Region is the region of the server embedding the state store.
    44  	Region string
    45  }
    46  
    47  // The StateStore is responsible for maintaining all the Nomad
    48  // state. It is manipulated by the FSM which maintains consistency
    49  // through the use of Raft. The goals of the StateStore are to provide
    50  // high concurrency for read operations without blocking writes, and
    51  // to provide write availability in the face of reads. EVERY object
    52  // returned as a result of a read against the state store should be
    53  // considered a constant and NEVER modified in place.
    54  type StateStore struct {
    55  	logger *log.Logger
    56  	db     *memdb.MemDB
    57  
    58  	// config is the passed in configuration
    59  	config *StateStoreConfig
    60  
    61  	// abandonCh is used to signal watchers that this state store has been
    62  	// abandoned (usually during a restore). This is only ever closed.
    63  	abandonCh chan struct{}
    64  }
    65  
    66  // NewStateStore is used to create a new state store
    67  func NewStateStore(config *StateStoreConfig) (*StateStore, error) {
    68  	// Create the MemDB
    69  	db, err := memdb.NewMemDB(stateStoreSchema())
    70  	if err != nil {
    71  		return nil, fmt.Errorf("state store setup failed: %v", err)
    72  	}
    73  
    74  	// Create the state store
    75  	s := &StateStore{
    76  		logger:    log.New(config.LogOutput, "", log.LstdFlags),
    77  		db:        db,
    78  		config:    config,
    79  		abandonCh: make(chan struct{}),
    80  	}
    81  	return s, nil
    82  }
    83  
    84  // Config returns the state store configuration.
    85  func (s *StateStore) Config() *StateStoreConfig {
    86  	return s.config
    87  }
    88  
    89  // Snapshot is used to create a point in time snapshot. Because
    90  // we use MemDB, we just need to snapshot the state of the underlying
    91  // database.
    92  func (s *StateStore) Snapshot() (*StateSnapshot, error) {
    93  	snap := &StateSnapshot{
    94  		StateStore: StateStore{
    95  			logger: s.logger,
    96  			config: s.config,
    97  			db:     s.db.Snapshot(),
    98  		},
    99  	}
   100  	return snap, nil
   101  }
   102  
   103  // Restore is used to optimize the efficiency of rebuilding
   104  // state by minimizing the number of transactions and checking
   105  // overhead.
   106  func (s *StateStore) Restore() (*StateRestore, error) {
   107  	txn := s.db.Txn(true)
   108  	r := &StateRestore{
   109  		txn: txn,
   110  	}
   111  	return r, nil
   112  }
   113  
   114  // AbandonCh returns a channel you can wait on to know if the state store was
   115  // abandoned.
   116  func (s *StateStore) AbandonCh() <-chan struct{} {
   117  	return s.abandonCh
   118  }
   119  
   120  // Abandon is used to signal that the given state store has been abandoned.
   121  // Calling this more than one time will panic.
   122  func (s *StateStore) Abandon() {
   123  	close(s.abandonCh)
   124  }
   125  
   126  // QueryFn is the definition of a function that can be used to implement a basic
   127  // blocking query against the state store.
   128  type QueryFn func(memdb.WatchSet, *StateStore) (resp interface{}, index uint64, err error)
   129  
   130  // BlockingQuery takes a query function and runs the function until the minimum
   131  // query index is met or until the passed context is cancelled.
   132  func (s *StateStore) BlockingQuery(query QueryFn, minIndex uint64, ctx context.Context) (
   133  	resp interface{}, index uint64, err error) {
   134  
   135  RUN_QUERY:
   136  	// We capture the state store and its abandon channel but pass a snapshot to
   137  	// the blocking query function. We operate on the snapshot to allow separate
   138  	// calls to the state store not all wrapped within the same transaction.
   139  	abandonCh := s.AbandonCh()
   140  	snap, _ := s.Snapshot()
   141  	stateSnap := &snap.StateStore
   142  
   143  	// We can skip all watch tracking if this isn't a blocking query.
   144  	var ws memdb.WatchSet
   145  	if minIndex > 0 {
   146  		ws = memdb.NewWatchSet()
   147  
   148  		// This channel will be closed if a snapshot is restored and the
   149  		// whole state store is abandoned.
   150  		ws.Add(abandonCh)
   151  	}
   152  
   153  	resp, index, err = query(ws, stateSnap)
   154  	if err != nil {
   155  		return nil, index, err
   156  	}
   157  
   158  	// We haven't reached the min-index yet.
   159  	if minIndex > 0 && index <= minIndex {
   160  		if err := ws.WatchCtx(ctx); err != nil {
   161  			return nil, index, err
   162  		}
   163  
   164  		goto RUN_QUERY
   165  	}
   166  
   167  	return resp, index, nil
   168  }
   169  
   170  // UpsertPlanResults is used to upsert the results of a plan.
   171  func (s *StateStore) UpsertPlanResults(index uint64, results *structs.ApplyPlanResultsRequest) error {
   172  	txn := s.db.Txn(true)
   173  	defer txn.Abort()
   174  
   175  	// Upsert the newly created or updated deployment
   176  	if results.Deployment != nil {
   177  		if err := s.upsertDeploymentImpl(index, results.Deployment, txn); err != nil {
   178  			return err
   179  		}
   180  	}
   181  
   182  	// Update the status of deployments effected by the plan.
   183  	if len(results.DeploymentUpdates) != 0 {
   184  		s.upsertDeploymentUpdates(index, results.DeploymentUpdates, txn)
   185  	}
   186  
   187  	// Attach the job to all the allocations. It is pulled out in the payload to
   188  	// avoid the redundancy of encoding, but should be denormalized prior to
   189  	// being inserted into MemDB.
   190  	structs.DenormalizeAllocationJobs(results.Job, results.Alloc)
   191  
   192  	// Calculate the total resources of allocations. It is pulled out in the
   193  	// payload to avoid encoding something that can be computed, but should be
   194  	// denormalized prior to being inserted into MemDB.
   195  	for _, alloc := range results.Alloc {
   196  		if alloc.Resources != nil {
   197  			continue
   198  		}
   199  
   200  		alloc.Resources = new(structs.Resources)
   201  		for _, task := range alloc.TaskResources {
   202  			alloc.Resources.Add(task)
   203  		}
   204  
   205  		// Add the shared resources
   206  		alloc.Resources.Add(alloc.SharedResources)
   207  	}
   208  
   209  	// Upsert the allocations
   210  	if err := s.upsertAllocsImpl(index, results.Alloc, txn); err != nil {
   211  		return err
   212  	}
   213  
   214  	// COMPAT: Nomad versions before 0.7.1 did not include the eval ID when
   215  	// applying the plan. Thus while we are upgrading, we ignore updating the
   216  	// modify index of evaluations from older plans.
   217  	if results.EvalID != "" {
   218  		// Update the modify index of the eval id
   219  		if err := s.updateEvalModifyIndex(txn, index, results.EvalID); err != nil {
   220  			return err
   221  		}
   222  	}
   223  
   224  	txn.Commit()
   225  	return nil
   226  }
   227  
   228  // upsertDeploymentUpdates updates the deployments given the passed status
   229  // updates.
   230  func (s *StateStore) upsertDeploymentUpdates(index uint64, updates []*structs.DeploymentStatusUpdate, txn *memdb.Txn) error {
   231  	for _, u := range updates {
   232  		if err := s.updateDeploymentStatusImpl(index, u, txn); err != nil {
   233  			return err
   234  		}
   235  	}
   236  
   237  	return nil
   238  }
   239  
   240  // UpsertJobSummary upserts a job summary into the state store.
   241  func (s *StateStore) UpsertJobSummary(index uint64, jobSummary *structs.JobSummary) error {
   242  	txn := s.db.Txn(true)
   243  	defer txn.Abort()
   244  
   245  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
   246  	if jobSummary.Namespace == "" {
   247  		jobSummary.Namespace = structs.DefaultNamespace
   248  	}
   249  
   250  	// Check if the job summary already exists
   251  	existing, err := txn.First("job_summary", "id", jobSummary.Namespace, jobSummary.JobID)
   252  	if err != nil {
   253  		return fmt.Errorf("job summary lookup failed: %v", err)
   254  	}
   255  
   256  	// Setup the indexes correctly
   257  	if existing != nil {
   258  		jobSummary.CreateIndex = existing.(*structs.JobSummary).CreateIndex
   259  		jobSummary.ModifyIndex = index
   260  	} else {
   261  		jobSummary.CreateIndex = index
   262  		jobSummary.ModifyIndex = index
   263  	}
   264  
   265  	// Update the index
   266  	if err := txn.Insert("job_summary", jobSummary); err != nil {
   267  		return err
   268  	}
   269  
   270  	// Update the indexes table for job summary
   271  	if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
   272  		return fmt.Errorf("index update failed: %v", err)
   273  	}
   274  
   275  	txn.Commit()
   276  	return nil
   277  }
   278  
   279  // DeleteJobSummary deletes the job summary with the given ID. This is for
   280  // testing purposes only.
   281  func (s *StateStore) DeleteJobSummary(index uint64, namespace, id string) error {
   282  	txn := s.db.Txn(true)
   283  	defer txn.Abort()
   284  
   285  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
   286  	if namespace == "" {
   287  		namespace = structs.DefaultNamespace
   288  	}
   289  
   290  	// Delete the job summary
   291  	if _, err := txn.DeleteAll("job_summary", "id", namespace, id); err != nil {
   292  		return fmt.Errorf("deleting job summary failed: %v", err)
   293  	}
   294  	if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
   295  		return fmt.Errorf("index update failed: %v", err)
   296  	}
   297  	txn.Commit()
   298  	return nil
   299  }
   300  
   301  // UpsertDeployment is used to insert a new deployment. If cancelPrior is set to
   302  // true, all prior deployments for the same job will be cancelled.
   303  func (s *StateStore) UpsertDeployment(index uint64, deployment *structs.Deployment) error {
   304  	txn := s.db.Txn(true)
   305  	defer txn.Abort()
   306  	if err := s.upsertDeploymentImpl(index, deployment, txn); err != nil {
   307  		return err
   308  	}
   309  	txn.Commit()
   310  	return nil
   311  }
   312  
   313  func (s *StateStore) upsertDeploymentImpl(index uint64, deployment *structs.Deployment, txn *memdb.Txn) error {
   314  	// Check if the deployment already exists
   315  	existing, err := txn.First("deployment", "id", deployment.ID)
   316  	if err != nil {
   317  		return fmt.Errorf("deployment lookup failed: %v", err)
   318  	}
   319  
   320  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
   321  	if deployment.Namespace == "" {
   322  		deployment.Namespace = structs.DefaultNamespace
   323  	}
   324  
   325  	// Setup the indexes correctly
   326  	if existing != nil {
   327  		deployment.CreateIndex = existing.(*structs.Deployment).CreateIndex
   328  		deployment.ModifyIndex = index
   329  	} else {
   330  		deployment.CreateIndex = index
   331  		deployment.ModifyIndex = index
   332  	}
   333  
   334  	// Insert the deployment
   335  	if err := txn.Insert("deployment", deployment); err != nil {
   336  		return err
   337  	}
   338  
   339  	// Update the indexes table for deployment
   340  	if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil {
   341  		return fmt.Errorf("index update failed: %v", err)
   342  	}
   343  
   344  	// If the deployment is being marked as complete, set the job to stable.
   345  	if deployment.Status == structs.DeploymentStatusSuccessful {
   346  		if err := s.updateJobStabilityImpl(index, deployment.Namespace, deployment.JobID, deployment.JobVersion, true, txn); err != nil {
   347  			return fmt.Errorf("failed to update job stability: %v", err)
   348  		}
   349  	}
   350  
   351  	return nil
   352  }
   353  
   354  func (s *StateStore) Deployments(ws memdb.WatchSet) (memdb.ResultIterator, error) {
   355  	txn := s.db.Txn(false)
   356  
   357  	// Walk the entire deployments table
   358  	iter, err := txn.Get("deployment", "id")
   359  	if err != nil {
   360  		return nil, err
   361  	}
   362  
   363  	ws.Add(iter.WatchCh())
   364  	return iter, nil
   365  }
   366  
   367  func (s *StateStore) DeploymentsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) {
   368  	txn := s.db.Txn(false)
   369  
   370  	// Walk the entire deployments table
   371  	iter, err := txn.Get("deployment", "namespace", namespace)
   372  	if err != nil {
   373  		return nil, err
   374  	}
   375  
   376  	ws.Add(iter.WatchCh())
   377  	return iter, nil
   378  }
   379  
   380  func (s *StateStore) DeploymentsByIDPrefix(ws memdb.WatchSet, namespace, deploymentID string) (memdb.ResultIterator, error) {
   381  	txn := s.db.Txn(false)
   382  
   383  	// Walk the entire deployments table
   384  	iter, err := txn.Get("deployment", "id_prefix", deploymentID)
   385  	if err != nil {
   386  		return nil, err
   387  	}
   388  
   389  	ws.Add(iter.WatchCh())
   390  
   391  	// Wrap the iterator in a filter
   392  	wrap := memdb.NewFilterIterator(iter, deploymentNamespaceFilter(namespace))
   393  	return wrap, nil
   394  }
   395  
   396  // deploymentNamespaceFilter returns a filter function that filters all
   397  // deployment not in the given namespace.
   398  func deploymentNamespaceFilter(namespace string) func(interface{}) bool {
   399  	return func(raw interface{}) bool {
   400  		d, ok := raw.(*structs.Deployment)
   401  		if !ok {
   402  			return true
   403  		}
   404  
   405  		return d.Namespace != namespace
   406  	}
   407  }
   408  
   409  func (s *StateStore) DeploymentByID(ws memdb.WatchSet, deploymentID string) (*structs.Deployment, error) {
   410  	txn := s.db.Txn(false)
   411  	return s.deploymentByIDImpl(ws, deploymentID, txn)
   412  }
   413  
   414  func (s *StateStore) deploymentByIDImpl(ws memdb.WatchSet, deploymentID string, txn *memdb.Txn) (*structs.Deployment, error) {
   415  	watchCh, existing, err := txn.FirstWatch("deployment", "id", deploymentID)
   416  	if err != nil {
   417  		return nil, fmt.Errorf("deployment lookup failed: %v", err)
   418  	}
   419  	ws.Add(watchCh)
   420  
   421  	if existing != nil {
   422  		return existing.(*structs.Deployment), nil
   423  	}
   424  
   425  	return nil, nil
   426  }
   427  
   428  func (s *StateStore) DeploymentsByJobID(ws memdb.WatchSet, namespace, jobID string) ([]*structs.Deployment, error) {
   429  	txn := s.db.Txn(false)
   430  
   431  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
   432  	if namespace == "" {
   433  		namespace = structs.DefaultNamespace
   434  	}
   435  
   436  	// Get an iterator over the deployments
   437  	iter, err := txn.Get("deployment", "job", namespace, jobID)
   438  	if err != nil {
   439  		return nil, err
   440  	}
   441  
   442  	ws.Add(iter.WatchCh())
   443  
   444  	var out []*structs.Deployment
   445  	for {
   446  		raw := iter.Next()
   447  		if raw == nil {
   448  			break
   449  		}
   450  
   451  		d := raw.(*structs.Deployment)
   452  		out = append(out, d)
   453  	}
   454  
   455  	return out, nil
   456  }
   457  
   458  // LatestDeploymentByJobID returns the latest deployment for the given job. The
   459  // latest is determined strictly by CreateIndex.
   460  func (s *StateStore) LatestDeploymentByJobID(ws memdb.WatchSet, namespace, jobID string) (*structs.Deployment, error) {
   461  	txn := s.db.Txn(false)
   462  
   463  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
   464  	if namespace == "" {
   465  		namespace = structs.DefaultNamespace
   466  	}
   467  
   468  	// Get an iterator over the deployments
   469  	iter, err := txn.Get("deployment", "job", namespace, jobID)
   470  	if err != nil {
   471  		return nil, err
   472  	}
   473  
   474  	ws.Add(iter.WatchCh())
   475  
   476  	var out *structs.Deployment
   477  	for {
   478  		raw := iter.Next()
   479  		if raw == nil {
   480  			break
   481  		}
   482  
   483  		d := raw.(*structs.Deployment)
   484  		if out == nil || out.CreateIndex < d.CreateIndex {
   485  			out = d
   486  		}
   487  	}
   488  
   489  	return out, nil
   490  }
   491  
   492  // DeleteDeployment is used to delete a set of deployments by ID
   493  func (s *StateStore) DeleteDeployment(index uint64, deploymentIDs []string) error {
   494  	txn := s.db.Txn(true)
   495  	defer txn.Abort()
   496  
   497  	if len(deploymentIDs) == 0 {
   498  		return nil
   499  	}
   500  
   501  	for _, deploymentID := range deploymentIDs {
   502  		// Lookup the deployment
   503  		existing, err := txn.First("deployment", "id", deploymentID)
   504  		if err != nil {
   505  			return fmt.Errorf("deployment lookup failed: %v", err)
   506  		}
   507  		if existing == nil {
   508  			return fmt.Errorf("deployment not found")
   509  		}
   510  
   511  		// Delete the deployment
   512  		if err := txn.Delete("deployment", existing); err != nil {
   513  			return fmt.Errorf("deployment delete failed: %v", err)
   514  		}
   515  	}
   516  
   517  	if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil {
   518  		return fmt.Errorf("index update failed: %v", err)
   519  	}
   520  
   521  	txn.Commit()
   522  	return nil
   523  }
   524  
   525  // UpsertNode is used to register a node or update a node definition
   526  // This is assumed to be triggered by the client, so we retain the value
   527  // of drain/eligibility which is set by the scheduler.
   528  func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
   529  	txn := s.db.Txn(true)
   530  	defer txn.Abort()
   531  
   532  	// Check if the node already exists
   533  	existing, err := txn.First("nodes", "id", node.ID)
   534  	if err != nil {
   535  		return fmt.Errorf("node lookup failed: %v", err)
   536  	}
   537  
   538  	// Setup the indexes correctly
   539  	if existing != nil {
   540  		exist := existing.(*structs.Node)
   541  		node.CreateIndex = exist.CreateIndex
   542  		node.ModifyIndex = index
   543  
   544  		// Retain node events that have already been set on the node
   545  		node.Events = exist.Events
   546  
   547  		// If we are transitioning from down, record the re-registration
   548  		if exist.Status == structs.NodeStatusDown && node.Status != structs.NodeStatusDown {
   549  			appendNodeEvents(index, node, []*structs.NodeEvent{
   550  				structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster).
   551  					SetMessage(NodeRegisterEventReregistered).
   552  					SetTimestamp(time.Unix(node.StatusUpdatedAt, 0))})
   553  		}
   554  
   555  		node.Drain = exist.Drain                                 // Retain the drain mode
   556  		node.SchedulingEligibility = exist.SchedulingEligibility // Retain the eligibility
   557  		node.DrainStrategy = exist.DrainStrategy                 // Retain the drain strategy
   558  	} else {
   559  		// Because this is the first time the node is being registered, we should
   560  		// also create a node registration event
   561  		nodeEvent := structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster).
   562  			SetMessage(NodeRegisterEventRegistered).
   563  			SetTimestamp(time.Unix(node.StatusUpdatedAt, 0))
   564  		node.Events = []*structs.NodeEvent{nodeEvent}
   565  		node.CreateIndex = index
   566  		node.ModifyIndex = index
   567  	}
   568  
   569  	// Insert the node
   570  	if err := txn.Insert("nodes", node); err != nil {
   571  		return fmt.Errorf("node insert failed: %v", err)
   572  	}
   573  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   574  		return fmt.Errorf("index update failed: %v", err)
   575  	}
   576  
   577  	txn.Commit()
   578  	return nil
   579  }
   580  
   581  // DeleteNode is used to deregister a node
   582  func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
   583  	txn := s.db.Txn(true)
   584  	defer txn.Abort()
   585  
   586  	// Lookup the node
   587  	existing, err := txn.First("nodes", "id", nodeID)
   588  	if err != nil {
   589  		return fmt.Errorf("node lookup failed: %v", err)
   590  	}
   591  	if existing == nil {
   592  		return fmt.Errorf("node not found")
   593  	}
   594  
   595  	// Delete the node
   596  	if err := txn.Delete("nodes", existing); err != nil {
   597  		return fmt.Errorf("node delete failed: %v", err)
   598  	}
   599  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   600  		return fmt.Errorf("index update failed: %v", err)
   601  	}
   602  
   603  	txn.Commit()
   604  	return nil
   605  }
   606  
   607  // UpdateNodeStatus is used to update the status of a node
   608  func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string, event *structs.NodeEvent) error {
   609  	txn := s.db.Txn(true)
   610  	defer txn.Abort()
   611  
   612  	// Lookup the node
   613  	existing, err := txn.First("nodes", "id", nodeID)
   614  	if err != nil {
   615  		return fmt.Errorf("node lookup failed: %v", err)
   616  	}
   617  	if existing == nil {
   618  		return fmt.Errorf("node not found")
   619  	}
   620  
   621  	// Copy the existing node
   622  	existingNode := existing.(*structs.Node)
   623  	copyNode := existingNode.Copy()
   624  
   625  	// Add the event if given
   626  	if event != nil {
   627  		appendNodeEvents(index, copyNode, []*structs.NodeEvent{event})
   628  	}
   629  
   630  	// Update the status in the copy
   631  	copyNode.Status = status
   632  	copyNode.ModifyIndex = index
   633  
   634  	// Insert the node
   635  	if err := txn.Insert("nodes", copyNode); err != nil {
   636  		return fmt.Errorf("node update failed: %v", err)
   637  	}
   638  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   639  		return fmt.Errorf("index update failed: %v", err)
   640  	}
   641  
   642  	txn.Commit()
   643  	return nil
   644  }
   645  
   646  // BatchUpdateNodeDrain is used to update the drain of a node set of nodes
   647  func (s *StateStore) BatchUpdateNodeDrain(index uint64, updates map[string]*structs.DrainUpdate, events map[string]*structs.NodeEvent) error {
   648  	txn := s.db.Txn(true)
   649  	defer txn.Abort()
   650  	for node, update := range updates {
   651  		if err := s.updateNodeDrainImpl(txn, index, node, update.DrainStrategy, update.MarkEligible, events[node]); err != nil {
   652  			return err
   653  		}
   654  	}
   655  	txn.Commit()
   656  	return nil
   657  }
   658  
   659  // UpdateNodeDrain is used to update the drain of a node
   660  func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string,
   661  	drain *structs.DrainStrategy, markEligible bool, event *structs.NodeEvent) error {
   662  
   663  	txn := s.db.Txn(true)
   664  	defer txn.Abort()
   665  	if err := s.updateNodeDrainImpl(txn, index, nodeID, drain, markEligible, event); err != nil {
   666  		return err
   667  	}
   668  	txn.Commit()
   669  	return nil
   670  }
   671  
   672  func (s *StateStore) updateNodeDrainImpl(txn *memdb.Txn, index uint64, nodeID string,
   673  	drain *structs.DrainStrategy, markEligible bool, event *structs.NodeEvent) error {
   674  
   675  	// Lookup the node
   676  	existing, err := txn.First("nodes", "id", nodeID)
   677  	if err != nil {
   678  		return fmt.Errorf("node lookup failed: %v", err)
   679  	}
   680  	if existing == nil {
   681  		return fmt.Errorf("node not found")
   682  	}
   683  
   684  	// Copy the existing node
   685  	existingNode := existing.(*structs.Node)
   686  	copyNode := existingNode.Copy()
   687  
   688  	// Add the event if given
   689  	if event != nil {
   690  		appendNodeEvents(index, copyNode, []*structs.NodeEvent{event})
   691  	}
   692  
   693  	// Update the drain in the copy
   694  	copyNode.Drain = drain != nil // COMPAT: Remove in Nomad 0.9
   695  	copyNode.DrainStrategy = drain
   696  	if drain != nil {
   697  		copyNode.SchedulingEligibility = structs.NodeSchedulingIneligible
   698  	} else if markEligible {
   699  		copyNode.SchedulingEligibility = structs.NodeSchedulingEligible
   700  	}
   701  
   702  	copyNode.ModifyIndex = index
   703  
   704  	// Insert the node
   705  	if err := txn.Insert("nodes", copyNode); err != nil {
   706  		return fmt.Errorf("node update failed: %v", err)
   707  	}
   708  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   709  		return fmt.Errorf("index update failed: %v", err)
   710  	}
   711  
   712  	return nil
   713  }
   714  
   715  // UpdateNodeEligibility is used to update the scheduling eligibility of a node
   716  func (s *StateStore) UpdateNodeEligibility(index uint64, nodeID string, eligibility string, event *structs.NodeEvent) error {
   717  
   718  	txn := s.db.Txn(true)
   719  	defer txn.Abort()
   720  
   721  	// Lookup the node
   722  	existing, err := txn.First("nodes", "id", nodeID)
   723  	if err != nil {
   724  		return fmt.Errorf("node lookup failed: %v", err)
   725  	}
   726  	if existing == nil {
   727  		return fmt.Errorf("node not found")
   728  	}
   729  
   730  	// Copy the existing node
   731  	existingNode := existing.(*structs.Node)
   732  	copyNode := existingNode.Copy()
   733  
   734  	// Add the event if given
   735  	if event != nil {
   736  		appendNodeEvents(index, copyNode, []*structs.NodeEvent{event})
   737  	}
   738  
   739  	// Check if this is a valid action
   740  	if copyNode.DrainStrategy != nil && eligibility == structs.NodeSchedulingEligible {
   741  		return fmt.Errorf("can not set node's scheduling eligibility to eligible while it is draining")
   742  	}
   743  
   744  	// Update the eligibility in the copy
   745  	copyNode.SchedulingEligibility = eligibility
   746  	copyNode.ModifyIndex = index
   747  
   748  	// Insert the node
   749  	if err := txn.Insert("nodes", copyNode); err != nil {
   750  		return fmt.Errorf("node update failed: %v", err)
   751  	}
   752  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   753  		return fmt.Errorf("index update failed: %v", err)
   754  	}
   755  
   756  	txn.Commit()
   757  	return nil
   758  }
   759  
   760  // UpsertNodeEvents adds the node events to the nodes, rotating events as
   761  // necessary.
   762  func (s *StateStore) UpsertNodeEvents(index uint64, nodeEvents map[string][]*structs.NodeEvent) error {
   763  	txn := s.db.Txn(true)
   764  	defer txn.Abort()
   765  
   766  	for nodeID, events := range nodeEvents {
   767  		if err := s.upsertNodeEvents(index, nodeID, events, txn); err != nil {
   768  			return err
   769  		}
   770  	}
   771  
   772  	txn.Commit()
   773  	return nil
   774  }
   775  
   776  // upsertNodeEvent upserts a node event for a respective node. It also maintains
   777  // that a fixed number of node events are ever stored simultaneously, deleting
   778  // older events once this bound has been reached.
   779  func (s *StateStore) upsertNodeEvents(index uint64, nodeID string, events []*structs.NodeEvent, txn *memdb.Txn) error {
   780  	// Lookup the node
   781  	existing, err := txn.First("nodes", "id", nodeID)
   782  	if err != nil {
   783  		return fmt.Errorf("node lookup failed: %v", err)
   784  	}
   785  	if existing == nil {
   786  		return fmt.Errorf("node not found")
   787  	}
   788  
   789  	// Copy the existing node
   790  	existingNode := existing.(*structs.Node)
   791  	copyNode := existingNode.Copy()
   792  	appendNodeEvents(index, copyNode, events)
   793  
   794  	// Insert the node
   795  	if err := txn.Insert("nodes", copyNode); err != nil {
   796  		return fmt.Errorf("node update failed: %v", err)
   797  	}
   798  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   799  		return fmt.Errorf("index update failed: %v", err)
   800  	}
   801  
   802  	return nil
   803  }
   804  
   805  // appendNodeEvents is a helper that takes a node and new events and appends
   806  // them, pruning older events as needed.
   807  func appendNodeEvents(index uint64, node *structs.Node, events []*structs.NodeEvent) {
   808  	// Add the events, updating the indexes
   809  	for _, e := range events {
   810  		e.CreateIndex = index
   811  		node.Events = append(node.Events, e)
   812  	}
   813  
   814  	// Keep node events pruned to not exceed the max allowed
   815  	if l := len(node.Events); l > structs.MaxRetainedNodeEvents {
   816  		delta := l - structs.MaxRetainedNodeEvents
   817  		node.Events = node.Events[delta:]
   818  	}
   819  }
   820  
   821  // NodeByID is used to lookup a node by ID
   822  func (s *StateStore) NodeByID(ws memdb.WatchSet, nodeID string) (*structs.Node, error) {
   823  	txn := s.db.Txn(false)
   824  
   825  	watchCh, existing, err := txn.FirstWatch("nodes", "id", nodeID)
   826  	if err != nil {
   827  		return nil, fmt.Errorf("node lookup failed: %v", err)
   828  	}
   829  	ws.Add(watchCh)
   830  
   831  	if existing != nil {
   832  		return existing.(*structs.Node), nil
   833  	}
   834  	return nil, nil
   835  }
   836  
   837  // NodesByIDPrefix is used to lookup nodes by prefix
   838  func (s *StateStore) NodesByIDPrefix(ws memdb.WatchSet, nodeID string) (memdb.ResultIterator, error) {
   839  	txn := s.db.Txn(false)
   840  
   841  	iter, err := txn.Get("nodes", "id_prefix", nodeID)
   842  	if err != nil {
   843  		return nil, fmt.Errorf("node lookup failed: %v", err)
   844  	}
   845  	ws.Add(iter.WatchCh())
   846  
   847  	return iter, nil
   848  }
   849  
   850  // NodeBySecretID is used to lookup a node by SecretID
   851  func (s *StateStore) NodeBySecretID(ws memdb.WatchSet, secretID string) (*structs.Node, error) {
   852  	txn := s.db.Txn(false)
   853  
   854  	watchCh, existing, err := txn.FirstWatch("nodes", "secret_id", secretID)
   855  	if err != nil {
   856  		return nil, fmt.Errorf("node lookup by SecretID failed: %v", err)
   857  	}
   858  	ws.Add(watchCh)
   859  
   860  	if existing != nil {
   861  		return existing.(*structs.Node), nil
   862  	}
   863  	return nil, nil
   864  }
   865  
   866  // Nodes returns an iterator over all the nodes
   867  func (s *StateStore) Nodes(ws memdb.WatchSet) (memdb.ResultIterator, error) {
   868  	txn := s.db.Txn(false)
   869  
   870  	// Walk the entire nodes table
   871  	iter, err := txn.Get("nodes", "id")
   872  	if err != nil {
   873  		return nil, err
   874  	}
   875  	ws.Add(iter.WatchCh())
   876  	return iter, nil
   877  }
   878  
   879  // UpsertJob is used to register a job or update a job definition
   880  func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
   881  	txn := s.db.Txn(true)
   882  	defer txn.Abort()
   883  	if err := s.upsertJobImpl(index, job, false, txn); err != nil {
   884  		return err
   885  	}
   886  	txn.Commit()
   887  	return nil
   888  }
   889  
   890  // UpsertJobTxn is used to register a job or update a job definition, like UpsertJob,
   891  // but in a transcation.  Useful for when making multiple modifications atomically
   892  func (s *StateStore) UpsertJobTxn(index uint64, job *structs.Job, txn Txn) error {
   893  	return s.upsertJobImpl(index, job, false, txn)
   894  }
   895  
   896  // upsertJobImpl is the implementation for registering a job or updating a job definition
   897  func (s *StateStore) upsertJobImpl(index uint64, job *structs.Job, keepVersion bool, txn *memdb.Txn) error {
   898  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
   899  	if job.Namespace == "" {
   900  		job.Namespace = structs.DefaultNamespace
   901  	}
   902  
   903  	// Assert the namespace exists
   904  	if exists, err := s.namespaceExists(txn, job.Namespace); err != nil {
   905  		return err
   906  	} else if !exists {
   907  		return fmt.Errorf("job %q is in nonexistent namespace %q", job.ID, job.Namespace)
   908  	}
   909  
   910  	// Check if the job already exists
   911  	existing, err := txn.First("jobs", "id", job.Namespace, job.ID)
   912  	if err != nil {
   913  		return fmt.Errorf("job lookup failed: %v", err)
   914  	}
   915  
   916  	// Setup the indexes correctly
   917  	if existing != nil {
   918  		job.CreateIndex = existing.(*structs.Job).CreateIndex
   919  		job.ModifyIndex = index
   920  
   921  		// Bump the version unless asked to keep it. This should only be done
   922  		// when changing an internal field such as Stable. A spec change should
   923  		// always come with a version bump
   924  		if !keepVersion {
   925  			job.JobModifyIndex = index
   926  			job.Version = existing.(*structs.Job).Version + 1
   927  		}
   928  
   929  		// Compute the job status
   930  		var err error
   931  		job.Status, err = s.getJobStatus(txn, job, false)
   932  		if err != nil {
   933  			return fmt.Errorf("setting job status for %q failed: %v", job.ID, err)
   934  		}
   935  	} else {
   936  		job.CreateIndex = index
   937  		job.ModifyIndex = index
   938  		job.JobModifyIndex = index
   939  		job.Version = 0
   940  
   941  		if err := s.setJobStatus(index, txn, job, false, ""); err != nil {
   942  			return fmt.Errorf("setting job status for %q failed: %v", job.ID, err)
   943  		}
   944  
   945  		// Have to get the job again since it could have been updated
   946  		updated, err := txn.First("jobs", "id", job.Namespace, job.ID)
   947  		if err != nil {
   948  			return fmt.Errorf("job lookup failed: %v", err)
   949  		}
   950  		if updated != nil {
   951  			job = updated.(*structs.Job)
   952  		}
   953  	}
   954  
   955  	if err := s.updateSummaryWithJob(index, job, txn); err != nil {
   956  		return fmt.Errorf("unable to create job summary: %v", err)
   957  	}
   958  
   959  	if err := s.upsertJobVersion(index, job, txn); err != nil {
   960  		return fmt.Errorf("unable to upsert job into job_version table: %v", err)
   961  	}
   962  
   963  	// Create the EphemeralDisk if it's nil by adding up DiskMB from task resources.
   964  	// COMPAT 0.4.1 -> 0.5
   965  	s.addEphemeralDiskToTaskGroups(job)
   966  
   967  	// Insert the job
   968  	if err := txn.Insert("jobs", job); err != nil {
   969  		return fmt.Errorf("job insert failed: %v", err)
   970  	}
   971  	if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil {
   972  		return fmt.Errorf("index update failed: %v", err)
   973  	}
   974  
   975  	return nil
   976  }
   977  
   978  // DeleteJob is used to deregister a job
   979  func (s *StateStore) DeleteJob(index uint64, namespace, jobID string) error {
   980  	txn := s.db.Txn(true)
   981  	defer txn.Abort()
   982  
   983  	err := s.DeleteJobTxn(index, namespace, jobID, txn)
   984  	if err == nil {
   985  		txn.Commit()
   986  	}
   987  	return err
   988  }
   989  
   990  // DeleteJobTxn is used to deregister a job, like DeleteJob,
   991  // but in a transcation.  Useful for when making multiple modifications atomically
   992  func (s *StateStore) DeleteJobTxn(index uint64, namespace, jobID string, txn Txn) error {
   993  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
   994  	if namespace == "" {
   995  		namespace = structs.DefaultNamespace
   996  	}
   997  
   998  	// Lookup the node
   999  	existing, err := txn.First("jobs", "id", namespace, jobID)
  1000  	if err != nil {
  1001  		return fmt.Errorf("job lookup failed: %v", err)
  1002  	}
  1003  	if existing == nil {
  1004  		return fmt.Errorf("job not found")
  1005  	}
  1006  
  1007  	// Check if we should update a parent job summary
  1008  	job := existing.(*structs.Job)
  1009  	if job.ParentID != "" {
  1010  		summaryRaw, err := txn.First("job_summary", "id", namespace, job.ParentID)
  1011  		if err != nil {
  1012  			return fmt.Errorf("unable to retrieve summary for parent job: %v", err)
  1013  		}
  1014  
  1015  		// Only continue if the summary exists. It could not exist if the parent
  1016  		// job was removed
  1017  		if summaryRaw != nil {
  1018  			existing := summaryRaw.(*structs.JobSummary)
  1019  			pSummary := existing.Copy()
  1020  			if pSummary.Children != nil {
  1021  
  1022  				modified := false
  1023  				switch job.Status {
  1024  				case structs.JobStatusPending:
  1025  					pSummary.Children.Pending--
  1026  					pSummary.Children.Dead++
  1027  					modified = true
  1028  				case structs.JobStatusRunning:
  1029  					pSummary.Children.Running--
  1030  					pSummary.Children.Dead++
  1031  					modified = true
  1032  				case structs.JobStatusDead:
  1033  				default:
  1034  					return fmt.Errorf("unknown old job status %q", job.Status)
  1035  				}
  1036  
  1037  				if modified {
  1038  					// Update the modify index
  1039  					pSummary.ModifyIndex = index
  1040  
  1041  					// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1042  					if pSummary.Namespace == "" {
  1043  						pSummary.Namespace = structs.DefaultNamespace
  1044  					}
  1045  
  1046  					// Insert the summary
  1047  					if err := txn.Insert("job_summary", pSummary); err != nil {
  1048  						return fmt.Errorf("job summary insert failed: %v", err)
  1049  					}
  1050  					if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  1051  						return fmt.Errorf("index update failed: %v", err)
  1052  					}
  1053  				}
  1054  			}
  1055  		}
  1056  	}
  1057  
  1058  	// Delete the job
  1059  	if err := txn.Delete("jobs", existing); err != nil {
  1060  		return fmt.Errorf("job delete failed: %v", err)
  1061  	}
  1062  	if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil {
  1063  		return fmt.Errorf("index update failed: %v", err)
  1064  	}
  1065  
  1066  	// Delete the job versions
  1067  	if err := s.deleteJobVersions(index, job, txn); err != nil {
  1068  		return err
  1069  	}
  1070  
  1071  	// Delete the job summary
  1072  	if _, err = txn.DeleteAll("job_summary", "id", namespace, jobID); err != nil {
  1073  		return fmt.Errorf("deleing job summary failed: %v", err)
  1074  	}
  1075  	if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  1076  		return fmt.Errorf("index update failed: %v", err)
  1077  	}
  1078  
  1079  	return nil
  1080  }
  1081  
  1082  // deleteJobVersions deletes all versions of the given job.
  1083  func (s *StateStore) deleteJobVersions(index uint64, job *structs.Job, txn *memdb.Txn) error {
  1084  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1085  	if job.Namespace == "" {
  1086  		job.Namespace = structs.DefaultNamespace
  1087  	}
  1088  
  1089  	iter, err := txn.Get("job_version", "id_prefix", job.Namespace, job.ID)
  1090  	if err != nil {
  1091  		return err
  1092  	}
  1093  
  1094  	// Put them into a slice so there are no safety concerns while actually
  1095  	// performing the deletes
  1096  	jobs := []*structs.Job{}
  1097  	for {
  1098  		raw := iter.Next()
  1099  		if raw == nil {
  1100  			break
  1101  		}
  1102  
  1103  		// Ensure the ID is an exact match
  1104  		j := raw.(*structs.Job)
  1105  		if j.ID != job.ID {
  1106  			continue
  1107  		}
  1108  
  1109  		jobs = append(jobs, j)
  1110  	}
  1111  
  1112  	// Do the deletes
  1113  	for _, j := range jobs {
  1114  		if err := txn.Delete("job_version", j); err != nil {
  1115  			return fmt.Errorf("deleting job versions failed: %v", err)
  1116  		}
  1117  	}
  1118  
  1119  	if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil {
  1120  		return fmt.Errorf("index update failed: %v", err)
  1121  	}
  1122  
  1123  	return nil
  1124  }
  1125  
  1126  // upsertJobVersion inserts a job into its historic version table and limits the
  1127  // number of job versions that are tracked.
  1128  func (s *StateStore) upsertJobVersion(index uint64, job *structs.Job, txn *memdb.Txn) error {
  1129  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1130  	if job.Namespace == "" {
  1131  		job.Namespace = structs.DefaultNamespace
  1132  	}
  1133  
  1134  	// Insert the job
  1135  	if err := txn.Insert("job_version", job); err != nil {
  1136  		return fmt.Errorf("failed to insert job into job_version table: %v", err)
  1137  	}
  1138  
  1139  	if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil {
  1140  		return fmt.Errorf("index update failed: %v", err)
  1141  	}
  1142  
  1143  	// Get all the historic jobs for this ID
  1144  	all, err := s.jobVersionByID(txn, nil, job.Namespace, job.ID)
  1145  	if err != nil {
  1146  		return fmt.Errorf("failed to look up job versions for %q: %v", job.ID, err)
  1147  	}
  1148  
  1149  	// If we are below the limit there is no GCing to be done
  1150  	if len(all) <= structs.JobTrackedVersions {
  1151  		return nil
  1152  	}
  1153  
  1154  	// We have to delete a historic job to make room.
  1155  	// Find index of the highest versioned stable job
  1156  	stableIdx := -1
  1157  	for i, j := range all {
  1158  		if j.Stable {
  1159  			stableIdx = i
  1160  			break
  1161  		}
  1162  	}
  1163  
  1164  	// If the stable job is the oldest version, do a swap to bring it into the
  1165  	// keep set.
  1166  	max := structs.JobTrackedVersions
  1167  	if stableIdx == max {
  1168  		all[max-1], all[max] = all[max], all[max-1]
  1169  	}
  1170  
  1171  	// Delete the job outside of the set that are being kept.
  1172  	d := all[max]
  1173  	if err := txn.Delete("job_version", d); err != nil {
  1174  		return fmt.Errorf("failed to delete job %v (%d) from job_version", d.ID, d.Version)
  1175  	}
  1176  
  1177  	return nil
  1178  }
  1179  
  1180  // JobByID is used to lookup a job by its ID. JobByID returns the current/latest job
  1181  // version.
  1182  func (s *StateStore) JobByID(ws memdb.WatchSet, namespace, id string) (*structs.Job, error) {
  1183  	txn := s.db.Txn(false)
  1184  	return s.JobByIDTxn(ws, namespace, id, txn)
  1185  }
  1186  
  1187  // JobByIDTxn is used to lookup a job by its ID, like  JobByID. JobByID returns the job version
  1188  // accessible through in the transaction
  1189  func (s *StateStore) JobByIDTxn(ws memdb.WatchSet, namespace, id string, txn Txn) (*structs.Job, error) {
  1190  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1191  	if namespace == "" {
  1192  		namespace = structs.DefaultNamespace
  1193  	}
  1194  
  1195  	watchCh, existing, err := txn.FirstWatch("jobs", "id", namespace, id)
  1196  	if err != nil {
  1197  		return nil, fmt.Errorf("job lookup failed: %v", err)
  1198  	}
  1199  	ws.Add(watchCh)
  1200  
  1201  	if existing != nil {
  1202  		return existing.(*structs.Job), nil
  1203  	}
  1204  	return nil, nil
  1205  }
  1206  
  1207  // JobsByIDPrefix is used to lookup a job by prefix
  1208  func (s *StateStore) JobsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) {
  1209  	txn := s.db.Txn(false)
  1210  
  1211  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1212  	if namespace == "" {
  1213  		namespace = structs.DefaultNamespace
  1214  	}
  1215  
  1216  	iter, err := txn.Get("jobs", "id_prefix", namespace, id)
  1217  	if err != nil {
  1218  		return nil, fmt.Errorf("job lookup failed: %v", err)
  1219  	}
  1220  
  1221  	ws.Add(iter.WatchCh())
  1222  
  1223  	return iter, nil
  1224  }
  1225  
  1226  // JobVersionsByID returns all the tracked versions of a job.
  1227  func (s *StateStore) JobVersionsByID(ws memdb.WatchSet, namespace, id string) ([]*structs.Job, error) {
  1228  	txn := s.db.Txn(false)
  1229  
  1230  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1231  	if namespace == "" {
  1232  		namespace = structs.DefaultNamespace
  1233  	}
  1234  
  1235  	return s.jobVersionByID(txn, &ws, namespace, id)
  1236  }
  1237  
  1238  // jobVersionByID is the underlying implementation for retrieving all tracked
  1239  // versions of a job and is called under an existing transaction. A watch set
  1240  // can optionally be passed in to add the job histories to the watch set.
  1241  func (s *StateStore) jobVersionByID(txn *memdb.Txn, ws *memdb.WatchSet, namespace, id string) ([]*structs.Job, error) {
  1242  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1243  	if namespace == "" {
  1244  		namespace = structs.DefaultNamespace
  1245  	}
  1246  
  1247  	// Get all the historic jobs for this ID
  1248  	iter, err := txn.Get("job_version", "id_prefix", namespace, id)
  1249  	if err != nil {
  1250  		return nil, err
  1251  	}
  1252  
  1253  	if ws != nil {
  1254  		ws.Add(iter.WatchCh())
  1255  	}
  1256  
  1257  	var all []*structs.Job
  1258  	for {
  1259  		raw := iter.Next()
  1260  		if raw == nil {
  1261  			break
  1262  		}
  1263  
  1264  		// Ensure the ID is an exact match
  1265  		j := raw.(*structs.Job)
  1266  		if j.ID != id {
  1267  			continue
  1268  		}
  1269  
  1270  		all = append(all, j)
  1271  	}
  1272  
  1273  	// Sort in reverse order so that the highest version is first
  1274  	sort.Slice(all, func(i, j int) bool {
  1275  		return all[i].Version > all[j].Version
  1276  	})
  1277  
  1278  	return all, nil
  1279  }
  1280  
  1281  // JobByIDAndVersion returns the job identified by its ID and Version. The
  1282  // passed watchset may be nil.
  1283  func (s *StateStore) JobByIDAndVersion(ws memdb.WatchSet, namespace, id string, version uint64) (*structs.Job, error) {
  1284  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1285  	if namespace == "" {
  1286  		namespace = structs.DefaultNamespace
  1287  	}
  1288  	txn := s.db.Txn(false)
  1289  	return s.jobByIDAndVersionImpl(ws, namespace, id, version, txn)
  1290  }
  1291  
  1292  // jobByIDAndVersionImpl returns the job identified by its ID and Version. The
  1293  // passed watchset may be nil.
  1294  func (s *StateStore) jobByIDAndVersionImpl(ws memdb.WatchSet, namespace, id string,
  1295  	version uint64, txn *memdb.Txn) (*structs.Job, error) {
  1296  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1297  	if namespace == "" {
  1298  		namespace = structs.DefaultNamespace
  1299  	}
  1300  
  1301  	watchCh, existing, err := txn.FirstWatch("job_version", "id", namespace, id, version)
  1302  	if err != nil {
  1303  		return nil, err
  1304  	}
  1305  
  1306  	if ws != nil {
  1307  		ws.Add(watchCh)
  1308  	}
  1309  
  1310  	if existing != nil {
  1311  		job := existing.(*structs.Job)
  1312  		return job, nil
  1313  	}
  1314  
  1315  	return nil, nil
  1316  }
  1317  
  1318  func (s *StateStore) JobVersions(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  1319  	txn := s.db.Txn(false)
  1320  
  1321  	// Walk the entire deployments table
  1322  	iter, err := txn.Get("job_version", "id")
  1323  	if err != nil {
  1324  		return nil, err
  1325  	}
  1326  
  1327  	ws.Add(iter.WatchCh())
  1328  	return iter, nil
  1329  }
  1330  
  1331  // Jobs returns an iterator over all the jobs
  1332  func (s *StateStore) Jobs(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  1333  	txn := s.db.Txn(false)
  1334  
  1335  	// Walk the entire jobs table
  1336  	iter, err := txn.Get("jobs", "id")
  1337  	if err != nil {
  1338  		return nil, err
  1339  	}
  1340  
  1341  	ws.Add(iter.WatchCh())
  1342  
  1343  	return iter, nil
  1344  }
  1345  
  1346  // JobsByNamespace returns an iterator over all the jobs for the given namespace
  1347  func (s *StateStore) JobsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) {
  1348  	txn := s.db.Txn(false)
  1349  	return s.jobsByNamespaceImpl(ws, namespace, txn)
  1350  }
  1351  
  1352  // jobsByNamespaceImpl returns an iterator over all the jobs for the given namespace
  1353  func (s *StateStore) jobsByNamespaceImpl(ws memdb.WatchSet, namespace string, txn *memdb.Txn) (memdb.ResultIterator, error) {
  1354  	// Walk the entire jobs table
  1355  	iter, err := txn.Get("jobs", "id_prefix", namespace, "")
  1356  	if err != nil {
  1357  		return nil, err
  1358  	}
  1359  
  1360  	ws.Add(iter.WatchCh())
  1361  
  1362  	return iter, nil
  1363  }
  1364  
  1365  // JobsByPeriodic returns an iterator over all the periodic or non-periodic jobs.
  1366  func (s *StateStore) JobsByPeriodic(ws memdb.WatchSet, periodic bool) (memdb.ResultIterator, error) {
  1367  	txn := s.db.Txn(false)
  1368  
  1369  	iter, err := txn.Get("jobs", "periodic", periodic)
  1370  	if err != nil {
  1371  		return nil, err
  1372  	}
  1373  
  1374  	ws.Add(iter.WatchCh())
  1375  
  1376  	return iter, nil
  1377  }
  1378  
  1379  // JobsByScheduler returns an iterator over all the jobs with the specific
  1380  // scheduler type.
  1381  func (s *StateStore) JobsByScheduler(ws memdb.WatchSet, schedulerType string) (memdb.ResultIterator, error) {
  1382  	txn := s.db.Txn(false)
  1383  
  1384  	// Return an iterator for jobs with the specific type.
  1385  	iter, err := txn.Get("jobs", "type", schedulerType)
  1386  	if err != nil {
  1387  		return nil, err
  1388  	}
  1389  
  1390  	ws.Add(iter.WatchCh())
  1391  
  1392  	return iter, nil
  1393  }
  1394  
  1395  // JobsByGC returns an iterator over all jobs eligible or uneligible for garbage
  1396  // collection.
  1397  func (s *StateStore) JobsByGC(ws memdb.WatchSet, gc bool) (memdb.ResultIterator, error) {
  1398  	txn := s.db.Txn(false)
  1399  
  1400  	iter, err := txn.Get("jobs", "gc", gc)
  1401  	if err != nil {
  1402  		return nil, err
  1403  	}
  1404  
  1405  	ws.Add(iter.WatchCh())
  1406  
  1407  	return iter, nil
  1408  }
  1409  
  1410  // JobSummary returns a job summary object which matches a specific id.
  1411  func (s *StateStore) JobSummaryByID(ws memdb.WatchSet, namespace, jobID string) (*structs.JobSummary, error) {
  1412  	txn := s.db.Txn(false)
  1413  
  1414  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1415  	if namespace == "" {
  1416  		namespace = structs.DefaultNamespace
  1417  	}
  1418  
  1419  	watchCh, existing, err := txn.FirstWatch("job_summary", "id", namespace, jobID)
  1420  	if err != nil {
  1421  		return nil, err
  1422  	}
  1423  
  1424  	ws.Add(watchCh)
  1425  
  1426  	if existing != nil {
  1427  		summary := existing.(*structs.JobSummary)
  1428  		return summary, nil
  1429  	}
  1430  
  1431  	return nil, nil
  1432  }
  1433  
  1434  // JobSummaries walks the entire job summary table and returns all the job
  1435  // summary objects
  1436  func (s *StateStore) JobSummaries(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  1437  	txn := s.db.Txn(false)
  1438  
  1439  	iter, err := txn.Get("job_summary", "id")
  1440  	if err != nil {
  1441  		return nil, err
  1442  	}
  1443  
  1444  	ws.Add(iter.WatchCh())
  1445  
  1446  	return iter, nil
  1447  }
  1448  
  1449  // JobSummaryByPrefix is used to look up Job Summary by id prefix
  1450  func (s *StateStore) JobSummaryByPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) {
  1451  	txn := s.db.Txn(false)
  1452  
  1453  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1454  	if namespace == "" {
  1455  		namespace = structs.DefaultNamespace
  1456  	}
  1457  
  1458  	iter, err := txn.Get("job_summary", "id_prefix", namespace, id)
  1459  	if err != nil {
  1460  		return nil, fmt.Errorf("eval lookup failed: %v", err)
  1461  	}
  1462  
  1463  	ws.Add(iter.WatchCh())
  1464  
  1465  	return iter, nil
  1466  }
  1467  
  1468  // UpsertPeriodicLaunch is used to register a launch or update it.
  1469  func (s *StateStore) UpsertPeriodicLaunch(index uint64, launch *structs.PeriodicLaunch) error {
  1470  	txn := s.db.Txn(true)
  1471  	defer txn.Abort()
  1472  
  1473  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1474  	if launch.Namespace == "" {
  1475  		launch.Namespace = structs.DefaultNamespace
  1476  	}
  1477  
  1478  	// Check if the job already exists
  1479  	existing, err := txn.First("periodic_launch", "id", launch.Namespace, launch.ID)
  1480  	if err != nil {
  1481  		return fmt.Errorf("periodic launch lookup failed: %v", err)
  1482  	}
  1483  
  1484  	// Setup the indexes correctly
  1485  	if existing != nil {
  1486  		launch.CreateIndex = existing.(*structs.PeriodicLaunch).CreateIndex
  1487  		launch.ModifyIndex = index
  1488  	} else {
  1489  		launch.CreateIndex = index
  1490  		launch.ModifyIndex = index
  1491  	}
  1492  
  1493  	// Insert the job
  1494  	if err := txn.Insert("periodic_launch", launch); err != nil {
  1495  		return fmt.Errorf("launch insert failed: %v", err)
  1496  	}
  1497  	if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil {
  1498  		return fmt.Errorf("index update failed: %v", err)
  1499  	}
  1500  
  1501  	txn.Commit()
  1502  	return nil
  1503  }
  1504  
  1505  // DeletePeriodicLaunch is used to delete the periodic launch
  1506  func (s *StateStore) DeletePeriodicLaunch(index uint64, namespace, jobID string) error {
  1507  	txn := s.db.Txn(true)
  1508  	defer txn.Abort()
  1509  
  1510  	err := s.DeletePeriodicLaunchTxn(index, namespace, jobID, txn)
  1511  	if err == nil {
  1512  		txn.Commit()
  1513  	}
  1514  	return err
  1515  }
  1516  
  1517  // DeletePeriodicLaunchTxn is used to delete the periodic launch, like DeletePeriodicLaunch
  1518  // but in a transcation.  Useful for when making multiple modifications atomically
  1519  func (s *StateStore) DeletePeriodicLaunchTxn(index uint64, namespace, jobID string, txn Txn) error {
  1520  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1521  	if namespace == "" {
  1522  		namespace = structs.DefaultNamespace
  1523  	}
  1524  
  1525  	// Lookup the launch
  1526  	existing, err := txn.First("periodic_launch", "id", namespace, jobID)
  1527  	if err != nil {
  1528  		return fmt.Errorf("launch lookup failed: %v", err)
  1529  	}
  1530  	if existing == nil {
  1531  		return fmt.Errorf("launch not found")
  1532  	}
  1533  
  1534  	// Delete the launch
  1535  	if err := txn.Delete("periodic_launch", existing); err != nil {
  1536  		return fmt.Errorf("launch delete failed: %v", err)
  1537  	}
  1538  	if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil {
  1539  		return fmt.Errorf("index update failed: %v", err)
  1540  	}
  1541  
  1542  	return nil
  1543  }
  1544  
  1545  // PeriodicLaunchByID is used to lookup a periodic launch by the periodic job
  1546  // ID.
  1547  func (s *StateStore) PeriodicLaunchByID(ws memdb.WatchSet, namespace, id string) (*structs.PeriodicLaunch, error) {
  1548  	txn := s.db.Txn(false)
  1549  
  1550  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1551  	if namespace == "" {
  1552  		namespace = structs.DefaultNamespace
  1553  	}
  1554  
  1555  	watchCh, existing, err := txn.FirstWatch("periodic_launch", "id", namespace, id)
  1556  	if err != nil {
  1557  		return nil, fmt.Errorf("periodic launch lookup failed: %v", err)
  1558  	}
  1559  
  1560  	ws.Add(watchCh)
  1561  
  1562  	if existing != nil {
  1563  		return existing.(*structs.PeriodicLaunch), nil
  1564  	}
  1565  	return nil, nil
  1566  }
  1567  
  1568  // PeriodicLaunches returns an iterator over all the periodic launches
  1569  func (s *StateStore) PeriodicLaunches(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  1570  	txn := s.db.Txn(false)
  1571  
  1572  	// Walk the entire table
  1573  	iter, err := txn.Get("periodic_launch", "id")
  1574  	if err != nil {
  1575  		return nil, err
  1576  	}
  1577  
  1578  	ws.Add(iter.WatchCh())
  1579  
  1580  	return iter, nil
  1581  }
  1582  
  1583  // UpsertEvals is used to upsert a set of evaluations
  1584  func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) error {
  1585  	txn := s.db.Txn(true)
  1586  	defer txn.Abort()
  1587  
  1588  	err := s.UpsertEvalsTxn(index, evals, txn)
  1589  	if err == nil {
  1590  		txn.Commit()
  1591  	}
  1592  	return err
  1593  }
  1594  
  1595  // UpsertEvals is used to upsert a set of evaluations, like UpsertEvals
  1596  // but in a transcation.  Useful for when making multiple modifications atomically
  1597  func (s *StateStore) UpsertEvalsTxn(index uint64, evals []*structs.Evaluation, txn Txn) error {
  1598  	// Do a nested upsert
  1599  	jobs := make(map[structs.NamespacedID]string, len(evals))
  1600  	for _, eval := range evals {
  1601  		if err := s.nestedUpsertEval(txn, index, eval); err != nil {
  1602  			return err
  1603  		}
  1604  
  1605  		tuple := structs.NamespacedID{
  1606  			ID:        eval.JobID,
  1607  			Namespace: eval.Namespace,
  1608  		}
  1609  		jobs[tuple] = ""
  1610  	}
  1611  
  1612  	// Set the job's status
  1613  	if err := s.setJobStatuses(index, txn, jobs, false); err != nil {
  1614  		return fmt.Errorf("setting job status failed: %v", err)
  1615  	}
  1616  
  1617  	return nil
  1618  }
  1619  
  1620  // nestedUpsertEvaluation is used to nest an evaluation upsert within a transaction
  1621  func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *structs.Evaluation) error {
  1622  	// Lookup the evaluation
  1623  	existing, err := txn.First("evals", "id", eval.ID)
  1624  	if err != nil {
  1625  		return fmt.Errorf("eval lookup failed: %v", err)
  1626  	}
  1627  
  1628  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1629  	if eval.Namespace == "" {
  1630  		eval.Namespace = structs.DefaultNamespace
  1631  	}
  1632  
  1633  	// Update the indexes
  1634  	if existing != nil {
  1635  		eval.CreateIndex = existing.(*structs.Evaluation).CreateIndex
  1636  		eval.ModifyIndex = index
  1637  	} else {
  1638  		eval.CreateIndex = index
  1639  		eval.ModifyIndex = index
  1640  	}
  1641  
  1642  	// Update the job summary
  1643  	summaryRaw, err := txn.First("job_summary", "id", eval.Namespace, eval.JobID)
  1644  	if err != nil {
  1645  		return fmt.Errorf("job summary lookup failed: %v", err)
  1646  	}
  1647  	if summaryRaw != nil {
  1648  		js := summaryRaw.(*structs.JobSummary).Copy()
  1649  		hasSummaryChanged := false
  1650  		for tg, num := range eval.QueuedAllocations {
  1651  			if summary, ok := js.Summary[tg]; ok {
  1652  				if summary.Queued != num {
  1653  					summary.Queued = num
  1654  					js.Summary[tg] = summary
  1655  					hasSummaryChanged = true
  1656  				}
  1657  			} else {
  1658  				s.logger.Printf("[ERR] state_store: unable to update queued for job %q and task group %q", eval.JobID, tg)
  1659  			}
  1660  		}
  1661  
  1662  		// Insert the job summary
  1663  		if hasSummaryChanged {
  1664  			// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1665  			if js.Namespace == "" {
  1666  				js.Namespace = structs.DefaultNamespace
  1667  			}
  1668  
  1669  			js.ModifyIndex = index
  1670  			if err := txn.Insert("job_summary", js); err != nil {
  1671  				return fmt.Errorf("job summary insert failed: %v", err)
  1672  			}
  1673  			if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  1674  				return fmt.Errorf("index update failed: %v", err)
  1675  			}
  1676  		}
  1677  	}
  1678  
  1679  	// Check if the job has any blocked evaluations and cancel them
  1680  	if eval.Status == structs.EvalStatusComplete && len(eval.FailedTGAllocs) == 0 {
  1681  		// Get the blocked evaluation for a job if it exists
  1682  		iter, err := txn.Get("evals", "job", eval.Namespace, eval.JobID, structs.EvalStatusBlocked)
  1683  		if err != nil {
  1684  			return fmt.Errorf("failed to get blocked evals for job %q in namespace %q: %v", eval.JobID, eval.Namespace, err)
  1685  		}
  1686  
  1687  		var blocked []*structs.Evaluation
  1688  		for {
  1689  			raw := iter.Next()
  1690  			if raw == nil {
  1691  				break
  1692  			}
  1693  			blocked = append(blocked, raw.(*structs.Evaluation))
  1694  		}
  1695  
  1696  		// Go through and update the evals
  1697  		for _, eval := range blocked {
  1698  			newEval := eval.Copy()
  1699  			newEval.Status = structs.EvalStatusCancelled
  1700  			newEval.StatusDescription = fmt.Sprintf("evaluation %q successful", newEval.ID)
  1701  			newEval.ModifyIndex = index
  1702  
  1703  			// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1704  			if newEval.Namespace == "" {
  1705  				newEval.Namespace = structs.DefaultNamespace
  1706  			}
  1707  
  1708  			if err := txn.Insert("evals", newEval); err != nil {
  1709  				return fmt.Errorf("eval insert failed: %v", err)
  1710  			}
  1711  		}
  1712  	}
  1713  
  1714  	// Insert the eval
  1715  	if err := txn.Insert("evals", eval); err != nil {
  1716  		return fmt.Errorf("eval insert failed: %v", err)
  1717  	}
  1718  	if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil {
  1719  		return fmt.Errorf("index update failed: %v", err)
  1720  	}
  1721  	return nil
  1722  }
  1723  
  1724  // updateEvalModifyIndex is used to update the modify index of an evaluation that has been
  1725  // through a scheduler pass. This is done as part of plan apply. It ensures that when a subsequent
  1726  // scheduler workers process a re-queued evaluation it sees any partial updates from the plan apply.
  1727  func (s *StateStore) updateEvalModifyIndex(txn *memdb.Txn, index uint64, evalID string) error {
  1728  	// Lookup the evaluation
  1729  	existing, err := txn.First("evals", "id", evalID)
  1730  	if err != nil {
  1731  		return fmt.Errorf("eval lookup failed: %v", err)
  1732  	}
  1733  	if existing == nil {
  1734  		err := fmt.Errorf("unable to find eval id %q", evalID)
  1735  		s.logger.Printf("[ERR] state_store: %v", err)
  1736  		return err
  1737  	}
  1738  	eval := existing.(*structs.Evaluation).Copy()
  1739  	// Update the indexes
  1740  	eval.ModifyIndex = index
  1741  
  1742  	// Insert the eval
  1743  	if err := txn.Insert("evals", eval); err != nil {
  1744  		return fmt.Errorf("eval insert failed: %v", err)
  1745  	}
  1746  	if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil {
  1747  		return fmt.Errorf("index update failed: %v", err)
  1748  	}
  1749  	return nil
  1750  }
  1751  
  1752  // DeleteEval is used to delete an evaluation
  1753  func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error {
  1754  	txn := s.db.Txn(true)
  1755  	defer txn.Abort()
  1756  
  1757  	jobs := make(map[structs.NamespacedID]string, len(evals))
  1758  	for _, eval := range evals {
  1759  		existing, err := txn.First("evals", "id", eval)
  1760  		if err != nil {
  1761  			return fmt.Errorf("eval lookup failed: %v", err)
  1762  		}
  1763  		if existing == nil {
  1764  			continue
  1765  		}
  1766  		if err := txn.Delete("evals", existing); err != nil {
  1767  			return fmt.Errorf("eval delete failed: %v", err)
  1768  		}
  1769  		eval := existing.(*structs.Evaluation)
  1770  
  1771  		tuple := structs.NamespacedID{
  1772  			ID:        eval.JobID,
  1773  			Namespace: eval.Namespace,
  1774  		}
  1775  		jobs[tuple] = ""
  1776  	}
  1777  
  1778  	for _, alloc := range allocs {
  1779  		raw, err := txn.First("allocs", "id", alloc)
  1780  		if err != nil {
  1781  			return fmt.Errorf("alloc lookup failed: %v", err)
  1782  		}
  1783  		if raw == nil {
  1784  			continue
  1785  		}
  1786  		if err := txn.Delete("allocs", raw); err != nil {
  1787  			return fmt.Errorf("alloc delete failed: %v", err)
  1788  		}
  1789  	}
  1790  
  1791  	// Update the indexes
  1792  	if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil {
  1793  		return fmt.Errorf("index update failed: %v", err)
  1794  	}
  1795  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  1796  		return fmt.Errorf("index update failed: %v", err)
  1797  	}
  1798  
  1799  	// Set the job's status
  1800  	if err := s.setJobStatuses(index, txn, jobs, true); err != nil {
  1801  		return fmt.Errorf("setting job status failed: %v", err)
  1802  	}
  1803  
  1804  	txn.Commit()
  1805  	return nil
  1806  }
  1807  
  1808  // EvalByID is used to lookup an eval by its ID
  1809  func (s *StateStore) EvalByID(ws memdb.WatchSet, id string) (*structs.Evaluation, error) {
  1810  	txn := s.db.Txn(false)
  1811  
  1812  	watchCh, existing, err := txn.FirstWatch("evals", "id", id)
  1813  	if err != nil {
  1814  		return nil, fmt.Errorf("eval lookup failed: %v", err)
  1815  	}
  1816  
  1817  	ws.Add(watchCh)
  1818  
  1819  	if existing != nil {
  1820  		return existing.(*structs.Evaluation), nil
  1821  	}
  1822  	return nil, nil
  1823  }
  1824  
  1825  // EvalsByIDPrefix is used to lookup evaluations by prefix in a particular
  1826  // namespace
  1827  func (s *StateStore) EvalsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) {
  1828  	txn := s.db.Txn(false)
  1829  
  1830  	// Get an iterator over all evals by the id prefix
  1831  	iter, err := txn.Get("evals", "id_prefix", id)
  1832  	if err != nil {
  1833  		return nil, fmt.Errorf("eval lookup failed: %v", err)
  1834  	}
  1835  
  1836  	ws.Add(iter.WatchCh())
  1837  
  1838  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1839  	if namespace == "" {
  1840  		namespace = structs.DefaultNamespace
  1841  	}
  1842  
  1843  	// Wrap the iterator in a filter
  1844  	wrap := memdb.NewFilterIterator(iter, evalNamespaceFilter(namespace))
  1845  	return wrap, nil
  1846  }
  1847  
  1848  // evalNamespaceFilter returns a filter function that filters all evaluations
  1849  // not in the given namespace.
  1850  func evalNamespaceFilter(namespace string) func(interface{}) bool {
  1851  	return func(raw interface{}) bool {
  1852  		eval, ok := raw.(*structs.Evaluation)
  1853  		if !ok {
  1854  			return true
  1855  		}
  1856  
  1857  		return eval.Namespace != namespace
  1858  	}
  1859  }
  1860  
  1861  // EvalsByJob returns all the evaluations by job id
  1862  func (s *StateStore) EvalsByJob(ws memdb.WatchSet, namespace, jobID string) ([]*structs.Evaluation, error) {
  1863  	txn := s.db.Txn(false)
  1864  
  1865  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1866  	if namespace == "" {
  1867  		namespace = structs.DefaultNamespace
  1868  	}
  1869  
  1870  	// Get an iterator over the node allocations
  1871  	iter, err := txn.Get("evals", "job_prefix", namespace, jobID)
  1872  	if err != nil {
  1873  		return nil, err
  1874  	}
  1875  
  1876  	ws.Add(iter.WatchCh())
  1877  
  1878  	var out []*structs.Evaluation
  1879  	for {
  1880  		raw := iter.Next()
  1881  		if raw == nil {
  1882  			break
  1883  		}
  1884  
  1885  		e := raw.(*structs.Evaluation)
  1886  
  1887  		// Filter non-exact matches
  1888  		if e.JobID != jobID {
  1889  			continue
  1890  		}
  1891  
  1892  		out = append(out, e)
  1893  	}
  1894  	return out, nil
  1895  }
  1896  
  1897  // Evals returns an iterator over all the evaluations
  1898  func (s *StateStore) Evals(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  1899  	txn := s.db.Txn(false)
  1900  
  1901  	// Walk the entire table
  1902  	iter, err := txn.Get("evals", "id")
  1903  	if err != nil {
  1904  		return nil, err
  1905  	}
  1906  
  1907  	ws.Add(iter.WatchCh())
  1908  
  1909  	return iter, nil
  1910  }
  1911  
  1912  // EvalsByNamespace returns an iterator over all the evaluations in the given
  1913  // namespace
  1914  func (s *StateStore) EvalsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) {
  1915  	txn := s.db.Txn(false)
  1916  
  1917  	// Walk the entire table
  1918  	iter, err := txn.Get("evals", "namespace", namespace)
  1919  	if err != nil {
  1920  		return nil, err
  1921  	}
  1922  
  1923  	ws.Add(iter.WatchCh())
  1924  
  1925  	return iter, nil
  1926  }
  1927  
  1928  // UpdateAllocsFromClient is used to update an allocation based on input
  1929  // from a client. While the schedulers are the authority on the allocation for
  1930  // most things, some updates are authoritative from the client. Specifically,
  1931  // the desired state comes from the schedulers, while the actual state comes
  1932  // from clients.
  1933  func (s *StateStore) UpdateAllocsFromClient(index uint64, allocs []*structs.Allocation) error {
  1934  	txn := s.db.Txn(true)
  1935  	defer txn.Abort()
  1936  
  1937  	// Handle each of the updated allocations
  1938  	for _, alloc := range allocs {
  1939  		if err := s.nestedUpdateAllocFromClient(txn, index, alloc); err != nil {
  1940  			return err
  1941  		}
  1942  	}
  1943  
  1944  	// Update the indexes
  1945  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  1946  		return fmt.Errorf("index update failed: %v", err)
  1947  	}
  1948  
  1949  	txn.Commit()
  1950  	return nil
  1951  }
  1952  
  1953  // nestedUpdateAllocFromClient is used to nest an update of an allocation with client status
  1954  func (s *StateStore) nestedUpdateAllocFromClient(txn *memdb.Txn, index uint64, alloc *structs.Allocation) error {
  1955  	// Look for existing alloc
  1956  	existing, err := txn.First("allocs", "id", alloc.ID)
  1957  	if err != nil {
  1958  		return fmt.Errorf("alloc lookup failed: %v", err)
  1959  	}
  1960  
  1961  	// Nothing to do if this does not exist
  1962  	if existing == nil {
  1963  		return nil
  1964  	}
  1965  	exist := existing.(*structs.Allocation)
  1966  
  1967  	// Copy everything from the existing allocation
  1968  	copyAlloc := exist.Copy()
  1969  
  1970  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1971  	if copyAlloc.Namespace == "" {
  1972  		copyAlloc.Namespace = structs.DefaultNamespace
  1973  	}
  1974  
  1975  	// Pull in anything the client is the authority on
  1976  	copyAlloc.ClientStatus = alloc.ClientStatus
  1977  	copyAlloc.ClientDescription = alloc.ClientDescription
  1978  	copyAlloc.TaskStates = alloc.TaskStates
  1979  
  1980  	// The client can only set its deployment health and timestamp, so just take
  1981  	// those
  1982  	if copyAlloc.DeploymentStatus != nil && alloc.DeploymentStatus != nil {
  1983  		oldHasHealthy := copyAlloc.DeploymentStatus.HasHealth()
  1984  		newHasHealthy := alloc.DeploymentStatus.HasHealth()
  1985  
  1986  		// We got new health information from the client
  1987  		if newHasHealthy && (!oldHasHealthy || *copyAlloc.DeploymentStatus.Healthy != *alloc.DeploymentStatus.Healthy) {
  1988  			// Updated deployment health and timestamp
  1989  			copyAlloc.DeploymentStatus.Healthy = helper.BoolToPtr(*alloc.DeploymentStatus.Healthy)
  1990  			copyAlloc.DeploymentStatus.Timestamp = alloc.DeploymentStatus.Timestamp
  1991  			copyAlloc.DeploymentStatus.ModifyIndex = index
  1992  		}
  1993  	} else if alloc.DeploymentStatus != nil {
  1994  		// First time getting a deployment status so copy everything and just
  1995  		// set the index
  1996  		copyAlloc.DeploymentStatus = alloc.DeploymentStatus.Copy()
  1997  		copyAlloc.DeploymentStatus.ModifyIndex = index
  1998  	}
  1999  
  2000  	// Update the modify index
  2001  	copyAlloc.ModifyIndex = index
  2002  
  2003  	// Update the modify time
  2004  	copyAlloc.ModifyTime = alloc.ModifyTime
  2005  
  2006  	if err := s.updateDeploymentWithAlloc(index, copyAlloc, exist, txn); err != nil {
  2007  		return fmt.Errorf("error updating deployment: %v", err)
  2008  	}
  2009  
  2010  	if err := s.updateSummaryWithAlloc(index, copyAlloc, exist, txn); err != nil {
  2011  		return fmt.Errorf("error updating job summary: %v", err)
  2012  	}
  2013  
  2014  	if err := s.updateEntWithAlloc(index, copyAlloc, exist, txn); err != nil {
  2015  		return err
  2016  	}
  2017  
  2018  	// Update the allocation
  2019  	if err := txn.Insert("allocs", copyAlloc); err != nil {
  2020  		return fmt.Errorf("alloc insert failed: %v", err)
  2021  	}
  2022  
  2023  	// Set the job's status
  2024  	forceStatus := ""
  2025  	if !copyAlloc.TerminalStatus() {
  2026  		forceStatus = structs.JobStatusRunning
  2027  	}
  2028  
  2029  	tuple := structs.NamespacedID{
  2030  		ID:        exist.JobID,
  2031  		Namespace: exist.Namespace,
  2032  	}
  2033  	jobs := map[structs.NamespacedID]string{tuple: forceStatus}
  2034  
  2035  	if err := s.setJobStatuses(index, txn, jobs, false); err != nil {
  2036  		return fmt.Errorf("setting job status failed: %v", err)
  2037  	}
  2038  	return nil
  2039  }
  2040  
  2041  // UpsertAllocs is used to evict a set of allocations and allocate new ones at
  2042  // the same time.
  2043  func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error {
  2044  	txn := s.db.Txn(true)
  2045  	defer txn.Abort()
  2046  	if err := s.upsertAllocsImpl(index, allocs, txn); err != nil {
  2047  		return err
  2048  	}
  2049  	txn.Commit()
  2050  	return nil
  2051  }
  2052  
  2053  // upsertAllocs is the actual implementation of UpsertAllocs so that it may be
  2054  // used with an existing transaction.
  2055  func (s *StateStore) upsertAllocsImpl(index uint64, allocs []*structs.Allocation, txn *memdb.Txn) error {
  2056  	// Handle the allocations
  2057  	jobs := make(map[structs.NamespacedID]string, 1)
  2058  	for _, alloc := range allocs {
  2059  		existing, err := txn.First("allocs", "id", alloc.ID)
  2060  		if err != nil {
  2061  			return fmt.Errorf("alloc lookup failed: %v", err)
  2062  		}
  2063  		exist, _ := existing.(*structs.Allocation)
  2064  
  2065  		if exist == nil {
  2066  			alloc.CreateIndex = index
  2067  			alloc.ModifyIndex = index
  2068  			alloc.AllocModifyIndex = index
  2069  			if alloc.DeploymentStatus != nil {
  2070  				alloc.DeploymentStatus.ModifyIndex = index
  2071  			}
  2072  
  2073  			// Issue https://github.com/hashicorp/nomad/issues/2583 uncovered
  2074  			// the a race between a forced garbage collection and the scheduler
  2075  			// marking an allocation as terminal. The issue is that the
  2076  			// allocation from the scheduler has its job normalized and the FSM
  2077  			// will only denormalize if the allocation is not terminal.  However
  2078  			// if the allocation is garbage collected, that will result in a
  2079  			// allocation being upserted for the first time without a job
  2080  			// attached. By returning an error here, it will cause the FSM to
  2081  			// error, causing the plan_apply to error and thus causing the
  2082  			// evaluation to be failed. This will force an index refresh that
  2083  			// should solve this issue.
  2084  			if alloc.Job == nil {
  2085  				return fmt.Errorf("attempting to upsert allocation %q without a job", alloc.ID)
  2086  			}
  2087  		} else {
  2088  			alloc.CreateIndex = exist.CreateIndex
  2089  			alloc.ModifyIndex = index
  2090  			alloc.AllocModifyIndex = index
  2091  
  2092  			// Keep the clients task states
  2093  			alloc.TaskStates = exist.TaskStates
  2094  
  2095  			// If the scheduler is marking this allocation as lost we do not
  2096  			// want to reuse the status of the existing allocation.
  2097  			if alloc.ClientStatus != structs.AllocClientStatusLost {
  2098  				alloc.ClientStatus = exist.ClientStatus
  2099  				alloc.ClientDescription = exist.ClientDescription
  2100  			}
  2101  
  2102  			// The job has been denormalized so re-attach the original job
  2103  			if alloc.Job == nil {
  2104  				alloc.Job = exist.Job
  2105  			}
  2106  		}
  2107  
  2108  		// COMPAT 0.7: Upgrade old objects that do not have namespaces
  2109  		if alloc.Namespace == "" {
  2110  			alloc.Namespace = structs.DefaultNamespace
  2111  		}
  2112  
  2113  		// OPTIMIZATION:
  2114  		// These should be given a map of new to old allocation and the updates
  2115  		// should be one on all changes. The current implementation causes O(n)
  2116  		// lookups/copies/insertions rather than O(1)
  2117  		if err := s.updateDeploymentWithAlloc(index, alloc, exist, txn); err != nil {
  2118  			return fmt.Errorf("error updating deployment: %v", err)
  2119  		}
  2120  
  2121  		if err := s.updateSummaryWithAlloc(index, alloc, exist, txn); err != nil {
  2122  			return fmt.Errorf("error updating job summary: %v", err)
  2123  		}
  2124  
  2125  		if err := s.updateEntWithAlloc(index, alloc, exist, txn); err != nil {
  2126  			return err
  2127  		}
  2128  
  2129  		// Create the EphemeralDisk if it's nil by adding up DiskMB from task resources.
  2130  		// COMPAT 0.4.1 -> 0.5
  2131  		if alloc.Job != nil {
  2132  			s.addEphemeralDiskToTaskGroups(alloc.Job)
  2133  		}
  2134  
  2135  		if err := txn.Insert("allocs", alloc); err != nil {
  2136  			return fmt.Errorf("alloc insert failed: %v", err)
  2137  		}
  2138  
  2139  		if alloc.PreviousAllocation != "" {
  2140  			prevAlloc, err := txn.First("allocs", "id", alloc.PreviousAllocation)
  2141  			if err != nil {
  2142  				return fmt.Errorf("alloc lookup failed: %v", err)
  2143  			}
  2144  			existingPrevAlloc, _ := prevAlloc.(*structs.Allocation)
  2145  			if existingPrevAlloc != nil {
  2146  				prevAllocCopy := existingPrevAlloc.Copy()
  2147  				prevAllocCopy.NextAllocation = alloc.ID
  2148  				prevAllocCopy.ModifyIndex = index
  2149  				if err := txn.Insert("allocs", prevAllocCopy); err != nil {
  2150  					return fmt.Errorf("alloc insert failed: %v", err)
  2151  				}
  2152  			}
  2153  		}
  2154  
  2155  		// If the allocation is running, force the job to running status.
  2156  		forceStatus := ""
  2157  		if !alloc.TerminalStatus() {
  2158  			forceStatus = structs.JobStatusRunning
  2159  		}
  2160  
  2161  		tuple := structs.NamespacedID{
  2162  			ID:        alloc.JobID,
  2163  			Namespace: alloc.Namespace,
  2164  		}
  2165  		jobs[tuple] = forceStatus
  2166  	}
  2167  
  2168  	// Update the indexes
  2169  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  2170  		return fmt.Errorf("index update failed: %v", err)
  2171  	}
  2172  
  2173  	// Set the job's status
  2174  	if err := s.setJobStatuses(index, txn, jobs, false); err != nil {
  2175  		return fmt.Errorf("setting job status failed: %v", err)
  2176  	}
  2177  
  2178  	return nil
  2179  }
  2180  
  2181  // UpdateAllocsDesiredTransitions is used to update a set of allocations
  2182  // desired transitions.
  2183  func (s *StateStore) UpdateAllocsDesiredTransitions(index uint64, allocs map[string]*structs.DesiredTransition,
  2184  	evals []*structs.Evaluation) error {
  2185  
  2186  	txn := s.db.Txn(true)
  2187  	defer txn.Abort()
  2188  
  2189  	// Handle each of the updated allocations
  2190  	for id, transition := range allocs {
  2191  		if err := s.nestedUpdateAllocDesiredTransition(txn, index, id, transition); err != nil {
  2192  			return err
  2193  		}
  2194  	}
  2195  
  2196  	for _, eval := range evals {
  2197  		if err := s.nestedUpsertEval(txn, index, eval); err != nil {
  2198  			return err
  2199  		}
  2200  	}
  2201  
  2202  	// Update the indexes
  2203  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  2204  		return fmt.Errorf("index update failed: %v", err)
  2205  	}
  2206  
  2207  	txn.Commit()
  2208  	return nil
  2209  }
  2210  
  2211  // nestedUpdateAllocDesiredTransition is used to nest an update of an
  2212  // allocations desired transition
  2213  func (s *StateStore) nestedUpdateAllocDesiredTransition(
  2214  	txn *memdb.Txn, index uint64, allocID string,
  2215  	transition *structs.DesiredTransition) error {
  2216  
  2217  	// Look for existing alloc
  2218  	existing, err := txn.First("allocs", "id", allocID)
  2219  	if err != nil {
  2220  		return fmt.Errorf("alloc lookup failed: %v", err)
  2221  	}
  2222  
  2223  	// Nothing to do if this does not exist
  2224  	if existing == nil {
  2225  		return nil
  2226  	}
  2227  	exist := existing.(*structs.Allocation)
  2228  
  2229  	// Copy everything from the existing allocation
  2230  	copyAlloc := exist.Copy()
  2231  
  2232  	// Merge the desired transitions
  2233  	copyAlloc.DesiredTransition.Merge(transition)
  2234  
  2235  	// Update the modify index
  2236  	copyAlloc.ModifyIndex = index
  2237  
  2238  	// Update the allocation
  2239  	if err := txn.Insert("allocs", copyAlloc); err != nil {
  2240  		return fmt.Errorf("alloc insert failed: %v", err)
  2241  	}
  2242  
  2243  	return nil
  2244  }
  2245  
  2246  // AllocByID is used to lookup an allocation by its ID
  2247  func (s *StateStore) AllocByID(ws memdb.WatchSet, id string) (*structs.Allocation, error) {
  2248  	txn := s.db.Txn(false)
  2249  
  2250  	watchCh, existing, err := txn.FirstWatch("allocs", "id", id)
  2251  	if err != nil {
  2252  		return nil, fmt.Errorf("alloc lookup failed: %v", err)
  2253  	}
  2254  
  2255  	ws.Add(watchCh)
  2256  
  2257  	if existing != nil {
  2258  		return existing.(*structs.Allocation), nil
  2259  	}
  2260  	return nil, nil
  2261  }
  2262  
  2263  // AllocsByIDPrefix is used to lookup allocs by prefix
  2264  func (s *StateStore) AllocsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) {
  2265  	txn := s.db.Txn(false)
  2266  
  2267  	iter, err := txn.Get("allocs", "id_prefix", id)
  2268  	if err != nil {
  2269  		return nil, fmt.Errorf("alloc lookup failed: %v", err)
  2270  	}
  2271  
  2272  	ws.Add(iter.WatchCh())
  2273  
  2274  	// Wrap the iterator in a filter
  2275  	wrap := memdb.NewFilterIterator(iter, allocNamespaceFilter(namespace))
  2276  	return wrap, nil
  2277  }
  2278  
  2279  // allocNamespaceFilter returns a filter function that filters all allocations
  2280  // not in the given namespace.
  2281  func allocNamespaceFilter(namespace string) func(interface{}) bool {
  2282  	return func(raw interface{}) bool {
  2283  		alloc, ok := raw.(*structs.Allocation)
  2284  		if !ok {
  2285  			return true
  2286  		}
  2287  
  2288  		return alloc.Namespace != namespace
  2289  	}
  2290  }
  2291  
  2292  // AllocsByNode returns all the allocations by node
  2293  func (s *StateStore) AllocsByNode(ws memdb.WatchSet, node string) ([]*structs.Allocation, error) {
  2294  	txn := s.db.Txn(false)
  2295  
  2296  	// Get an iterator over the node allocations, using only the
  2297  	// node prefix which ignores the terminal status
  2298  	iter, err := txn.Get("allocs", "node_prefix", node)
  2299  	if err != nil {
  2300  		return nil, err
  2301  	}
  2302  
  2303  	ws.Add(iter.WatchCh())
  2304  
  2305  	var out []*structs.Allocation
  2306  	for {
  2307  		raw := iter.Next()
  2308  		if raw == nil {
  2309  			break
  2310  		}
  2311  		out = append(out, raw.(*structs.Allocation))
  2312  	}
  2313  	return out, nil
  2314  }
  2315  
  2316  // AllocsByNode returns all the allocations by node and terminal status
  2317  func (s *StateStore) AllocsByNodeTerminal(ws memdb.WatchSet, node string, terminal bool) ([]*structs.Allocation, error) {
  2318  	txn := s.db.Txn(false)
  2319  
  2320  	// Get an iterator over the node allocations
  2321  	iter, err := txn.Get("allocs", "node", node, terminal)
  2322  	if err != nil {
  2323  		return nil, err
  2324  	}
  2325  
  2326  	ws.Add(iter.WatchCh())
  2327  
  2328  	var out []*structs.Allocation
  2329  	for {
  2330  		raw := iter.Next()
  2331  		if raw == nil {
  2332  			break
  2333  		}
  2334  		out = append(out, raw.(*structs.Allocation))
  2335  	}
  2336  	return out, nil
  2337  }
  2338  
  2339  // AllocsByJob returns all the allocations by job id
  2340  func (s *StateStore) AllocsByJob(ws memdb.WatchSet, namespace, jobID string, all bool) ([]*structs.Allocation, error) {
  2341  	txn := s.db.Txn(false)
  2342  
  2343  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  2344  	if namespace == "" {
  2345  		namespace = structs.DefaultNamespace
  2346  	}
  2347  
  2348  	// Get the job
  2349  	var job *structs.Job
  2350  	rawJob, err := txn.First("jobs", "id", namespace, jobID)
  2351  	if err != nil {
  2352  		return nil, err
  2353  	}
  2354  	if rawJob != nil {
  2355  		job = rawJob.(*structs.Job)
  2356  	}
  2357  
  2358  	// Get an iterator over the node allocations
  2359  	iter, err := txn.Get("allocs", "job", namespace, jobID)
  2360  	if err != nil {
  2361  		return nil, err
  2362  	}
  2363  
  2364  	ws.Add(iter.WatchCh())
  2365  
  2366  	var out []*structs.Allocation
  2367  	for {
  2368  		raw := iter.Next()
  2369  		if raw == nil {
  2370  			break
  2371  		}
  2372  
  2373  		alloc := raw.(*structs.Allocation)
  2374  		// If the allocation belongs to a job with the same ID but a different
  2375  		// create index and we are not getting all the allocations whose Jobs
  2376  		// matches the same Job ID then we skip it
  2377  		if !all && job != nil && alloc.Job.CreateIndex != job.CreateIndex {
  2378  			continue
  2379  		}
  2380  		out = append(out, raw.(*structs.Allocation))
  2381  	}
  2382  	return out, nil
  2383  }
  2384  
  2385  // AllocsByEval returns all the allocations by eval id
  2386  func (s *StateStore) AllocsByEval(ws memdb.WatchSet, evalID string) ([]*structs.Allocation, error) {
  2387  	txn := s.db.Txn(false)
  2388  
  2389  	// Get an iterator over the eval allocations
  2390  	iter, err := txn.Get("allocs", "eval", evalID)
  2391  	if err != nil {
  2392  		return nil, err
  2393  	}
  2394  
  2395  	ws.Add(iter.WatchCh())
  2396  
  2397  	var out []*structs.Allocation
  2398  	for {
  2399  		raw := iter.Next()
  2400  		if raw == nil {
  2401  			break
  2402  		}
  2403  		out = append(out, raw.(*structs.Allocation))
  2404  	}
  2405  	return out, nil
  2406  }
  2407  
  2408  // AllocsByDeployment returns all the allocations by deployment id
  2409  func (s *StateStore) AllocsByDeployment(ws memdb.WatchSet, deploymentID string) ([]*structs.Allocation, error) {
  2410  	txn := s.db.Txn(false)
  2411  
  2412  	// Get an iterator over the deployments allocations
  2413  	iter, err := txn.Get("allocs", "deployment", deploymentID)
  2414  	if err != nil {
  2415  		return nil, err
  2416  	}
  2417  
  2418  	ws.Add(iter.WatchCh())
  2419  
  2420  	var out []*structs.Allocation
  2421  	for {
  2422  		raw := iter.Next()
  2423  		if raw == nil {
  2424  			break
  2425  		}
  2426  		out = append(out, raw.(*structs.Allocation))
  2427  	}
  2428  	return out, nil
  2429  }
  2430  
  2431  // Allocs returns an iterator over all the evaluations
  2432  func (s *StateStore) Allocs(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  2433  	txn := s.db.Txn(false)
  2434  
  2435  	// Walk the entire table
  2436  	iter, err := txn.Get("allocs", "id")
  2437  	if err != nil {
  2438  		return nil, err
  2439  	}
  2440  
  2441  	ws.Add(iter.WatchCh())
  2442  
  2443  	return iter, nil
  2444  }
  2445  
  2446  // AllocsByNamespace returns an iterator over all the allocations in the
  2447  // namespace
  2448  func (s *StateStore) AllocsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) {
  2449  	txn := s.db.Txn(false)
  2450  	return s.allocsByNamespaceImpl(ws, txn, namespace)
  2451  }
  2452  
  2453  // allocsByNamespaceImpl returns an iterator over all the allocations in the
  2454  // namespace
  2455  func (s *StateStore) allocsByNamespaceImpl(ws memdb.WatchSet, txn *memdb.Txn, namespace string) (memdb.ResultIterator, error) {
  2456  	// Walk the entire table
  2457  	iter, err := txn.Get("allocs", "namespace", namespace)
  2458  	if err != nil {
  2459  		return nil, err
  2460  	}
  2461  
  2462  	ws.Add(iter.WatchCh())
  2463  
  2464  	return iter, nil
  2465  }
  2466  
  2467  // UpsertVaultAccessors is used to register a set of Vault Accessors
  2468  func (s *StateStore) UpsertVaultAccessor(index uint64, accessors []*structs.VaultAccessor) error {
  2469  	txn := s.db.Txn(true)
  2470  	defer txn.Abort()
  2471  
  2472  	for _, accessor := range accessors {
  2473  		// Set the create index
  2474  		accessor.CreateIndex = index
  2475  
  2476  		// Insert the accessor
  2477  		if err := txn.Insert("vault_accessors", accessor); err != nil {
  2478  			return fmt.Errorf("accessor insert failed: %v", err)
  2479  		}
  2480  	}
  2481  
  2482  	if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil {
  2483  		return fmt.Errorf("index update failed: %v", err)
  2484  	}
  2485  
  2486  	txn.Commit()
  2487  	return nil
  2488  }
  2489  
  2490  // DeleteVaultAccessors is used to delete a set of Vault Accessors
  2491  func (s *StateStore) DeleteVaultAccessors(index uint64, accessors []*structs.VaultAccessor) error {
  2492  	txn := s.db.Txn(true)
  2493  	defer txn.Abort()
  2494  
  2495  	// Lookup the accessor
  2496  	for _, accessor := range accessors {
  2497  		// Delete the accessor
  2498  		if err := txn.Delete("vault_accessors", accessor); err != nil {
  2499  			return fmt.Errorf("accessor delete failed: %v", err)
  2500  		}
  2501  	}
  2502  
  2503  	if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil {
  2504  		return fmt.Errorf("index update failed: %v", err)
  2505  	}
  2506  
  2507  	txn.Commit()
  2508  	return nil
  2509  }
  2510  
  2511  // VaultAccessor returns the given Vault accessor
  2512  func (s *StateStore) VaultAccessor(ws memdb.WatchSet, accessor string) (*structs.VaultAccessor, error) {
  2513  	txn := s.db.Txn(false)
  2514  
  2515  	watchCh, existing, err := txn.FirstWatch("vault_accessors", "id", accessor)
  2516  	if err != nil {
  2517  		return nil, fmt.Errorf("accessor lookup failed: %v", err)
  2518  	}
  2519  
  2520  	ws.Add(watchCh)
  2521  
  2522  	if existing != nil {
  2523  		return existing.(*structs.VaultAccessor), nil
  2524  	}
  2525  
  2526  	return nil, nil
  2527  }
  2528  
  2529  // VaultAccessors returns an iterator of Vault accessors.
  2530  func (s *StateStore) VaultAccessors(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  2531  	txn := s.db.Txn(false)
  2532  
  2533  	iter, err := txn.Get("vault_accessors", "id")
  2534  	if err != nil {
  2535  		return nil, err
  2536  	}
  2537  
  2538  	ws.Add(iter.WatchCh())
  2539  
  2540  	return iter, nil
  2541  }
  2542  
  2543  // VaultAccessorsByAlloc returns all the Vault accessors by alloc id
  2544  func (s *StateStore) VaultAccessorsByAlloc(ws memdb.WatchSet, allocID string) ([]*structs.VaultAccessor, error) {
  2545  	txn := s.db.Txn(false)
  2546  
  2547  	// Get an iterator over the accessors
  2548  	iter, err := txn.Get("vault_accessors", "alloc_id", allocID)
  2549  	if err != nil {
  2550  		return nil, err
  2551  	}
  2552  
  2553  	ws.Add(iter.WatchCh())
  2554  
  2555  	var out []*structs.VaultAccessor
  2556  	for {
  2557  		raw := iter.Next()
  2558  		if raw == nil {
  2559  			break
  2560  		}
  2561  		out = append(out, raw.(*structs.VaultAccessor))
  2562  	}
  2563  	return out, nil
  2564  }
  2565  
  2566  // VaultAccessorsByNode returns all the Vault accessors by node id
  2567  func (s *StateStore) VaultAccessorsByNode(ws memdb.WatchSet, nodeID string) ([]*structs.VaultAccessor, error) {
  2568  	txn := s.db.Txn(false)
  2569  
  2570  	// Get an iterator over the accessors
  2571  	iter, err := txn.Get("vault_accessors", "node_id", nodeID)
  2572  	if err != nil {
  2573  		return nil, err
  2574  	}
  2575  
  2576  	ws.Add(iter.WatchCh())
  2577  
  2578  	var out []*structs.VaultAccessor
  2579  	for {
  2580  		raw := iter.Next()
  2581  		if raw == nil {
  2582  			break
  2583  		}
  2584  		out = append(out, raw.(*structs.VaultAccessor))
  2585  	}
  2586  	return out, nil
  2587  }
  2588  
  2589  // UpdateDeploymentStatus is used to make deployment status updates and
  2590  // potentially make a evaluation
  2591  func (s *StateStore) UpdateDeploymentStatus(index uint64, req *structs.DeploymentStatusUpdateRequest) error {
  2592  	txn := s.db.Txn(true)
  2593  	defer txn.Abort()
  2594  
  2595  	if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil {
  2596  		return err
  2597  	}
  2598  
  2599  	// Upsert the job if necessary
  2600  	if req.Job != nil {
  2601  		if err := s.upsertJobImpl(index, req.Job, false, txn); err != nil {
  2602  			return err
  2603  		}
  2604  	}
  2605  
  2606  	// Upsert the optional eval
  2607  	if req.Eval != nil {
  2608  		if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil {
  2609  			return err
  2610  		}
  2611  	}
  2612  
  2613  	txn.Commit()
  2614  	return nil
  2615  }
  2616  
  2617  // updateDeploymentStatusImpl is used to make deployment status updates
  2618  func (s *StateStore) updateDeploymentStatusImpl(index uint64, u *structs.DeploymentStatusUpdate, txn *memdb.Txn) error {
  2619  	// Retrieve deployment
  2620  	ws := memdb.NewWatchSet()
  2621  	deployment, err := s.deploymentByIDImpl(ws, u.DeploymentID, txn)
  2622  	if err != nil {
  2623  		return err
  2624  	} else if deployment == nil {
  2625  		return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", u.DeploymentID)
  2626  	} else if !deployment.Active() {
  2627  		return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status)
  2628  	}
  2629  
  2630  	// Apply the new status
  2631  	copy := deployment.Copy()
  2632  	copy.Status = u.Status
  2633  	copy.StatusDescription = u.StatusDescription
  2634  	copy.ModifyIndex = index
  2635  
  2636  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  2637  	if copy.Namespace == "" {
  2638  		copy.Namespace = structs.DefaultNamespace
  2639  	}
  2640  
  2641  	// Insert the deployment
  2642  	if err := txn.Insert("deployment", copy); err != nil {
  2643  		return err
  2644  	}
  2645  
  2646  	// Update the index
  2647  	if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil {
  2648  		return fmt.Errorf("index update failed: %v", err)
  2649  	}
  2650  
  2651  	// If the deployment is being marked as complete, set the job to stable.
  2652  	if copy.Status == structs.DeploymentStatusSuccessful {
  2653  		if err := s.updateJobStabilityImpl(index, copy.Namespace, copy.JobID, copy.JobVersion, true, txn); err != nil {
  2654  			return fmt.Errorf("failed to update job stability: %v", err)
  2655  		}
  2656  	}
  2657  
  2658  	return nil
  2659  }
  2660  
  2661  // UpdateJobStability updates the stability of the given job and version to the
  2662  // desired status.
  2663  func (s *StateStore) UpdateJobStability(index uint64, namespace, jobID string, jobVersion uint64, stable bool) error {
  2664  	txn := s.db.Txn(true)
  2665  	defer txn.Abort()
  2666  
  2667  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  2668  	if namespace == "" {
  2669  		namespace = structs.DefaultNamespace
  2670  	}
  2671  
  2672  	if err := s.updateJobStabilityImpl(index, namespace, jobID, jobVersion, stable, txn); err != nil {
  2673  		return err
  2674  	}
  2675  
  2676  	txn.Commit()
  2677  	return nil
  2678  }
  2679  
  2680  // updateJobStabilityImpl updates the stability of the given job and version
  2681  func (s *StateStore) updateJobStabilityImpl(index uint64, namespace, jobID string, jobVersion uint64, stable bool, txn *memdb.Txn) error {
  2682  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  2683  	if namespace == "" {
  2684  		namespace = structs.DefaultNamespace
  2685  	}
  2686  
  2687  	// Get the job that is referenced
  2688  	job, err := s.jobByIDAndVersionImpl(nil, namespace, jobID, jobVersion, txn)
  2689  	if err != nil {
  2690  		return err
  2691  	}
  2692  
  2693  	// Has already been cleared, nothing to do
  2694  	if job == nil {
  2695  		return nil
  2696  	}
  2697  
  2698  	// If the job already has the desired stability, nothing to do
  2699  	if job.Stable == stable {
  2700  		return nil
  2701  	}
  2702  
  2703  	copy := job.Copy()
  2704  	copy.Stable = stable
  2705  	return s.upsertJobImpl(index, copy, true, txn)
  2706  }
  2707  
  2708  // UpdateDeploymentPromotion is used to promote canaries in a deployment and
  2709  // potentially make a evaluation
  2710  func (s *StateStore) UpdateDeploymentPromotion(index uint64, req *structs.ApplyDeploymentPromoteRequest) error {
  2711  	txn := s.db.Txn(true)
  2712  	defer txn.Abort()
  2713  
  2714  	// Retrieve deployment and ensure it is not terminal and is active
  2715  	ws := memdb.NewWatchSet()
  2716  	deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn)
  2717  	if err != nil {
  2718  		return err
  2719  	} else if deployment == nil {
  2720  		return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID)
  2721  	} else if !deployment.Active() {
  2722  		return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status)
  2723  	}
  2724  
  2725  	// Retrieve effected allocations
  2726  	iter, err := txn.Get("allocs", "deployment", req.DeploymentID)
  2727  	if err != nil {
  2728  		return err
  2729  	}
  2730  
  2731  	// groupIndex is a map of groups being promoted
  2732  	groupIndex := make(map[string]struct{}, len(req.Groups))
  2733  	for _, g := range req.Groups {
  2734  		groupIndex[g] = struct{}{}
  2735  	}
  2736  
  2737  	// canaryIndex is the set of placed canaries in the deployment
  2738  	canaryIndex := make(map[string]struct{}, len(deployment.TaskGroups))
  2739  	for _, state := range deployment.TaskGroups {
  2740  		for _, c := range state.PlacedCanaries {
  2741  			canaryIndex[c] = struct{}{}
  2742  		}
  2743  	}
  2744  
  2745  	// healthyCounts is a mapping of group to the number of healthy canaries
  2746  	healthyCounts := make(map[string]int, len(deployment.TaskGroups))
  2747  
  2748  	// promotable is the set of allocations that we can move from canary to
  2749  	// non-canary
  2750  	var promotable []*structs.Allocation
  2751  
  2752  	for {
  2753  		raw := iter.Next()
  2754  		if raw == nil {
  2755  			break
  2756  		}
  2757  
  2758  		alloc := raw.(*structs.Allocation)
  2759  
  2760  		// Check that the alloc is a canary
  2761  		if _, ok := canaryIndex[alloc.ID]; !ok {
  2762  			continue
  2763  		}
  2764  
  2765  		// Check that the canary is part of a group being promoted
  2766  		if _, ok := groupIndex[alloc.TaskGroup]; !req.All && !ok {
  2767  			continue
  2768  		}
  2769  
  2770  		// Ensure the canaries are healthy
  2771  		if alloc.TerminalStatus() || !alloc.DeploymentStatus.IsHealthy() {
  2772  			continue
  2773  		}
  2774  
  2775  		healthyCounts[alloc.TaskGroup]++
  2776  		promotable = append(promotable, alloc)
  2777  	}
  2778  
  2779  	// Determine if we have enough healthy allocations
  2780  	var unhealthyErr multierror.Error
  2781  	for tg, state := range deployment.TaskGroups {
  2782  		if _, ok := groupIndex[tg]; !req.All && !ok {
  2783  			continue
  2784  		}
  2785  
  2786  		need := state.DesiredCanaries
  2787  		if need == 0 {
  2788  			continue
  2789  		}
  2790  
  2791  		if have := healthyCounts[tg]; have < need {
  2792  			multierror.Append(&unhealthyErr, fmt.Errorf("Task group %q has %d/%d healthy allocations", tg, have, need))
  2793  		}
  2794  	}
  2795  
  2796  	if err := unhealthyErr.ErrorOrNil(); err != nil {
  2797  		return err
  2798  	}
  2799  
  2800  	// Update deployment
  2801  	copy := deployment.Copy()
  2802  	copy.ModifyIndex = index
  2803  	for tg, status := range copy.TaskGroups {
  2804  		_, ok := groupIndex[tg]
  2805  		if !req.All && !ok {
  2806  			continue
  2807  		}
  2808  
  2809  		status.Promoted = true
  2810  	}
  2811  
  2812  	// If the deployment no longer needs promotion, update its status
  2813  	if !copy.RequiresPromotion() && copy.Status == structs.DeploymentStatusRunning {
  2814  		copy.StatusDescription = structs.DeploymentStatusDescriptionRunning
  2815  	}
  2816  
  2817  	// Insert the deployment
  2818  	if err := s.upsertDeploymentImpl(index, copy, txn); err != nil {
  2819  		return err
  2820  	}
  2821  
  2822  	// Upsert the optional eval
  2823  	if req.Eval != nil {
  2824  		if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil {
  2825  			return err
  2826  		}
  2827  	}
  2828  
  2829  	// For each promotable allocation remoce the canary field
  2830  	for _, alloc := range promotable {
  2831  		promoted := alloc.Copy()
  2832  		promoted.DeploymentStatus.Canary = false
  2833  		promoted.DeploymentStatus.ModifyIndex = index
  2834  		promoted.ModifyIndex = index
  2835  		promoted.AllocModifyIndex = index
  2836  
  2837  		if err := txn.Insert("allocs", promoted); err != nil {
  2838  			return fmt.Errorf("alloc insert failed: %v", err)
  2839  		}
  2840  	}
  2841  
  2842  	// Update the alloc index
  2843  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  2844  		return fmt.Errorf("index update failed: %v", err)
  2845  	}
  2846  
  2847  	txn.Commit()
  2848  	return nil
  2849  }
  2850  
  2851  // UpdateDeploymentAllocHealth is used to update the health of allocations as
  2852  // part of the deployment and potentially make a evaluation
  2853  func (s *StateStore) UpdateDeploymentAllocHealth(index uint64, req *structs.ApplyDeploymentAllocHealthRequest) error {
  2854  	txn := s.db.Txn(true)
  2855  	defer txn.Abort()
  2856  
  2857  	// Retrieve deployment and ensure it is not terminal and is active
  2858  	ws := memdb.NewWatchSet()
  2859  	deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn)
  2860  	if err != nil {
  2861  		return err
  2862  	} else if deployment == nil {
  2863  		return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID)
  2864  	} else if !deployment.Active() {
  2865  		return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status)
  2866  	}
  2867  
  2868  	// Update the health status of each allocation
  2869  	if total := len(req.HealthyAllocationIDs) + len(req.UnhealthyAllocationIDs); total != 0 {
  2870  		setAllocHealth := func(id string, healthy bool, ts time.Time) error {
  2871  			existing, err := txn.First("allocs", "id", id)
  2872  			if err != nil {
  2873  				return fmt.Errorf("alloc %q lookup failed: %v", id, err)
  2874  			}
  2875  			if existing == nil {
  2876  				return fmt.Errorf("unknown alloc %q", id)
  2877  			}
  2878  
  2879  			old := existing.(*structs.Allocation)
  2880  			if old.DeploymentID != req.DeploymentID {
  2881  				return fmt.Errorf("alloc %q is not part of deployment %q", id, req.DeploymentID)
  2882  			}
  2883  
  2884  			// Set the health
  2885  			copy := old.Copy()
  2886  			if copy.DeploymentStatus == nil {
  2887  				copy.DeploymentStatus = &structs.AllocDeploymentStatus{}
  2888  			}
  2889  			copy.DeploymentStatus.Healthy = helper.BoolToPtr(healthy)
  2890  			copy.DeploymentStatus.Timestamp = ts
  2891  			copy.DeploymentStatus.ModifyIndex = index
  2892  			copy.ModifyIndex = index
  2893  
  2894  			if err := s.updateDeploymentWithAlloc(index, copy, old, txn); err != nil {
  2895  				return fmt.Errorf("error updating deployment: %v", err)
  2896  			}
  2897  
  2898  			if err := txn.Insert("allocs", copy); err != nil {
  2899  				return fmt.Errorf("alloc insert failed: %v", err)
  2900  			}
  2901  
  2902  			return nil
  2903  		}
  2904  
  2905  		for _, id := range req.HealthyAllocationIDs {
  2906  			if err := setAllocHealth(id, true, req.Timestamp); err != nil {
  2907  				return err
  2908  			}
  2909  		}
  2910  		for _, id := range req.UnhealthyAllocationIDs {
  2911  			if err := setAllocHealth(id, false, req.Timestamp); err != nil {
  2912  				return err
  2913  			}
  2914  		}
  2915  
  2916  		// Update the indexes
  2917  		if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  2918  			return fmt.Errorf("index update failed: %v", err)
  2919  		}
  2920  	}
  2921  
  2922  	// Update the deployment status as needed.
  2923  	if req.DeploymentUpdate != nil {
  2924  		if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil {
  2925  			return err
  2926  		}
  2927  	}
  2928  
  2929  	// Upsert the job if necessary
  2930  	if req.Job != nil {
  2931  		if err := s.upsertJobImpl(index, req.Job, false, txn); err != nil {
  2932  			return err
  2933  		}
  2934  	}
  2935  
  2936  	// Upsert the optional eval
  2937  	if req.Eval != nil {
  2938  		if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil {
  2939  			return err
  2940  		}
  2941  	}
  2942  
  2943  	txn.Commit()
  2944  	return nil
  2945  }
  2946  
  2947  // LastIndex returns the greatest index value for all indexes
  2948  func (s *StateStore) LatestIndex() (uint64, error) {
  2949  	indexes, err := s.Indexes()
  2950  	if err != nil {
  2951  		return 0, err
  2952  	}
  2953  
  2954  	var max uint64 = 0
  2955  	for {
  2956  		raw := indexes.Next()
  2957  		if raw == nil {
  2958  			break
  2959  		}
  2960  
  2961  		// Prepare the request struct
  2962  		idx := raw.(*IndexEntry)
  2963  
  2964  		// Determine the max
  2965  		if idx.Value > max {
  2966  			max = idx.Value
  2967  		}
  2968  	}
  2969  
  2970  	return max, nil
  2971  }
  2972  
  2973  // Index finds the matching index value
  2974  func (s *StateStore) Index(name string) (uint64, error) {
  2975  	txn := s.db.Txn(false)
  2976  
  2977  	// Lookup the first matching index
  2978  	out, err := txn.First("index", "id", name)
  2979  	if err != nil {
  2980  		return 0, err
  2981  	}
  2982  	if out == nil {
  2983  		return 0, nil
  2984  	}
  2985  	return out.(*IndexEntry).Value, nil
  2986  }
  2987  
  2988  // RemoveIndex is a helper method to remove an index for testing purposes
  2989  func (s *StateStore) RemoveIndex(name string) error {
  2990  	txn := s.db.Txn(true)
  2991  	defer txn.Abort()
  2992  
  2993  	if _, err := txn.DeleteAll("index", "id", name); err != nil {
  2994  		return err
  2995  	}
  2996  
  2997  	txn.Commit()
  2998  	return nil
  2999  }
  3000  
  3001  // Indexes returns an iterator over all the indexes
  3002  func (s *StateStore) Indexes() (memdb.ResultIterator, error) {
  3003  	txn := s.db.Txn(false)
  3004  
  3005  	// Walk the entire nodes table
  3006  	iter, err := txn.Get("index", "id")
  3007  	if err != nil {
  3008  		return nil, err
  3009  	}
  3010  	return iter, nil
  3011  }
  3012  
  3013  // ReconcileJobSummaries re-creates summaries for all jobs present in the state
  3014  // store
  3015  func (s *StateStore) ReconcileJobSummaries(index uint64) error {
  3016  	txn := s.db.Txn(true)
  3017  	defer txn.Abort()
  3018  
  3019  	// Get all the jobs
  3020  	iter, err := txn.Get("jobs", "id")
  3021  	if err != nil {
  3022  		return err
  3023  	}
  3024  	for {
  3025  		rawJob := iter.Next()
  3026  		if rawJob == nil {
  3027  			break
  3028  		}
  3029  		job := rawJob.(*structs.Job)
  3030  
  3031  		// Create a job summary for the job
  3032  		summary := &structs.JobSummary{
  3033  			JobID:     job.ID,
  3034  			Namespace: job.Namespace,
  3035  			Summary:   make(map[string]structs.TaskGroupSummary),
  3036  		}
  3037  		for _, tg := range job.TaskGroups {
  3038  			summary.Summary[tg.Name] = structs.TaskGroupSummary{}
  3039  		}
  3040  
  3041  		// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3042  		if job.Namespace == "" {
  3043  			job.Namespace = structs.DefaultNamespace
  3044  		}
  3045  
  3046  		// Find all the allocations for the jobs
  3047  		iterAllocs, err := txn.Get("allocs", "job", job.Namespace, job.ID)
  3048  		if err != nil {
  3049  			return err
  3050  		}
  3051  
  3052  		// Calculate the summary for the job
  3053  		for {
  3054  			rawAlloc := iterAllocs.Next()
  3055  			if rawAlloc == nil {
  3056  				break
  3057  			}
  3058  			alloc := rawAlloc.(*structs.Allocation)
  3059  
  3060  			// Ignore the allocation if it doesn't belong to the currently
  3061  			// registered job. The allocation is checked because of issue #2304
  3062  			if alloc.Job == nil || alloc.Job.CreateIndex != job.CreateIndex {
  3063  				continue
  3064  			}
  3065  
  3066  			tg := summary.Summary[alloc.TaskGroup]
  3067  			switch alloc.ClientStatus {
  3068  			case structs.AllocClientStatusFailed:
  3069  				tg.Failed += 1
  3070  			case structs.AllocClientStatusLost:
  3071  				tg.Lost += 1
  3072  			case structs.AllocClientStatusComplete:
  3073  				tg.Complete += 1
  3074  			case structs.AllocClientStatusRunning:
  3075  				tg.Running += 1
  3076  			case structs.AllocClientStatusPending:
  3077  				tg.Starting += 1
  3078  			default:
  3079  				s.logger.Printf("[ERR] state_store: invalid client status: %v in allocation %q", alloc.ClientStatus, alloc.ID)
  3080  			}
  3081  			summary.Summary[alloc.TaskGroup] = tg
  3082  		}
  3083  
  3084  		// Set the create index of the summary same as the job's create index
  3085  		// and the modify index to the current index
  3086  		summary.CreateIndex = job.CreateIndex
  3087  		summary.ModifyIndex = index
  3088  
  3089  		// Insert the job summary
  3090  		if err := txn.Insert("job_summary", summary); err != nil {
  3091  			return fmt.Errorf("error inserting job summary: %v", err)
  3092  		}
  3093  	}
  3094  
  3095  	// Update the indexes table for job summary
  3096  	if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  3097  		return fmt.Errorf("index update failed: %v", err)
  3098  	}
  3099  	txn.Commit()
  3100  	return nil
  3101  }
  3102  
  3103  // setJobStatuses is a helper for calling setJobStatus on multiple jobs by ID.
  3104  // It takes a map of job IDs to an optional forceStatus string. It returns an
  3105  // error if the job doesn't exist or setJobStatus fails.
  3106  func (s *StateStore) setJobStatuses(index uint64, txn *memdb.Txn,
  3107  	jobs map[structs.NamespacedID]string, evalDelete bool) error {
  3108  	for tuple, forceStatus := range jobs {
  3109  		// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3110  		if tuple.Namespace == "" {
  3111  			tuple.Namespace = structs.DefaultNamespace
  3112  		}
  3113  
  3114  		existing, err := txn.First("jobs", "id", tuple.Namespace, tuple.ID)
  3115  		if err != nil {
  3116  			return fmt.Errorf("job lookup failed: %v", err)
  3117  		}
  3118  
  3119  		if existing == nil {
  3120  			continue
  3121  		}
  3122  
  3123  		if err := s.setJobStatus(index, txn, existing.(*structs.Job), evalDelete, forceStatus); err != nil {
  3124  			return err
  3125  		}
  3126  	}
  3127  
  3128  	return nil
  3129  }
  3130  
  3131  // setJobStatus sets the status of the job by looking up associated evaluations
  3132  // and allocations. evalDelete should be set to true if setJobStatus is being
  3133  // called because an evaluation is being deleted (potentially because of garbage
  3134  // collection). If forceStatus is non-empty, the job's status will be set to the
  3135  // passed status.
  3136  func (s *StateStore) setJobStatus(index uint64, txn *memdb.Txn,
  3137  	job *structs.Job, evalDelete bool, forceStatus string) error {
  3138  
  3139  	// Capture the current status so we can check if there is a change
  3140  	oldStatus := job.Status
  3141  	if index == job.CreateIndex {
  3142  		oldStatus = ""
  3143  	}
  3144  	newStatus := forceStatus
  3145  
  3146  	// If forceStatus is not set, compute the jobs status.
  3147  	if forceStatus == "" {
  3148  		var err error
  3149  		newStatus, err = s.getJobStatus(txn, job, evalDelete)
  3150  		if err != nil {
  3151  			return err
  3152  		}
  3153  	}
  3154  
  3155  	// Fast-path if nothing has changed.
  3156  	if oldStatus == newStatus {
  3157  		return nil
  3158  	}
  3159  
  3160  	// Copy and update the existing job
  3161  	updated := job.Copy()
  3162  	updated.Status = newStatus
  3163  	updated.ModifyIndex = index
  3164  
  3165  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3166  	if updated.Namespace == "" {
  3167  		updated.Namespace = structs.DefaultNamespace
  3168  	}
  3169  
  3170  	// Insert the job
  3171  	if err := txn.Insert("jobs", updated); err != nil {
  3172  		return fmt.Errorf("job insert failed: %v", err)
  3173  	}
  3174  	if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil {
  3175  		return fmt.Errorf("index update failed: %v", err)
  3176  	}
  3177  
  3178  	// Update the children summary
  3179  	if updated.ParentID != "" {
  3180  		// Try to update the summary of the parent job summary
  3181  		summaryRaw, err := txn.First("job_summary", "id", updated.Namespace, updated.ParentID)
  3182  		if err != nil {
  3183  			return fmt.Errorf("unable to retrieve summary for parent job: %v", err)
  3184  		}
  3185  
  3186  		// Only continue if the summary exists. It could not exist if the parent
  3187  		// job was removed
  3188  		if summaryRaw != nil {
  3189  			existing := summaryRaw.(*structs.JobSummary)
  3190  			pSummary := existing.Copy()
  3191  			if pSummary.Children == nil {
  3192  				pSummary.Children = new(structs.JobChildrenSummary)
  3193  			}
  3194  
  3195  			// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3196  			if pSummary.Namespace == "" {
  3197  				pSummary.Namespace = structs.DefaultNamespace
  3198  			}
  3199  
  3200  			// Determine the transition and update the correct fields
  3201  			children := pSummary.Children
  3202  
  3203  			// Decrement old status
  3204  			if oldStatus != "" {
  3205  				switch oldStatus {
  3206  				case structs.JobStatusPending:
  3207  					children.Pending--
  3208  				case structs.JobStatusRunning:
  3209  					children.Running--
  3210  				case structs.JobStatusDead:
  3211  					children.Dead--
  3212  				default:
  3213  					return fmt.Errorf("unknown old job status %q", oldStatus)
  3214  				}
  3215  			}
  3216  
  3217  			// Increment new status
  3218  			switch newStatus {
  3219  			case structs.JobStatusPending:
  3220  				children.Pending++
  3221  			case structs.JobStatusRunning:
  3222  				children.Running++
  3223  			case structs.JobStatusDead:
  3224  				children.Dead++
  3225  			default:
  3226  				return fmt.Errorf("unknown new job status %q", newStatus)
  3227  			}
  3228  
  3229  			// Update the index
  3230  			pSummary.ModifyIndex = index
  3231  
  3232  			// Insert the summary
  3233  			if err := txn.Insert("job_summary", pSummary); err != nil {
  3234  				return fmt.Errorf("job summary insert failed: %v", err)
  3235  			}
  3236  			if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  3237  				return fmt.Errorf("index update failed: %v", err)
  3238  			}
  3239  		}
  3240  	}
  3241  
  3242  	return nil
  3243  }
  3244  
  3245  func (s *StateStore) getJobStatus(txn *memdb.Txn, job *structs.Job, evalDelete bool) (string, error) {
  3246  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3247  	if job.Namespace == "" {
  3248  		job.Namespace = structs.DefaultNamespace
  3249  	}
  3250  
  3251  	// System, Periodic and Parameterized jobs are running until explicitly
  3252  	// stopped
  3253  	if job.Type == structs.JobTypeSystem || job.IsParameterized() || job.IsPeriodic() {
  3254  		if job.Stop {
  3255  			return structs.JobStatusDead, nil
  3256  		}
  3257  
  3258  		return structs.JobStatusRunning, nil
  3259  	}
  3260  
  3261  	allocs, err := txn.Get("allocs", "job", job.Namespace, job.ID)
  3262  	if err != nil {
  3263  		return "", err
  3264  	}
  3265  
  3266  	// If there is a non-terminal allocation, the job is running.
  3267  	hasAlloc := false
  3268  	for alloc := allocs.Next(); alloc != nil; alloc = allocs.Next() {
  3269  		hasAlloc = true
  3270  		if !alloc.(*structs.Allocation).TerminalStatus() {
  3271  			return structs.JobStatusRunning, nil
  3272  		}
  3273  	}
  3274  
  3275  	evals, err := txn.Get("evals", "job_prefix", job.Namespace, job.ID)
  3276  	if err != nil {
  3277  		return "", err
  3278  	}
  3279  
  3280  	hasEval := false
  3281  	for raw := evals.Next(); raw != nil; raw = evals.Next() {
  3282  		e := raw.(*structs.Evaluation)
  3283  
  3284  		// Filter non-exact matches
  3285  		if e.JobID != job.ID {
  3286  			continue
  3287  		}
  3288  
  3289  		hasEval = true
  3290  		if !e.TerminalStatus() {
  3291  			return structs.JobStatusPending, nil
  3292  		}
  3293  	}
  3294  
  3295  	// The job is dead if all the allocations and evals are terminal or if there
  3296  	// are no evals because of garbage collection.
  3297  	if evalDelete || hasEval || hasAlloc {
  3298  		return structs.JobStatusDead, nil
  3299  	}
  3300  
  3301  	return structs.JobStatusPending, nil
  3302  }
  3303  
  3304  // updateSummaryWithJob creates or updates job summaries when new jobs are
  3305  // upserted or existing ones are updated
  3306  func (s *StateStore) updateSummaryWithJob(index uint64, job *structs.Job,
  3307  	txn *memdb.Txn) error {
  3308  
  3309  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3310  	if job.Namespace == "" {
  3311  		job.Namespace = structs.DefaultNamespace
  3312  	}
  3313  
  3314  	// Update the job summary
  3315  	summaryRaw, err := txn.First("job_summary", "id", job.Namespace, job.ID)
  3316  	if err != nil {
  3317  		return fmt.Errorf("job summary lookup failed: %v", err)
  3318  	}
  3319  
  3320  	// Get the summary or create if necessary
  3321  	var summary *structs.JobSummary
  3322  	hasSummaryChanged := false
  3323  	if summaryRaw != nil {
  3324  		summary = summaryRaw.(*structs.JobSummary).Copy()
  3325  	} else {
  3326  		summary = &structs.JobSummary{
  3327  			JobID:       job.ID,
  3328  			Namespace:   job.Namespace,
  3329  			Summary:     make(map[string]structs.TaskGroupSummary),
  3330  			Children:    new(structs.JobChildrenSummary),
  3331  			CreateIndex: index,
  3332  		}
  3333  		hasSummaryChanged = true
  3334  	}
  3335  
  3336  	for _, tg := range job.TaskGroups {
  3337  		if _, ok := summary.Summary[tg.Name]; !ok {
  3338  			newSummary := structs.TaskGroupSummary{
  3339  				Complete: 0,
  3340  				Failed:   0,
  3341  				Running:  0,
  3342  				Starting: 0,
  3343  			}
  3344  			summary.Summary[tg.Name] = newSummary
  3345  			hasSummaryChanged = true
  3346  		}
  3347  	}
  3348  
  3349  	// The job summary has changed, so update the modify index.
  3350  	if hasSummaryChanged {
  3351  		summary.ModifyIndex = index
  3352  
  3353  		// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3354  		if summary.Namespace == "" {
  3355  			summary.Namespace = structs.DefaultNamespace
  3356  		}
  3357  
  3358  		// Update the indexes table for job summary
  3359  		if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  3360  			return fmt.Errorf("index update failed: %v", err)
  3361  		}
  3362  		if err := txn.Insert("job_summary", summary); err != nil {
  3363  			return err
  3364  		}
  3365  	}
  3366  
  3367  	return nil
  3368  }
  3369  
  3370  // updateDeploymentWithAlloc is used to update the deployment state associated
  3371  // with the given allocation. The passed alloc may be updated if the deployment
  3372  // status has changed to capture the modify index at which it has changed.
  3373  func (s *StateStore) updateDeploymentWithAlloc(index uint64, alloc, existing *structs.Allocation, txn *memdb.Txn) error {
  3374  	// Nothing to do if the allocation is not associated with a deployment
  3375  	if alloc.DeploymentID == "" {
  3376  		return nil
  3377  	}
  3378  
  3379  	// Get the deployment
  3380  	ws := memdb.NewWatchSet()
  3381  	deployment, err := s.deploymentByIDImpl(ws, alloc.DeploymentID, txn)
  3382  	if err != nil {
  3383  		return err
  3384  	}
  3385  	if deployment == nil {
  3386  		return nil
  3387  	}
  3388  
  3389  	// Retrieve the deployment state object
  3390  	_, ok := deployment.TaskGroups[alloc.TaskGroup]
  3391  	if !ok {
  3392  		// If the task group isn't part of the deployment, the task group wasn't
  3393  		// part of a rolling update so nothing to do
  3394  		return nil
  3395  	}
  3396  
  3397  	// Do not modify in-place. Instead keep track of what must be done
  3398  	placed := 0
  3399  	healthy := 0
  3400  	unhealthy := 0
  3401  
  3402  	// If there was no existing allocation, this is a placement and we increment
  3403  	// the placement
  3404  	existingHealthSet := existing != nil && existing.DeploymentStatus.HasHealth()
  3405  	allocHealthSet := alloc.DeploymentStatus.HasHealth()
  3406  	if existing == nil || existing.DeploymentID != alloc.DeploymentID {
  3407  		placed++
  3408  	} else if !existingHealthSet && allocHealthSet {
  3409  		if *alloc.DeploymentStatus.Healthy {
  3410  			healthy++
  3411  		} else {
  3412  			unhealthy++
  3413  		}
  3414  	} else if existingHealthSet && allocHealthSet {
  3415  		// See if it has gone from healthy to unhealthy
  3416  		if *existing.DeploymentStatus.Healthy && !*alloc.DeploymentStatus.Healthy {
  3417  			healthy--
  3418  			unhealthy++
  3419  		}
  3420  	}
  3421  
  3422  	// Nothing to do
  3423  	if placed == 0 && healthy == 0 && unhealthy == 0 {
  3424  		return nil
  3425  	}
  3426  
  3427  	// Update the allocation's deployment status modify index
  3428  	if alloc.DeploymentStatus != nil && healthy+unhealthy != 0 {
  3429  		alloc.DeploymentStatus.ModifyIndex = index
  3430  	}
  3431  
  3432  	// Create a copy of the deployment object
  3433  	deploymentCopy := deployment.Copy()
  3434  	deploymentCopy.ModifyIndex = index
  3435  
  3436  	state := deploymentCopy.TaskGroups[alloc.TaskGroup]
  3437  	state.PlacedAllocs += placed
  3438  	state.HealthyAllocs += healthy
  3439  	state.UnhealthyAllocs += unhealthy
  3440  
  3441  	// Update the progress deadline
  3442  	if pd := state.ProgressDeadline; pd != 0 {
  3443  		// If we are the first placed allocation for the deployment start the progress deadline.
  3444  		if placed != 0 && state.RequireProgressBy.IsZero() {
  3445  			// Use modify time instead of create time because we may in-place
  3446  			// update the allocation to be part of a new deployment.
  3447  			state.RequireProgressBy = time.Unix(0, alloc.ModifyTime).Add(pd)
  3448  		} else if healthy != 0 {
  3449  			if d := alloc.DeploymentStatus.Timestamp.Add(pd); d.After(state.RequireProgressBy) {
  3450  				state.RequireProgressBy = d
  3451  			}
  3452  		}
  3453  	}
  3454  
  3455  	// Upsert the deployment
  3456  	if err := s.upsertDeploymentImpl(index, deploymentCopy, txn); err != nil {
  3457  		return err
  3458  	}
  3459  
  3460  	return nil
  3461  }
  3462  
  3463  // updateSummaryWithAlloc updates the job summary when allocations are updated
  3464  // or inserted
  3465  func (s *StateStore) updateSummaryWithAlloc(index uint64, alloc *structs.Allocation,
  3466  	existingAlloc *structs.Allocation, txn *memdb.Txn) error {
  3467  
  3468  	// We don't have to update the summary if the job is missing
  3469  	if alloc.Job == nil {
  3470  		return nil
  3471  	}
  3472  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3473  	if alloc.Namespace == "" {
  3474  		alloc.Namespace = structs.DefaultNamespace
  3475  	}
  3476  
  3477  	summaryRaw, err := txn.First("job_summary", "id", alloc.Namespace, alloc.JobID)
  3478  	if err != nil {
  3479  		return fmt.Errorf("unable to lookup job summary for job id %q in namespace %q: %v", alloc.JobID, alloc.Namespace, err)
  3480  	}
  3481  
  3482  	if summaryRaw == nil {
  3483  		// Check if the job is de-registered
  3484  		rawJob, err := txn.First("jobs", "id", alloc.Namespace, alloc.JobID)
  3485  		if err != nil {
  3486  			return fmt.Errorf("unable to query job: %v", err)
  3487  		}
  3488  
  3489  		// If the job is de-registered then we skip updating it's summary
  3490  		if rawJob == nil {
  3491  			return nil
  3492  		}
  3493  
  3494  		return fmt.Errorf("job summary for job %q in namespace %q is not present", alloc.JobID, alloc.Namespace)
  3495  	}
  3496  
  3497  	// Get a copy of the existing summary
  3498  	jobSummary := summaryRaw.(*structs.JobSummary).Copy()
  3499  
  3500  	// Not updating the job summary because the allocation doesn't belong to the
  3501  	// currently registered job
  3502  	if jobSummary.CreateIndex != alloc.Job.CreateIndex {
  3503  		return nil
  3504  	}
  3505  
  3506  	tgSummary, ok := jobSummary.Summary[alloc.TaskGroup]
  3507  	if !ok {
  3508  		return fmt.Errorf("unable to find task group in the job summary: %v", alloc.TaskGroup)
  3509  	}
  3510  
  3511  	summaryChanged := false
  3512  	if existingAlloc == nil {
  3513  		switch alloc.DesiredStatus {
  3514  		case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict:
  3515  			s.logger.Printf("[ERR] state_store: new allocation inserted into state store with id: %v and state: %v",
  3516  				alloc.ID, alloc.DesiredStatus)
  3517  		}
  3518  		switch alloc.ClientStatus {
  3519  		case structs.AllocClientStatusPending:
  3520  			tgSummary.Starting += 1
  3521  			if tgSummary.Queued > 0 {
  3522  				tgSummary.Queued -= 1
  3523  			}
  3524  			summaryChanged = true
  3525  		case structs.AllocClientStatusRunning, structs.AllocClientStatusFailed,
  3526  			structs.AllocClientStatusComplete:
  3527  			s.logger.Printf("[ERR] state_store: new allocation inserted into state store with id: %v and state: %v",
  3528  				alloc.ID, alloc.ClientStatus)
  3529  		}
  3530  	} else if existingAlloc.ClientStatus != alloc.ClientStatus {
  3531  		// Incrementing the client of the bin of the current state
  3532  		switch alloc.ClientStatus {
  3533  		case structs.AllocClientStatusRunning:
  3534  			tgSummary.Running += 1
  3535  		case structs.AllocClientStatusFailed:
  3536  			tgSummary.Failed += 1
  3537  		case structs.AllocClientStatusPending:
  3538  			tgSummary.Starting += 1
  3539  		case structs.AllocClientStatusComplete:
  3540  			tgSummary.Complete += 1
  3541  		case structs.AllocClientStatusLost:
  3542  			tgSummary.Lost += 1
  3543  		}
  3544  
  3545  		// Decrementing the count of the bin of the last state
  3546  		switch existingAlloc.ClientStatus {
  3547  		case structs.AllocClientStatusRunning:
  3548  			if tgSummary.Running > 0 {
  3549  				tgSummary.Running -= 1
  3550  			}
  3551  		case structs.AllocClientStatusPending:
  3552  			if tgSummary.Starting > 0 {
  3553  				tgSummary.Starting -= 1
  3554  			}
  3555  		case structs.AllocClientStatusLost:
  3556  			if tgSummary.Lost > 0 {
  3557  				tgSummary.Lost -= 1
  3558  			}
  3559  		case structs.AllocClientStatusFailed, structs.AllocClientStatusComplete:
  3560  		default:
  3561  			s.logger.Printf("[ERR] state_store: invalid old state of allocation with id: %v, and state: %v",
  3562  				existingAlloc.ID, existingAlloc.ClientStatus)
  3563  		}
  3564  		summaryChanged = true
  3565  	}
  3566  	jobSummary.Summary[alloc.TaskGroup] = tgSummary
  3567  
  3568  	if summaryChanged {
  3569  		jobSummary.ModifyIndex = index
  3570  
  3571  		// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3572  		if jobSummary.Namespace == "" {
  3573  			jobSummary.Namespace = structs.DefaultNamespace
  3574  		}
  3575  
  3576  		// Update the indexes table for job summary
  3577  		if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  3578  			return fmt.Errorf("index update failed: %v", err)
  3579  		}
  3580  
  3581  		if err := txn.Insert("job_summary", jobSummary); err != nil {
  3582  			return fmt.Errorf("updating job summary failed: %v", err)
  3583  		}
  3584  	}
  3585  
  3586  	return nil
  3587  }
  3588  
  3589  // addEphemeralDiskToTaskGroups adds missing EphemeralDisk objects to TaskGroups
  3590  func (s *StateStore) addEphemeralDiskToTaskGroups(job *structs.Job) {
  3591  	for _, tg := range job.TaskGroups {
  3592  		var diskMB int
  3593  		for _, task := range tg.Tasks {
  3594  			if task.Resources != nil {
  3595  				diskMB += task.Resources.DiskMB
  3596  				task.Resources.DiskMB = 0
  3597  			}
  3598  		}
  3599  		if tg.EphemeralDisk != nil {
  3600  			continue
  3601  		}
  3602  		tg.EphemeralDisk = &structs.EphemeralDisk{
  3603  			SizeMB: diskMB,
  3604  		}
  3605  	}
  3606  }
  3607  
  3608  // UpsertACLPolicies is used to create or update a set of ACL policies
  3609  func (s *StateStore) UpsertACLPolicies(index uint64, policies []*structs.ACLPolicy) error {
  3610  	txn := s.db.Txn(true)
  3611  	defer txn.Abort()
  3612  
  3613  	for _, policy := range policies {
  3614  		// Ensure the policy hash is non-nil. This should be done outside the state store
  3615  		// for performance reasons, but we check here for defense in depth.
  3616  		if len(policy.Hash) == 0 {
  3617  			policy.SetHash()
  3618  		}
  3619  
  3620  		// Check if the policy already exists
  3621  		existing, err := txn.First("acl_policy", "id", policy.Name)
  3622  		if err != nil {
  3623  			return fmt.Errorf("policy lookup failed: %v", err)
  3624  		}
  3625  
  3626  		// Update all the indexes
  3627  		if existing != nil {
  3628  			policy.CreateIndex = existing.(*structs.ACLPolicy).CreateIndex
  3629  			policy.ModifyIndex = index
  3630  		} else {
  3631  			policy.CreateIndex = index
  3632  			policy.ModifyIndex = index
  3633  		}
  3634  
  3635  		// Update the policy
  3636  		if err := txn.Insert("acl_policy", policy); err != nil {
  3637  			return fmt.Errorf("upserting policy failed: %v", err)
  3638  		}
  3639  	}
  3640  
  3641  	// Update the indexes tabl
  3642  	if err := txn.Insert("index", &IndexEntry{"acl_policy", index}); err != nil {
  3643  		return fmt.Errorf("index update failed: %v", err)
  3644  	}
  3645  
  3646  	txn.Commit()
  3647  	return nil
  3648  }
  3649  
  3650  // DeleteACLPolicies deletes the policies with the given names
  3651  func (s *StateStore) DeleteACLPolicies(index uint64, names []string) error {
  3652  	txn := s.db.Txn(true)
  3653  	defer txn.Abort()
  3654  
  3655  	// Delete the policy
  3656  	for _, name := range names {
  3657  		if _, err := txn.DeleteAll("acl_policy", "id", name); err != nil {
  3658  			return fmt.Errorf("deleting acl policy failed: %v", err)
  3659  		}
  3660  	}
  3661  	if err := txn.Insert("index", &IndexEntry{"acl_policy", index}); err != nil {
  3662  		return fmt.Errorf("index update failed: %v", err)
  3663  	}
  3664  	txn.Commit()
  3665  	return nil
  3666  }
  3667  
  3668  // ACLPolicyByName is used to lookup a policy by name
  3669  func (s *StateStore) ACLPolicyByName(ws memdb.WatchSet, name string) (*structs.ACLPolicy, error) {
  3670  	txn := s.db.Txn(false)
  3671  
  3672  	watchCh, existing, err := txn.FirstWatch("acl_policy", "id", name)
  3673  	if err != nil {
  3674  		return nil, fmt.Errorf("acl policy lookup failed: %v", err)
  3675  	}
  3676  	ws.Add(watchCh)
  3677  
  3678  	if existing != nil {
  3679  		return existing.(*structs.ACLPolicy), nil
  3680  	}
  3681  	return nil, nil
  3682  }
  3683  
  3684  // ACLPolicyByNamePrefix is used to lookup policies by prefix
  3685  func (s *StateStore) ACLPolicyByNamePrefix(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) {
  3686  	txn := s.db.Txn(false)
  3687  
  3688  	iter, err := txn.Get("acl_policy", "id_prefix", prefix)
  3689  	if err != nil {
  3690  		return nil, fmt.Errorf("acl policy lookup failed: %v", err)
  3691  	}
  3692  	ws.Add(iter.WatchCh())
  3693  
  3694  	return iter, nil
  3695  }
  3696  
  3697  // ACLPolicies returns an iterator over all the acl policies
  3698  func (s *StateStore) ACLPolicies(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  3699  	txn := s.db.Txn(false)
  3700  
  3701  	// Walk the entire table
  3702  	iter, err := txn.Get("acl_policy", "id")
  3703  	if err != nil {
  3704  		return nil, err
  3705  	}
  3706  	ws.Add(iter.WatchCh())
  3707  	return iter, nil
  3708  }
  3709  
  3710  // UpsertACLTokens is used to create or update a set of ACL tokens
  3711  func (s *StateStore) UpsertACLTokens(index uint64, tokens []*structs.ACLToken) error {
  3712  	txn := s.db.Txn(true)
  3713  	defer txn.Abort()
  3714  
  3715  	for _, token := range tokens {
  3716  		// Ensure the policy hash is non-nil. This should be done outside the state store
  3717  		// for performance reasons, but we check here for defense in depth.
  3718  		if len(token.Hash) == 0 {
  3719  			token.SetHash()
  3720  		}
  3721  
  3722  		// Check if the token already exists
  3723  		existing, err := txn.First("acl_token", "id", token.AccessorID)
  3724  		if err != nil {
  3725  			return fmt.Errorf("token lookup failed: %v", err)
  3726  		}
  3727  
  3728  		// Update all the indexes
  3729  		if existing != nil {
  3730  			existTK := existing.(*structs.ACLToken)
  3731  			token.CreateIndex = existTK.CreateIndex
  3732  			token.ModifyIndex = index
  3733  
  3734  			// Do not allow SecretID or create time to change
  3735  			token.SecretID = existTK.SecretID
  3736  			token.CreateTime = existTK.CreateTime
  3737  
  3738  		} else {
  3739  			token.CreateIndex = index
  3740  			token.ModifyIndex = index
  3741  		}
  3742  
  3743  		// Update the token
  3744  		if err := txn.Insert("acl_token", token); err != nil {
  3745  			return fmt.Errorf("upserting token failed: %v", err)
  3746  		}
  3747  	}
  3748  
  3749  	// Update the indexes table
  3750  	if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil {
  3751  		return fmt.Errorf("index update failed: %v", err)
  3752  	}
  3753  	txn.Commit()
  3754  	return nil
  3755  }
  3756  
  3757  // DeleteACLTokens deletes the tokens with the given accessor ids
  3758  func (s *StateStore) DeleteACLTokens(index uint64, ids []string) error {
  3759  	txn := s.db.Txn(true)
  3760  	defer txn.Abort()
  3761  
  3762  	// Delete the tokens
  3763  	for _, id := range ids {
  3764  		if _, err := txn.DeleteAll("acl_token", "id", id); err != nil {
  3765  			return fmt.Errorf("deleting acl token failed: %v", err)
  3766  		}
  3767  	}
  3768  	if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil {
  3769  		return fmt.Errorf("index update failed: %v", err)
  3770  	}
  3771  	txn.Commit()
  3772  	return nil
  3773  }
  3774  
  3775  // ACLTokenByAccessorID is used to lookup a token by accessor ID
  3776  func (s *StateStore) ACLTokenByAccessorID(ws memdb.WatchSet, id string) (*structs.ACLToken, error) {
  3777  	txn := s.db.Txn(false)
  3778  
  3779  	watchCh, existing, err := txn.FirstWatch("acl_token", "id", id)
  3780  	if err != nil {
  3781  		return nil, fmt.Errorf("acl token lookup failed: %v", err)
  3782  	}
  3783  	ws.Add(watchCh)
  3784  
  3785  	if existing != nil {
  3786  		return existing.(*structs.ACLToken), nil
  3787  	}
  3788  	return nil, nil
  3789  }
  3790  
  3791  // ACLTokenBySecretID is used to lookup a token by secret ID
  3792  func (s *StateStore) ACLTokenBySecretID(ws memdb.WatchSet, secretID string) (*structs.ACLToken, error) {
  3793  	txn := s.db.Txn(false)
  3794  
  3795  	watchCh, existing, err := txn.FirstWatch("acl_token", "secret", secretID)
  3796  	if err != nil {
  3797  		return nil, fmt.Errorf("acl token lookup failed: %v", err)
  3798  	}
  3799  	ws.Add(watchCh)
  3800  
  3801  	if existing != nil {
  3802  		return existing.(*structs.ACLToken), nil
  3803  	}
  3804  	return nil, nil
  3805  }
  3806  
  3807  // ACLTokenByAccessorIDPrefix is used to lookup tokens by prefix
  3808  func (s *StateStore) ACLTokenByAccessorIDPrefix(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) {
  3809  	txn := s.db.Txn(false)
  3810  
  3811  	iter, err := txn.Get("acl_token", "id_prefix", prefix)
  3812  	if err != nil {
  3813  		return nil, fmt.Errorf("acl token lookup failed: %v", err)
  3814  	}
  3815  	ws.Add(iter.WatchCh())
  3816  	return iter, nil
  3817  }
  3818  
  3819  // ACLTokens returns an iterator over all the tokens
  3820  func (s *StateStore) ACLTokens(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  3821  	txn := s.db.Txn(false)
  3822  
  3823  	// Walk the entire table
  3824  	iter, err := txn.Get("acl_token", "id")
  3825  	if err != nil {
  3826  		return nil, err
  3827  	}
  3828  	ws.Add(iter.WatchCh())
  3829  	return iter, nil
  3830  }
  3831  
  3832  // ACLTokensByGlobal returns an iterator over all the tokens filtered by global value
  3833  func (s *StateStore) ACLTokensByGlobal(ws memdb.WatchSet, globalVal bool) (memdb.ResultIterator, error) {
  3834  	txn := s.db.Txn(false)
  3835  
  3836  	// Walk the entire table
  3837  	iter, err := txn.Get("acl_token", "global", globalVal)
  3838  	if err != nil {
  3839  		return nil, err
  3840  	}
  3841  	ws.Add(iter.WatchCh())
  3842  	return iter, nil
  3843  }
  3844  
  3845  // CanBootstrapACLToken checks if bootstrapping is possible and returns the reset index
  3846  func (s *StateStore) CanBootstrapACLToken() (bool, uint64, error) {
  3847  	txn := s.db.Txn(false)
  3848  
  3849  	// Lookup the bootstrap sentinel
  3850  	out, err := txn.First("index", "id", "acl_token_bootstrap")
  3851  	if err != nil {
  3852  		return false, 0, err
  3853  	}
  3854  
  3855  	// No entry, we haven't bootstrapped yet
  3856  	if out == nil {
  3857  		return true, 0, nil
  3858  	}
  3859  
  3860  	// Return the reset index if we've already bootstrapped
  3861  	return false, out.(*IndexEntry).Value, nil
  3862  }
  3863  
  3864  // BootstrapACLToken is used to create an initial ACL token
  3865  func (s *StateStore) BootstrapACLTokens(index, resetIndex uint64, token *structs.ACLToken) error {
  3866  	txn := s.db.Txn(true)
  3867  	defer txn.Abort()
  3868  
  3869  	// Check if we have already done a bootstrap
  3870  	existing, err := txn.First("index", "id", "acl_token_bootstrap")
  3871  	if err != nil {
  3872  		return fmt.Errorf("bootstrap check failed: %v", err)
  3873  	}
  3874  	if existing != nil {
  3875  		if resetIndex == 0 {
  3876  			return fmt.Errorf("ACL bootstrap already done")
  3877  		} else if resetIndex != existing.(*IndexEntry).Value {
  3878  			return fmt.Errorf("Invalid reset index for ACL bootstrap")
  3879  		}
  3880  	}
  3881  
  3882  	// Update the Create/Modify time
  3883  	token.CreateIndex = index
  3884  	token.ModifyIndex = index
  3885  
  3886  	// Insert the token
  3887  	if err := txn.Insert("acl_token", token); err != nil {
  3888  		return fmt.Errorf("upserting token failed: %v", err)
  3889  	}
  3890  
  3891  	// Update the indexes table, prevents future bootstrap until reset
  3892  	if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil {
  3893  		return fmt.Errorf("index update failed: %v", err)
  3894  	}
  3895  	if err := txn.Insert("index", &IndexEntry{"acl_token_bootstrap", index}); err != nil {
  3896  		return fmt.Errorf("index update failed: %v", err)
  3897  	}
  3898  	txn.Commit()
  3899  	return nil
  3900  }
  3901  
  3902  // WithWriteTransaction executes the passed function within a write transaction,
  3903  // and returns its result.  If the invocation returns no error, the transaction
  3904  // is committed; otherwise, it's aborted.
  3905  func (s *StateStore) WithWriteTransaction(fn func(Txn) error) error {
  3906  	tx := s.db.Txn(true)
  3907  	defer tx.Abort()
  3908  
  3909  	err := fn(tx)
  3910  	if err == nil {
  3911  		tx.Commit()
  3912  	}
  3913  	return err
  3914  }
  3915  
  3916  // StateSnapshot is used to provide a point-in-time snapshot
  3917  type StateSnapshot struct {
  3918  	StateStore
  3919  }
  3920  
  3921  // StateRestore is used to optimize the performance when
  3922  // restoring state by only using a single large transaction
  3923  // instead of thousands of sub transactions
  3924  type StateRestore struct {
  3925  	txn *memdb.Txn
  3926  }
  3927  
  3928  // Abort is used to abort the restore operation
  3929  func (s *StateRestore) Abort() {
  3930  	s.txn.Abort()
  3931  }
  3932  
  3933  // Commit is used to commit the restore operation
  3934  func (s *StateRestore) Commit() {
  3935  	s.txn.Commit()
  3936  }
  3937  
  3938  // NodeRestore is used to restore a node
  3939  func (r *StateRestore) NodeRestore(node *structs.Node) error {
  3940  	if err := r.txn.Insert("nodes", node); err != nil {
  3941  		return fmt.Errorf("node insert failed: %v", err)
  3942  	}
  3943  	return nil
  3944  }
  3945  
  3946  // JobRestore is used to restore a job
  3947  func (r *StateRestore) JobRestore(job *structs.Job) error {
  3948  	// Create the EphemeralDisk if it's nil by adding up DiskMB from task resources.
  3949  	// COMPAT 0.4.1 -> 0.5
  3950  	r.addEphemeralDiskToTaskGroups(job)
  3951  
  3952  	if err := r.txn.Insert("jobs", job); err != nil {
  3953  		return fmt.Errorf("job insert failed: %v", err)
  3954  	}
  3955  	return nil
  3956  }
  3957  
  3958  // EvalRestore is used to restore an evaluation
  3959  func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
  3960  	if err := r.txn.Insert("evals", eval); err != nil {
  3961  		return fmt.Errorf("eval insert failed: %v", err)
  3962  	}
  3963  	return nil
  3964  }
  3965  
  3966  // AllocRestore is used to restore an allocation
  3967  func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error {
  3968  	// Set the shared resources if it's not present
  3969  	// COMPAT 0.4.1 -> 0.5
  3970  	if alloc.SharedResources == nil {
  3971  		alloc.SharedResources = &structs.Resources{
  3972  			DiskMB: alloc.Resources.DiskMB,
  3973  		}
  3974  	}
  3975  
  3976  	// Create the EphemeralDisk if it's nil by adding up DiskMB from task resources.
  3977  	if alloc.Job != nil {
  3978  		r.addEphemeralDiskToTaskGroups(alloc.Job)
  3979  	}
  3980  
  3981  	if err := r.txn.Insert("allocs", alloc); err != nil {
  3982  		return fmt.Errorf("alloc insert failed: %v", err)
  3983  	}
  3984  	return nil
  3985  }
  3986  
  3987  // IndexRestore is used to restore an index
  3988  func (r *StateRestore) IndexRestore(idx *IndexEntry) error {
  3989  	if err := r.txn.Insert("index", idx); err != nil {
  3990  		return fmt.Errorf("index insert failed: %v", err)
  3991  	}
  3992  	return nil
  3993  }
  3994  
  3995  // PeriodicLaunchRestore is used to restore a periodic launch.
  3996  func (r *StateRestore) PeriodicLaunchRestore(launch *structs.PeriodicLaunch) error {
  3997  	if err := r.txn.Insert("periodic_launch", launch); err != nil {
  3998  		return fmt.Errorf("periodic launch insert failed: %v", err)
  3999  	}
  4000  	return nil
  4001  }
  4002  
  4003  // JobSummaryRestore is used to restore a job summary
  4004  func (r *StateRestore) JobSummaryRestore(jobSummary *structs.JobSummary) error {
  4005  	if err := r.txn.Insert("job_summary", jobSummary); err != nil {
  4006  		return fmt.Errorf("job summary insert failed: %v", err)
  4007  	}
  4008  	return nil
  4009  }
  4010  
  4011  // JobVersionRestore is used to restore a job version
  4012  func (r *StateRestore) JobVersionRestore(version *structs.Job) error {
  4013  	if err := r.txn.Insert("job_version", version); err != nil {
  4014  		return fmt.Errorf("job version insert failed: %v", err)
  4015  	}
  4016  	return nil
  4017  }
  4018  
  4019  // DeploymentRestore is used to restore a deployment
  4020  func (r *StateRestore) DeploymentRestore(deployment *structs.Deployment) error {
  4021  	if err := r.txn.Insert("deployment", deployment); err != nil {
  4022  		return fmt.Errorf("deployment insert failed: %v", err)
  4023  	}
  4024  	return nil
  4025  }
  4026  
  4027  // VaultAccessorRestore is used to restore a vault accessor
  4028  func (r *StateRestore) VaultAccessorRestore(accessor *structs.VaultAccessor) error {
  4029  	if err := r.txn.Insert("vault_accessors", accessor); err != nil {
  4030  		return fmt.Errorf("vault accessor insert failed: %v", err)
  4031  	}
  4032  	return nil
  4033  }
  4034  
  4035  // ACLPolicyRestore is used to restore an ACL policy
  4036  func (r *StateRestore) ACLPolicyRestore(policy *structs.ACLPolicy) error {
  4037  	if err := r.txn.Insert("acl_policy", policy); err != nil {
  4038  		return fmt.Errorf("inserting acl policy failed: %v", err)
  4039  	}
  4040  	return nil
  4041  }
  4042  
  4043  // ACLTokenRestore is used to restore an ACL token
  4044  func (r *StateRestore) ACLTokenRestore(token *structs.ACLToken) error {
  4045  	if err := r.txn.Insert("acl_token", token); err != nil {
  4046  		return fmt.Errorf("inserting acl token failed: %v", err)
  4047  	}
  4048  	return nil
  4049  }
  4050  
  4051  // addEphemeralDiskToTaskGroups adds missing EphemeralDisk objects to TaskGroups
  4052  func (r *StateRestore) addEphemeralDiskToTaskGroups(job *structs.Job) {
  4053  	for _, tg := range job.TaskGroups {
  4054  		if tg.EphemeralDisk != nil {
  4055  			continue
  4056  		}
  4057  		var sizeMB int
  4058  		for _, task := range tg.Tasks {
  4059  			if task.Resources != nil {
  4060  				sizeMB += task.Resources.DiskMB
  4061  				task.Resources.DiskMB = 0
  4062  			}
  4063  		}
  4064  		tg.EphemeralDisk = &structs.EphemeralDisk{
  4065  			SizeMB: sizeMB,
  4066  		}
  4067  	}
  4068  }