github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/nomad/state/state_store.go (about)

     1  package state
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"reflect"
     7  	"sort"
     8  	"strings"
     9  	"time"
    10  
    11  	log "github.com/hashicorp/go-hclog"
    12  	memdb "github.com/hashicorp/go-memdb"
    13  	multierror "github.com/hashicorp/go-multierror"
    14  	"github.com/pkg/errors"
    15  
    16  	"github.com/hashicorp/nomad/helper"
    17  	"github.com/hashicorp/nomad/nomad/stream"
    18  	"github.com/hashicorp/nomad/nomad/structs"
    19  )
    20  
    21  // Txn is a transaction against a state store.
    22  // This can be a read or write transaction.
    23  type Txn = *txn
    24  
    25  const (
    26  	// NodeRegisterEventReregistered is the message used when the node becomes
    27  	// reregistered.
    28  	NodeRegisterEventRegistered = "Node registered"
    29  
    30  	// NodeRegisterEventReregistered is the message used when the node becomes
    31  	// reregistered.
    32  	NodeRegisterEventReregistered = "Node re-registered"
    33  )
    34  
    35  // IndexEntry is used with the "index" table
    36  // for managing the latest Raft index affecting a table.
    37  type IndexEntry struct {
    38  	Key   string
    39  	Value uint64
    40  }
    41  
    42  // StateStoreConfig is used to configure a new state store
    43  type StateStoreConfig struct {
    44  	// Logger is used to output the state store's logs
    45  	Logger log.Logger
    46  
    47  	// Region is the region of the server embedding the state store.
    48  	Region string
    49  
    50  	// EnablePublisher is used to enable or disable the event publisher
    51  	EnablePublisher bool
    52  
    53  	// EventBufferSize configures the amount of events to hold in memory
    54  	EventBufferSize int64
    55  }
    56  
    57  // The StateStore is responsible for maintaining all the Nomad
    58  // state. It is manipulated by the FSM which maintains consistency
    59  // through the use of Raft. The goals of the StateStore are to provide
    60  // high concurrency for read operations without blocking writes, and
    61  // to provide write availability in the face of reads. EVERY object
    62  // returned as a result of a read against the state store should be
    63  // considered a constant and NEVER modified in place.
    64  type StateStore struct {
    65  	logger log.Logger
    66  	db     *changeTrackerDB
    67  
    68  	// config is the passed in configuration
    69  	config *StateStoreConfig
    70  
    71  	// abandonCh is used to signal watchers that this state store has been
    72  	// abandoned (usually during a restore). This is only ever closed.
    73  	abandonCh chan struct{}
    74  
    75  	// TODO: refactor abandonCh to use a context so that both can use the same
    76  	// cancel mechanism.
    77  	stopEventBroker func()
    78  }
    79  
    80  type streamACLDelegate struct {
    81  	s *StateStore
    82  }
    83  
    84  func (a *streamACLDelegate) TokenProvider() stream.ACLTokenProvider {
    85  	resolver, _ := a.s.Snapshot()
    86  	return resolver
    87  }
    88  
    89  // NewStateStore is used to create a new state store
    90  func NewStateStore(config *StateStoreConfig) (*StateStore, error) {
    91  	// Create the MemDB
    92  	db, err := memdb.NewMemDB(stateStoreSchema())
    93  	if err != nil {
    94  		return nil, fmt.Errorf("state store setup failed: %v", err)
    95  	}
    96  
    97  	// Create the state store
    98  	ctx, cancel := context.WithCancel(context.TODO())
    99  	s := &StateStore{
   100  		logger:          config.Logger.Named("state_store"),
   101  		config:          config,
   102  		abandonCh:       make(chan struct{}),
   103  		stopEventBroker: cancel,
   104  	}
   105  
   106  	if config.EnablePublisher {
   107  		// Create new event publisher using provided config
   108  		broker, err := stream.NewEventBroker(ctx, &streamACLDelegate{s}, stream.EventBrokerCfg{
   109  			EventBufferSize: config.EventBufferSize,
   110  			Logger:          config.Logger,
   111  		})
   112  		if err != nil {
   113  			return nil, fmt.Errorf("creating state store event broker %w", err)
   114  		}
   115  		s.db = NewChangeTrackerDB(db, broker, eventsFromChanges)
   116  	} else {
   117  		s.db = NewChangeTrackerDB(db, nil, noOpProcessChanges)
   118  	}
   119  
   120  	// Initialize the state store with the default namespace.
   121  	if err := s.namespaceInit(); err != nil {
   122  		return nil, fmt.Errorf("enterprise state store initialization failed: %v", err)
   123  	}
   124  
   125  	return s, nil
   126  }
   127  
   128  // NewWatchSet returns a new memdb.WatchSet that adds the state stores abandonCh
   129  // as a watcher. This is important in that it will notify when this specific
   130  // state store is no longer valid, usually due to a new snapshot being loaded
   131  func (s *StateStore) NewWatchSet() memdb.WatchSet {
   132  	ws := memdb.NewWatchSet()
   133  	ws.Add(s.AbandonCh())
   134  	return ws
   135  }
   136  
   137  func (s *StateStore) EventBroker() (*stream.EventBroker, error) {
   138  	if s.db.publisher == nil {
   139  		return nil, fmt.Errorf("EventBroker not configured")
   140  	}
   141  	return s.db.publisher, nil
   142  }
   143  
   144  // namespaceInit ensures the default namespace exists.
   145  func (s *StateStore) namespaceInit() error {
   146  	// Create the default namespace. This is safe to do every time we create the
   147  	// state store. There are two main cases, a brand new cluster in which case
   148  	// each server will have the same default namespace object, or a new cluster
   149  	// in which case if the default namespace has been modified, it will be
   150  	// overridden by the restore code path.
   151  	defaultNs := &structs.Namespace{
   152  		Name:        structs.DefaultNamespace,
   153  		Description: structs.DefaultNamespaceDescription,
   154  	}
   155  
   156  	if err := s.UpsertNamespaces(1, []*structs.Namespace{defaultNs}); err != nil {
   157  		return fmt.Errorf("inserting default namespace failed: %v", err)
   158  	}
   159  
   160  	return nil
   161  }
   162  
   163  // Config returns the state store configuration.
   164  func (s *StateStore) Config() *StateStoreConfig {
   165  	return s.config
   166  }
   167  
   168  // Snapshot is used to create a point in time snapshot. Because
   169  // we use MemDB, we just need to snapshot the state of the underlying
   170  // database.
   171  func (s *StateStore) Snapshot() (*StateSnapshot, error) {
   172  	memDBSnap := s.db.memdb.Snapshot()
   173  
   174  	store := StateStore{
   175  		logger: s.logger,
   176  		config: s.config,
   177  	}
   178  
   179  	// Create a new change tracker DB that does not publish or track changes
   180  	store.db = NewChangeTrackerDB(memDBSnap, nil, noOpProcessChanges)
   181  
   182  	snap := &StateSnapshot{
   183  		StateStore: store,
   184  	}
   185  	return snap, nil
   186  }
   187  
   188  // SnapshotMinIndex is used to create a state snapshot where the index is
   189  // guaranteed to be greater than or equal to the index parameter.
   190  //
   191  // Some server operations (such as scheduling) exchange objects via RPC
   192  // concurrent with Raft log application, so they must ensure the state store
   193  // snapshot they are operating on is at or after the index the objects
   194  // retrieved via RPC were applied to the Raft log at.
   195  //
   196  // Callers should maintain their own timer metric as the time this method
   197  // blocks indicates Raft log application latency relative to scheduling.
   198  func (s *StateStore) SnapshotMinIndex(ctx context.Context, index uint64) (*StateSnapshot, error) {
   199  	// Ported from work.go:waitForIndex prior to 0.9
   200  
   201  	const backoffBase = 20 * time.Millisecond
   202  	const backoffLimit = 1 * time.Second
   203  	var retries uint
   204  	var retryTimer *time.Timer
   205  
   206  	// XXX: Potential optimization is to set up a watch on the state
   207  	// store's index table and only unblock via a trigger rather than
   208  	// polling.
   209  	for {
   210  		// Get the states current index
   211  		snapshotIndex, err := s.LatestIndex()
   212  		if err != nil {
   213  			return nil, fmt.Errorf("failed to determine state store's index: %v", err)
   214  		}
   215  
   216  		// We only need the FSM state to be as recent as the given index
   217  		if snapshotIndex >= index {
   218  			return s.Snapshot()
   219  		}
   220  
   221  		// Exponential back off
   222  		retries++
   223  		if retryTimer == nil {
   224  			// First retry, start at baseline
   225  			retryTimer = time.NewTimer(backoffBase)
   226  		} else {
   227  			// Subsequent retry, reset timer
   228  			deadline := 1 << (2 * retries) * backoffBase
   229  			if deadline > backoffLimit {
   230  				deadline = backoffLimit
   231  			}
   232  			retryTimer.Reset(deadline)
   233  		}
   234  
   235  		select {
   236  		case <-ctx.Done():
   237  			return nil, ctx.Err()
   238  		case <-retryTimer.C:
   239  		}
   240  	}
   241  }
   242  
   243  // Restore is used to optimize the efficiency of rebuilding
   244  // state by minimizing the number of transactions and checking
   245  // overhead.
   246  func (s *StateStore) Restore() (*StateRestore, error) {
   247  	txn := s.db.WriteTxnRestore()
   248  	r := &StateRestore{
   249  		txn: txn,
   250  	}
   251  	return r, nil
   252  }
   253  
   254  // AbandonCh returns a channel you can wait on to know if the state store was
   255  // abandoned.
   256  func (s *StateStore) AbandonCh() <-chan struct{} {
   257  	return s.abandonCh
   258  }
   259  
   260  // Abandon is used to signal that the given state store has been abandoned.
   261  // Calling this more than one time will panic.
   262  func (s *StateStore) Abandon() {
   263  	s.StopEventBroker()
   264  	close(s.abandonCh)
   265  }
   266  
   267  // StopStopEventBroker calls the cancel func for the state stores event
   268  // publisher. It should be called during server shutdown.
   269  func (s *StateStore) StopEventBroker() {
   270  	s.stopEventBroker()
   271  }
   272  
   273  // QueryFn is the definition of a function that can be used to implement a basic
   274  // blocking query against the state store.
   275  type QueryFn func(memdb.WatchSet, *StateStore) (resp interface{}, index uint64, err error)
   276  
   277  // BlockingQuery takes a query function and runs the function until the minimum
   278  // query index is met or until the passed context is cancelled.
   279  func (s *StateStore) BlockingQuery(query QueryFn, minIndex uint64, ctx context.Context) (
   280  	resp interface{}, index uint64, err error) {
   281  
   282  RUN_QUERY:
   283  	// We capture the state store and its abandon channel but pass a snapshot to
   284  	// the blocking query function. We operate on the snapshot to allow separate
   285  	// calls to the state store not all wrapped within the same transaction.
   286  	abandonCh := s.AbandonCh()
   287  	snap, _ := s.Snapshot()
   288  	stateSnap := &snap.StateStore
   289  
   290  	// We can skip all watch tracking if this isn't a blocking query.
   291  	var ws memdb.WatchSet
   292  	if minIndex > 0 {
   293  		ws = memdb.NewWatchSet()
   294  
   295  		// This channel will be closed if a snapshot is restored and the
   296  		// whole state store is abandoned.
   297  		ws.Add(abandonCh)
   298  	}
   299  
   300  	resp, index, err = query(ws, stateSnap)
   301  	if err != nil {
   302  		return nil, index, err
   303  	}
   304  
   305  	// We haven't reached the min-index yet.
   306  	if minIndex > 0 && index <= minIndex {
   307  		if err := ws.WatchCtx(ctx); err != nil {
   308  			return nil, index, err
   309  		}
   310  
   311  		goto RUN_QUERY
   312  	}
   313  
   314  	return resp, index, nil
   315  }
   316  
   317  // UpsertPlanResults is used to upsert the results of a plan.
   318  func (s *StateStore) UpsertPlanResults(msgType structs.MessageType, index uint64, results *structs.ApplyPlanResultsRequest) error {
   319  	snapshot, err := s.Snapshot()
   320  	if err != nil {
   321  		return err
   322  	}
   323  
   324  	allocsStopped, err := snapshot.DenormalizeAllocationDiffSlice(results.AllocsStopped)
   325  	if err != nil {
   326  		return err
   327  	}
   328  
   329  	allocsPreempted, err := snapshot.DenormalizeAllocationDiffSlice(results.AllocsPreempted)
   330  	if err != nil {
   331  		return err
   332  	}
   333  
   334  	// COMPAT 0.11: Remove this denormalization when NodePreemptions is removed
   335  	results.NodePreemptions, err = snapshot.DenormalizeAllocationSlice(results.NodePreemptions)
   336  	if err != nil {
   337  		return err
   338  	}
   339  
   340  	txn := s.db.WriteTxnMsgT(msgType, index)
   341  	defer txn.Abort()
   342  
   343  	// Upsert the newly created or updated deployment
   344  	if results.Deployment != nil {
   345  		if err := s.upsertDeploymentImpl(index, results.Deployment, txn); err != nil {
   346  			return err
   347  		}
   348  	}
   349  
   350  	// Update the status of deployments effected by the plan.
   351  	if len(results.DeploymentUpdates) != 0 {
   352  		s.upsertDeploymentUpdates(index, results.DeploymentUpdates, txn)
   353  	}
   354  
   355  	if results.EvalID != "" {
   356  		// Update the modify index of the eval id
   357  		if err := s.updateEvalModifyIndex(txn, index, results.EvalID); err != nil {
   358  			return err
   359  		}
   360  	}
   361  
   362  	numAllocs := 0
   363  	if len(results.Alloc) > 0 || len(results.NodePreemptions) > 0 {
   364  		// COMPAT 0.11: This branch will be removed, when Alloc is removed
   365  		// Attach the job to all the allocations. It is pulled out in the payload to
   366  		// avoid the redundancy of encoding, but should be denormalized prior to
   367  		// being inserted into MemDB.
   368  		addComputedAllocAttrs(results.Alloc, results.Job)
   369  		numAllocs = len(results.Alloc) + len(results.NodePreemptions)
   370  	} else {
   371  		// Attach the job to all the allocations. It is pulled out in the payload to
   372  		// avoid the redundancy of encoding, but should be denormalized prior to
   373  		// being inserted into MemDB.
   374  		addComputedAllocAttrs(results.AllocsUpdated, results.Job)
   375  		numAllocs = len(allocsStopped) + len(results.AllocsUpdated) + len(allocsPreempted)
   376  	}
   377  
   378  	allocsToUpsert := make([]*structs.Allocation, 0, numAllocs)
   379  
   380  	// COMPAT 0.11: Both these appends should be removed when Alloc and NodePreemptions are removed
   381  	allocsToUpsert = append(allocsToUpsert, results.Alloc...)
   382  	allocsToUpsert = append(allocsToUpsert, results.NodePreemptions...)
   383  
   384  	allocsToUpsert = append(allocsToUpsert, allocsStopped...)
   385  	allocsToUpsert = append(allocsToUpsert, results.AllocsUpdated...)
   386  	allocsToUpsert = append(allocsToUpsert, allocsPreempted...)
   387  
   388  	// handle upgrade path
   389  	for _, alloc := range allocsToUpsert {
   390  		alloc.Canonicalize()
   391  	}
   392  
   393  	if err := s.upsertAllocsImpl(index, allocsToUpsert, txn); err != nil {
   394  		return err
   395  	}
   396  
   397  	// Upsert followup evals for allocs that were preempted
   398  	for _, eval := range results.PreemptionEvals {
   399  		if err := s.nestedUpsertEval(txn, index, eval); err != nil {
   400  			return err
   401  		}
   402  	}
   403  
   404  	return txn.Commit()
   405  }
   406  
   407  // addComputedAllocAttrs adds the computed/derived attributes to the allocation.
   408  // This method is used when an allocation is being denormalized.
   409  func addComputedAllocAttrs(allocs []*structs.Allocation, job *structs.Job) {
   410  	structs.DenormalizeAllocationJobs(job, allocs)
   411  
   412  	// COMPAT(0.11): Remove in 0.11
   413  	// Calculate the total resources of allocations. It is pulled out in the
   414  	// payload to avoid encoding something that can be computed, but should be
   415  	// denormalized prior to being inserted into MemDB.
   416  	for _, alloc := range allocs {
   417  		if alloc.Resources != nil {
   418  			continue
   419  		}
   420  
   421  		alloc.Resources = new(structs.Resources)
   422  		for _, task := range alloc.TaskResources {
   423  			alloc.Resources.Add(task)
   424  		}
   425  
   426  		// Add the shared resources
   427  		alloc.Resources.Add(alloc.SharedResources)
   428  	}
   429  }
   430  
   431  // upsertDeploymentUpdates updates the deployments given the passed status
   432  // updates.
   433  func (s *StateStore) upsertDeploymentUpdates(index uint64, updates []*structs.DeploymentStatusUpdate, txn *txn) error {
   434  	for _, u := range updates {
   435  		if err := s.updateDeploymentStatusImpl(index, u, txn); err != nil {
   436  			return err
   437  		}
   438  	}
   439  
   440  	return nil
   441  }
   442  
   443  // UpsertJobSummary upserts a job summary into the state store.
   444  func (s *StateStore) UpsertJobSummary(index uint64, jobSummary *structs.JobSummary) error {
   445  	txn := s.db.WriteTxn(index)
   446  	defer txn.Abort()
   447  
   448  	// Check if the job summary already exists
   449  	existing, err := txn.First("job_summary", "id", jobSummary.Namespace, jobSummary.JobID)
   450  	if err != nil {
   451  		return fmt.Errorf("job summary lookup failed: %v", err)
   452  	}
   453  
   454  	// Setup the indexes correctly
   455  	if existing != nil {
   456  		jobSummary.CreateIndex = existing.(*structs.JobSummary).CreateIndex
   457  		jobSummary.ModifyIndex = index
   458  	} else {
   459  		jobSummary.CreateIndex = index
   460  		jobSummary.ModifyIndex = index
   461  	}
   462  
   463  	// Update the index
   464  	if err := txn.Insert("job_summary", jobSummary); err != nil {
   465  		return err
   466  	}
   467  
   468  	// Update the indexes table for job summary
   469  	if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
   470  		return fmt.Errorf("index update failed: %v", err)
   471  	}
   472  
   473  	return txn.Commit()
   474  }
   475  
   476  // DeleteJobSummary deletes the job summary with the given ID. This is for
   477  // testing purposes only.
   478  func (s *StateStore) DeleteJobSummary(index uint64, namespace, id string) error {
   479  	txn := s.db.WriteTxn(index)
   480  	defer txn.Abort()
   481  
   482  	// Delete the job summary
   483  	if _, err := txn.DeleteAll("job_summary", "id", namespace, id); err != nil {
   484  		return fmt.Errorf("deleting job summary failed: %v", err)
   485  	}
   486  	if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
   487  		return fmt.Errorf("index update failed: %v", err)
   488  	}
   489  	return txn.Commit()
   490  }
   491  
   492  // UpsertDeployment is used to insert a new deployment. If cancelPrior is set to
   493  // true, all prior deployments for the same job will be cancelled.
   494  func (s *StateStore) UpsertDeployment(index uint64, deployment *structs.Deployment) error {
   495  	txn := s.db.WriteTxn(index)
   496  	defer txn.Abort()
   497  	if err := s.upsertDeploymentImpl(index, deployment, txn); err != nil {
   498  		return err
   499  	}
   500  	return txn.Commit()
   501  }
   502  
   503  func (s *StateStore) upsertDeploymentImpl(index uint64, deployment *structs.Deployment, txn *txn) error {
   504  	// Check if the deployment already exists
   505  	existing, err := txn.First("deployment", "id", deployment.ID)
   506  	if err != nil {
   507  		return fmt.Errorf("deployment lookup failed: %v", err)
   508  	}
   509  
   510  	// Setup the indexes correctly
   511  	if existing != nil {
   512  		deployment.CreateIndex = existing.(*structs.Deployment).CreateIndex
   513  		deployment.ModifyIndex = index
   514  	} else {
   515  		deployment.CreateIndex = index
   516  		deployment.ModifyIndex = index
   517  	}
   518  
   519  	// Insert the deployment
   520  	if err := txn.Insert("deployment", deployment); err != nil {
   521  		return err
   522  	}
   523  
   524  	// Update the indexes table for deployment
   525  	if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil {
   526  		return fmt.Errorf("index update failed: %v", err)
   527  	}
   528  
   529  	// If the deployment is being marked as complete, set the job to stable.
   530  	if deployment.Status == structs.DeploymentStatusSuccessful {
   531  		if err := s.updateJobStabilityImpl(index, deployment.Namespace, deployment.JobID, deployment.JobVersion, true, txn); err != nil {
   532  			return fmt.Errorf("failed to update job stability: %v", err)
   533  		}
   534  	}
   535  
   536  	return nil
   537  }
   538  
   539  func (s *StateStore) Deployments(ws memdb.WatchSet) (memdb.ResultIterator, error) {
   540  	txn := s.db.ReadTxn()
   541  
   542  	// Walk the entire deployments table
   543  	iter, err := txn.Get("deployment", "id")
   544  	if err != nil {
   545  		return nil, err
   546  	}
   547  
   548  	ws.Add(iter.WatchCh())
   549  	return iter, nil
   550  }
   551  
   552  func (s *StateStore) DeploymentsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) {
   553  	txn := s.db.ReadTxn()
   554  
   555  	// Walk the entire deployments table
   556  	iter, err := txn.Get("deployment", "namespace", namespace)
   557  	if err != nil {
   558  		return nil, err
   559  	}
   560  
   561  	ws.Add(iter.WatchCh())
   562  	return iter, nil
   563  }
   564  
   565  func (s *StateStore) DeploymentsByIDPrefix(ws memdb.WatchSet, namespace, deploymentID string) (memdb.ResultIterator, error) {
   566  	txn := s.db.ReadTxn()
   567  
   568  	// Walk the entire deployments table
   569  	iter, err := txn.Get("deployment", "id_prefix", deploymentID)
   570  	if err != nil {
   571  		return nil, err
   572  	}
   573  
   574  	ws.Add(iter.WatchCh())
   575  
   576  	// Wrap the iterator in a filter
   577  	wrap := memdb.NewFilterIterator(iter, deploymentNamespaceFilter(namespace))
   578  	return wrap, nil
   579  }
   580  
   581  // deploymentNamespaceFilter returns a filter function that filters all
   582  // deployment not in the given namespace.
   583  func deploymentNamespaceFilter(namespace string) func(interface{}) bool {
   584  	return func(raw interface{}) bool {
   585  		d, ok := raw.(*structs.Deployment)
   586  		if !ok {
   587  			return true
   588  		}
   589  
   590  		return d.Namespace != namespace
   591  	}
   592  }
   593  
   594  func (s *StateStore) DeploymentByID(ws memdb.WatchSet, deploymentID string) (*structs.Deployment, error) {
   595  	txn := s.db.ReadTxn()
   596  	return s.deploymentByIDImpl(ws, deploymentID, txn)
   597  }
   598  
   599  func (s *StateStore) deploymentByIDImpl(ws memdb.WatchSet, deploymentID string, txn *txn) (*structs.Deployment, error) {
   600  	watchCh, existing, err := txn.FirstWatch("deployment", "id", deploymentID)
   601  	if err != nil {
   602  		return nil, fmt.Errorf("deployment lookup failed: %v", err)
   603  	}
   604  	ws.Add(watchCh)
   605  
   606  	if existing != nil {
   607  		return existing.(*structs.Deployment), nil
   608  	}
   609  
   610  	return nil, nil
   611  }
   612  
   613  func (s *StateStore) DeploymentsByJobID(ws memdb.WatchSet, namespace, jobID string, all bool) ([]*structs.Deployment, error) {
   614  	txn := s.db.ReadTxn()
   615  
   616  	var job *structs.Job
   617  	// Read job from state store
   618  	_, existing, err := txn.FirstWatch("jobs", "id", namespace, jobID)
   619  	if err != nil {
   620  		return nil, fmt.Errorf("job lookup failed: %v", err)
   621  	}
   622  	if existing != nil {
   623  		job = existing.(*structs.Job)
   624  	}
   625  
   626  	// Get an iterator over the deployments
   627  	iter, err := txn.Get("deployment", "job", namespace, jobID)
   628  	if err != nil {
   629  		return nil, err
   630  	}
   631  
   632  	ws.Add(iter.WatchCh())
   633  
   634  	var out []*structs.Deployment
   635  	for {
   636  		raw := iter.Next()
   637  		if raw == nil {
   638  			break
   639  		}
   640  		d := raw.(*structs.Deployment)
   641  
   642  		// If the allocation belongs to a job with the same ID but a different
   643  		// create index and we are not getting all the allocations whose Jobs
   644  		// matches the same Job ID then we skip it
   645  		if !all && job != nil && d.JobCreateIndex != job.CreateIndex {
   646  			continue
   647  		}
   648  		out = append(out, d)
   649  	}
   650  
   651  	return out, nil
   652  }
   653  
   654  // LatestDeploymentByJobID returns the latest deployment for the given job. The
   655  // latest is determined strictly by CreateIndex.
   656  func (s *StateStore) LatestDeploymentByJobID(ws memdb.WatchSet, namespace, jobID string) (*structs.Deployment, error) {
   657  	txn := s.db.ReadTxn()
   658  
   659  	// Get an iterator over the deployments
   660  	iter, err := txn.Get("deployment", "job", namespace, jobID)
   661  	if err != nil {
   662  		return nil, err
   663  	}
   664  
   665  	ws.Add(iter.WatchCh())
   666  
   667  	var out *structs.Deployment
   668  	for {
   669  		raw := iter.Next()
   670  		if raw == nil {
   671  			break
   672  		}
   673  
   674  		d := raw.(*structs.Deployment)
   675  		if out == nil || out.CreateIndex < d.CreateIndex {
   676  			out = d
   677  		}
   678  	}
   679  
   680  	return out, nil
   681  }
   682  
   683  // DeleteDeployment is used to delete a set of deployments by ID
   684  func (s *StateStore) DeleteDeployment(index uint64, deploymentIDs []string) error {
   685  	txn := s.db.WriteTxn(index)
   686  	defer txn.Abort()
   687  
   688  	if len(deploymentIDs) == 0 {
   689  		return nil
   690  	}
   691  
   692  	for _, deploymentID := range deploymentIDs {
   693  		// Lookup the deployment
   694  		existing, err := txn.First("deployment", "id", deploymentID)
   695  		if err != nil {
   696  			return fmt.Errorf("deployment lookup failed: %v", err)
   697  		}
   698  		if existing == nil {
   699  			return fmt.Errorf("deployment not found")
   700  		}
   701  
   702  		// Delete the deployment
   703  		if err := txn.Delete("deployment", existing); err != nil {
   704  			return fmt.Errorf("deployment delete failed: %v", err)
   705  		}
   706  	}
   707  
   708  	if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil {
   709  		return fmt.Errorf("index update failed: %v", err)
   710  	}
   711  
   712  	return txn.Commit()
   713  }
   714  
   715  // UpsertScalingEvent is used to insert a new scaling event.
   716  // Only the most recent JobTrackedScalingEvents will be kept.
   717  func (s *StateStore) UpsertScalingEvent(index uint64, req *structs.ScalingEventRequest) error {
   718  	txn := s.db.WriteTxn(index)
   719  	defer txn.Abort()
   720  
   721  	// Get the existing events
   722  	existing, err := txn.First("scaling_event", "id", req.Namespace, req.JobID)
   723  	if err != nil {
   724  		return fmt.Errorf("scaling event lookup failed: %v", err)
   725  	}
   726  
   727  	var jobEvents *structs.JobScalingEvents
   728  	if existing != nil {
   729  		jobEvents = existing.(*structs.JobScalingEvents)
   730  	} else {
   731  		jobEvents = &structs.JobScalingEvents{
   732  			Namespace:     req.Namespace,
   733  			JobID:         req.JobID,
   734  			ScalingEvents: make(map[string][]*structs.ScalingEvent),
   735  		}
   736  	}
   737  
   738  	jobEvents.ModifyIndex = index
   739  	req.ScalingEvent.CreateIndex = index
   740  
   741  	events := jobEvents.ScalingEvents[req.TaskGroup]
   742  	// Prepend this latest event
   743  	events = append(
   744  		[]*structs.ScalingEvent{req.ScalingEvent},
   745  		events...,
   746  	)
   747  	// Truncate older events
   748  	if len(events) > structs.JobTrackedScalingEvents {
   749  		events = events[0:structs.JobTrackedScalingEvents]
   750  	}
   751  	jobEvents.ScalingEvents[req.TaskGroup] = events
   752  
   753  	// Insert the new event
   754  	if err := txn.Insert("scaling_event", jobEvents); err != nil {
   755  		return fmt.Errorf("scaling event insert failed: %v", err)
   756  	}
   757  
   758  	// Update the indexes table for scaling_event
   759  	if err := txn.Insert("index", &IndexEntry{"scaling_event", index}); err != nil {
   760  		return fmt.Errorf("index update failed: %v", err)
   761  	}
   762  
   763  	return txn.Commit()
   764  }
   765  
   766  // ScalingEvents returns an iterator over all the job scaling events
   767  func (s *StateStore) ScalingEvents(ws memdb.WatchSet) (memdb.ResultIterator, error) {
   768  	txn := s.db.ReadTxn()
   769  
   770  	// Walk the entire scaling_event table
   771  	iter, err := txn.Get("scaling_event", "id")
   772  	if err != nil {
   773  		return nil, err
   774  	}
   775  
   776  	ws.Add(iter.WatchCh())
   777  
   778  	return iter, nil
   779  }
   780  
   781  func (s *StateStore) ScalingEventsByJob(ws memdb.WatchSet, namespace, jobID string) (map[string][]*structs.ScalingEvent, uint64, error) {
   782  	txn := s.db.ReadTxn()
   783  
   784  	watchCh, existing, err := txn.FirstWatch("scaling_event", "id", namespace, jobID)
   785  	if err != nil {
   786  		return nil, 0, fmt.Errorf("job scaling events lookup failed: %v", err)
   787  	}
   788  	ws.Add(watchCh)
   789  
   790  	if existing != nil {
   791  		events := existing.(*structs.JobScalingEvents)
   792  		return events.ScalingEvents, events.ModifyIndex, nil
   793  	}
   794  	return nil, 0, nil
   795  }
   796  
   797  // UpsertNode is used to register a node or update a node definition
   798  // This is assumed to be triggered by the client, so we retain the value
   799  // of drain/eligibility which is set by the scheduler.
   800  func (s *StateStore) UpsertNode(msgType structs.MessageType, index uint64, node *structs.Node) error {
   801  	txn := s.db.WriteTxnMsgT(msgType, index)
   802  	defer txn.Abort()
   803  
   804  	err := upsertNodeTxn(txn, index, node)
   805  	if err != nil {
   806  		return nil
   807  	}
   808  	return txn.Commit()
   809  }
   810  
   811  func upsertNodeTxn(txn *txn, index uint64, node *structs.Node) error {
   812  	// Check if the node already exists
   813  	existing, err := txn.First("nodes", "id", node.ID)
   814  	if err != nil {
   815  		return fmt.Errorf("node lookup failed: %v", err)
   816  	}
   817  
   818  	// Setup the indexes correctly
   819  	if existing != nil {
   820  		exist := existing.(*structs.Node)
   821  		node.CreateIndex = exist.CreateIndex
   822  		node.ModifyIndex = index
   823  
   824  		// Retain node events that have already been set on the node
   825  		node.Events = exist.Events
   826  
   827  		// If we are transitioning from down, record the re-registration
   828  		if exist.Status == structs.NodeStatusDown && node.Status != structs.NodeStatusDown {
   829  			appendNodeEvents(index, node, []*structs.NodeEvent{
   830  				structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster).
   831  					SetMessage(NodeRegisterEventReregistered).
   832  					SetTimestamp(time.Unix(node.StatusUpdatedAt, 0))})
   833  		}
   834  
   835  		node.Drain = exist.Drain                                 // Retain the drain mode
   836  		node.SchedulingEligibility = exist.SchedulingEligibility // Retain the eligibility
   837  		node.DrainStrategy = exist.DrainStrategy                 // Retain the drain strategy
   838  	} else {
   839  		// Because this is the first time the node is being registered, we should
   840  		// also create a node registration event
   841  		nodeEvent := structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster).
   842  			SetMessage(NodeRegisterEventRegistered).
   843  			SetTimestamp(time.Unix(node.StatusUpdatedAt, 0))
   844  		node.Events = []*structs.NodeEvent{nodeEvent}
   845  		node.CreateIndex = index
   846  		node.ModifyIndex = index
   847  	}
   848  
   849  	// Insert the node
   850  	if err := txn.Insert("nodes", node); err != nil {
   851  		return fmt.Errorf("node insert failed: %v", err)
   852  	}
   853  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   854  		return fmt.Errorf("index update failed: %v", err)
   855  	}
   856  	if err := upsertNodeCSIPlugins(txn, node, index); err != nil {
   857  		return fmt.Errorf("csi plugin update failed: %v", err)
   858  	}
   859  
   860  	return nil
   861  }
   862  
   863  // DeleteNode deregisters a batch of nodes
   864  func (s *StateStore) DeleteNode(msgType structs.MessageType, index uint64, nodes []string) error {
   865  	txn := s.db.WriteTxn(index)
   866  	defer txn.Abort()
   867  
   868  	err := deleteNodeTxn(txn, index, nodes)
   869  	if err != nil {
   870  		return nil
   871  	}
   872  	return txn.Commit()
   873  }
   874  
   875  func deleteNodeTxn(txn *txn, index uint64, nodes []string) error {
   876  	if len(nodes) == 0 {
   877  		return fmt.Errorf("node ids missing")
   878  	}
   879  
   880  	for _, nodeID := range nodes {
   881  		existing, err := txn.First("nodes", "id", nodeID)
   882  		if err != nil {
   883  			return fmt.Errorf("node lookup failed: %s: %v", nodeID, err)
   884  		}
   885  		if existing == nil {
   886  			return fmt.Errorf("node not found: %s", nodeID)
   887  		}
   888  
   889  		// Delete the node
   890  		if err := txn.Delete("nodes", existing); err != nil {
   891  			return fmt.Errorf("node delete failed: %s: %v", nodeID, err)
   892  		}
   893  
   894  		node := existing.(*structs.Node)
   895  		if err := deleteNodeCSIPlugins(txn, node, index); err != nil {
   896  			return fmt.Errorf("csi plugin delete failed: %v", err)
   897  		}
   898  	}
   899  
   900  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   901  		return fmt.Errorf("index update failed: %v", err)
   902  	}
   903  
   904  	return nil
   905  }
   906  
   907  // UpdateNodeStatus is used to update the status of a node
   908  func (s *StateStore) UpdateNodeStatus(msgType structs.MessageType, index uint64, nodeID, status string, updatedAt int64, event *structs.NodeEvent) error {
   909  	txn := s.db.WriteTxnMsgT(msgType, index)
   910  	defer txn.Abort()
   911  
   912  	if err := s.updateNodeStatusTxn(txn, nodeID, status, updatedAt, event); err != nil {
   913  		return err
   914  	}
   915  
   916  	return txn.Commit()
   917  }
   918  
   919  func (s *StateStore) updateNodeStatusTxn(txn *txn, nodeID, status string, updatedAt int64, event *structs.NodeEvent) error {
   920  
   921  	// Lookup the node
   922  	existing, err := txn.First("nodes", "id", nodeID)
   923  	if err != nil {
   924  		return fmt.Errorf("node lookup failed: %v", err)
   925  	}
   926  	if existing == nil {
   927  		return fmt.Errorf("node not found")
   928  	}
   929  
   930  	// Copy the existing node
   931  	existingNode := existing.(*structs.Node)
   932  	copyNode := existingNode.Copy()
   933  	copyNode.StatusUpdatedAt = updatedAt
   934  
   935  	// Add the event if given
   936  	if event != nil {
   937  		appendNodeEvents(txn.Index, copyNode, []*structs.NodeEvent{event})
   938  	}
   939  
   940  	// Update the status in the copy
   941  	copyNode.Status = status
   942  	copyNode.ModifyIndex = txn.Index
   943  
   944  	// Insert the node
   945  	if err := txn.Insert("nodes", copyNode); err != nil {
   946  		return fmt.Errorf("node update failed: %v", err)
   947  	}
   948  	if err := txn.Insert("index", &IndexEntry{"nodes", txn.Index}); err != nil {
   949  		return fmt.Errorf("index update failed: %v", err)
   950  	}
   951  	return nil
   952  }
   953  
   954  // BatchUpdateNodeDrain is used to update the drain of a node set of nodes
   955  func (s *StateStore) BatchUpdateNodeDrain(msgType structs.MessageType, index uint64, updatedAt int64, updates map[string]*structs.DrainUpdate, events map[string]*structs.NodeEvent) error {
   956  	txn := s.db.WriteTxnMsgT(msgType, index)
   957  	defer txn.Abort()
   958  	for node, update := range updates {
   959  		if err := s.updateNodeDrainImpl(txn, index, node, update.DrainStrategy, update.MarkEligible, updatedAt, events[node]); err != nil {
   960  			return err
   961  		}
   962  	}
   963  	return txn.Commit()
   964  }
   965  
   966  // UpdateNodeDrain is used to update the drain of a node
   967  func (s *StateStore) UpdateNodeDrain(msgType structs.MessageType, index uint64, nodeID string, drain *structs.DrainStrategy, markEligible bool, updatedAt int64, event *structs.NodeEvent) error {
   968  
   969  	txn := s.db.WriteTxn(index)
   970  	defer txn.Abort()
   971  	if err := s.updateNodeDrainImpl(txn, index, nodeID, drain, markEligible, updatedAt, event); err != nil {
   972  		return err
   973  	}
   974  	return txn.Commit()
   975  }
   976  
   977  func (s *StateStore) updateNodeDrainImpl(txn *txn, index uint64, nodeID string,
   978  	drain *structs.DrainStrategy, markEligible bool, updatedAt int64, event *structs.NodeEvent) error {
   979  
   980  	// Lookup the node
   981  	existing, err := txn.First("nodes", "id", nodeID)
   982  	if err != nil {
   983  		return fmt.Errorf("node lookup failed: %v", err)
   984  	}
   985  	if existing == nil {
   986  		return fmt.Errorf("node not found")
   987  	}
   988  
   989  	// Copy the existing node
   990  	existingNode := existing.(*structs.Node)
   991  	copyNode := existingNode.Copy()
   992  	copyNode.StatusUpdatedAt = updatedAt
   993  
   994  	// Add the event if given
   995  	if event != nil {
   996  		appendNodeEvents(index, copyNode, []*structs.NodeEvent{event})
   997  	}
   998  
   999  	// Update the drain in the copy
  1000  	copyNode.Drain = drain != nil // COMPAT: Remove in Nomad 0.10
  1001  	copyNode.DrainStrategy = drain
  1002  	if drain != nil {
  1003  		copyNode.SchedulingEligibility = structs.NodeSchedulingIneligible
  1004  	} else if markEligible {
  1005  		copyNode.SchedulingEligibility = structs.NodeSchedulingEligible
  1006  	}
  1007  
  1008  	copyNode.ModifyIndex = index
  1009  
  1010  	// Insert the node
  1011  	if err := txn.Insert("nodes", copyNode); err != nil {
  1012  		return fmt.Errorf("node update failed: %v", err)
  1013  	}
  1014  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
  1015  		return fmt.Errorf("index update failed: %v", err)
  1016  	}
  1017  
  1018  	return nil
  1019  }
  1020  
  1021  // UpdateNodeEligibility is used to update the scheduling eligibility of a node
  1022  func (s *StateStore) UpdateNodeEligibility(msgType structs.MessageType, index uint64, nodeID string, eligibility string, updatedAt int64, event *structs.NodeEvent) error {
  1023  
  1024  	txn := s.db.WriteTxnMsgT(msgType, index)
  1025  	defer txn.Abort()
  1026  
  1027  	// Lookup the node
  1028  	existing, err := txn.First("nodes", "id", nodeID)
  1029  	if err != nil {
  1030  		return fmt.Errorf("node lookup failed: %v", err)
  1031  	}
  1032  	if existing == nil {
  1033  		return fmt.Errorf("node not found")
  1034  	}
  1035  
  1036  	// Copy the existing node
  1037  	existingNode := existing.(*structs.Node)
  1038  	copyNode := existingNode.Copy()
  1039  	copyNode.StatusUpdatedAt = updatedAt
  1040  
  1041  	// Add the event if given
  1042  	if event != nil {
  1043  		appendNodeEvents(index, copyNode, []*structs.NodeEvent{event})
  1044  	}
  1045  
  1046  	// Check if this is a valid action
  1047  	if copyNode.DrainStrategy != nil && eligibility == structs.NodeSchedulingEligible {
  1048  		return fmt.Errorf("can not set node's scheduling eligibility to eligible while it is draining")
  1049  	}
  1050  
  1051  	// Update the eligibility in the copy
  1052  	copyNode.SchedulingEligibility = eligibility
  1053  	copyNode.ModifyIndex = index
  1054  
  1055  	// Insert the node
  1056  	if err := txn.Insert("nodes", copyNode); err != nil {
  1057  		return fmt.Errorf("node update failed: %v", err)
  1058  	}
  1059  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
  1060  		return fmt.Errorf("index update failed: %v", err)
  1061  	}
  1062  
  1063  	return txn.Commit()
  1064  }
  1065  
  1066  // UpsertNodeEvents adds the node events to the nodes, rotating events as
  1067  // necessary.
  1068  func (s *StateStore) UpsertNodeEvents(msgType structs.MessageType, index uint64, nodeEvents map[string][]*structs.NodeEvent) error {
  1069  	txn := s.db.WriteTxnMsgT(msgType, index)
  1070  	defer txn.Abort()
  1071  
  1072  	for nodeID, events := range nodeEvents {
  1073  		if err := s.upsertNodeEvents(index, nodeID, events, txn); err != nil {
  1074  			return err
  1075  		}
  1076  	}
  1077  
  1078  	return txn.Commit()
  1079  }
  1080  
  1081  // upsertNodeEvent upserts a node event for a respective node. It also maintains
  1082  // that a fixed number of node events are ever stored simultaneously, deleting
  1083  // older events once this bound has been reached.
  1084  func (s *StateStore) upsertNodeEvents(index uint64, nodeID string, events []*structs.NodeEvent, txn *txn) error {
  1085  	// Lookup the node
  1086  	existing, err := txn.First("nodes", "id", nodeID)
  1087  	if err != nil {
  1088  		return fmt.Errorf("node lookup failed: %v", err)
  1089  	}
  1090  	if existing == nil {
  1091  		return fmt.Errorf("node not found")
  1092  	}
  1093  
  1094  	// Copy the existing node
  1095  	existingNode := existing.(*structs.Node)
  1096  	copyNode := existingNode.Copy()
  1097  	appendNodeEvents(index, copyNode, events)
  1098  
  1099  	// Insert the node
  1100  	if err := txn.Insert("nodes", copyNode); err != nil {
  1101  		return fmt.Errorf("node update failed: %v", err)
  1102  	}
  1103  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
  1104  		return fmt.Errorf("index update failed: %v", err)
  1105  	}
  1106  
  1107  	return nil
  1108  }
  1109  
  1110  // appendNodeEvents is a helper that takes a node and new events and appends
  1111  // them, pruning older events as needed.
  1112  func appendNodeEvents(index uint64, node *structs.Node, events []*structs.NodeEvent) {
  1113  	// Add the events, updating the indexes
  1114  	for _, e := range events {
  1115  		e.CreateIndex = index
  1116  		node.Events = append(node.Events, e)
  1117  	}
  1118  
  1119  	// Keep node events pruned to not exceed the max allowed
  1120  	if l := len(node.Events); l > structs.MaxRetainedNodeEvents {
  1121  		delta := l - structs.MaxRetainedNodeEvents
  1122  		node.Events = node.Events[delta:]
  1123  	}
  1124  }
  1125  
  1126  // upsertNodeCSIPlugins indexes csi plugins for volume retrieval, with health. It's called
  1127  // on upsertNodeEvents, so that event driven health changes are updated
  1128  func upsertNodeCSIPlugins(txn *txn, node *structs.Node, index uint64) error {
  1129  
  1130  	loop := func(info *structs.CSIInfo) error {
  1131  		raw, err := txn.First("csi_plugins", "id", info.PluginID)
  1132  		if err != nil {
  1133  			return fmt.Errorf("csi_plugin lookup error: %s %v", info.PluginID, err)
  1134  		}
  1135  
  1136  		var plug *structs.CSIPlugin
  1137  		if raw != nil {
  1138  			plug = raw.(*structs.CSIPlugin).Copy()
  1139  		} else {
  1140  			if !info.Healthy {
  1141  				// we don't want to create new plugins for unhealthy
  1142  				// allocs, otherwise we'd recreate the plugin when we
  1143  				// get the update for the alloc becoming terminal
  1144  				return nil
  1145  			}
  1146  			plug = structs.NewCSIPlugin(info.PluginID, index)
  1147  		}
  1148  
  1149  		// the plugin may have been created by the job being updated, in which case
  1150  		// this data will not be configured, it's only available to the fingerprint
  1151  		// system
  1152  		plug.Provider = info.Provider
  1153  		plug.Version = info.ProviderVersion
  1154  
  1155  		err = plug.AddPlugin(node.ID, info)
  1156  		if err != nil {
  1157  			return err
  1158  		}
  1159  
  1160  		plug.ModifyIndex = index
  1161  
  1162  		err = txn.Insert("csi_plugins", plug)
  1163  		if err != nil {
  1164  			return fmt.Errorf("csi_plugins insert error: %v", err)
  1165  		}
  1166  
  1167  		return nil
  1168  	}
  1169  
  1170  	inUseController := map[string]struct{}{}
  1171  	inUseNode := map[string]struct{}{}
  1172  
  1173  	for _, info := range node.CSIControllerPlugins {
  1174  		err := loop(info)
  1175  		if err != nil {
  1176  			return err
  1177  		}
  1178  		inUseController[info.PluginID] = struct{}{}
  1179  	}
  1180  
  1181  	for _, info := range node.CSINodePlugins {
  1182  		err := loop(info)
  1183  		if err != nil {
  1184  			return err
  1185  		}
  1186  		inUseNode[info.PluginID] = struct{}{}
  1187  	}
  1188  
  1189  	// remove the client node from any plugin that's not
  1190  	// running on it.
  1191  	iter, err := txn.Get("csi_plugins", "id")
  1192  	if err != nil {
  1193  		return fmt.Errorf("csi_plugins lookup failed: %v", err)
  1194  	}
  1195  	for {
  1196  		raw := iter.Next()
  1197  		if raw == nil {
  1198  			break
  1199  		}
  1200  		plug, ok := raw.(*structs.CSIPlugin)
  1201  		if !ok {
  1202  			continue
  1203  		}
  1204  		plug = plug.Copy()
  1205  
  1206  		var hadDelete bool
  1207  		if _, ok := inUseController[plug.ID]; !ok {
  1208  			if _, asController := plug.Controllers[node.ID]; asController {
  1209  				err := plug.DeleteNodeForType(node.ID, structs.CSIPluginTypeController)
  1210  				if err != nil {
  1211  					return err
  1212  				}
  1213  				hadDelete = true
  1214  			}
  1215  		}
  1216  		if _, ok := inUseNode[plug.ID]; !ok {
  1217  			if _, asNode := plug.Nodes[node.ID]; asNode {
  1218  				err := plug.DeleteNodeForType(node.ID, structs.CSIPluginTypeNode)
  1219  				if err != nil {
  1220  					return err
  1221  				}
  1222  				hadDelete = true
  1223  			}
  1224  		}
  1225  		// we check this flag both for performance and to make sure we
  1226  		// don't delete a plugin when registering a node plugin but
  1227  		// no controller
  1228  		if hadDelete {
  1229  			err = updateOrGCPlugin(index, txn, plug)
  1230  			if err != nil {
  1231  				return err
  1232  			}
  1233  		}
  1234  	}
  1235  
  1236  	if err := txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil {
  1237  		return fmt.Errorf("index update failed: %v", err)
  1238  	}
  1239  
  1240  	return nil
  1241  }
  1242  
  1243  // deleteNodeCSIPlugins cleans up CSIInfo node health status, called in DeleteNode
  1244  func deleteNodeCSIPlugins(txn *txn, node *structs.Node, index uint64) error {
  1245  	if len(node.CSIControllerPlugins) == 0 && len(node.CSINodePlugins) == 0 {
  1246  		return nil
  1247  	}
  1248  
  1249  	names := map[string]struct{}{}
  1250  	for _, info := range node.CSIControllerPlugins {
  1251  		names[info.PluginID] = struct{}{}
  1252  	}
  1253  	for _, info := range node.CSINodePlugins {
  1254  		names[info.PluginID] = struct{}{}
  1255  	}
  1256  
  1257  	for id := range names {
  1258  		raw, err := txn.First("csi_plugins", "id", id)
  1259  		if err != nil {
  1260  			return fmt.Errorf("csi_plugins lookup error %s: %v", id, err)
  1261  		}
  1262  		if raw == nil {
  1263  			// plugin may have been deregistered but we didn't
  1264  			// update the fingerprint yet
  1265  			continue
  1266  		}
  1267  
  1268  		plug := raw.(*structs.CSIPlugin).Copy()
  1269  		err = plug.DeleteNode(node.ID)
  1270  		if err != nil {
  1271  			return err
  1272  		}
  1273  		err = updateOrGCPlugin(index, txn, plug)
  1274  		if err != nil {
  1275  			return err
  1276  		}
  1277  	}
  1278  
  1279  	if err := txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil {
  1280  		return fmt.Errorf("index update failed: %v", err)
  1281  	}
  1282  
  1283  	return nil
  1284  }
  1285  
  1286  // updateOrGCPlugin updates a plugin but will delete it if the plugin is empty
  1287  func updateOrGCPlugin(index uint64, txn Txn, plug *structs.CSIPlugin) error {
  1288  	plug.ModifyIndex = index
  1289  
  1290  	if plug.IsEmpty() {
  1291  		err := txn.Delete("csi_plugins", plug)
  1292  		if err != nil {
  1293  			return fmt.Errorf("csi_plugins delete error: %v", err)
  1294  		}
  1295  	} else {
  1296  		err := txn.Insert("csi_plugins", plug)
  1297  		if err != nil {
  1298  			return fmt.Errorf("csi_plugins update error %s: %v", plug.ID, err)
  1299  		}
  1300  	}
  1301  	return nil
  1302  }
  1303  
  1304  // deleteJobFromPlugins removes the allocations of this job from any plugins the job is
  1305  // running, possibly deleting the plugin if it's no longer in use. It's called in DeleteJobTxn
  1306  func (s *StateStore) deleteJobFromPlugins(index uint64, txn Txn, job *structs.Job) error {
  1307  	ws := memdb.NewWatchSet()
  1308  	summary, err := s.JobSummaryByID(ws, job.Namespace, job.ID)
  1309  	if err != nil {
  1310  		return fmt.Errorf("error getting job summary: %v", err)
  1311  	}
  1312  
  1313  	allocs, err := s.AllocsByJob(ws, job.Namespace, job.ID, false)
  1314  	if err != nil {
  1315  		return fmt.Errorf("error getting allocations: %v", err)
  1316  	}
  1317  
  1318  	type pair struct {
  1319  		pluginID string
  1320  		alloc    *structs.Allocation
  1321  	}
  1322  
  1323  	plugAllocs := []*pair{}
  1324  	found := map[string]struct{}{}
  1325  
  1326  	// Find plugins for allocs that belong to this job
  1327  	for _, a := range allocs {
  1328  		tg := a.Job.LookupTaskGroup(a.TaskGroup)
  1329  		found[tg.Name] = struct{}{}
  1330  		for _, t := range tg.Tasks {
  1331  			if t.CSIPluginConfig == nil {
  1332  				continue
  1333  			}
  1334  			plugAllocs = append(plugAllocs, &pair{
  1335  				pluginID: t.CSIPluginConfig.ID,
  1336  				alloc:    a,
  1337  			})
  1338  		}
  1339  	}
  1340  
  1341  	// Find any plugins that do not yet have allocs for this job
  1342  	for _, tg := range job.TaskGroups {
  1343  		if _, ok := found[tg.Name]; ok {
  1344  			continue
  1345  		}
  1346  
  1347  		for _, t := range tg.Tasks {
  1348  			if t.CSIPluginConfig == nil {
  1349  				continue
  1350  			}
  1351  			plugAllocs = append(plugAllocs, &pair{
  1352  				pluginID: t.CSIPluginConfig.ID,
  1353  			})
  1354  		}
  1355  	}
  1356  
  1357  	plugins := map[string]*structs.CSIPlugin{}
  1358  
  1359  	for _, x := range plugAllocs {
  1360  		plug, ok := plugins[x.pluginID]
  1361  
  1362  		if !ok {
  1363  			plug, err = s.CSIPluginByIDTxn(txn, nil, x.pluginID)
  1364  			if err != nil {
  1365  				return fmt.Errorf("error getting plugin: %s, %v", x.pluginID, err)
  1366  			}
  1367  			if plug == nil {
  1368  				return fmt.Errorf("plugin missing: %s %v", x.pluginID, err)
  1369  			}
  1370  			// only copy once, so we update the same plugin on each alloc
  1371  			plugins[x.pluginID] = plug.Copy()
  1372  			plug = plugins[x.pluginID]
  1373  		}
  1374  
  1375  		if x.alloc == nil {
  1376  			continue
  1377  		}
  1378  		err := plug.DeleteAlloc(x.alloc.ID, x.alloc.NodeID)
  1379  		if err != nil {
  1380  			return err
  1381  		}
  1382  	}
  1383  
  1384  	for _, plug := range plugins {
  1385  		plug.DeleteJob(job, summary)
  1386  		err = updateOrGCPlugin(index, txn, plug)
  1387  		if err != nil {
  1388  			return err
  1389  		}
  1390  	}
  1391  
  1392  	if err = txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil {
  1393  		return fmt.Errorf("index update failed: %v", err)
  1394  	}
  1395  
  1396  	return nil
  1397  }
  1398  
  1399  // NodeByID is used to lookup a node by ID
  1400  func (s *StateStore) NodeByID(ws memdb.WatchSet, nodeID string) (*structs.Node, error) {
  1401  	txn := s.db.ReadTxn()
  1402  
  1403  	watchCh, existing, err := txn.FirstWatch("nodes", "id", nodeID)
  1404  	if err != nil {
  1405  		return nil, fmt.Errorf("node lookup failed: %v", err)
  1406  	}
  1407  	ws.Add(watchCh)
  1408  
  1409  	if existing != nil {
  1410  		return existing.(*structs.Node), nil
  1411  	}
  1412  	return nil, nil
  1413  }
  1414  
  1415  // NodesByIDPrefix is used to lookup nodes by prefix
  1416  func (s *StateStore) NodesByIDPrefix(ws memdb.WatchSet, nodeID string) (memdb.ResultIterator, error) {
  1417  	txn := s.db.ReadTxn()
  1418  
  1419  	iter, err := txn.Get("nodes", "id_prefix", nodeID)
  1420  	if err != nil {
  1421  		return nil, fmt.Errorf("node lookup failed: %v", err)
  1422  	}
  1423  	ws.Add(iter.WatchCh())
  1424  
  1425  	return iter, nil
  1426  }
  1427  
  1428  // NodeBySecretID is used to lookup a node by SecretID
  1429  func (s *StateStore) NodeBySecretID(ws memdb.WatchSet, secretID string) (*structs.Node, error) {
  1430  	txn := s.db.ReadTxn()
  1431  
  1432  	watchCh, existing, err := txn.FirstWatch("nodes", "secret_id", secretID)
  1433  	if err != nil {
  1434  		return nil, fmt.Errorf("node lookup by SecretID failed: %v", err)
  1435  	}
  1436  	ws.Add(watchCh)
  1437  
  1438  	if existing != nil {
  1439  		return existing.(*structs.Node), nil
  1440  	}
  1441  	return nil, nil
  1442  }
  1443  
  1444  // Nodes returns an iterator over all the nodes
  1445  func (s *StateStore) Nodes(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  1446  	txn := s.db.ReadTxn()
  1447  
  1448  	// Walk the entire nodes table
  1449  	iter, err := txn.Get("nodes", "id")
  1450  	if err != nil {
  1451  		return nil, err
  1452  	}
  1453  	ws.Add(iter.WatchCh())
  1454  	return iter, nil
  1455  }
  1456  
  1457  // UpsertJob is used to register a job or update a job definition
  1458  func (s *StateStore) UpsertJob(msgType structs.MessageType, index uint64, job *structs.Job) error {
  1459  	txn := s.db.WriteTxnMsgT(msgType, index)
  1460  	defer txn.Abort()
  1461  	if err := s.upsertJobImpl(index, job, false, txn); err != nil {
  1462  		return err
  1463  	}
  1464  	return txn.Commit()
  1465  }
  1466  
  1467  // UpsertJobTxn is used to register a job or update a job definition, like UpsertJob,
  1468  // but in a transaction.  Useful for when making multiple modifications atomically
  1469  func (s *StateStore) UpsertJobTxn(index uint64, job *structs.Job, txn Txn) error {
  1470  	return s.upsertJobImpl(index, job, false, txn)
  1471  }
  1472  
  1473  // upsertJobImpl is the implementation for registering a job or updating a job definition
  1474  func (s *StateStore) upsertJobImpl(index uint64, job *structs.Job, keepVersion bool, txn *txn) error {
  1475  	// Assert the namespace exists
  1476  	if exists, err := s.namespaceExists(txn, job.Namespace); err != nil {
  1477  		return err
  1478  	} else if !exists {
  1479  		return fmt.Errorf("job %q is in nonexistent namespace %q", job.ID, job.Namespace)
  1480  	}
  1481  
  1482  	// Check if the job already exists
  1483  	existing, err := txn.First("jobs", "id", job.Namespace, job.ID)
  1484  	var existingJob *structs.Job
  1485  	if err != nil {
  1486  		return fmt.Errorf("job lookup failed: %v", err)
  1487  	}
  1488  
  1489  	// Setup the indexes correctly
  1490  	if existing != nil {
  1491  		job.CreateIndex = existing.(*structs.Job).CreateIndex
  1492  		job.ModifyIndex = index
  1493  
  1494  		existingJob = existing.(*structs.Job)
  1495  
  1496  		// Bump the version unless asked to keep it. This should only be done
  1497  		// when changing an internal field such as Stable. A spec change should
  1498  		// always come with a version bump
  1499  		if !keepVersion {
  1500  			job.JobModifyIndex = index
  1501  			if job.Version <= existingJob.Version {
  1502  				job.Version = existingJob.Version + 1
  1503  			}
  1504  		}
  1505  
  1506  		// Compute the job status
  1507  		var err error
  1508  		job.Status, err = s.getJobStatus(txn, job, false)
  1509  		if err != nil {
  1510  			return fmt.Errorf("setting job status for %q failed: %v", job.ID, err)
  1511  		}
  1512  	} else {
  1513  		job.CreateIndex = index
  1514  		job.ModifyIndex = index
  1515  		job.JobModifyIndex = index
  1516  
  1517  		if err := s.setJobStatus(index, txn, job, false, ""); err != nil {
  1518  			return fmt.Errorf("setting job status for %q failed: %v", job.ID, err)
  1519  		}
  1520  
  1521  		// Have to get the job again since it could have been updated
  1522  		updated, err := txn.First("jobs", "id", job.Namespace, job.ID)
  1523  		if err != nil {
  1524  			return fmt.Errorf("job lookup failed: %v", err)
  1525  		}
  1526  		if updated != nil {
  1527  			job = updated.(*structs.Job)
  1528  		}
  1529  	}
  1530  
  1531  	if err := s.updateSummaryWithJob(index, job, txn); err != nil {
  1532  		return fmt.Errorf("unable to create job summary: %v", err)
  1533  	}
  1534  
  1535  	if err := s.upsertJobVersion(index, job, txn); err != nil {
  1536  		return fmt.Errorf("unable to upsert job into job_version table: %v", err)
  1537  	}
  1538  
  1539  	if err := s.updateJobScalingPolicies(index, job, txn); err != nil {
  1540  		return fmt.Errorf("unable to update job scaling policies: %v", err)
  1541  	}
  1542  
  1543  	if err := s.updateJobRecommendations(index, txn, existingJob, job); err != nil {
  1544  		return fmt.Errorf("unable to update job recommendations: %v", err)
  1545  	}
  1546  
  1547  	if err := s.updateJobCSIPlugins(index, job, existingJob, txn); err != nil {
  1548  		return fmt.Errorf("unable to update job scaling policies: %v", err)
  1549  	}
  1550  
  1551  	// Insert the job
  1552  	if err := txn.Insert("jobs", job); err != nil {
  1553  		return fmt.Errorf("job insert failed: %v", err)
  1554  	}
  1555  	if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil {
  1556  		return fmt.Errorf("index update failed: %v", err)
  1557  	}
  1558  
  1559  	return nil
  1560  }
  1561  
  1562  // DeleteJob is used to deregister a job
  1563  func (s *StateStore) DeleteJob(index uint64, namespace, jobID string) error {
  1564  	txn := s.db.WriteTxn(index)
  1565  	defer txn.Abort()
  1566  
  1567  	err := s.DeleteJobTxn(index, namespace, jobID, txn)
  1568  	if err == nil {
  1569  		return txn.Commit()
  1570  	}
  1571  	return err
  1572  }
  1573  
  1574  // DeleteJobTxn is used to deregister a job, like DeleteJob,
  1575  // but in a transaction.  Useful for when making multiple modifications atomically
  1576  func (s *StateStore) DeleteJobTxn(index uint64, namespace, jobID string, txn Txn) error {
  1577  	// Lookup the node
  1578  	existing, err := txn.First("jobs", "id", namespace, jobID)
  1579  	if err != nil {
  1580  		return fmt.Errorf("job lookup failed: %v", err)
  1581  	}
  1582  	if existing == nil {
  1583  		return fmt.Errorf("job not found")
  1584  	}
  1585  
  1586  	// Check if we should update a parent job summary
  1587  	job := existing.(*structs.Job)
  1588  	if job.ParentID != "" {
  1589  		summaryRaw, err := txn.First("job_summary", "id", namespace, job.ParentID)
  1590  		if err != nil {
  1591  			return fmt.Errorf("unable to retrieve summary for parent job: %v", err)
  1592  		}
  1593  
  1594  		// Only continue if the summary exists. It could not exist if the parent
  1595  		// job was removed
  1596  		if summaryRaw != nil {
  1597  			existing := summaryRaw.(*structs.JobSummary)
  1598  			pSummary := existing.Copy()
  1599  			if pSummary.Children != nil {
  1600  
  1601  				modified := false
  1602  				switch job.Status {
  1603  				case structs.JobStatusPending:
  1604  					pSummary.Children.Pending--
  1605  					pSummary.Children.Dead++
  1606  					modified = true
  1607  				case structs.JobStatusRunning:
  1608  					pSummary.Children.Running--
  1609  					pSummary.Children.Dead++
  1610  					modified = true
  1611  				case structs.JobStatusDead:
  1612  				default:
  1613  					return fmt.Errorf("unknown old job status %q", job.Status)
  1614  				}
  1615  
  1616  				if modified {
  1617  					// Update the modify index
  1618  					pSummary.ModifyIndex = index
  1619  
  1620  					// Insert the summary
  1621  					if err := txn.Insert("job_summary", pSummary); err != nil {
  1622  						return fmt.Errorf("job summary insert failed: %v", err)
  1623  					}
  1624  					if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  1625  						return fmt.Errorf("index update failed: %v", err)
  1626  					}
  1627  				}
  1628  			}
  1629  		}
  1630  	}
  1631  
  1632  	// Delete the job
  1633  	if err := txn.Delete("jobs", existing); err != nil {
  1634  		return fmt.Errorf("job delete failed: %v", err)
  1635  	}
  1636  	if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil {
  1637  		return fmt.Errorf("index update failed: %v", err)
  1638  	}
  1639  
  1640  	// Delete the job versions
  1641  	if err := s.deleteJobVersions(index, job, txn); err != nil {
  1642  		return err
  1643  	}
  1644  
  1645  	// Cleanup plugins registered by this job, before we delete the summary
  1646  	err = s.deleteJobFromPlugins(index, txn, job)
  1647  	if err != nil {
  1648  		return fmt.Errorf("deleting job from plugin: %v", err)
  1649  	}
  1650  
  1651  	// Delete the job summary
  1652  	if _, err = txn.DeleteAll("job_summary", "id", namespace, jobID); err != nil {
  1653  		return fmt.Errorf("deleting job summary failed: %v", err)
  1654  	}
  1655  	if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  1656  		return fmt.Errorf("index update failed: %v", err)
  1657  	}
  1658  
  1659  	// Delete any remaining job scaling policies
  1660  	if err := s.deleteJobScalingPolicies(index, job, txn); err != nil {
  1661  		return fmt.Errorf("deleting job scaling policies failed: %v", err)
  1662  	}
  1663  
  1664  	// Delete any job recommendations
  1665  	if err := s.deleteRecommendationsByJob(index, txn, job); err != nil {
  1666  		return fmt.Errorf("deleting job recommendatons failed: %v", err)
  1667  	}
  1668  
  1669  	// Delete the scaling events
  1670  	if _, err = txn.DeleteAll("scaling_event", "id", namespace, jobID); err != nil {
  1671  		return fmt.Errorf("deleting job scaling events failed: %v", err)
  1672  	}
  1673  	if err := txn.Insert("index", &IndexEntry{"scaling_event", index}); err != nil {
  1674  		return fmt.Errorf("index update failed: %v", err)
  1675  	}
  1676  
  1677  	return nil
  1678  }
  1679  
  1680  // deleteJobScalingPolicies deletes any scaling policies associated with the job
  1681  func (s *StateStore) deleteJobScalingPolicies(index uint64, job *structs.Job, txn *txn) error {
  1682  	iter, err := s.ScalingPoliciesByJobTxn(nil, job.Namespace, job.ID, txn)
  1683  	if err != nil {
  1684  		return fmt.Errorf("getting job scaling policies for deletion failed: %v", err)
  1685  	}
  1686  
  1687  	// Put them into a slice so there are no safety concerns while actually
  1688  	// performing the deletes
  1689  	policies := []interface{}{}
  1690  	for {
  1691  		raw := iter.Next()
  1692  		if raw == nil {
  1693  			break
  1694  		}
  1695  		policies = append(policies, raw)
  1696  	}
  1697  
  1698  	// Do the deletes
  1699  	for _, p := range policies {
  1700  		if err := txn.Delete("scaling_policy", p); err != nil {
  1701  			return fmt.Errorf("deleting scaling policy failed: %v", err)
  1702  		}
  1703  	}
  1704  
  1705  	if len(policies) > 0 {
  1706  		if err := txn.Insert("index", &IndexEntry{"scaling_policy", index}); err != nil {
  1707  			return fmt.Errorf("index update failed: %v", err)
  1708  		}
  1709  	}
  1710  	return nil
  1711  }
  1712  
  1713  // deleteJobVersions deletes all versions of the given job.
  1714  func (s *StateStore) deleteJobVersions(index uint64, job *structs.Job, txn *txn) error {
  1715  	iter, err := txn.Get("job_version", "id_prefix", job.Namespace, job.ID)
  1716  	if err != nil {
  1717  		return err
  1718  	}
  1719  
  1720  	// Put them into a slice so there are no safety concerns while actually
  1721  	// performing the deletes
  1722  	jobs := []*structs.Job{}
  1723  	for {
  1724  		raw := iter.Next()
  1725  		if raw == nil {
  1726  			break
  1727  		}
  1728  
  1729  		// Ensure the ID is an exact match
  1730  		j := raw.(*structs.Job)
  1731  		if j.ID != job.ID {
  1732  			continue
  1733  		}
  1734  
  1735  		jobs = append(jobs, j)
  1736  	}
  1737  
  1738  	// Do the deletes
  1739  	for _, j := range jobs {
  1740  		if err := txn.Delete("job_version", j); err != nil {
  1741  			return fmt.Errorf("deleting job versions failed: %v", err)
  1742  		}
  1743  	}
  1744  
  1745  	if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil {
  1746  		return fmt.Errorf("index update failed: %v", err)
  1747  	}
  1748  
  1749  	return nil
  1750  }
  1751  
  1752  // upsertJobVersion inserts a job into its historic version table and limits the
  1753  // number of job versions that are tracked.
  1754  func (s *StateStore) upsertJobVersion(index uint64, job *structs.Job, txn *txn) error {
  1755  	// Insert the job
  1756  	if err := txn.Insert("job_version", job); err != nil {
  1757  		return fmt.Errorf("failed to insert job into job_version table: %v", err)
  1758  	}
  1759  
  1760  	if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil {
  1761  		return fmt.Errorf("index update failed: %v", err)
  1762  	}
  1763  
  1764  	// Get all the historic jobs for this ID
  1765  	all, err := s.jobVersionByID(txn, nil, job.Namespace, job.ID)
  1766  	if err != nil {
  1767  		return fmt.Errorf("failed to look up job versions for %q: %v", job.ID, err)
  1768  	}
  1769  
  1770  	// If we are below the limit there is no GCing to be done
  1771  	if len(all) <= structs.JobTrackedVersions {
  1772  		return nil
  1773  	}
  1774  
  1775  	// We have to delete a historic job to make room.
  1776  	// Find index of the highest versioned stable job
  1777  	stableIdx := -1
  1778  	for i, j := range all {
  1779  		if j.Stable {
  1780  			stableIdx = i
  1781  			break
  1782  		}
  1783  	}
  1784  
  1785  	// If the stable job is the oldest version, do a swap to bring it into the
  1786  	// keep set.
  1787  	max := structs.JobTrackedVersions
  1788  	if stableIdx == max {
  1789  		all[max-1], all[max] = all[max], all[max-1]
  1790  	}
  1791  
  1792  	// Delete the job outside of the set that are being kept.
  1793  	d := all[max]
  1794  	if err := txn.Delete("job_version", d); err != nil {
  1795  		return fmt.Errorf("failed to delete job %v (%d) from job_version", d.ID, d.Version)
  1796  	}
  1797  
  1798  	return nil
  1799  }
  1800  
  1801  // JobByID is used to lookup a job by its ID. JobByID returns the current/latest job
  1802  // version.
  1803  func (s *StateStore) JobByID(ws memdb.WatchSet, namespace, id string) (*structs.Job, error) {
  1804  	txn := s.db.ReadTxn()
  1805  	return s.JobByIDTxn(ws, namespace, id, txn)
  1806  }
  1807  
  1808  // JobByIDTxn is used to lookup a job by its ID, like  JobByID. JobByID returns the job version
  1809  // accessible through in the transaction
  1810  func (s *StateStore) JobByIDTxn(ws memdb.WatchSet, namespace, id string, txn Txn) (*structs.Job, error) {
  1811  	watchCh, existing, err := txn.FirstWatch("jobs", "id", namespace, id)
  1812  	if err != nil {
  1813  		return nil, fmt.Errorf("job lookup failed: %v", err)
  1814  	}
  1815  	ws.Add(watchCh)
  1816  
  1817  	if existing != nil {
  1818  		return existing.(*structs.Job), nil
  1819  	}
  1820  	return nil, nil
  1821  }
  1822  
  1823  // JobsByIDPrefix is used to lookup a job by prefix
  1824  func (s *StateStore) JobsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) {
  1825  	txn := s.db.ReadTxn()
  1826  
  1827  	iter, err := txn.Get("jobs", "id_prefix", namespace, id)
  1828  	if err != nil {
  1829  		return nil, fmt.Errorf("job lookup failed: %v", err)
  1830  	}
  1831  
  1832  	ws.Add(iter.WatchCh())
  1833  
  1834  	return iter, nil
  1835  }
  1836  
  1837  // JobVersionsByID returns all the tracked versions of a job.
  1838  func (s *StateStore) JobVersionsByID(ws memdb.WatchSet, namespace, id string) ([]*structs.Job, error) {
  1839  	txn := s.db.ReadTxn()
  1840  
  1841  	return s.jobVersionByID(txn, ws, namespace, id)
  1842  }
  1843  
  1844  // jobVersionByID is the underlying implementation for retrieving all tracked
  1845  // versions of a job and is called under an existing transaction. A watch set
  1846  // can optionally be passed in to add the job histories to the watch set.
  1847  func (s *StateStore) jobVersionByID(txn *txn, ws memdb.WatchSet, namespace, id string) ([]*structs.Job, error) {
  1848  	// Get all the historic jobs for this ID
  1849  	iter, err := txn.Get("job_version", "id_prefix", namespace, id)
  1850  	if err != nil {
  1851  		return nil, err
  1852  	}
  1853  
  1854  	ws.Add(iter.WatchCh())
  1855  
  1856  	var all []*structs.Job
  1857  	for {
  1858  		raw := iter.Next()
  1859  		if raw == nil {
  1860  			break
  1861  		}
  1862  
  1863  		// Ensure the ID is an exact match
  1864  		j := raw.(*structs.Job)
  1865  		if j.ID != id {
  1866  			continue
  1867  		}
  1868  
  1869  		all = append(all, j)
  1870  	}
  1871  
  1872  	// Sort in reverse order so that the highest version is first
  1873  	sort.Slice(all, func(i, j int) bool {
  1874  		return all[i].Version > all[j].Version
  1875  	})
  1876  
  1877  	return all, nil
  1878  }
  1879  
  1880  // JobByIDAndVersion returns the job identified by its ID and Version. The
  1881  // passed watchset may be nil.
  1882  func (s *StateStore) JobByIDAndVersion(ws memdb.WatchSet, namespace, id string, version uint64) (*structs.Job, error) {
  1883  	txn := s.db.ReadTxn()
  1884  	return s.jobByIDAndVersionImpl(ws, namespace, id, version, txn)
  1885  }
  1886  
  1887  // jobByIDAndVersionImpl returns the job identified by its ID and Version. The
  1888  // passed watchset may be nil.
  1889  func (s *StateStore) jobByIDAndVersionImpl(ws memdb.WatchSet, namespace, id string,
  1890  	version uint64, txn *txn) (*structs.Job, error) {
  1891  
  1892  	watchCh, existing, err := txn.FirstWatch("job_version", "id", namespace, id, version)
  1893  	if err != nil {
  1894  		return nil, err
  1895  	}
  1896  
  1897  	ws.Add(watchCh)
  1898  
  1899  	if existing != nil {
  1900  		job := existing.(*structs.Job)
  1901  		return job, nil
  1902  	}
  1903  
  1904  	return nil, nil
  1905  }
  1906  
  1907  func (s *StateStore) JobVersions(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  1908  	txn := s.db.ReadTxn()
  1909  
  1910  	// Walk the entire deployments table
  1911  	iter, err := txn.Get("job_version", "id")
  1912  	if err != nil {
  1913  		return nil, err
  1914  	}
  1915  
  1916  	ws.Add(iter.WatchCh())
  1917  	return iter, nil
  1918  }
  1919  
  1920  // Jobs returns an iterator over all the jobs
  1921  func (s *StateStore) Jobs(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  1922  	txn := s.db.ReadTxn()
  1923  
  1924  	// Walk the entire jobs table
  1925  	iter, err := txn.Get("jobs", "id")
  1926  	if err != nil {
  1927  		return nil, err
  1928  	}
  1929  
  1930  	ws.Add(iter.WatchCh())
  1931  
  1932  	return iter, nil
  1933  }
  1934  
  1935  // JobsByNamespace returns an iterator over all the jobs for the given namespace
  1936  func (s *StateStore) JobsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) {
  1937  	txn := s.db.ReadTxn()
  1938  	return s.jobsByNamespaceImpl(ws, namespace, txn)
  1939  }
  1940  
  1941  // jobsByNamespaceImpl returns an iterator over all the jobs for the given namespace
  1942  func (s *StateStore) jobsByNamespaceImpl(ws memdb.WatchSet, namespace string, txn *txn) (memdb.ResultIterator, error) {
  1943  	// Walk the entire jobs table
  1944  	iter, err := txn.Get("jobs", "id_prefix", namespace, "")
  1945  	if err != nil {
  1946  		return nil, err
  1947  	}
  1948  
  1949  	ws.Add(iter.WatchCh())
  1950  
  1951  	return iter, nil
  1952  }
  1953  
  1954  // JobsByPeriodic returns an iterator over all the periodic or non-periodic jobs.
  1955  func (s *StateStore) JobsByPeriodic(ws memdb.WatchSet, periodic bool) (memdb.ResultIterator, error) {
  1956  	txn := s.db.ReadTxn()
  1957  
  1958  	iter, err := txn.Get("jobs", "periodic", periodic)
  1959  	if err != nil {
  1960  		return nil, err
  1961  	}
  1962  
  1963  	ws.Add(iter.WatchCh())
  1964  
  1965  	return iter, nil
  1966  }
  1967  
  1968  // JobsByScheduler returns an iterator over all the jobs with the specific
  1969  // scheduler type.
  1970  func (s *StateStore) JobsByScheduler(ws memdb.WatchSet, schedulerType string) (memdb.ResultIterator, error) {
  1971  	txn := s.db.ReadTxn()
  1972  
  1973  	// Return an iterator for jobs with the specific type.
  1974  	iter, err := txn.Get("jobs", "type", schedulerType)
  1975  	if err != nil {
  1976  		return nil, err
  1977  	}
  1978  
  1979  	ws.Add(iter.WatchCh())
  1980  
  1981  	return iter, nil
  1982  }
  1983  
  1984  // JobsByGC returns an iterator over all jobs eligible or uneligible for garbage
  1985  // collection.
  1986  func (s *StateStore) JobsByGC(ws memdb.WatchSet, gc bool) (memdb.ResultIterator, error) {
  1987  	txn := s.db.ReadTxn()
  1988  
  1989  	iter, err := txn.Get("jobs", "gc", gc)
  1990  	if err != nil {
  1991  		return nil, err
  1992  	}
  1993  
  1994  	ws.Add(iter.WatchCh())
  1995  
  1996  	return iter, nil
  1997  }
  1998  
  1999  // JobSummary returns a job summary object which matches a specific id.
  2000  func (s *StateStore) JobSummaryByID(ws memdb.WatchSet, namespace, jobID string) (*structs.JobSummary, error) {
  2001  	txn := s.db.ReadTxn()
  2002  
  2003  	watchCh, existing, err := txn.FirstWatch("job_summary", "id", namespace, jobID)
  2004  	if err != nil {
  2005  		return nil, err
  2006  	}
  2007  
  2008  	ws.Add(watchCh)
  2009  
  2010  	if existing != nil {
  2011  		summary := existing.(*structs.JobSummary)
  2012  		return summary, nil
  2013  	}
  2014  
  2015  	return nil, nil
  2016  }
  2017  
  2018  // JobSummaries walks the entire job summary table and returns all the job
  2019  // summary objects
  2020  func (s *StateStore) JobSummaries(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  2021  	txn := s.db.ReadTxn()
  2022  
  2023  	iter, err := txn.Get("job_summary", "id")
  2024  	if err != nil {
  2025  		return nil, err
  2026  	}
  2027  
  2028  	ws.Add(iter.WatchCh())
  2029  
  2030  	return iter, nil
  2031  }
  2032  
  2033  // JobSummaryByPrefix is used to look up Job Summary by id prefix
  2034  func (s *StateStore) JobSummaryByPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) {
  2035  	txn := s.db.ReadTxn()
  2036  
  2037  	iter, err := txn.Get("job_summary", "id_prefix", namespace, id)
  2038  	if err != nil {
  2039  		return nil, fmt.Errorf("job_summary lookup failed: %v", err)
  2040  	}
  2041  
  2042  	ws.Add(iter.WatchCh())
  2043  
  2044  	return iter, nil
  2045  }
  2046  
  2047  // CSIVolumeRegister adds a volume to the server store, failing if it already exists
  2048  func (s *StateStore) CSIVolumeRegister(index uint64, volumes []*structs.CSIVolume) error {
  2049  	txn := s.db.WriteTxn(index)
  2050  	defer txn.Abort()
  2051  
  2052  	for _, v := range volumes {
  2053  		if exists, err := s.namespaceExists(txn, v.Namespace); err != nil {
  2054  			return err
  2055  		} else if !exists {
  2056  			return fmt.Errorf("volume %s is in nonexistent namespace %s", v.ID, v.Namespace)
  2057  		}
  2058  
  2059  		// Check for volume existence
  2060  		obj, err := txn.First("csi_volumes", "id", v.Namespace, v.ID)
  2061  		if err != nil {
  2062  			return fmt.Errorf("volume existence check error: %v", err)
  2063  		}
  2064  		if obj != nil {
  2065  			// Allow some properties of a volume to be updated in place, but
  2066  			// prevent accidentally overwriting important properties, or
  2067  			// overwriting a volume in use
  2068  			old, ok := obj.(*structs.CSIVolume)
  2069  			if ok &&
  2070  				old.InUse() ||
  2071  				old.ExternalID != v.ExternalID ||
  2072  				old.PluginID != v.PluginID ||
  2073  				old.Provider != v.Provider {
  2074  				return fmt.Errorf("volume exists: %s", v.ID)
  2075  			}
  2076  		}
  2077  
  2078  		if v.CreateIndex == 0 {
  2079  			v.CreateIndex = index
  2080  			v.ModifyIndex = index
  2081  		}
  2082  
  2083  		// Allocations are copy on write, so we want to keep the Allocation ID
  2084  		// but we need to clear the pointer so that we don't store it when we
  2085  		// write the volume to the state store. We'll get it from the db in
  2086  		// denormalize.
  2087  		for allocID := range v.ReadAllocs {
  2088  			v.ReadAllocs[allocID] = nil
  2089  		}
  2090  		for allocID := range v.WriteAllocs {
  2091  			v.WriteAllocs[allocID] = nil
  2092  		}
  2093  
  2094  		err = txn.Insert("csi_volumes", v)
  2095  		if err != nil {
  2096  			return fmt.Errorf("volume insert: %v", err)
  2097  		}
  2098  	}
  2099  
  2100  	if err := txn.Insert("index", &IndexEntry{"csi_volumes", index}); err != nil {
  2101  		return fmt.Errorf("index update failed: %v", err)
  2102  	}
  2103  
  2104  	return txn.Commit()
  2105  }
  2106  
  2107  // CSIVolumes returns the unfiltered list of all volumes. Caller should
  2108  // snapshot if it wants to also denormalize the plugins.
  2109  func (s *StateStore) CSIVolumes(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  2110  	txn := s.db.ReadTxn()
  2111  	defer txn.Abort()
  2112  
  2113  	iter, err := txn.Get("csi_volumes", "id")
  2114  	if err != nil {
  2115  		return nil, fmt.Errorf("csi_volumes lookup failed: %v", err)
  2116  	}
  2117  
  2118  	ws.Add(iter.WatchCh())
  2119  
  2120  	return iter, nil
  2121  }
  2122  
  2123  // CSIVolumeByID is used to lookup a single volume. Returns a copy of the
  2124  // volume because its plugins and allocations are denormalized to provide
  2125  // accurate Health.
  2126  func (s *StateStore) CSIVolumeByID(ws memdb.WatchSet, namespace, id string) (*structs.CSIVolume, error) {
  2127  	txn := s.db.ReadTxn()
  2128  
  2129  	watchCh, obj, err := txn.FirstWatch("csi_volumes", "id_prefix", namespace, id)
  2130  	if err != nil {
  2131  		return nil, fmt.Errorf("volume lookup failed: %s %v", id, err)
  2132  	}
  2133  
  2134  	ws.Add(watchCh)
  2135  
  2136  	if obj == nil {
  2137  		return nil, nil
  2138  	}
  2139  
  2140  	// we return the volume with the plugins denormalized by default,
  2141  	// because the scheduler needs them for feasibility checking
  2142  	vol := obj.(*structs.CSIVolume)
  2143  	return s.CSIVolumeDenormalizePluginsTxn(txn, vol.Copy())
  2144  }
  2145  
  2146  // CSIVolumes looks up csi_volumes by pluginID. Caller should snapshot if it
  2147  // wants to also denormalize the plugins.
  2148  func (s *StateStore) CSIVolumesByPluginID(ws memdb.WatchSet, namespace, pluginID string) (memdb.ResultIterator, error) {
  2149  	txn := s.db.ReadTxn()
  2150  
  2151  	iter, err := txn.Get("csi_volumes", "plugin_id", pluginID)
  2152  	if err != nil {
  2153  		return nil, fmt.Errorf("volume lookup failed: %v", err)
  2154  	}
  2155  
  2156  	// Filter the iterator by namespace
  2157  	f := func(raw interface{}) bool {
  2158  		v, ok := raw.(*structs.CSIVolume)
  2159  		if !ok {
  2160  			return false
  2161  		}
  2162  		return v.Namespace != namespace
  2163  	}
  2164  
  2165  	wrap := memdb.NewFilterIterator(iter, f)
  2166  	return wrap, nil
  2167  }
  2168  
  2169  // CSIVolumesByIDPrefix supports search. Caller should snapshot if it wants to
  2170  // also denormalize the plugins.
  2171  func (s *StateStore) CSIVolumesByIDPrefix(ws memdb.WatchSet, namespace, volumeID string) (memdb.ResultIterator, error) {
  2172  	txn := s.db.ReadTxn()
  2173  
  2174  	iter, err := txn.Get("csi_volumes", "id_prefix", namespace, volumeID)
  2175  	if err != nil {
  2176  		return nil, err
  2177  	}
  2178  
  2179  	ws.Add(iter.WatchCh())
  2180  
  2181  	return iter, nil
  2182  }
  2183  
  2184  // CSIVolumesByNodeID looks up CSIVolumes in use on a node. Caller should
  2185  // snapshot if it wants to also denormalize the plugins.
  2186  func (s *StateStore) CSIVolumesByNodeID(ws memdb.WatchSet, nodeID string) (memdb.ResultIterator, error) {
  2187  	allocs, err := s.AllocsByNode(ws, nodeID)
  2188  	if err != nil {
  2189  		return nil, fmt.Errorf("alloc lookup failed: %v", err)
  2190  	}
  2191  
  2192  	// Find volume ids for CSI volumes in running allocs, or allocs that we desire to run
  2193  	ids := map[string]string{} // Map volumeID to Namespace
  2194  	for _, a := range allocs {
  2195  		tg := a.Job.LookupTaskGroup(a.TaskGroup)
  2196  
  2197  		if !(a.DesiredStatus == structs.AllocDesiredStatusRun ||
  2198  			a.ClientStatus == structs.AllocClientStatusRunning) ||
  2199  			len(tg.Volumes) == 0 {
  2200  			continue
  2201  		}
  2202  
  2203  		for _, v := range tg.Volumes {
  2204  			if v.Type != structs.VolumeTypeCSI {
  2205  				continue
  2206  			}
  2207  			ids[v.Source] = a.Namespace
  2208  		}
  2209  	}
  2210  
  2211  	// Lookup the raw CSIVolumes to match the other list interfaces
  2212  	iter := NewSliceIterator()
  2213  	txn := s.db.ReadTxn()
  2214  	for id, namespace := range ids {
  2215  		raw, err := txn.First("csi_volumes", "id", namespace, id)
  2216  		if err != nil {
  2217  			return nil, fmt.Errorf("volume lookup failed: %s %v", id, err)
  2218  		}
  2219  		iter.Add(raw)
  2220  	}
  2221  
  2222  	ws.Add(iter.WatchCh())
  2223  
  2224  	return iter, nil
  2225  }
  2226  
  2227  // CSIVolumesByNamespace looks up the entire csi_volumes table
  2228  func (s *StateStore) CSIVolumesByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) {
  2229  	txn := s.db.ReadTxn()
  2230  
  2231  	iter, err := txn.Get("csi_volumes", "id_prefix", namespace, "")
  2232  	if err != nil {
  2233  		return nil, fmt.Errorf("volume lookup failed: %v", err)
  2234  	}
  2235  
  2236  	ws.Add(iter.WatchCh())
  2237  
  2238  	return iter, nil
  2239  }
  2240  
  2241  // CSIVolumeClaim updates the volume's claim count and allocation list
  2242  func (s *StateStore) CSIVolumeClaim(index uint64, namespace, id string, claim *structs.CSIVolumeClaim) error {
  2243  	txn := s.db.WriteTxn(index)
  2244  	defer txn.Abort()
  2245  
  2246  	row, err := txn.First("csi_volumes", "id", namespace, id)
  2247  	if err != nil {
  2248  		return fmt.Errorf("volume lookup failed: %s: %v", id, err)
  2249  	}
  2250  	if row == nil {
  2251  		return fmt.Errorf("volume not found: %s", id)
  2252  	}
  2253  
  2254  	orig, ok := row.(*structs.CSIVolume)
  2255  	if !ok {
  2256  		return fmt.Errorf("volume row conversion error")
  2257  	}
  2258  
  2259  	var alloc *structs.Allocation
  2260  	if claim.State == structs.CSIVolumeClaimStateTaken {
  2261  		alloc, err = s.allocByIDImpl(txn, nil, claim.AllocationID)
  2262  		if err != nil {
  2263  			s.logger.Error("AllocByID failed", "error", err)
  2264  			return fmt.Errorf(structs.ErrUnknownAllocationPrefix)
  2265  		}
  2266  		if alloc == nil {
  2267  			s.logger.Error("AllocByID failed to find alloc", "alloc_id", claim.AllocationID)
  2268  			if err != nil {
  2269  				return fmt.Errorf(structs.ErrUnknownAllocationPrefix)
  2270  			}
  2271  		}
  2272  	}
  2273  
  2274  	volume, err := s.CSIVolumeDenormalizePluginsTxn(txn, orig.Copy())
  2275  	if err != nil {
  2276  		return err
  2277  	}
  2278  	volume, err = s.CSIVolumeDenormalizeTxn(txn, nil, volume)
  2279  	if err != nil {
  2280  		return err
  2281  	}
  2282  
  2283  	// in the case of a job deregistration, there will be no allocation ID
  2284  	// for the claim but we still want to write an updated index to the volume
  2285  	// so that volume reaping is triggered
  2286  	if claim.AllocationID != "" {
  2287  		err = volume.Claim(claim, alloc)
  2288  		if err != nil {
  2289  			return err
  2290  		}
  2291  	}
  2292  
  2293  	volume.ModifyIndex = index
  2294  
  2295  	// Allocations are copy on write, so we want to keep the Allocation ID
  2296  	// but we need to clear the pointer so that we don't store it when we
  2297  	// write the volume to the state store. We'll get it from the db in
  2298  	// denormalize.
  2299  	for allocID := range volume.ReadAllocs {
  2300  		volume.ReadAllocs[allocID] = nil
  2301  	}
  2302  	for allocID := range volume.WriteAllocs {
  2303  		volume.WriteAllocs[allocID] = nil
  2304  	}
  2305  
  2306  	if err = txn.Insert("csi_volumes", volume); err != nil {
  2307  		return fmt.Errorf("volume update failed: %s: %v", id, err)
  2308  	}
  2309  
  2310  	if err = txn.Insert("index", &IndexEntry{"csi_volumes", index}); err != nil {
  2311  		return fmt.Errorf("index update failed: %v", err)
  2312  	}
  2313  
  2314  	return txn.Commit()
  2315  }
  2316  
  2317  // CSIVolumeDeregister removes the volume from the server
  2318  func (s *StateStore) CSIVolumeDeregister(index uint64, namespace string, ids []string, force bool) error {
  2319  	txn := s.db.WriteTxn(index)
  2320  	defer txn.Abort()
  2321  
  2322  	for _, id := range ids {
  2323  		existing, err := txn.First("csi_volumes", "id_prefix", namespace, id)
  2324  		if err != nil {
  2325  			return fmt.Errorf("volume lookup failed: %s: %v", id, err)
  2326  		}
  2327  
  2328  		if existing == nil {
  2329  			return fmt.Errorf("volume not found: %s", id)
  2330  		}
  2331  
  2332  		vol, ok := existing.(*structs.CSIVolume)
  2333  		if !ok {
  2334  			return fmt.Errorf("volume row conversion error: %s", id)
  2335  		}
  2336  
  2337  		// The common case for a volume deregister is when the volume is
  2338  		// unused, but we can also let an operator intervene in the case where
  2339  		// allocations have been stopped but claims can't be freed because
  2340  		// ex. the plugins have all been removed.
  2341  		if vol.InUse() {
  2342  			if !force || !s.volSafeToForce(txn, vol) {
  2343  				return fmt.Errorf("volume in use: %s", id)
  2344  			}
  2345  		}
  2346  
  2347  		if err = txn.Delete("csi_volumes", existing); err != nil {
  2348  			return fmt.Errorf("volume delete failed: %s: %v", id, err)
  2349  		}
  2350  	}
  2351  
  2352  	if err := txn.Insert("index", &IndexEntry{"csi_volumes", index}); err != nil {
  2353  		return fmt.Errorf("index update failed: %v", err)
  2354  	}
  2355  
  2356  	return txn.Commit()
  2357  }
  2358  
  2359  // volSafeToForce checks if the any of the remaining allocations
  2360  // are in a non-terminal state.
  2361  func (s *StateStore) volSafeToForce(txn Txn, v *structs.CSIVolume) bool {
  2362  	vol, err := s.CSIVolumeDenormalizeTxn(txn, nil, v)
  2363  	if err != nil {
  2364  		return false
  2365  	}
  2366  
  2367  	for _, alloc := range vol.ReadAllocs {
  2368  		if alloc != nil && !alloc.TerminalStatus() {
  2369  			return false
  2370  		}
  2371  	}
  2372  	for _, alloc := range vol.WriteAllocs {
  2373  		if alloc != nil && !alloc.TerminalStatus() {
  2374  			return false
  2375  		}
  2376  	}
  2377  	return true
  2378  }
  2379  
  2380  // CSIVolumeDenormalizePlugins returns a CSIVolume with current health and
  2381  // plugins, but without allocations.
  2382  // Use this for current volume metadata, handling lists of volumes.
  2383  // Use CSIVolumeDenormalize for volumes containing both health and current
  2384  // allocations.
  2385  func (s *StateStore) CSIVolumeDenormalizePlugins(ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) {
  2386  	if vol == nil {
  2387  		return nil, nil
  2388  	}
  2389  	txn := s.db.ReadTxn()
  2390  	defer txn.Abort()
  2391  	return s.CSIVolumeDenormalizePluginsTxn(txn, vol)
  2392  }
  2393  
  2394  // CSIVolumeDenormalizePluginsTxn returns a CSIVolume with current health and
  2395  // plugins, but without allocations.
  2396  // Use this for current volume metadata, handling lists of volumes.
  2397  // Use CSIVolumeDenormalize for volumes containing both health and current
  2398  // allocations.
  2399  func (s *StateStore) CSIVolumeDenormalizePluginsTxn(txn Txn, vol *structs.CSIVolume) (*structs.CSIVolume, error) {
  2400  	if vol == nil {
  2401  		return nil, nil
  2402  	}
  2403  	plug, err := s.CSIPluginByIDTxn(txn, nil, vol.PluginID)
  2404  	if err != nil {
  2405  		return nil, fmt.Errorf("plugin lookup error: %s %v", vol.PluginID, err)
  2406  	}
  2407  	if plug == nil {
  2408  		vol.ControllersHealthy = 0
  2409  		vol.NodesHealthy = 0
  2410  		vol.Schedulable = false
  2411  		return vol, nil
  2412  	}
  2413  
  2414  	vol.Provider = plug.Provider
  2415  	vol.ProviderVersion = plug.Version
  2416  	vol.ControllerRequired = plug.ControllerRequired
  2417  	vol.ControllersHealthy = plug.ControllersHealthy
  2418  	vol.NodesHealthy = plug.NodesHealthy
  2419  
  2420  	// This value may be stale, but stale is ok
  2421  	vol.ControllersExpected = plug.ControllersExpected
  2422  	vol.NodesExpected = plug.NodesExpected
  2423  
  2424  	vol.Schedulable = vol.NodesHealthy > 0
  2425  	if vol.ControllerRequired {
  2426  		vol.Schedulable = vol.ControllersHealthy > 0 && vol.Schedulable
  2427  	}
  2428  
  2429  	return vol, nil
  2430  }
  2431  
  2432  // CSIVolumeDenormalize returns a CSIVolume with allocations
  2433  func (s *StateStore) CSIVolumeDenormalize(ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) {
  2434  	txn := s.db.ReadTxn()
  2435  	return s.CSIVolumeDenormalizeTxn(txn, ws, vol)
  2436  }
  2437  
  2438  // CSIVolumeDenormalizeTxn populates a CSIVolume with allocations
  2439  func (s *StateStore) CSIVolumeDenormalizeTxn(txn Txn, ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) {
  2440  	if vol == nil {
  2441  		return nil, nil
  2442  	}
  2443  	for id := range vol.ReadAllocs {
  2444  		a, err := s.allocByIDImpl(txn, ws, id)
  2445  		if err != nil {
  2446  			return nil, err
  2447  		}
  2448  		if a != nil {
  2449  			vol.ReadAllocs[id] = a
  2450  			// COMPAT(1.0): the CSIVolumeClaim fields were added
  2451  			// after 0.11.1, so claims made before that may be
  2452  			// missing this value. (same for WriteAlloc below)
  2453  			if _, ok := vol.ReadClaims[id]; !ok {
  2454  				vol.ReadClaims[id] = &structs.CSIVolumeClaim{
  2455  					AllocationID: a.ID,
  2456  					NodeID:       a.NodeID,
  2457  					Mode:         structs.CSIVolumeClaimRead,
  2458  					State:        structs.CSIVolumeClaimStateTaken,
  2459  				}
  2460  			}
  2461  		}
  2462  	}
  2463  
  2464  	for id := range vol.WriteAllocs {
  2465  		a, err := s.allocByIDImpl(txn, ws, id)
  2466  		if err != nil {
  2467  			return nil, err
  2468  		}
  2469  		if a != nil {
  2470  			vol.WriteAllocs[id] = a
  2471  			if _, ok := vol.WriteClaims[id]; !ok {
  2472  				vol.WriteClaims[id] = &structs.CSIVolumeClaim{
  2473  					AllocationID: a.ID,
  2474  					NodeID:       a.NodeID,
  2475  					Mode:         structs.CSIVolumeClaimWrite,
  2476  					State:        structs.CSIVolumeClaimStateTaken,
  2477  				}
  2478  			}
  2479  		}
  2480  	}
  2481  
  2482  	return vol, nil
  2483  }
  2484  
  2485  // CSIPlugins returns the unfiltered list of all plugin health status
  2486  func (s *StateStore) CSIPlugins(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  2487  	txn := s.db.ReadTxn()
  2488  	defer txn.Abort()
  2489  
  2490  	iter, err := txn.Get("csi_plugins", "id")
  2491  	if err != nil {
  2492  		return nil, fmt.Errorf("csi_plugins lookup failed: %v", err)
  2493  	}
  2494  
  2495  	ws.Add(iter.WatchCh())
  2496  
  2497  	return iter, nil
  2498  }
  2499  
  2500  // CSIPluginsByIDPrefix supports search
  2501  func (s *StateStore) CSIPluginsByIDPrefix(ws memdb.WatchSet, pluginID string) (memdb.ResultIterator, error) {
  2502  	txn := s.db.ReadTxn()
  2503  
  2504  	iter, err := txn.Get("csi_plugins", "id_prefix", pluginID)
  2505  	if err != nil {
  2506  		return nil, err
  2507  	}
  2508  
  2509  	ws.Add(iter.WatchCh())
  2510  
  2511  	return iter, nil
  2512  }
  2513  
  2514  // CSIPluginByID returns a named CSIPlugin. This method creates a new
  2515  // transaction so you should not call it from within another transaction.
  2516  func (s *StateStore) CSIPluginByID(ws memdb.WatchSet, id string) (*structs.CSIPlugin, error) {
  2517  	txn := s.db.ReadTxn()
  2518  	plugin, err := s.CSIPluginByIDTxn(txn, ws, id)
  2519  	if err != nil {
  2520  		return nil, err
  2521  	}
  2522  	return plugin, nil
  2523  }
  2524  
  2525  // CSIPluginByIDTxn returns a named CSIPlugin
  2526  func (s *StateStore) CSIPluginByIDTxn(txn Txn, ws memdb.WatchSet, id string) (*structs.CSIPlugin, error) {
  2527  
  2528  	watchCh, obj, err := txn.FirstWatch("csi_plugins", "id_prefix", id)
  2529  	if err != nil {
  2530  		return nil, fmt.Errorf("csi_plugin lookup failed: %s %v", id, err)
  2531  	}
  2532  
  2533  	ws.Add(watchCh)
  2534  
  2535  	if obj != nil {
  2536  		return obj.(*structs.CSIPlugin), nil
  2537  	}
  2538  	return nil, nil
  2539  }
  2540  
  2541  // CSIPluginDenormalize returns a CSIPlugin with allocation details. Always called on a copy of the plugin.
  2542  func (s *StateStore) CSIPluginDenormalize(ws memdb.WatchSet, plug *structs.CSIPlugin) (*structs.CSIPlugin, error) {
  2543  	txn := s.db.ReadTxn()
  2544  	return s.CSIPluginDenormalizeTxn(txn, ws, plug)
  2545  }
  2546  
  2547  func (s *StateStore) CSIPluginDenormalizeTxn(txn Txn, ws memdb.WatchSet, plug *structs.CSIPlugin) (*structs.CSIPlugin, error) {
  2548  	if plug == nil {
  2549  		return nil, nil
  2550  	}
  2551  
  2552  	// Get the unique list of allocation ids
  2553  	ids := map[string]struct{}{}
  2554  	for _, info := range plug.Controllers {
  2555  		ids[info.AllocID] = struct{}{}
  2556  	}
  2557  	for _, info := range plug.Nodes {
  2558  		ids[info.AllocID] = struct{}{}
  2559  	}
  2560  
  2561  	for id := range ids {
  2562  		alloc, err := s.allocByIDImpl(txn, ws, id)
  2563  		if err != nil {
  2564  			return nil, err
  2565  		}
  2566  		if alloc == nil {
  2567  			continue
  2568  		}
  2569  		plug.Allocations = append(plug.Allocations, alloc.Stub(nil))
  2570  	}
  2571  
  2572  	return plug, nil
  2573  }
  2574  
  2575  // UpsertCSIPlugin writes the plugin to the state store. Note: there
  2576  // is currently no raft message for this, as it's intended to support
  2577  // testing use cases.
  2578  func (s *StateStore) UpsertCSIPlugin(index uint64, plug *structs.CSIPlugin) error {
  2579  	txn := s.db.WriteTxn(index)
  2580  	defer txn.Abort()
  2581  
  2582  	existing, err := txn.First("csi_plugins", "id", plug.ID)
  2583  	if err != nil {
  2584  		return fmt.Errorf("csi_plugin lookup error: %s %v", plug.ID, err)
  2585  	}
  2586  
  2587  	plug.ModifyIndex = index
  2588  	if existing != nil {
  2589  		plug.CreateIndex = existing.(*structs.CSIPlugin).CreateIndex
  2590  	}
  2591  
  2592  	err = txn.Insert("csi_plugins", plug)
  2593  	if err != nil {
  2594  		return fmt.Errorf("csi_plugins insert error: %v", err)
  2595  	}
  2596  	if err := txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil {
  2597  		return fmt.Errorf("index update failed: %v", err)
  2598  	}
  2599  	return txn.Commit()
  2600  }
  2601  
  2602  // DeleteCSIPlugin deletes the plugin if it's not in use.
  2603  func (s *StateStore) DeleteCSIPlugin(index uint64, id string) error {
  2604  	txn := s.db.WriteTxn(index)
  2605  	defer txn.Abort()
  2606  
  2607  	plug, err := s.CSIPluginByIDTxn(txn, nil, id)
  2608  	if err != nil {
  2609  		return err
  2610  	}
  2611  
  2612  	if plug == nil {
  2613  		return nil
  2614  	}
  2615  
  2616  	plug, err = s.CSIPluginDenormalizeTxn(txn, nil, plug.Copy())
  2617  	if err != nil {
  2618  		return err
  2619  	}
  2620  	if !plug.IsEmpty() {
  2621  		return fmt.Errorf("plugin in use")
  2622  	}
  2623  
  2624  	err = txn.Delete("csi_plugins", plug)
  2625  	if err != nil {
  2626  		return fmt.Errorf("csi_plugins delete error: %v", err)
  2627  	}
  2628  	return txn.Commit()
  2629  }
  2630  
  2631  // UpsertPeriodicLaunch is used to register a launch or update it.
  2632  func (s *StateStore) UpsertPeriodicLaunch(index uint64, launch *structs.PeriodicLaunch) error {
  2633  	txn := s.db.WriteTxn(index)
  2634  	defer txn.Abort()
  2635  
  2636  	// Check if the job already exists
  2637  	existing, err := txn.First("periodic_launch", "id", launch.Namespace, launch.ID)
  2638  	if err != nil {
  2639  		return fmt.Errorf("periodic launch lookup failed: %v", err)
  2640  	}
  2641  
  2642  	// Setup the indexes correctly
  2643  	if existing != nil {
  2644  		launch.CreateIndex = existing.(*structs.PeriodicLaunch).CreateIndex
  2645  		launch.ModifyIndex = index
  2646  	} else {
  2647  		launch.CreateIndex = index
  2648  		launch.ModifyIndex = index
  2649  	}
  2650  
  2651  	// Insert the job
  2652  	if err := txn.Insert("periodic_launch", launch); err != nil {
  2653  		return fmt.Errorf("launch insert failed: %v", err)
  2654  	}
  2655  	if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil {
  2656  		return fmt.Errorf("index update failed: %v", err)
  2657  	}
  2658  
  2659  	return txn.Commit()
  2660  }
  2661  
  2662  // DeletePeriodicLaunch is used to delete the periodic launch
  2663  func (s *StateStore) DeletePeriodicLaunch(index uint64, namespace, jobID string) error {
  2664  	txn := s.db.WriteTxn(index)
  2665  	defer txn.Abort()
  2666  
  2667  	err := s.DeletePeriodicLaunchTxn(index, namespace, jobID, txn)
  2668  	if err == nil {
  2669  		return txn.Commit()
  2670  	}
  2671  	return err
  2672  }
  2673  
  2674  // DeletePeriodicLaunchTxn is used to delete the periodic launch, like DeletePeriodicLaunch
  2675  // but in a transaction.  Useful for when making multiple modifications atomically
  2676  func (s *StateStore) DeletePeriodicLaunchTxn(index uint64, namespace, jobID string, txn Txn) error {
  2677  	// Lookup the launch
  2678  	existing, err := txn.First("periodic_launch", "id", namespace, jobID)
  2679  	if err != nil {
  2680  		return fmt.Errorf("launch lookup failed: %v", err)
  2681  	}
  2682  	if existing == nil {
  2683  		return fmt.Errorf("launch not found")
  2684  	}
  2685  
  2686  	// Delete the launch
  2687  	if err := txn.Delete("periodic_launch", existing); err != nil {
  2688  		return fmt.Errorf("launch delete failed: %v", err)
  2689  	}
  2690  	if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil {
  2691  		return fmt.Errorf("index update failed: %v", err)
  2692  	}
  2693  
  2694  	return nil
  2695  }
  2696  
  2697  // PeriodicLaunchByID is used to lookup a periodic launch by the periodic job
  2698  // ID.
  2699  func (s *StateStore) PeriodicLaunchByID(ws memdb.WatchSet, namespace, id string) (*structs.PeriodicLaunch, error) {
  2700  	txn := s.db.ReadTxn()
  2701  
  2702  	watchCh, existing, err := txn.FirstWatch("periodic_launch", "id", namespace, id)
  2703  	if err != nil {
  2704  		return nil, fmt.Errorf("periodic launch lookup failed: %v", err)
  2705  	}
  2706  
  2707  	ws.Add(watchCh)
  2708  
  2709  	if existing != nil {
  2710  		return existing.(*structs.PeriodicLaunch), nil
  2711  	}
  2712  	return nil, nil
  2713  }
  2714  
  2715  // PeriodicLaunches returns an iterator over all the periodic launches
  2716  func (s *StateStore) PeriodicLaunches(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  2717  	txn := s.db.ReadTxn()
  2718  
  2719  	// Walk the entire table
  2720  	iter, err := txn.Get("periodic_launch", "id")
  2721  	if err != nil {
  2722  		return nil, err
  2723  	}
  2724  
  2725  	ws.Add(iter.WatchCh())
  2726  
  2727  	return iter, nil
  2728  }
  2729  
  2730  // UpsertEvals is used to upsert a set of evaluations
  2731  func (s *StateStore) UpsertEvals(msgType structs.MessageType, index uint64, evals []*structs.Evaluation) error {
  2732  	txn := s.db.WriteTxnMsgT(msgType, index)
  2733  	defer txn.Abort()
  2734  
  2735  	err := s.UpsertEvalsTxn(index, evals, txn)
  2736  	if err == nil {
  2737  		return txn.Commit()
  2738  	}
  2739  	return err
  2740  }
  2741  
  2742  // UpsertEvals is used to upsert a set of evaluations, like UpsertEvals
  2743  // but in a transaction.  Useful for when making multiple modifications atomically
  2744  func (s *StateStore) UpsertEvalsTxn(index uint64, evals []*structs.Evaluation, txn Txn) error {
  2745  	// Do a nested upsert
  2746  	jobs := make(map[structs.NamespacedID]string, len(evals))
  2747  	for _, eval := range evals {
  2748  		if err := s.nestedUpsertEval(txn, index, eval); err != nil {
  2749  			return err
  2750  		}
  2751  
  2752  		tuple := structs.NamespacedID{
  2753  			ID:        eval.JobID,
  2754  			Namespace: eval.Namespace,
  2755  		}
  2756  		jobs[tuple] = ""
  2757  	}
  2758  
  2759  	// Set the job's status
  2760  	if err := s.setJobStatuses(index, txn, jobs, false); err != nil {
  2761  		return fmt.Errorf("setting job status failed: %v", err)
  2762  	}
  2763  
  2764  	return nil
  2765  }
  2766  
  2767  // nestedUpsertEvaluation is used to nest an evaluation upsert within a transaction
  2768  func (s *StateStore) nestedUpsertEval(txn *txn, index uint64, eval *structs.Evaluation) error {
  2769  	// Lookup the evaluation
  2770  	existing, err := txn.First("evals", "id", eval.ID)
  2771  	if err != nil {
  2772  		return fmt.Errorf("eval lookup failed: %v", err)
  2773  	}
  2774  
  2775  	// Update the indexes
  2776  	if existing != nil {
  2777  		eval.CreateIndex = existing.(*structs.Evaluation).CreateIndex
  2778  		eval.ModifyIndex = index
  2779  	} else {
  2780  		eval.CreateIndex = index
  2781  		eval.ModifyIndex = index
  2782  	}
  2783  
  2784  	// Update the job summary
  2785  	summaryRaw, err := txn.First("job_summary", "id", eval.Namespace, eval.JobID)
  2786  	if err != nil {
  2787  		return fmt.Errorf("job summary lookup failed: %v", err)
  2788  	}
  2789  	if summaryRaw != nil {
  2790  		js := summaryRaw.(*structs.JobSummary).Copy()
  2791  		hasSummaryChanged := false
  2792  		for tg, num := range eval.QueuedAllocations {
  2793  			if summary, ok := js.Summary[tg]; ok {
  2794  				if summary.Queued != num {
  2795  					summary.Queued = num
  2796  					js.Summary[tg] = summary
  2797  					hasSummaryChanged = true
  2798  				}
  2799  			} else {
  2800  				s.logger.Error("unable to update queued for job and task group", "job_id", eval.JobID, "task_group", tg, "namespace", eval.Namespace)
  2801  			}
  2802  		}
  2803  
  2804  		// Insert the job summary
  2805  		if hasSummaryChanged {
  2806  			js.ModifyIndex = index
  2807  			if err := txn.Insert("job_summary", js); err != nil {
  2808  				return fmt.Errorf("job summary insert failed: %v", err)
  2809  			}
  2810  			if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  2811  				return fmt.Errorf("index update failed: %v", err)
  2812  			}
  2813  		}
  2814  	}
  2815  
  2816  	// Check if the job has any blocked evaluations and cancel them
  2817  	if eval.Status == structs.EvalStatusComplete && len(eval.FailedTGAllocs) == 0 {
  2818  		// Get the blocked evaluation for a job if it exists
  2819  		iter, err := txn.Get("evals", "job", eval.Namespace, eval.JobID, structs.EvalStatusBlocked)
  2820  		if err != nil {
  2821  			return fmt.Errorf("failed to get blocked evals for job %q in namespace %q: %v", eval.JobID, eval.Namespace, err)
  2822  		}
  2823  
  2824  		var blocked []*structs.Evaluation
  2825  		for {
  2826  			raw := iter.Next()
  2827  			if raw == nil {
  2828  				break
  2829  			}
  2830  			blocked = append(blocked, raw.(*structs.Evaluation))
  2831  		}
  2832  
  2833  		// Go through and update the evals
  2834  		for _, eval := range blocked {
  2835  			newEval := eval.Copy()
  2836  			newEval.Status = structs.EvalStatusCancelled
  2837  			newEval.StatusDescription = fmt.Sprintf("evaluation %q successful", newEval.ID)
  2838  			newEval.ModifyIndex = index
  2839  
  2840  			if err := txn.Insert("evals", newEval); err != nil {
  2841  				return fmt.Errorf("eval insert failed: %v", err)
  2842  			}
  2843  		}
  2844  	}
  2845  
  2846  	// Insert the eval
  2847  	if err := txn.Insert("evals", eval); err != nil {
  2848  		return fmt.Errorf("eval insert failed: %v", err)
  2849  	}
  2850  	if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil {
  2851  		return fmt.Errorf("index update failed: %v", err)
  2852  	}
  2853  	return nil
  2854  }
  2855  
  2856  // updateEvalModifyIndex is used to update the modify index of an evaluation that has been
  2857  // through a scheduler pass. This is done as part of plan apply. It ensures that when a subsequent
  2858  // scheduler workers process a re-queued evaluation it sees any partial updates from the plan apply.
  2859  func (s *StateStore) updateEvalModifyIndex(txn *txn, index uint64, evalID string) error {
  2860  	// Lookup the evaluation
  2861  	existing, err := txn.First("evals", "id", evalID)
  2862  	if err != nil {
  2863  		return fmt.Errorf("eval lookup failed: %v", err)
  2864  	}
  2865  	if existing == nil {
  2866  		s.logger.Error("unable to find eval", "eval_id", evalID)
  2867  		return fmt.Errorf("unable to find eval id %q", evalID)
  2868  	}
  2869  	eval := existing.(*structs.Evaluation).Copy()
  2870  	// Update the indexes
  2871  	eval.ModifyIndex = index
  2872  
  2873  	// Insert the eval
  2874  	if err := txn.Insert("evals", eval); err != nil {
  2875  		return fmt.Errorf("eval insert failed: %v", err)
  2876  	}
  2877  	if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil {
  2878  		return fmt.Errorf("index update failed: %v", err)
  2879  	}
  2880  	return nil
  2881  }
  2882  
  2883  // DeleteEval is used to delete an evaluation
  2884  func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error {
  2885  	txn := s.db.WriteTxn(index)
  2886  	defer txn.Abort()
  2887  
  2888  	jobs := make(map[structs.NamespacedID]string, len(evals))
  2889  	for _, eval := range evals {
  2890  		existing, err := txn.First("evals", "id", eval)
  2891  		if err != nil {
  2892  			return fmt.Errorf("eval lookup failed: %v", err)
  2893  		}
  2894  		if existing == nil {
  2895  			continue
  2896  		}
  2897  		if err := txn.Delete("evals", existing); err != nil {
  2898  			return fmt.Errorf("eval delete failed: %v", err)
  2899  		}
  2900  		eval := existing.(*structs.Evaluation)
  2901  
  2902  		tuple := structs.NamespacedID{
  2903  			ID:        eval.JobID,
  2904  			Namespace: eval.Namespace,
  2905  		}
  2906  		jobs[tuple] = ""
  2907  	}
  2908  
  2909  	for _, alloc := range allocs {
  2910  		raw, err := txn.First("allocs", "id", alloc)
  2911  		if err != nil {
  2912  			return fmt.Errorf("alloc lookup failed: %v", err)
  2913  		}
  2914  		if raw == nil {
  2915  			continue
  2916  		}
  2917  		if err := txn.Delete("allocs", raw); err != nil {
  2918  			return fmt.Errorf("alloc delete failed: %v", err)
  2919  		}
  2920  	}
  2921  
  2922  	// Update the indexes
  2923  	if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil {
  2924  		return fmt.Errorf("index update failed: %v", err)
  2925  	}
  2926  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  2927  		return fmt.Errorf("index update failed: %v", err)
  2928  	}
  2929  
  2930  	// Set the job's status
  2931  	if err := s.setJobStatuses(index, txn, jobs, true); err != nil {
  2932  		return fmt.Errorf("setting job status failed: %v", err)
  2933  	}
  2934  
  2935  	return txn.Commit()
  2936  }
  2937  
  2938  // EvalByID is used to lookup an eval by its ID
  2939  func (s *StateStore) EvalByID(ws memdb.WatchSet, id string) (*structs.Evaluation, error) {
  2940  	txn := s.db.ReadTxn()
  2941  
  2942  	watchCh, existing, err := txn.FirstWatch("evals", "id", id)
  2943  	if err != nil {
  2944  		return nil, fmt.Errorf("eval lookup failed: %v", err)
  2945  	}
  2946  
  2947  	ws.Add(watchCh)
  2948  
  2949  	if existing != nil {
  2950  		return existing.(*structs.Evaluation), nil
  2951  	}
  2952  	return nil, nil
  2953  }
  2954  
  2955  // EvalsByIDPrefix is used to lookup evaluations by prefix in a particular
  2956  // namespace
  2957  func (s *StateStore) EvalsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) {
  2958  	txn := s.db.ReadTxn()
  2959  
  2960  	// Get an iterator over all evals by the id prefix
  2961  	iter, err := txn.Get("evals", "id_prefix", id)
  2962  	if err != nil {
  2963  		return nil, fmt.Errorf("eval lookup failed: %v", err)
  2964  	}
  2965  
  2966  	ws.Add(iter.WatchCh())
  2967  
  2968  	// Wrap the iterator in a filter
  2969  	wrap := memdb.NewFilterIterator(iter, evalNamespaceFilter(namespace))
  2970  	return wrap, nil
  2971  }
  2972  
  2973  // evalNamespaceFilter returns a filter function that filters all evaluations
  2974  // not in the given namespace.
  2975  func evalNamespaceFilter(namespace string) func(interface{}) bool {
  2976  	return func(raw interface{}) bool {
  2977  		eval, ok := raw.(*structs.Evaluation)
  2978  		if !ok {
  2979  			return true
  2980  		}
  2981  
  2982  		return eval.Namespace != namespace
  2983  	}
  2984  }
  2985  
  2986  // EvalsByJob returns all the evaluations by job id
  2987  func (s *StateStore) EvalsByJob(ws memdb.WatchSet, namespace, jobID string) ([]*structs.Evaluation, error) {
  2988  	txn := s.db.ReadTxn()
  2989  
  2990  	// Get an iterator over the node allocations
  2991  	iter, err := txn.Get("evals", "job_prefix", namespace, jobID)
  2992  	if err != nil {
  2993  		return nil, err
  2994  	}
  2995  
  2996  	ws.Add(iter.WatchCh())
  2997  
  2998  	var out []*structs.Evaluation
  2999  	for {
  3000  		raw := iter.Next()
  3001  		if raw == nil {
  3002  			break
  3003  		}
  3004  
  3005  		e := raw.(*structs.Evaluation)
  3006  
  3007  		// Filter non-exact matches
  3008  		if e.JobID != jobID {
  3009  			continue
  3010  		}
  3011  
  3012  		out = append(out, e)
  3013  	}
  3014  	return out, nil
  3015  }
  3016  
  3017  // Evals returns an iterator over all the evaluations
  3018  func (s *StateStore) Evals(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  3019  	txn := s.db.ReadTxn()
  3020  
  3021  	// Walk the entire table
  3022  	iter, err := txn.Get("evals", "id")
  3023  	if err != nil {
  3024  		return nil, err
  3025  	}
  3026  
  3027  	ws.Add(iter.WatchCh())
  3028  
  3029  	return iter, nil
  3030  }
  3031  
  3032  // EvalsByNamespace returns an iterator over all the evaluations in the given
  3033  // namespace
  3034  func (s *StateStore) EvalsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) {
  3035  	txn := s.db.ReadTxn()
  3036  
  3037  	// Walk the entire table
  3038  	iter, err := txn.Get("evals", "namespace", namespace)
  3039  	if err != nil {
  3040  		return nil, err
  3041  	}
  3042  
  3043  	ws.Add(iter.WatchCh())
  3044  
  3045  	return iter, nil
  3046  }
  3047  
  3048  // UpdateAllocsFromClient is used to update an allocation based on input
  3049  // from a client. While the schedulers are the authority on the allocation for
  3050  // most things, some updates are authoritative from the client. Specifically,
  3051  // the desired state comes from the schedulers, while the actual state comes
  3052  // from clients.
  3053  func (s *StateStore) UpdateAllocsFromClient(msgType structs.MessageType, index uint64, allocs []*structs.Allocation) error {
  3054  	txn := s.db.WriteTxnMsgT(msgType, index)
  3055  	defer txn.Abort()
  3056  
  3057  	// Handle each of the updated allocations
  3058  	for _, alloc := range allocs {
  3059  		if err := s.nestedUpdateAllocFromClient(txn, index, alloc); err != nil {
  3060  			return err
  3061  		}
  3062  	}
  3063  
  3064  	// Update the indexes
  3065  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  3066  		return fmt.Errorf("index update failed: %v", err)
  3067  	}
  3068  
  3069  	return txn.Commit()
  3070  }
  3071  
  3072  // nestedUpdateAllocFromClient is used to nest an update of an allocation with client status
  3073  func (s *StateStore) nestedUpdateAllocFromClient(txn *txn, index uint64, alloc *structs.Allocation) error {
  3074  	// Look for existing alloc
  3075  	existing, err := txn.First("allocs", "id", alloc.ID)
  3076  	if err != nil {
  3077  		return fmt.Errorf("alloc lookup failed: %v", err)
  3078  	}
  3079  
  3080  	// Nothing to do if this does not exist
  3081  	if existing == nil {
  3082  		return nil
  3083  	}
  3084  	exist := existing.(*structs.Allocation)
  3085  
  3086  	// Copy everything from the existing allocation
  3087  	copyAlloc := exist.Copy()
  3088  
  3089  	// Pull in anything the client is the authority on
  3090  	copyAlloc.ClientStatus = alloc.ClientStatus
  3091  	copyAlloc.ClientDescription = alloc.ClientDescription
  3092  	copyAlloc.TaskStates = alloc.TaskStates
  3093  	copyAlloc.NetworkStatus = alloc.NetworkStatus
  3094  
  3095  	// The client can only set its deployment health and timestamp, so just take
  3096  	// those
  3097  	if copyAlloc.DeploymentStatus != nil && alloc.DeploymentStatus != nil {
  3098  		oldHasHealthy := copyAlloc.DeploymentStatus.HasHealth()
  3099  		newHasHealthy := alloc.DeploymentStatus.HasHealth()
  3100  
  3101  		// We got new health information from the client
  3102  		if newHasHealthy && (!oldHasHealthy || *copyAlloc.DeploymentStatus.Healthy != *alloc.DeploymentStatus.Healthy) {
  3103  			// Updated deployment health and timestamp
  3104  			copyAlloc.DeploymentStatus.Healthy = helper.BoolToPtr(*alloc.DeploymentStatus.Healthy)
  3105  			copyAlloc.DeploymentStatus.Timestamp = alloc.DeploymentStatus.Timestamp
  3106  			copyAlloc.DeploymentStatus.ModifyIndex = index
  3107  		}
  3108  	} else if alloc.DeploymentStatus != nil {
  3109  		// First time getting a deployment status so copy everything and just
  3110  		// set the index
  3111  		copyAlloc.DeploymentStatus = alloc.DeploymentStatus.Copy()
  3112  		copyAlloc.DeploymentStatus.ModifyIndex = index
  3113  	}
  3114  
  3115  	// Update the modify index
  3116  	copyAlloc.ModifyIndex = index
  3117  
  3118  	// Update the modify time
  3119  	copyAlloc.ModifyTime = alloc.ModifyTime
  3120  
  3121  	if err := s.updateDeploymentWithAlloc(index, copyAlloc, exist, txn); err != nil {
  3122  		return fmt.Errorf("error updating deployment: %v", err)
  3123  	}
  3124  
  3125  	if err := s.updateSummaryWithAlloc(index, copyAlloc, exist, txn); err != nil {
  3126  		return fmt.Errorf("error updating job summary: %v", err)
  3127  	}
  3128  
  3129  	if err := s.updateEntWithAlloc(index, copyAlloc, exist, txn); err != nil {
  3130  		return err
  3131  	}
  3132  
  3133  	if err := s.updatePluginWithAlloc(index, copyAlloc, txn); err != nil {
  3134  		return err
  3135  	}
  3136  
  3137  	// Update the allocation
  3138  	if err := txn.Insert("allocs", copyAlloc); err != nil {
  3139  		return fmt.Errorf("alloc insert failed: %v", err)
  3140  	}
  3141  
  3142  	// Set the job's status
  3143  	forceStatus := ""
  3144  	if !copyAlloc.TerminalStatus() {
  3145  		forceStatus = structs.JobStatusRunning
  3146  	}
  3147  
  3148  	tuple := structs.NamespacedID{
  3149  		ID:        exist.JobID,
  3150  		Namespace: exist.Namespace,
  3151  	}
  3152  	jobs := map[structs.NamespacedID]string{tuple: forceStatus}
  3153  
  3154  	if err := s.setJobStatuses(index, txn, jobs, false); err != nil {
  3155  		return fmt.Errorf("setting job status failed: %v", err)
  3156  	}
  3157  	return nil
  3158  }
  3159  
  3160  // UpsertAllocs is used to evict a set of allocations and allocate new ones at
  3161  // the same time.
  3162  func (s *StateStore) UpsertAllocs(msgType structs.MessageType, index uint64, allocs []*structs.Allocation) error {
  3163  	txn := s.db.WriteTxn(index)
  3164  	defer txn.Abort()
  3165  	if err := s.upsertAllocsImpl(index, allocs, txn); err != nil {
  3166  		return err
  3167  	}
  3168  	return txn.Commit()
  3169  }
  3170  
  3171  // upsertAllocs is the actual implementation of UpsertAllocs so that it may be
  3172  // used with an existing transaction.
  3173  func (s *StateStore) upsertAllocsImpl(index uint64, allocs []*structs.Allocation, txn *txn) error {
  3174  	// Handle the allocations
  3175  	jobs := make(map[structs.NamespacedID]string, 1)
  3176  	for _, alloc := range allocs {
  3177  		existing, err := txn.First("allocs", "id", alloc.ID)
  3178  		if err != nil {
  3179  			return fmt.Errorf("alloc lookup failed: %v", err)
  3180  		}
  3181  		exist, _ := existing.(*structs.Allocation)
  3182  
  3183  		if exist == nil {
  3184  			alloc.CreateIndex = index
  3185  			alloc.ModifyIndex = index
  3186  			alloc.AllocModifyIndex = index
  3187  			if alloc.DeploymentStatus != nil {
  3188  				alloc.DeploymentStatus.ModifyIndex = index
  3189  			}
  3190  
  3191  			// Issue https://github.com/hashicorp/nomad/issues/2583 uncovered
  3192  			// the a race between a forced garbage collection and the scheduler
  3193  			// marking an allocation as terminal. The issue is that the
  3194  			// allocation from the scheduler has its job normalized and the FSM
  3195  			// will only denormalize if the allocation is not terminal.  However
  3196  			// if the allocation is garbage collected, that will result in a
  3197  			// allocation being upserted for the first time without a job
  3198  			// attached. By returning an error here, it will cause the FSM to
  3199  			// error, causing the plan_apply to error and thus causing the
  3200  			// evaluation to be failed. This will force an index refresh that
  3201  			// should solve this issue.
  3202  			if alloc.Job == nil {
  3203  				return fmt.Errorf("attempting to upsert allocation %q without a job", alloc.ID)
  3204  			}
  3205  		} else {
  3206  			alloc.CreateIndex = exist.CreateIndex
  3207  			alloc.ModifyIndex = index
  3208  			alloc.AllocModifyIndex = index
  3209  
  3210  			// Keep the clients task states
  3211  			alloc.TaskStates = exist.TaskStates
  3212  
  3213  			// If the scheduler is marking this allocation as lost we do not
  3214  			// want to reuse the status of the existing allocation.
  3215  			if alloc.ClientStatus != structs.AllocClientStatusLost {
  3216  				alloc.ClientStatus = exist.ClientStatus
  3217  				alloc.ClientDescription = exist.ClientDescription
  3218  			}
  3219  
  3220  			// The job has been denormalized so re-attach the original job
  3221  			if alloc.Job == nil {
  3222  				alloc.Job = exist.Job
  3223  			}
  3224  		}
  3225  
  3226  		// OPTIMIZATION:
  3227  		// These should be given a map of new to old allocation and the updates
  3228  		// should be one on all changes. The current implementation causes O(n)
  3229  		// lookups/copies/insertions rather than O(1)
  3230  		if err := s.updateDeploymentWithAlloc(index, alloc, exist, txn); err != nil {
  3231  			return fmt.Errorf("error updating deployment: %v", err)
  3232  		}
  3233  
  3234  		if err := s.updateSummaryWithAlloc(index, alloc, exist, txn); err != nil {
  3235  			return fmt.Errorf("error updating job summary: %v", err)
  3236  		}
  3237  
  3238  		if err := s.updateEntWithAlloc(index, alloc, exist, txn); err != nil {
  3239  			return err
  3240  		}
  3241  
  3242  		if err := s.updatePluginWithAlloc(index, alloc, txn); err != nil {
  3243  			return err
  3244  		}
  3245  
  3246  		if err := txn.Insert("allocs", alloc); err != nil {
  3247  			return fmt.Errorf("alloc insert failed: %v", err)
  3248  		}
  3249  
  3250  		if alloc.PreviousAllocation != "" {
  3251  			prevAlloc, err := txn.First("allocs", "id", alloc.PreviousAllocation)
  3252  			if err != nil {
  3253  				return fmt.Errorf("alloc lookup failed: %v", err)
  3254  			}
  3255  			existingPrevAlloc, _ := prevAlloc.(*structs.Allocation)
  3256  			if existingPrevAlloc != nil {
  3257  				prevAllocCopy := existingPrevAlloc.Copy()
  3258  				prevAllocCopy.NextAllocation = alloc.ID
  3259  				prevAllocCopy.ModifyIndex = index
  3260  				if err := txn.Insert("allocs", prevAllocCopy); err != nil {
  3261  					return fmt.Errorf("alloc insert failed: %v", err)
  3262  				}
  3263  			}
  3264  		}
  3265  
  3266  		// If the allocation is running, force the job to running status.
  3267  		forceStatus := ""
  3268  		if !alloc.TerminalStatus() {
  3269  			forceStatus = structs.JobStatusRunning
  3270  		}
  3271  
  3272  		tuple := structs.NamespacedID{
  3273  			ID:        alloc.JobID,
  3274  			Namespace: alloc.Namespace,
  3275  		}
  3276  		jobs[tuple] = forceStatus
  3277  	}
  3278  
  3279  	// Update the indexes
  3280  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  3281  		return fmt.Errorf("index update failed: %v", err)
  3282  	}
  3283  
  3284  	// Set the job's status
  3285  	if err := s.setJobStatuses(index, txn, jobs, false); err != nil {
  3286  		return fmt.Errorf("setting job status failed: %v", err)
  3287  	}
  3288  
  3289  	return nil
  3290  }
  3291  
  3292  // UpdateAllocsDesiredTransitions is used to update a set of allocations
  3293  // desired transitions.
  3294  func (s *StateStore) UpdateAllocsDesiredTransitions(msgType structs.MessageType, index uint64, allocs map[string]*structs.DesiredTransition,
  3295  	evals []*structs.Evaluation) error {
  3296  
  3297  	txn := s.db.WriteTxnMsgT(msgType, index)
  3298  	defer txn.Abort()
  3299  
  3300  	// Handle each of the updated allocations
  3301  	for id, transition := range allocs {
  3302  		if err := s.nestedUpdateAllocDesiredTransition(txn, index, id, transition); err != nil {
  3303  			return err
  3304  		}
  3305  	}
  3306  
  3307  	for _, eval := range evals {
  3308  		if err := s.nestedUpsertEval(txn, index, eval); err != nil {
  3309  			return err
  3310  		}
  3311  	}
  3312  
  3313  	// Update the indexes
  3314  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  3315  		return fmt.Errorf("index update failed: %v", err)
  3316  	}
  3317  
  3318  	return txn.Commit()
  3319  }
  3320  
  3321  // nestedUpdateAllocDesiredTransition is used to nest an update of an
  3322  // allocations desired transition
  3323  func (s *StateStore) nestedUpdateAllocDesiredTransition(
  3324  	txn *txn, index uint64, allocID string,
  3325  	transition *structs.DesiredTransition) error {
  3326  
  3327  	// Look for existing alloc
  3328  	existing, err := txn.First("allocs", "id", allocID)
  3329  	if err != nil {
  3330  		return fmt.Errorf("alloc lookup failed: %v", err)
  3331  	}
  3332  
  3333  	// Nothing to do if this does not exist
  3334  	if existing == nil {
  3335  		return nil
  3336  	}
  3337  	exist := existing.(*structs.Allocation)
  3338  
  3339  	// Copy everything from the existing allocation
  3340  	copyAlloc := exist.Copy()
  3341  
  3342  	// Merge the desired transitions
  3343  	copyAlloc.DesiredTransition.Merge(transition)
  3344  
  3345  	// Update the modify index
  3346  	copyAlloc.ModifyIndex = index
  3347  
  3348  	// Update the allocation
  3349  	if err := txn.Insert("allocs", copyAlloc); err != nil {
  3350  		return fmt.Errorf("alloc insert failed: %v", err)
  3351  	}
  3352  
  3353  	return nil
  3354  }
  3355  
  3356  // AllocByID is used to lookup an allocation by its ID
  3357  func (s *StateStore) AllocByID(ws memdb.WatchSet, id string) (*structs.Allocation, error) {
  3358  	txn := s.db.ReadTxn()
  3359  	return s.allocByIDImpl(txn, ws, id)
  3360  }
  3361  
  3362  // allocByIDImpl retrives an allocation and is called under and existing
  3363  // transaction. An optional watch set can be passed to add allocations to the
  3364  // watch set
  3365  func (s *StateStore) allocByIDImpl(txn Txn, ws memdb.WatchSet, id string) (*structs.Allocation, error) {
  3366  	watchCh, raw, err := txn.FirstWatch("allocs", "id", id)
  3367  	if err != nil {
  3368  		return nil, fmt.Errorf("alloc lookup failed: %v", err)
  3369  	}
  3370  
  3371  	ws.Add(watchCh)
  3372  
  3373  	if raw == nil {
  3374  		return nil, nil
  3375  	}
  3376  	alloc := raw.(*structs.Allocation)
  3377  	return alloc, nil
  3378  }
  3379  
  3380  // AllocsByIDPrefix is used to lookup allocs by prefix
  3381  func (s *StateStore) AllocsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) {
  3382  	txn := s.db.ReadTxn()
  3383  
  3384  	iter, err := txn.Get("allocs", "id_prefix", id)
  3385  	if err != nil {
  3386  		return nil, fmt.Errorf("alloc lookup failed: %v", err)
  3387  	}
  3388  
  3389  	ws.Add(iter.WatchCh())
  3390  
  3391  	// Wrap the iterator in a filter
  3392  	wrap := memdb.NewFilterIterator(iter, allocNamespaceFilter(namespace))
  3393  	return wrap, nil
  3394  }
  3395  
  3396  // allocNamespaceFilter returns a filter function that filters all allocations
  3397  // not in the given namespace.
  3398  func allocNamespaceFilter(namespace string) func(interface{}) bool {
  3399  	return func(raw interface{}) bool {
  3400  		alloc, ok := raw.(*structs.Allocation)
  3401  		if !ok {
  3402  			return true
  3403  		}
  3404  
  3405  		return alloc.Namespace != namespace
  3406  	}
  3407  }
  3408  
  3409  // AllocsByIDPrefix is used to lookup allocs by prefix
  3410  func (s *StateStore) AllocsByIDPrefixAllNSs(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) {
  3411  	txn := s.db.ReadTxn()
  3412  
  3413  	iter, err := txn.Get("allocs", "id_prefix", prefix)
  3414  	if err != nil {
  3415  		return nil, fmt.Errorf("alloc lookup failed: %v", err)
  3416  	}
  3417  
  3418  	ws.Add(iter.WatchCh())
  3419  
  3420  	return iter, nil
  3421  }
  3422  
  3423  // AllocsByNode returns all the allocations by node
  3424  func (s *StateStore) AllocsByNode(ws memdb.WatchSet, node string) ([]*structs.Allocation, error) {
  3425  	txn := s.db.ReadTxn()
  3426  
  3427  	return allocsByNodeTxn(txn, ws, node)
  3428  }
  3429  
  3430  func allocsByNodeTxn(txn ReadTxn, ws memdb.WatchSet, node string) ([]*structs.Allocation, error) {
  3431  	// Get an iterator over the node allocations, using only the
  3432  	// node prefix which ignores the terminal status
  3433  	iter, err := txn.Get("allocs", "node_prefix", node)
  3434  	if err != nil {
  3435  		return nil, err
  3436  	}
  3437  
  3438  	ws.Add(iter.WatchCh())
  3439  
  3440  	var out []*structs.Allocation
  3441  	for {
  3442  		raw := iter.Next()
  3443  		if raw == nil {
  3444  			break
  3445  		}
  3446  		out = append(out, raw.(*structs.Allocation))
  3447  	}
  3448  	return out, nil
  3449  }
  3450  
  3451  // AllocsByNode returns all the allocations by node and terminal status
  3452  func (s *StateStore) AllocsByNodeTerminal(ws memdb.WatchSet, node string, terminal bool) ([]*structs.Allocation, error) {
  3453  	txn := s.db.ReadTxn()
  3454  
  3455  	// Get an iterator over the node allocations
  3456  	iter, err := txn.Get("allocs", "node", node, terminal)
  3457  	if err != nil {
  3458  		return nil, err
  3459  	}
  3460  
  3461  	ws.Add(iter.WatchCh())
  3462  
  3463  	var out []*structs.Allocation
  3464  	for {
  3465  		raw := iter.Next()
  3466  		if raw == nil {
  3467  			break
  3468  		}
  3469  		out = append(out, raw.(*structs.Allocation))
  3470  	}
  3471  	return out, nil
  3472  }
  3473  
  3474  // AllocsByJob returns allocations by job id
  3475  func (s *StateStore) AllocsByJob(ws memdb.WatchSet, namespace, jobID string, anyCreateIndex bool) ([]*structs.Allocation, error) {
  3476  	txn := s.db.ReadTxn()
  3477  
  3478  	// Get the job
  3479  	var job *structs.Job
  3480  	rawJob, err := txn.First("jobs", "id", namespace, jobID)
  3481  	if err != nil {
  3482  		return nil, err
  3483  	}
  3484  	if rawJob != nil {
  3485  		job = rawJob.(*structs.Job)
  3486  	}
  3487  
  3488  	// Get an iterator over the node allocations
  3489  	iter, err := txn.Get("allocs", "job", namespace, jobID)
  3490  	if err != nil {
  3491  		return nil, err
  3492  	}
  3493  
  3494  	ws.Add(iter.WatchCh())
  3495  
  3496  	var out []*structs.Allocation
  3497  	for {
  3498  		raw := iter.Next()
  3499  		if raw == nil {
  3500  			break
  3501  		}
  3502  
  3503  		alloc := raw.(*structs.Allocation)
  3504  		// If the allocation belongs to a job with the same ID but a different
  3505  		// create index and we are not getting all the allocations whose Jobs
  3506  		// matches the same Job ID then we skip it
  3507  		if !anyCreateIndex && job != nil && alloc.Job.CreateIndex != job.CreateIndex {
  3508  			continue
  3509  		}
  3510  		out = append(out, raw.(*structs.Allocation))
  3511  	}
  3512  	return out, nil
  3513  }
  3514  
  3515  // AllocsByEval returns all the allocations by eval id
  3516  func (s *StateStore) AllocsByEval(ws memdb.WatchSet, evalID string) ([]*structs.Allocation, error) {
  3517  	txn := s.db.ReadTxn()
  3518  
  3519  	// Get an iterator over the eval allocations
  3520  	iter, err := txn.Get("allocs", "eval", evalID)
  3521  	if err != nil {
  3522  		return nil, err
  3523  	}
  3524  
  3525  	ws.Add(iter.WatchCh())
  3526  
  3527  	var out []*structs.Allocation
  3528  	for {
  3529  		raw := iter.Next()
  3530  		if raw == nil {
  3531  			break
  3532  		}
  3533  		out = append(out, raw.(*structs.Allocation))
  3534  	}
  3535  	return out, nil
  3536  }
  3537  
  3538  // AllocsByDeployment returns all the allocations by deployment id
  3539  func (s *StateStore) AllocsByDeployment(ws memdb.WatchSet, deploymentID string) ([]*structs.Allocation, error) {
  3540  	txn := s.db.ReadTxn()
  3541  
  3542  	// Get an iterator over the deployments allocations
  3543  	iter, err := txn.Get("allocs", "deployment", deploymentID)
  3544  	if err != nil {
  3545  		return nil, err
  3546  	}
  3547  
  3548  	ws.Add(iter.WatchCh())
  3549  
  3550  	var out []*structs.Allocation
  3551  	for {
  3552  		raw := iter.Next()
  3553  		if raw == nil {
  3554  			break
  3555  		}
  3556  		out = append(out, raw.(*structs.Allocation))
  3557  	}
  3558  	return out, nil
  3559  }
  3560  
  3561  // Allocs returns an iterator over all the evaluations
  3562  func (s *StateStore) Allocs(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  3563  	txn := s.db.ReadTxn()
  3564  
  3565  	// Walk the entire table
  3566  	iter, err := txn.Get("allocs", "id")
  3567  	if err != nil {
  3568  		return nil, err
  3569  	}
  3570  
  3571  	ws.Add(iter.WatchCh())
  3572  
  3573  	return iter, nil
  3574  }
  3575  
  3576  // AllocsByNamespace returns an iterator over all the allocations in the
  3577  // namespace
  3578  func (s *StateStore) AllocsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) {
  3579  	txn := s.db.ReadTxn()
  3580  	return s.allocsByNamespaceImpl(ws, txn, namespace)
  3581  }
  3582  
  3583  // allocsByNamespaceImpl returns an iterator over all the allocations in the
  3584  // namespace
  3585  func (s *StateStore) allocsByNamespaceImpl(ws memdb.WatchSet, txn *txn, namespace string) (memdb.ResultIterator, error) {
  3586  	// Walk the entire table
  3587  	iter, err := txn.Get("allocs", "namespace", namespace)
  3588  	if err != nil {
  3589  		return nil, err
  3590  	}
  3591  
  3592  	ws.Add(iter.WatchCh())
  3593  
  3594  	return iter, nil
  3595  }
  3596  
  3597  // UpsertVaultAccessors is used to register a set of Vault Accessors
  3598  func (s *StateStore) UpsertVaultAccessor(index uint64, accessors []*structs.VaultAccessor) error {
  3599  	txn := s.db.WriteTxn(index)
  3600  	defer txn.Abort()
  3601  
  3602  	for _, accessor := range accessors {
  3603  		// Set the create index
  3604  		accessor.CreateIndex = index
  3605  
  3606  		// Insert the accessor
  3607  		if err := txn.Insert("vault_accessors", accessor); err != nil {
  3608  			return fmt.Errorf("accessor insert failed: %v", err)
  3609  		}
  3610  	}
  3611  
  3612  	if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil {
  3613  		return fmt.Errorf("index update failed: %v", err)
  3614  	}
  3615  
  3616  	return txn.Commit()
  3617  }
  3618  
  3619  // DeleteVaultAccessors is used to delete a set of Vault Accessors
  3620  func (s *StateStore) DeleteVaultAccessors(index uint64, accessors []*structs.VaultAccessor) error {
  3621  	txn := s.db.WriteTxn(index)
  3622  	defer txn.Abort()
  3623  
  3624  	// Lookup the accessor
  3625  	for _, accessor := range accessors {
  3626  		// Delete the accessor
  3627  		if err := txn.Delete("vault_accessors", accessor); err != nil {
  3628  			return fmt.Errorf("accessor delete failed: %v", err)
  3629  		}
  3630  	}
  3631  
  3632  	if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil {
  3633  		return fmt.Errorf("index update failed: %v", err)
  3634  	}
  3635  
  3636  	return txn.Commit()
  3637  }
  3638  
  3639  // VaultAccessor returns the given Vault accessor
  3640  func (s *StateStore) VaultAccessor(ws memdb.WatchSet, accessor string) (*structs.VaultAccessor, error) {
  3641  	txn := s.db.ReadTxn()
  3642  
  3643  	watchCh, existing, err := txn.FirstWatch("vault_accessors", "id", accessor)
  3644  	if err != nil {
  3645  		return nil, fmt.Errorf("accessor lookup failed: %v", err)
  3646  	}
  3647  
  3648  	ws.Add(watchCh)
  3649  
  3650  	if existing != nil {
  3651  		return existing.(*structs.VaultAccessor), nil
  3652  	}
  3653  
  3654  	return nil, nil
  3655  }
  3656  
  3657  // VaultAccessors returns an iterator of Vault accessors.
  3658  func (s *StateStore) VaultAccessors(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  3659  	txn := s.db.ReadTxn()
  3660  
  3661  	iter, err := txn.Get("vault_accessors", "id")
  3662  	if err != nil {
  3663  		return nil, err
  3664  	}
  3665  
  3666  	ws.Add(iter.WatchCh())
  3667  
  3668  	return iter, nil
  3669  }
  3670  
  3671  // VaultAccessorsByAlloc returns all the Vault accessors by alloc id
  3672  func (s *StateStore) VaultAccessorsByAlloc(ws memdb.WatchSet, allocID string) ([]*structs.VaultAccessor, error) {
  3673  	txn := s.db.ReadTxn()
  3674  
  3675  	// Get an iterator over the accessors
  3676  	iter, err := txn.Get("vault_accessors", "alloc_id", allocID)
  3677  	if err != nil {
  3678  		return nil, err
  3679  	}
  3680  
  3681  	ws.Add(iter.WatchCh())
  3682  
  3683  	var out []*structs.VaultAccessor
  3684  	for {
  3685  		raw := iter.Next()
  3686  		if raw == nil {
  3687  			break
  3688  		}
  3689  		out = append(out, raw.(*structs.VaultAccessor))
  3690  	}
  3691  	return out, nil
  3692  }
  3693  
  3694  // VaultAccessorsByNode returns all the Vault accessors by node id
  3695  func (s *StateStore) VaultAccessorsByNode(ws memdb.WatchSet, nodeID string) ([]*structs.VaultAccessor, error) {
  3696  	txn := s.db.ReadTxn()
  3697  
  3698  	// Get an iterator over the accessors
  3699  	iter, err := txn.Get("vault_accessors", "node_id", nodeID)
  3700  	if err != nil {
  3701  		return nil, err
  3702  	}
  3703  
  3704  	ws.Add(iter.WatchCh())
  3705  
  3706  	var out []*structs.VaultAccessor
  3707  	for {
  3708  		raw := iter.Next()
  3709  		if raw == nil {
  3710  			break
  3711  		}
  3712  		out = append(out, raw.(*structs.VaultAccessor))
  3713  	}
  3714  	return out, nil
  3715  }
  3716  
  3717  func indexEntry(table string, index uint64) *IndexEntry {
  3718  	return &IndexEntry{
  3719  		Key:   table,
  3720  		Value: index,
  3721  	}
  3722  }
  3723  
  3724  const siTokenAccessorTable = "si_token_accessors"
  3725  
  3726  // UpsertSITokenAccessors is used to register a set of Service Identity token accessors.
  3727  func (s *StateStore) UpsertSITokenAccessors(index uint64, accessors []*structs.SITokenAccessor) error {
  3728  	txn := s.db.WriteTxn(index)
  3729  	defer txn.Abort()
  3730  
  3731  	for _, accessor := range accessors {
  3732  		// set the create index
  3733  		accessor.CreateIndex = index
  3734  
  3735  		// insert the accessor
  3736  		if err := txn.Insert(siTokenAccessorTable, accessor); err != nil {
  3737  			return errors.Wrap(err, "accessor insert failed")
  3738  		}
  3739  	}
  3740  
  3741  	// update the index for this table
  3742  	if err := txn.Insert("index", indexEntry(siTokenAccessorTable, index)); err != nil {
  3743  		return errors.Wrap(err, "index update failed")
  3744  	}
  3745  
  3746  	return txn.Commit()
  3747  }
  3748  
  3749  // DeleteSITokenAccessors is used to delete a set of Service Identity token accessors.
  3750  func (s *StateStore) DeleteSITokenAccessors(index uint64, accessors []*structs.SITokenAccessor) error {
  3751  	txn := s.db.WriteTxn(index)
  3752  	defer txn.Abort()
  3753  
  3754  	// Lookup each accessor
  3755  	for _, accessor := range accessors {
  3756  		// Delete the accessor
  3757  		if err := txn.Delete(siTokenAccessorTable, accessor); err != nil {
  3758  			return errors.Wrap(err, "accessor delete failed")
  3759  		}
  3760  	}
  3761  
  3762  	// update the index for this table
  3763  	if err := txn.Insert("index", indexEntry(siTokenAccessorTable, index)); err != nil {
  3764  		return errors.Wrap(err, "index update failed")
  3765  	}
  3766  
  3767  	return txn.Commit()
  3768  }
  3769  
  3770  // SITokenAccessor returns the given Service Identity token accessor.
  3771  func (s *StateStore) SITokenAccessor(ws memdb.WatchSet, accessorID string) (*structs.SITokenAccessor, error) {
  3772  	txn := s.db.ReadTxn()
  3773  	defer txn.Abort()
  3774  
  3775  	watchCh, existing, err := txn.FirstWatch(siTokenAccessorTable, "id", accessorID)
  3776  	if err != nil {
  3777  		return nil, errors.Wrap(err, "accessor lookup failed")
  3778  	}
  3779  
  3780  	ws.Add(watchCh)
  3781  
  3782  	if existing != nil {
  3783  		return existing.(*structs.SITokenAccessor), nil
  3784  	}
  3785  
  3786  	return nil, nil
  3787  }
  3788  
  3789  // SITokenAccessors returns an iterator of Service Identity token accessors.
  3790  func (s *StateStore) SITokenAccessors(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  3791  	txn := s.db.ReadTxn()
  3792  	defer txn.Abort()
  3793  
  3794  	iter, err := txn.Get(siTokenAccessorTable, "id")
  3795  	if err != nil {
  3796  		return nil, err
  3797  	}
  3798  
  3799  	ws.Add(iter.WatchCh())
  3800  
  3801  	return iter, nil
  3802  }
  3803  
  3804  // SITokenAccessorsByAlloc returns all the Service Identity token accessors by alloc ID.
  3805  func (s *StateStore) SITokenAccessorsByAlloc(ws memdb.WatchSet, allocID string) ([]*structs.SITokenAccessor, error) {
  3806  	txn := s.db.ReadTxn()
  3807  	defer txn.Abort()
  3808  
  3809  	// Get an iterator over the accessors
  3810  	iter, err := txn.Get(siTokenAccessorTable, "alloc_id", allocID)
  3811  	if err != nil {
  3812  		return nil, err
  3813  	}
  3814  
  3815  	ws.Add(iter.WatchCh())
  3816  
  3817  	var result []*structs.SITokenAccessor
  3818  	for raw := iter.Next(); raw != nil; raw = iter.Next() {
  3819  		result = append(result, raw.(*structs.SITokenAccessor))
  3820  	}
  3821  
  3822  	return result, nil
  3823  }
  3824  
  3825  // SITokenAccessorsByNode returns all the Service Identity token accessors by node ID.
  3826  func (s *StateStore) SITokenAccessorsByNode(ws memdb.WatchSet, nodeID string) ([]*structs.SITokenAccessor, error) {
  3827  	txn := s.db.ReadTxn()
  3828  	defer txn.Abort()
  3829  
  3830  	// Get an iterator over the accessors
  3831  	iter, err := txn.Get(siTokenAccessorTable, "node_id", nodeID)
  3832  	if err != nil {
  3833  		return nil, err
  3834  	}
  3835  
  3836  	ws.Add(iter.WatchCh())
  3837  
  3838  	var result []*structs.SITokenAccessor
  3839  	for raw := iter.Next(); raw != nil; raw = iter.Next() {
  3840  		result = append(result, raw.(*structs.SITokenAccessor))
  3841  	}
  3842  
  3843  	return result, nil
  3844  }
  3845  
  3846  // UpdateDeploymentStatus is used to make deployment status updates and
  3847  // potentially make a evaluation
  3848  func (s *StateStore) UpdateDeploymentStatus(msgType structs.MessageType, index uint64, req *structs.DeploymentStatusUpdateRequest) error {
  3849  	txn := s.db.WriteTxnMsgT(msgType, index)
  3850  	defer txn.Abort()
  3851  
  3852  	if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil {
  3853  		return err
  3854  	}
  3855  
  3856  	// Upsert the job if necessary
  3857  	if req.Job != nil {
  3858  		if err := s.upsertJobImpl(index, req.Job, false, txn); err != nil {
  3859  			return err
  3860  		}
  3861  	}
  3862  
  3863  	// Upsert the optional eval
  3864  	if req.Eval != nil {
  3865  		if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil {
  3866  			return err
  3867  		}
  3868  	}
  3869  
  3870  	return txn.Commit()
  3871  }
  3872  
  3873  // updateDeploymentStatusImpl is used to make deployment status updates
  3874  func (s *StateStore) updateDeploymentStatusImpl(index uint64, u *structs.DeploymentStatusUpdate, txn *txn) error {
  3875  	// Retrieve deployment
  3876  	ws := memdb.NewWatchSet()
  3877  	deployment, err := s.deploymentByIDImpl(ws, u.DeploymentID, txn)
  3878  	if err != nil {
  3879  		return err
  3880  	} else if deployment == nil {
  3881  		return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", u.DeploymentID)
  3882  	} else if !deployment.Active() {
  3883  		return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status)
  3884  	}
  3885  
  3886  	// Apply the new status
  3887  	copy := deployment.Copy()
  3888  	copy.Status = u.Status
  3889  	copy.StatusDescription = u.StatusDescription
  3890  	copy.ModifyIndex = index
  3891  
  3892  	// Insert the deployment
  3893  	if err := txn.Insert("deployment", copy); err != nil {
  3894  		return err
  3895  	}
  3896  
  3897  	// Update the index
  3898  	if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil {
  3899  		return fmt.Errorf("index update failed: %v", err)
  3900  	}
  3901  
  3902  	// If the deployment is being marked as complete, set the job to stable.
  3903  	if copy.Status == structs.DeploymentStatusSuccessful {
  3904  		if err := s.updateJobStabilityImpl(index, copy.Namespace, copy.JobID, copy.JobVersion, true, txn); err != nil {
  3905  			return fmt.Errorf("failed to update job stability: %v", err)
  3906  		}
  3907  	}
  3908  
  3909  	return nil
  3910  }
  3911  
  3912  // UpdateJobStability updates the stability of the given job and version to the
  3913  // desired status.
  3914  func (s *StateStore) UpdateJobStability(index uint64, namespace, jobID string, jobVersion uint64, stable bool) error {
  3915  	txn := s.db.WriteTxn(index)
  3916  	defer txn.Abort()
  3917  
  3918  	if err := s.updateJobStabilityImpl(index, namespace, jobID, jobVersion, stable, txn); err != nil {
  3919  		return err
  3920  	}
  3921  
  3922  	return txn.Commit()
  3923  }
  3924  
  3925  // updateJobStabilityImpl updates the stability of the given job and version
  3926  func (s *StateStore) updateJobStabilityImpl(index uint64, namespace, jobID string, jobVersion uint64, stable bool, txn *txn) error {
  3927  	// Get the job that is referenced
  3928  	job, err := s.jobByIDAndVersionImpl(nil, namespace, jobID, jobVersion, txn)
  3929  	if err != nil {
  3930  		return err
  3931  	}
  3932  
  3933  	// Has already been cleared, nothing to do
  3934  	if job == nil {
  3935  		return nil
  3936  	}
  3937  
  3938  	// If the job already has the desired stability, nothing to do
  3939  	if job.Stable == stable {
  3940  		return nil
  3941  	}
  3942  
  3943  	copy := job.Copy()
  3944  	copy.Stable = stable
  3945  	return s.upsertJobImpl(index, copy, true, txn)
  3946  }
  3947  
  3948  // UpdateDeploymentPromotion is used to promote canaries in a deployment and
  3949  // potentially make a evaluation
  3950  func (s *StateStore) UpdateDeploymentPromotion(msgType structs.MessageType, index uint64, req *structs.ApplyDeploymentPromoteRequest) error {
  3951  	txn := s.db.WriteTxnMsgT(msgType, index)
  3952  	defer txn.Abort()
  3953  
  3954  	// Retrieve deployment and ensure it is not terminal and is active
  3955  	ws := memdb.NewWatchSet()
  3956  	deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn)
  3957  	if err != nil {
  3958  		return err
  3959  	} else if deployment == nil {
  3960  		return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID)
  3961  	} else if !deployment.Active() {
  3962  		return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status)
  3963  	}
  3964  
  3965  	// Retrieve effected allocations
  3966  	iter, err := txn.Get("allocs", "deployment", req.DeploymentID)
  3967  	if err != nil {
  3968  		return err
  3969  	}
  3970  
  3971  	// groupIndex is a map of groups being promoted
  3972  	groupIndex := make(map[string]struct{}, len(req.Groups))
  3973  	for _, g := range req.Groups {
  3974  		groupIndex[g] = struct{}{}
  3975  	}
  3976  
  3977  	// canaryIndex is the set of placed canaries in the deployment
  3978  	canaryIndex := make(map[string]struct{}, len(deployment.TaskGroups))
  3979  	for _, dstate := range deployment.TaskGroups {
  3980  		for _, c := range dstate.PlacedCanaries {
  3981  			canaryIndex[c] = struct{}{}
  3982  		}
  3983  	}
  3984  
  3985  	// healthyCounts is a mapping of group to the number of healthy canaries
  3986  	healthyCounts := make(map[string]int, len(deployment.TaskGroups))
  3987  
  3988  	// promotable is the set of allocations that we can move from canary to
  3989  	// non-canary
  3990  	var promotable []*structs.Allocation
  3991  
  3992  	for {
  3993  		raw := iter.Next()
  3994  		if raw == nil {
  3995  			break
  3996  		}
  3997  
  3998  		alloc := raw.(*structs.Allocation)
  3999  
  4000  		// Check that the alloc is a canary
  4001  		if _, ok := canaryIndex[alloc.ID]; !ok {
  4002  			continue
  4003  		}
  4004  
  4005  		// Check that the canary is part of a group being promoted
  4006  		if _, ok := groupIndex[alloc.TaskGroup]; !req.All && !ok {
  4007  			continue
  4008  		}
  4009  
  4010  		// Ensure the canaries are healthy
  4011  		if alloc.TerminalStatus() || !alloc.DeploymentStatus.IsHealthy() {
  4012  			continue
  4013  		}
  4014  
  4015  		healthyCounts[alloc.TaskGroup]++
  4016  		promotable = append(promotable, alloc)
  4017  	}
  4018  
  4019  	// Determine if we have enough healthy allocations
  4020  	var unhealthyErr multierror.Error
  4021  	for tg, dstate := range deployment.TaskGroups {
  4022  		if _, ok := groupIndex[tg]; !req.All && !ok {
  4023  			continue
  4024  		}
  4025  
  4026  		need := dstate.DesiredCanaries
  4027  		if need == 0 {
  4028  			continue
  4029  		}
  4030  
  4031  		if have := healthyCounts[tg]; have < need {
  4032  			multierror.Append(&unhealthyErr, fmt.Errorf("Task group %q has %d/%d healthy allocations", tg, have, need))
  4033  		}
  4034  	}
  4035  
  4036  	if err := unhealthyErr.ErrorOrNil(); err != nil {
  4037  		return err
  4038  	}
  4039  
  4040  	// Update deployment
  4041  	copy := deployment.Copy()
  4042  	copy.ModifyIndex = index
  4043  	for tg, status := range copy.TaskGroups {
  4044  		_, ok := groupIndex[tg]
  4045  		if !req.All && !ok {
  4046  			continue
  4047  		}
  4048  
  4049  		// reset the progress deadline
  4050  		if status.ProgressDeadline > 0 && !status.RequireProgressBy.IsZero() {
  4051  			status.RequireProgressBy = time.Now().Add(status.ProgressDeadline)
  4052  		}
  4053  		status.Promoted = true
  4054  	}
  4055  
  4056  	// If the deployment no longer needs promotion, update its status
  4057  	if !copy.RequiresPromotion() && copy.Status == structs.DeploymentStatusRunning {
  4058  		copy.StatusDescription = structs.DeploymentStatusDescriptionRunning
  4059  	}
  4060  
  4061  	// Insert the deployment
  4062  	if err := s.upsertDeploymentImpl(index, copy, txn); err != nil {
  4063  		return err
  4064  	}
  4065  
  4066  	// Upsert the optional eval
  4067  	if req.Eval != nil {
  4068  		if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil {
  4069  			return err
  4070  		}
  4071  	}
  4072  
  4073  	// For each promotable allocation remove the canary field
  4074  	for _, alloc := range promotable {
  4075  		promoted := alloc.Copy()
  4076  		promoted.DeploymentStatus.Canary = false
  4077  		promoted.DeploymentStatus.ModifyIndex = index
  4078  		promoted.ModifyIndex = index
  4079  		promoted.AllocModifyIndex = index
  4080  
  4081  		if err := txn.Insert("allocs", promoted); err != nil {
  4082  			return fmt.Errorf("alloc insert failed: %v", err)
  4083  		}
  4084  	}
  4085  
  4086  	// Update the alloc index
  4087  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  4088  		return fmt.Errorf("index update failed: %v", err)
  4089  	}
  4090  
  4091  	return txn.Commit()
  4092  }
  4093  
  4094  // UpdateDeploymentAllocHealth is used to update the health of allocations as
  4095  // part of the deployment and potentially make a evaluation
  4096  func (s *StateStore) UpdateDeploymentAllocHealth(msgType structs.MessageType, index uint64, req *structs.ApplyDeploymentAllocHealthRequest) error {
  4097  	txn := s.db.WriteTxnMsgT(msgType, index)
  4098  	defer txn.Abort()
  4099  
  4100  	// Retrieve deployment and ensure it is not terminal and is active
  4101  	ws := memdb.NewWatchSet()
  4102  	deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn)
  4103  	if err != nil {
  4104  		return err
  4105  	} else if deployment == nil {
  4106  		return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID)
  4107  	} else if !deployment.Active() {
  4108  		return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status)
  4109  	}
  4110  
  4111  	// Update the health status of each allocation
  4112  	if total := len(req.HealthyAllocationIDs) + len(req.UnhealthyAllocationIDs); total != 0 {
  4113  		setAllocHealth := func(id string, healthy bool, ts time.Time) error {
  4114  			existing, err := txn.First("allocs", "id", id)
  4115  			if err != nil {
  4116  				return fmt.Errorf("alloc %q lookup failed: %v", id, err)
  4117  			}
  4118  			if existing == nil {
  4119  				return fmt.Errorf("unknown alloc %q", id)
  4120  			}
  4121  
  4122  			old := existing.(*structs.Allocation)
  4123  			if old.DeploymentID != req.DeploymentID {
  4124  				return fmt.Errorf("alloc %q is not part of deployment %q", id, req.DeploymentID)
  4125  			}
  4126  
  4127  			// Set the health
  4128  			copy := old.Copy()
  4129  			if copy.DeploymentStatus == nil {
  4130  				copy.DeploymentStatus = &structs.AllocDeploymentStatus{}
  4131  			}
  4132  			copy.DeploymentStatus.Healthy = helper.BoolToPtr(healthy)
  4133  			copy.DeploymentStatus.Timestamp = ts
  4134  			copy.DeploymentStatus.ModifyIndex = index
  4135  			copy.ModifyIndex = index
  4136  
  4137  			if err := s.updateDeploymentWithAlloc(index, copy, old, txn); err != nil {
  4138  				return fmt.Errorf("error updating deployment: %v", err)
  4139  			}
  4140  
  4141  			if err := txn.Insert("allocs", copy); err != nil {
  4142  				return fmt.Errorf("alloc insert failed: %v", err)
  4143  			}
  4144  
  4145  			return nil
  4146  		}
  4147  
  4148  		for _, id := range req.HealthyAllocationIDs {
  4149  			if err := setAllocHealth(id, true, req.Timestamp); err != nil {
  4150  				return err
  4151  			}
  4152  		}
  4153  		for _, id := range req.UnhealthyAllocationIDs {
  4154  			if err := setAllocHealth(id, false, req.Timestamp); err != nil {
  4155  				return err
  4156  			}
  4157  		}
  4158  
  4159  		// Update the indexes
  4160  		if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  4161  			return fmt.Errorf("index update failed: %v", err)
  4162  		}
  4163  	}
  4164  
  4165  	// Update the deployment status as needed.
  4166  	if req.DeploymentUpdate != nil {
  4167  		if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil {
  4168  			return err
  4169  		}
  4170  	}
  4171  
  4172  	// Upsert the job if necessary
  4173  	if req.Job != nil {
  4174  		if err := s.upsertJobImpl(index, req.Job, false, txn); err != nil {
  4175  			return err
  4176  		}
  4177  	}
  4178  
  4179  	// Upsert the optional eval
  4180  	if req.Eval != nil {
  4181  		if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil {
  4182  			return err
  4183  		}
  4184  	}
  4185  
  4186  	return txn.Commit()
  4187  }
  4188  
  4189  // LastIndex returns the greatest index value for all indexes
  4190  func (s *StateStore) LatestIndex() (uint64, error) {
  4191  	indexes, err := s.Indexes()
  4192  	if err != nil {
  4193  		return 0, err
  4194  	}
  4195  
  4196  	var max uint64 = 0
  4197  	for {
  4198  		raw := indexes.Next()
  4199  		if raw == nil {
  4200  			break
  4201  		}
  4202  
  4203  		// Prepare the request struct
  4204  		idx := raw.(*IndexEntry)
  4205  
  4206  		// Determine the max
  4207  		if idx.Value > max {
  4208  			max = idx.Value
  4209  		}
  4210  	}
  4211  
  4212  	return max, nil
  4213  }
  4214  
  4215  // Index finds the matching index value
  4216  func (s *StateStore) Index(name string) (uint64, error) {
  4217  	txn := s.db.ReadTxn()
  4218  
  4219  	// Lookup the first matching index
  4220  	out, err := txn.First("index", "id", name)
  4221  	if err != nil {
  4222  		return 0, err
  4223  	}
  4224  	if out == nil {
  4225  		return 0, nil
  4226  	}
  4227  	return out.(*IndexEntry).Value, nil
  4228  }
  4229  
  4230  // Indexes returns an iterator over all the indexes
  4231  func (s *StateStore) Indexes() (memdb.ResultIterator, error) {
  4232  	txn := s.db.ReadTxn()
  4233  
  4234  	// Walk the entire nodes table
  4235  	iter, err := txn.Get("index", "id")
  4236  	if err != nil {
  4237  		return nil, err
  4238  	}
  4239  	return iter, nil
  4240  }
  4241  
  4242  // ReconcileJobSummaries re-creates summaries for all jobs present in the state
  4243  // store
  4244  func (s *StateStore) ReconcileJobSummaries(index uint64) error {
  4245  	txn := s.db.WriteTxn(index)
  4246  	defer txn.Abort()
  4247  
  4248  	// Get all the jobs
  4249  	iter, err := txn.Get("jobs", "id")
  4250  	if err != nil {
  4251  		return err
  4252  	}
  4253  	// COMPAT: Remove after 0.11
  4254  	// Iterate over jobs to build a list of parent jobs and their children
  4255  	parentMap := make(map[string][]*structs.Job)
  4256  	for {
  4257  		rawJob := iter.Next()
  4258  		if rawJob == nil {
  4259  			break
  4260  		}
  4261  		job := rawJob.(*structs.Job)
  4262  		if job.ParentID != "" {
  4263  			children := parentMap[job.ParentID]
  4264  			children = append(children, job)
  4265  			parentMap[job.ParentID] = children
  4266  		}
  4267  	}
  4268  
  4269  	// Get all the jobs again
  4270  	iter, err = txn.Get("jobs", "id")
  4271  	if err != nil {
  4272  		return err
  4273  	}
  4274  
  4275  	for {
  4276  		rawJob := iter.Next()
  4277  		if rawJob == nil {
  4278  			break
  4279  		}
  4280  		job := rawJob.(*structs.Job)
  4281  
  4282  		if job.IsParameterized() || job.IsPeriodic() {
  4283  			// COMPAT: Remove after 0.11
  4284  
  4285  			// The following block of code fixes incorrect child summaries due to a bug
  4286  			// See https://github.com/hashicorp/nomad/issues/3886 for details
  4287  			rawSummary, err := txn.First("job_summary", "id", job.Namespace, job.ID)
  4288  			if err != nil {
  4289  				return err
  4290  			}
  4291  			if rawSummary == nil {
  4292  				continue
  4293  			}
  4294  
  4295  			oldSummary := rawSummary.(*structs.JobSummary)
  4296  
  4297  			// Create an empty summary
  4298  			summary := &structs.JobSummary{
  4299  				JobID:     job.ID,
  4300  				Namespace: job.Namespace,
  4301  				Summary:   make(map[string]structs.TaskGroupSummary),
  4302  				Children:  &structs.JobChildrenSummary{},
  4303  			}
  4304  
  4305  			// Iterate over children of this job if any to fix summary counts
  4306  			children := parentMap[job.ID]
  4307  			for _, childJob := range children {
  4308  				switch childJob.Status {
  4309  				case structs.JobStatusPending:
  4310  					summary.Children.Pending++
  4311  				case structs.JobStatusDead:
  4312  					summary.Children.Dead++
  4313  				case structs.JobStatusRunning:
  4314  					summary.Children.Running++
  4315  				}
  4316  			}
  4317  
  4318  			// Insert the job summary if its different
  4319  			if !reflect.DeepEqual(summary, oldSummary) {
  4320  				// Set the create index of the summary same as the job's create index
  4321  				// and the modify index to the current index
  4322  				summary.CreateIndex = job.CreateIndex
  4323  				summary.ModifyIndex = index
  4324  
  4325  				if err := txn.Insert("job_summary", summary); err != nil {
  4326  					return fmt.Errorf("error inserting job summary: %v", err)
  4327  				}
  4328  			}
  4329  
  4330  			// Done with handling a parent job, continue to next
  4331  			continue
  4332  		}
  4333  
  4334  		// Create a job summary for the job
  4335  		summary := &structs.JobSummary{
  4336  			JobID:     job.ID,
  4337  			Namespace: job.Namespace,
  4338  			Summary:   make(map[string]structs.TaskGroupSummary),
  4339  		}
  4340  		for _, tg := range job.TaskGroups {
  4341  			summary.Summary[tg.Name] = structs.TaskGroupSummary{}
  4342  		}
  4343  
  4344  		// Find all the allocations for the jobs
  4345  		iterAllocs, err := txn.Get("allocs", "job", job.Namespace, job.ID)
  4346  		if err != nil {
  4347  			return err
  4348  		}
  4349  
  4350  		// Calculate the summary for the job
  4351  		for {
  4352  			rawAlloc := iterAllocs.Next()
  4353  			if rawAlloc == nil {
  4354  				break
  4355  			}
  4356  			alloc := rawAlloc.(*structs.Allocation)
  4357  
  4358  			// Ignore the allocation if it doesn't belong to the currently
  4359  			// registered job. The allocation is checked because of issue #2304
  4360  			if alloc.Job == nil || alloc.Job.CreateIndex != job.CreateIndex {
  4361  				continue
  4362  			}
  4363  
  4364  			tg := summary.Summary[alloc.TaskGroup]
  4365  			switch alloc.ClientStatus {
  4366  			case structs.AllocClientStatusFailed:
  4367  				tg.Failed += 1
  4368  			case structs.AllocClientStatusLost:
  4369  				tg.Lost += 1
  4370  			case structs.AllocClientStatusComplete:
  4371  				tg.Complete += 1
  4372  			case structs.AllocClientStatusRunning:
  4373  				tg.Running += 1
  4374  			case structs.AllocClientStatusPending:
  4375  				tg.Starting += 1
  4376  			default:
  4377  				s.logger.Error("invalid client status set on allocation", "client_status", alloc.ClientStatus, "alloc_id", alloc.ID)
  4378  			}
  4379  			summary.Summary[alloc.TaskGroup] = tg
  4380  		}
  4381  
  4382  		// Set the create index of the summary same as the job's create index
  4383  		// and the modify index to the current index
  4384  		summary.CreateIndex = job.CreateIndex
  4385  		summary.ModifyIndex = index
  4386  
  4387  		// Insert the job summary
  4388  		if err := txn.Insert("job_summary", summary); err != nil {
  4389  			return fmt.Errorf("error inserting job summary: %v", err)
  4390  		}
  4391  	}
  4392  
  4393  	// Update the indexes table for job summary
  4394  	if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  4395  		return fmt.Errorf("index update failed: %v", err)
  4396  	}
  4397  	return txn.Commit()
  4398  }
  4399  
  4400  // setJobStatuses is a helper for calling setJobStatus on multiple jobs by ID.
  4401  // It takes a map of job IDs to an optional forceStatus string. It returns an
  4402  // error if the job doesn't exist or setJobStatus fails.
  4403  func (s *StateStore) setJobStatuses(index uint64, txn *txn,
  4404  	jobs map[structs.NamespacedID]string, evalDelete bool) error {
  4405  	for tuple, forceStatus := range jobs {
  4406  
  4407  		existing, err := txn.First("jobs", "id", tuple.Namespace, tuple.ID)
  4408  		if err != nil {
  4409  			return fmt.Errorf("job lookup failed: %v", err)
  4410  		}
  4411  
  4412  		if existing == nil {
  4413  			continue
  4414  		}
  4415  
  4416  		if err := s.setJobStatus(index, txn, existing.(*structs.Job), evalDelete, forceStatus); err != nil {
  4417  			return err
  4418  		}
  4419  
  4420  	}
  4421  
  4422  	return nil
  4423  }
  4424  
  4425  // setJobStatus sets the status of the job by looking up associated evaluations
  4426  // and allocations. evalDelete should be set to true if setJobStatus is being
  4427  // called because an evaluation is being deleted (potentially because of garbage
  4428  // collection). If forceStatus is non-empty, the job's status will be set to the
  4429  // passed status.
  4430  func (s *StateStore) setJobStatus(index uint64, txn *txn,
  4431  	job *structs.Job, evalDelete bool, forceStatus string) error {
  4432  
  4433  	// Capture the current status so we can check if there is a change
  4434  	oldStatus := job.Status
  4435  	newStatus := forceStatus
  4436  
  4437  	// If forceStatus is not set, compute the jobs status.
  4438  	if forceStatus == "" {
  4439  		var err error
  4440  		newStatus, err = s.getJobStatus(txn, job, evalDelete)
  4441  		if err != nil {
  4442  			return err
  4443  		}
  4444  	}
  4445  
  4446  	// Fast-path if the job has not changed.
  4447  	if oldStatus == newStatus {
  4448  		return nil
  4449  	}
  4450  
  4451  	// Copy and update the existing job
  4452  	updated := job.Copy()
  4453  	updated.Status = newStatus
  4454  	updated.ModifyIndex = index
  4455  
  4456  	// Insert the job
  4457  	if err := txn.Insert("jobs", updated); err != nil {
  4458  		return fmt.Errorf("job insert failed: %v", err)
  4459  	}
  4460  	if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil {
  4461  		return fmt.Errorf("index update failed: %v", err)
  4462  	}
  4463  
  4464  	// Update the children summary
  4465  	if err := s.setJobSummary(txn, updated, index, oldStatus, newStatus); err != nil {
  4466  		return fmt.Errorf("job summary update failed %w", err)
  4467  	}
  4468  	return nil
  4469  }
  4470  
  4471  func (s *StateStore) setJobSummary(txn *txn, updated *structs.Job, index uint64, oldStatus, newStatus string) error {
  4472  	if updated.ParentID == "" {
  4473  		return nil
  4474  	}
  4475  
  4476  	// Try to update the summary of the parent job summary
  4477  	summaryRaw, err := txn.First("job_summary", "id", updated.Namespace, updated.ParentID)
  4478  	if err != nil {
  4479  		return fmt.Errorf("unable to retrieve summary for parent job: %v", err)
  4480  	}
  4481  
  4482  	// Only continue if the summary exists. It could not exist if the parent
  4483  	// job was removed
  4484  	if summaryRaw != nil {
  4485  		existing := summaryRaw.(*structs.JobSummary)
  4486  		pSummary := existing.Copy()
  4487  		if pSummary.Children == nil {
  4488  			pSummary.Children = new(structs.JobChildrenSummary)
  4489  		}
  4490  
  4491  		// Determine the transition and update the correct fields
  4492  		children := pSummary.Children
  4493  
  4494  		// Decrement old status
  4495  		if oldStatus != "" {
  4496  			switch oldStatus {
  4497  			case structs.JobStatusPending:
  4498  				children.Pending--
  4499  			case structs.JobStatusRunning:
  4500  				children.Running--
  4501  			case structs.JobStatusDead:
  4502  				children.Dead--
  4503  			default:
  4504  				return fmt.Errorf("unknown old job status %q", oldStatus)
  4505  			}
  4506  		}
  4507  
  4508  		// Increment new status
  4509  		switch newStatus {
  4510  		case structs.JobStatusPending:
  4511  			children.Pending++
  4512  		case structs.JobStatusRunning:
  4513  			children.Running++
  4514  		case structs.JobStatusDead:
  4515  			children.Dead++
  4516  		default:
  4517  			return fmt.Errorf("unknown new job status %q", newStatus)
  4518  		}
  4519  
  4520  		// Update the index
  4521  		pSummary.ModifyIndex = index
  4522  
  4523  		// Insert the summary
  4524  		if err := txn.Insert("job_summary", pSummary); err != nil {
  4525  			return fmt.Errorf("job summary insert failed: %v", err)
  4526  		}
  4527  		if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  4528  			return fmt.Errorf("index update failed: %v", err)
  4529  		}
  4530  	}
  4531  	return nil
  4532  }
  4533  
  4534  func (s *StateStore) getJobStatus(txn *txn, job *structs.Job, evalDelete bool) (string, error) {
  4535  	// System, Periodic and Parameterized jobs are running until explicitly
  4536  	// stopped
  4537  	if job.Type == structs.JobTypeSystem || job.IsParameterized() || job.IsPeriodic() {
  4538  		if job.Stop {
  4539  			return structs.JobStatusDead, nil
  4540  		}
  4541  
  4542  		return structs.JobStatusRunning, nil
  4543  	}
  4544  
  4545  	allocs, err := txn.Get("allocs", "job", job.Namespace, job.ID)
  4546  	if err != nil {
  4547  		return "", err
  4548  	}
  4549  
  4550  	// If there is a non-terminal allocation, the job is running.
  4551  	hasAlloc := false
  4552  	for alloc := allocs.Next(); alloc != nil; alloc = allocs.Next() {
  4553  		hasAlloc = true
  4554  		if !alloc.(*structs.Allocation).TerminalStatus() {
  4555  			return structs.JobStatusRunning, nil
  4556  		}
  4557  	}
  4558  
  4559  	evals, err := txn.Get("evals", "job_prefix", job.Namespace, job.ID)
  4560  	if err != nil {
  4561  		return "", err
  4562  	}
  4563  
  4564  	hasEval := false
  4565  	for raw := evals.Next(); raw != nil; raw = evals.Next() {
  4566  		e := raw.(*structs.Evaluation)
  4567  
  4568  		// Filter non-exact matches
  4569  		if e.JobID != job.ID {
  4570  			continue
  4571  		}
  4572  
  4573  		hasEval = true
  4574  		if !e.TerminalStatus() {
  4575  			return structs.JobStatusPending, nil
  4576  		}
  4577  	}
  4578  
  4579  	// The job is dead if all the allocations and evals are terminal or if there
  4580  	// are no evals because of garbage collection.
  4581  	if evalDelete || hasEval || hasAlloc {
  4582  		return structs.JobStatusDead, nil
  4583  	}
  4584  
  4585  	return structs.JobStatusPending, nil
  4586  }
  4587  
  4588  // updateSummaryWithJob creates or updates job summaries when new jobs are
  4589  // upserted or existing ones are updated
  4590  func (s *StateStore) updateSummaryWithJob(index uint64, job *structs.Job,
  4591  	txn *txn) error {
  4592  
  4593  	// Update the job summary
  4594  	summaryRaw, err := txn.First("job_summary", "id", job.Namespace, job.ID)
  4595  	if err != nil {
  4596  		return fmt.Errorf("job summary lookup failed: %v", err)
  4597  	}
  4598  
  4599  	// Get the summary or create if necessary
  4600  	var summary *structs.JobSummary
  4601  	hasSummaryChanged := false
  4602  	if summaryRaw != nil {
  4603  		summary = summaryRaw.(*structs.JobSummary).Copy()
  4604  	} else {
  4605  		summary = &structs.JobSummary{
  4606  			JobID:       job.ID,
  4607  			Namespace:   job.Namespace,
  4608  			Summary:     make(map[string]structs.TaskGroupSummary),
  4609  			Children:    new(structs.JobChildrenSummary),
  4610  			CreateIndex: index,
  4611  		}
  4612  		hasSummaryChanged = true
  4613  	}
  4614  
  4615  	for _, tg := range job.TaskGroups {
  4616  		if _, ok := summary.Summary[tg.Name]; !ok {
  4617  			newSummary := structs.TaskGroupSummary{
  4618  				Complete: 0,
  4619  				Failed:   0,
  4620  				Running:  0,
  4621  				Starting: 0,
  4622  			}
  4623  			summary.Summary[tg.Name] = newSummary
  4624  			hasSummaryChanged = true
  4625  		}
  4626  	}
  4627  
  4628  	// The job summary has changed, so update the modify index.
  4629  	if hasSummaryChanged {
  4630  		summary.ModifyIndex = index
  4631  
  4632  		// Update the indexes table for job summary
  4633  		if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  4634  			return fmt.Errorf("index update failed: %v", err)
  4635  		}
  4636  		if err := txn.Insert("job_summary", summary); err != nil {
  4637  			return err
  4638  		}
  4639  	}
  4640  
  4641  	return nil
  4642  }
  4643  
  4644  // updateJobScalingPolicies upserts any scaling policies contained in the job and removes
  4645  // any previous scaling policies that were removed from the job
  4646  func (s *StateStore) updateJobScalingPolicies(index uint64, job *structs.Job, txn *txn) error {
  4647  
  4648  	ws := memdb.NewWatchSet()
  4649  
  4650  	scalingPolicies := job.GetScalingPolicies()
  4651  	newTargets := map[string]bool{}
  4652  	for _, p := range scalingPolicies {
  4653  		newTargets[p.JobKey()] = true
  4654  	}
  4655  	// find existing policies that need to be deleted
  4656  	deletedPolicies := []string{}
  4657  	iter, err := s.ScalingPoliciesByJobTxn(ws, job.Namespace, job.ID, txn)
  4658  	if err != nil {
  4659  		return fmt.Errorf("ScalingPoliciesByJob lookup failed: %v", err)
  4660  	}
  4661  	for raw := iter.Next(); raw != nil; raw = iter.Next() {
  4662  		oldPolicy := raw.(*structs.ScalingPolicy)
  4663  		if !newTargets[oldPolicy.JobKey()] {
  4664  			deletedPolicies = append(deletedPolicies, oldPolicy.ID)
  4665  		}
  4666  	}
  4667  	err = s.DeleteScalingPoliciesTxn(index, deletedPolicies, txn)
  4668  	if err != nil {
  4669  		return fmt.Errorf("DeleteScalingPolicies of removed policies failed: %v", err)
  4670  	}
  4671  
  4672  	err = s.UpsertScalingPoliciesTxn(index, scalingPolicies, txn)
  4673  	if err != nil {
  4674  		return fmt.Errorf("UpsertScalingPolicies of policies failed: %v", err)
  4675  	}
  4676  
  4677  	return nil
  4678  }
  4679  
  4680  // updateJobCSIPlugins runs on job update, and indexes the job in the plugin
  4681  func (s *StateStore) updateJobCSIPlugins(index uint64, job, prev *structs.Job, txn *txn) error {
  4682  	plugIns := make(map[string]*structs.CSIPlugin)
  4683  
  4684  	loop := func(job *structs.Job, delete bool) error {
  4685  		for _, tg := range job.TaskGroups {
  4686  			for _, t := range tg.Tasks {
  4687  				if t.CSIPluginConfig == nil {
  4688  					continue
  4689  				}
  4690  
  4691  				plugIn, ok := plugIns[t.CSIPluginConfig.ID]
  4692  				if !ok {
  4693  					p, err := s.CSIPluginByIDTxn(txn, nil, t.CSIPluginConfig.ID)
  4694  					if err != nil {
  4695  						return err
  4696  					}
  4697  					if p == nil {
  4698  						plugIn = structs.NewCSIPlugin(t.CSIPluginConfig.ID, index)
  4699  					} else {
  4700  						plugIn = p.Copy()
  4701  						plugIn.ModifyIndex = index
  4702  					}
  4703  					plugIns[plugIn.ID] = plugIn
  4704  				}
  4705  
  4706  				if delete {
  4707  					plugIn.DeleteJob(job, nil)
  4708  				} else {
  4709  					plugIn.AddJob(job, nil)
  4710  				}
  4711  			}
  4712  		}
  4713  
  4714  		return nil
  4715  	}
  4716  
  4717  	if prev != nil {
  4718  		err := loop(prev, true)
  4719  		if err != nil {
  4720  			return err
  4721  		}
  4722  	}
  4723  
  4724  	err := loop(job, false)
  4725  	if err != nil {
  4726  		return err
  4727  	}
  4728  
  4729  	for _, plugIn := range plugIns {
  4730  		err = txn.Insert("csi_plugins", plugIn)
  4731  		if err != nil {
  4732  			return fmt.Errorf("csi_plugins insert error: %v", err)
  4733  		}
  4734  	}
  4735  
  4736  	if err := txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil {
  4737  		return fmt.Errorf("index update failed: %v", err)
  4738  	}
  4739  
  4740  	return nil
  4741  }
  4742  
  4743  // updateDeploymentWithAlloc is used to update the deployment state associated
  4744  // with the given allocation. The passed alloc may be updated if the deployment
  4745  // status has changed to capture the modify index at which it has changed.
  4746  func (s *StateStore) updateDeploymentWithAlloc(index uint64, alloc, existing *structs.Allocation, txn *txn) error {
  4747  	// Nothing to do if the allocation is not associated with a deployment
  4748  	if alloc.DeploymentID == "" {
  4749  		return nil
  4750  	}
  4751  
  4752  	// Get the deployment
  4753  	ws := memdb.NewWatchSet()
  4754  	deployment, err := s.deploymentByIDImpl(ws, alloc.DeploymentID, txn)
  4755  	if err != nil {
  4756  		return err
  4757  	}
  4758  	if deployment == nil {
  4759  		return nil
  4760  	}
  4761  
  4762  	// Retrieve the deployment state object
  4763  	_, ok := deployment.TaskGroups[alloc.TaskGroup]
  4764  	if !ok {
  4765  		// If the task group isn't part of the deployment, the task group wasn't
  4766  		// part of a rolling update so nothing to do
  4767  		return nil
  4768  	}
  4769  
  4770  	// Do not modify in-place. Instead keep track of what must be done
  4771  	placed := 0
  4772  	healthy := 0
  4773  	unhealthy := 0
  4774  
  4775  	// If there was no existing allocation, this is a placement and we increment
  4776  	// the placement
  4777  	existingHealthSet := existing != nil && existing.DeploymentStatus.HasHealth()
  4778  	allocHealthSet := alloc.DeploymentStatus.HasHealth()
  4779  	if existing == nil || existing.DeploymentID != alloc.DeploymentID {
  4780  		placed++
  4781  	} else if !existingHealthSet && allocHealthSet {
  4782  		if *alloc.DeploymentStatus.Healthy {
  4783  			healthy++
  4784  		} else {
  4785  			unhealthy++
  4786  		}
  4787  	} else if existingHealthSet && allocHealthSet {
  4788  		// See if it has gone from healthy to unhealthy
  4789  		if *existing.DeploymentStatus.Healthy && !*alloc.DeploymentStatus.Healthy {
  4790  			healthy--
  4791  			unhealthy++
  4792  		}
  4793  	}
  4794  
  4795  	// Nothing to do
  4796  	if placed == 0 && healthy == 0 && unhealthy == 0 {
  4797  		return nil
  4798  	}
  4799  
  4800  	// Update the allocation's deployment status modify index
  4801  	if alloc.DeploymentStatus != nil && healthy+unhealthy != 0 {
  4802  		alloc.DeploymentStatus.ModifyIndex = index
  4803  	}
  4804  
  4805  	// Create a copy of the deployment object
  4806  	deploymentCopy := deployment.Copy()
  4807  	deploymentCopy.ModifyIndex = index
  4808  
  4809  	dstate := deploymentCopy.TaskGroups[alloc.TaskGroup]
  4810  	dstate.PlacedAllocs += placed
  4811  	dstate.HealthyAllocs += healthy
  4812  	dstate.UnhealthyAllocs += unhealthy
  4813  
  4814  	// Ensure PlacedCanaries accurately reflects the alloc canary status
  4815  	if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Canary {
  4816  		found := false
  4817  		for _, canary := range dstate.PlacedCanaries {
  4818  			if alloc.ID == canary {
  4819  				found = true
  4820  				break
  4821  			}
  4822  		}
  4823  		if !found {
  4824  			dstate.PlacedCanaries = append(dstate.PlacedCanaries, alloc.ID)
  4825  		}
  4826  	}
  4827  
  4828  	// Update the progress deadline
  4829  	if pd := dstate.ProgressDeadline; pd != 0 {
  4830  		// If we are the first placed allocation for the deployment start the progress deadline.
  4831  		if placed != 0 && dstate.RequireProgressBy.IsZero() {
  4832  			// Use modify time instead of create time because we may in-place
  4833  			// update the allocation to be part of a new deployment.
  4834  			dstate.RequireProgressBy = time.Unix(0, alloc.ModifyTime).Add(pd)
  4835  		} else if healthy != 0 {
  4836  			if d := alloc.DeploymentStatus.Timestamp.Add(pd); d.After(dstate.RequireProgressBy) {
  4837  				dstate.RequireProgressBy = d
  4838  			}
  4839  		}
  4840  	}
  4841  
  4842  	// Upsert the deployment
  4843  	if err := s.upsertDeploymentImpl(index, deploymentCopy, txn); err != nil {
  4844  		return err
  4845  	}
  4846  
  4847  	return nil
  4848  }
  4849  
  4850  // updateSummaryWithAlloc updates the job summary when allocations are updated
  4851  // or inserted
  4852  func (s *StateStore) updateSummaryWithAlloc(index uint64, alloc *structs.Allocation,
  4853  	existingAlloc *structs.Allocation, txn *txn) error {
  4854  
  4855  	// We don't have to update the summary if the job is missing
  4856  	if alloc.Job == nil {
  4857  		return nil
  4858  	}
  4859  
  4860  	summaryRaw, err := txn.First("job_summary", "id", alloc.Namespace, alloc.JobID)
  4861  	if err != nil {
  4862  		return fmt.Errorf("unable to lookup job summary for job id %q in namespace %q: %v", alloc.JobID, alloc.Namespace, err)
  4863  	}
  4864  
  4865  	if summaryRaw == nil {
  4866  		// Check if the job is de-registered
  4867  		rawJob, err := txn.First("jobs", "id", alloc.Namespace, alloc.JobID)
  4868  		if err != nil {
  4869  			return fmt.Errorf("unable to query job: %v", err)
  4870  		}
  4871  
  4872  		// If the job is de-registered then we skip updating it's summary
  4873  		if rawJob == nil {
  4874  			return nil
  4875  		}
  4876  
  4877  		return fmt.Errorf("job summary for job %q in namespace %q is not present", alloc.JobID, alloc.Namespace)
  4878  	}
  4879  
  4880  	// Get a copy of the existing summary
  4881  	jobSummary := summaryRaw.(*structs.JobSummary).Copy()
  4882  
  4883  	// Not updating the job summary because the allocation doesn't belong to the
  4884  	// currently registered job
  4885  	if jobSummary.CreateIndex != alloc.Job.CreateIndex {
  4886  		return nil
  4887  	}
  4888  
  4889  	tgSummary, ok := jobSummary.Summary[alloc.TaskGroup]
  4890  	if !ok {
  4891  		return fmt.Errorf("unable to find task group in the job summary: %v", alloc.TaskGroup)
  4892  	}
  4893  
  4894  	summaryChanged := false
  4895  	if existingAlloc == nil {
  4896  		switch alloc.DesiredStatus {
  4897  		case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict:
  4898  			s.logger.Error("new allocation inserted into state store with bad desired status",
  4899  				"alloc_id", alloc.ID, "desired_status", alloc.DesiredStatus)
  4900  		}
  4901  		switch alloc.ClientStatus {
  4902  		case structs.AllocClientStatusPending:
  4903  			tgSummary.Starting += 1
  4904  			if tgSummary.Queued > 0 {
  4905  				tgSummary.Queued -= 1
  4906  			}
  4907  			summaryChanged = true
  4908  		case structs.AllocClientStatusRunning, structs.AllocClientStatusFailed,
  4909  			structs.AllocClientStatusComplete:
  4910  			s.logger.Error("new allocation inserted into state store with bad client status",
  4911  				"alloc_id", alloc.ID, "client_status", alloc.ClientStatus)
  4912  		}
  4913  	} else if existingAlloc.ClientStatus != alloc.ClientStatus {
  4914  		// Incrementing the client of the bin of the current state
  4915  		switch alloc.ClientStatus {
  4916  		case structs.AllocClientStatusRunning:
  4917  			tgSummary.Running += 1
  4918  		case structs.AllocClientStatusFailed:
  4919  			tgSummary.Failed += 1
  4920  		case structs.AllocClientStatusPending:
  4921  			tgSummary.Starting += 1
  4922  		case structs.AllocClientStatusComplete:
  4923  			tgSummary.Complete += 1
  4924  		case structs.AllocClientStatusLost:
  4925  			tgSummary.Lost += 1
  4926  		}
  4927  
  4928  		// Decrementing the count of the bin of the last state
  4929  		switch existingAlloc.ClientStatus {
  4930  		case structs.AllocClientStatusRunning:
  4931  			if tgSummary.Running > 0 {
  4932  				tgSummary.Running -= 1
  4933  			}
  4934  		case structs.AllocClientStatusPending:
  4935  			if tgSummary.Starting > 0 {
  4936  				tgSummary.Starting -= 1
  4937  			}
  4938  		case structs.AllocClientStatusLost:
  4939  			if tgSummary.Lost > 0 {
  4940  				tgSummary.Lost -= 1
  4941  			}
  4942  		case structs.AllocClientStatusFailed, structs.AllocClientStatusComplete:
  4943  		default:
  4944  			s.logger.Error("invalid old client status for allocation",
  4945  				"alloc_id", existingAlloc.ID, "client_status", existingAlloc.ClientStatus)
  4946  		}
  4947  		summaryChanged = true
  4948  	}
  4949  	jobSummary.Summary[alloc.TaskGroup] = tgSummary
  4950  
  4951  	if summaryChanged {
  4952  		jobSummary.ModifyIndex = index
  4953  
  4954  		s.updatePluginWithJobSummary(index, jobSummary, alloc, txn)
  4955  
  4956  		// Update the indexes table for job summary
  4957  		if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  4958  			return fmt.Errorf("index update failed: %v", err)
  4959  		}
  4960  
  4961  		if err := txn.Insert("job_summary", jobSummary); err != nil {
  4962  			return fmt.Errorf("updating job summary failed: %v", err)
  4963  		}
  4964  	}
  4965  
  4966  	return nil
  4967  }
  4968  
  4969  // updatePluginWithAlloc updates the CSI plugins for an alloc when the
  4970  // allocation is updated or inserted with a terminal server status.
  4971  func (s *StateStore) updatePluginWithAlloc(index uint64, alloc *structs.Allocation,
  4972  	txn *txn) error {
  4973  	if !alloc.ServerTerminalStatus() {
  4974  		return nil
  4975  	}
  4976  
  4977  	tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
  4978  	for _, t := range tg.Tasks {
  4979  		if t.CSIPluginConfig != nil {
  4980  			pluginID := t.CSIPluginConfig.ID
  4981  			plug, err := s.CSIPluginByIDTxn(txn, nil, pluginID)
  4982  			if err != nil {
  4983  				return err
  4984  			}
  4985  			if plug == nil {
  4986  				// plugin may not have been created because it never
  4987  				// became healthy, just move on
  4988  				return nil
  4989  			}
  4990  			plug = plug.Copy()
  4991  			err = plug.DeleteAlloc(alloc.ID, alloc.NodeID)
  4992  			if err != nil {
  4993  				return err
  4994  			}
  4995  			err = updateOrGCPlugin(index, txn, plug)
  4996  			if err != nil {
  4997  				return err
  4998  			}
  4999  		}
  5000  	}
  5001  
  5002  	return nil
  5003  }
  5004  
  5005  // updatePluginWithJobSummary updates the CSI plugins for a job when the
  5006  // job summary is updated by an alloc
  5007  func (s *StateStore) updatePluginWithJobSummary(index uint64, summary *structs.JobSummary, alloc *structs.Allocation,
  5008  	txn *txn) error {
  5009  
  5010  	tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
  5011  	if tg == nil {
  5012  		return nil
  5013  	}
  5014  
  5015  	for _, t := range tg.Tasks {
  5016  		if t.CSIPluginConfig != nil {
  5017  			pluginID := t.CSIPluginConfig.ID
  5018  			plug, err := s.CSIPluginByIDTxn(txn, nil, pluginID)
  5019  			if err != nil {
  5020  				return err
  5021  			}
  5022  			if plug == nil {
  5023  				plug = structs.NewCSIPlugin(pluginID, index)
  5024  			} else {
  5025  				plug = plug.Copy()
  5026  			}
  5027  
  5028  			plug.UpdateExpectedWithJob(alloc.Job, summary, alloc.ServerTerminalStatus())
  5029  			err = updateOrGCPlugin(index, txn, plug)
  5030  			if err != nil {
  5031  				return err
  5032  			}
  5033  		}
  5034  	}
  5035  
  5036  	return nil
  5037  }
  5038  
  5039  // UpsertACLPolicies is used to create or update a set of ACL policies
  5040  func (s *StateStore) UpsertACLPolicies(msgType structs.MessageType, index uint64, policies []*structs.ACLPolicy) error {
  5041  	txn := s.db.WriteTxnMsgT(msgType, index)
  5042  	defer txn.Abort()
  5043  
  5044  	for _, policy := range policies {
  5045  		// Ensure the policy hash is non-nil. This should be done outside the state store
  5046  		// for performance reasons, but we check here for defense in depth.
  5047  		if len(policy.Hash) == 0 {
  5048  			policy.SetHash()
  5049  		}
  5050  
  5051  		// Check if the policy already exists
  5052  		existing, err := txn.First("acl_policy", "id", policy.Name)
  5053  		if err != nil {
  5054  			return fmt.Errorf("policy lookup failed: %v", err)
  5055  		}
  5056  
  5057  		// Update all the indexes
  5058  		if existing != nil {
  5059  			policy.CreateIndex = existing.(*structs.ACLPolicy).CreateIndex
  5060  			policy.ModifyIndex = index
  5061  		} else {
  5062  			policy.CreateIndex = index
  5063  			policy.ModifyIndex = index
  5064  		}
  5065  
  5066  		// Update the policy
  5067  		if err := txn.Insert("acl_policy", policy); err != nil {
  5068  			return fmt.Errorf("upserting policy failed: %v", err)
  5069  		}
  5070  	}
  5071  
  5072  	// Update the indexes tabl
  5073  	if err := txn.Insert("index", &IndexEntry{"acl_policy", index}); err != nil {
  5074  		return fmt.Errorf("index update failed: %v", err)
  5075  	}
  5076  
  5077  	return txn.Commit()
  5078  }
  5079  
  5080  // DeleteACLPolicies deletes the policies with the given names
  5081  func (s *StateStore) DeleteACLPolicies(msgType structs.MessageType, index uint64, names []string) error {
  5082  	txn := s.db.WriteTxnMsgT(msgType, index)
  5083  	defer txn.Abort()
  5084  
  5085  	// Delete the policy
  5086  	for _, name := range names {
  5087  		if _, err := txn.DeleteAll("acl_policy", "id", name); err != nil {
  5088  			return fmt.Errorf("deleting acl policy failed: %v", err)
  5089  		}
  5090  	}
  5091  	if err := txn.Insert("index", &IndexEntry{"acl_policy", index}); err != nil {
  5092  		return fmt.Errorf("index update failed: %v", err)
  5093  	}
  5094  	return txn.Commit()
  5095  }
  5096  
  5097  // ACLPolicyByName is used to lookup a policy by name
  5098  func (s *StateStore) ACLPolicyByName(ws memdb.WatchSet, name string) (*structs.ACLPolicy, error) {
  5099  	txn := s.db.ReadTxn()
  5100  
  5101  	watchCh, existing, err := txn.FirstWatch("acl_policy", "id", name)
  5102  	if err != nil {
  5103  		return nil, fmt.Errorf("acl policy lookup failed: %v", err)
  5104  	}
  5105  	ws.Add(watchCh)
  5106  
  5107  	if existing != nil {
  5108  		return existing.(*structs.ACLPolicy), nil
  5109  	}
  5110  	return nil, nil
  5111  }
  5112  
  5113  // ACLPolicyByNamePrefix is used to lookup policies by prefix
  5114  func (s *StateStore) ACLPolicyByNamePrefix(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) {
  5115  	txn := s.db.ReadTxn()
  5116  
  5117  	iter, err := txn.Get("acl_policy", "id_prefix", prefix)
  5118  	if err != nil {
  5119  		return nil, fmt.Errorf("acl policy lookup failed: %v", err)
  5120  	}
  5121  	ws.Add(iter.WatchCh())
  5122  
  5123  	return iter, nil
  5124  }
  5125  
  5126  // ACLPolicies returns an iterator over all the acl policies
  5127  func (s *StateStore) ACLPolicies(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  5128  	txn := s.db.ReadTxn()
  5129  
  5130  	// Walk the entire table
  5131  	iter, err := txn.Get("acl_policy", "id")
  5132  	if err != nil {
  5133  		return nil, err
  5134  	}
  5135  	ws.Add(iter.WatchCh())
  5136  	return iter, nil
  5137  }
  5138  
  5139  // UpsertACLTokens is used to create or update a set of ACL tokens
  5140  func (s *StateStore) UpsertACLTokens(msgType structs.MessageType, index uint64, tokens []*structs.ACLToken) error {
  5141  	txn := s.db.WriteTxnMsgT(msgType, index)
  5142  	defer txn.Abort()
  5143  
  5144  	for _, token := range tokens {
  5145  		// Ensure the policy hash is non-nil. This should be done outside the state store
  5146  		// for performance reasons, but we check here for defense in depth.
  5147  		if len(token.Hash) == 0 {
  5148  			token.SetHash()
  5149  		}
  5150  
  5151  		// Check if the token already exists
  5152  		existing, err := txn.First("acl_token", "id", token.AccessorID)
  5153  		if err != nil {
  5154  			return fmt.Errorf("token lookup failed: %v", err)
  5155  		}
  5156  
  5157  		// Update all the indexes
  5158  		if existing != nil {
  5159  			existTK := existing.(*structs.ACLToken)
  5160  			token.CreateIndex = existTK.CreateIndex
  5161  			token.ModifyIndex = index
  5162  
  5163  			// Do not allow SecretID or create time to change
  5164  			token.SecretID = existTK.SecretID
  5165  			token.CreateTime = existTK.CreateTime
  5166  
  5167  		} else {
  5168  			token.CreateIndex = index
  5169  			token.ModifyIndex = index
  5170  		}
  5171  
  5172  		// Update the token
  5173  		if err := txn.Insert("acl_token", token); err != nil {
  5174  			return fmt.Errorf("upserting token failed: %v", err)
  5175  		}
  5176  	}
  5177  
  5178  	// Update the indexes table
  5179  	if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil {
  5180  		return fmt.Errorf("index update failed: %v", err)
  5181  	}
  5182  	return txn.Commit()
  5183  }
  5184  
  5185  // DeleteACLTokens deletes the tokens with the given accessor ids
  5186  func (s *StateStore) DeleteACLTokens(msgType structs.MessageType, index uint64, ids []string) error {
  5187  	txn := s.db.WriteTxnMsgT(msgType, index)
  5188  	defer txn.Abort()
  5189  
  5190  	// Delete the tokens
  5191  	for _, id := range ids {
  5192  		if _, err := txn.DeleteAll("acl_token", "id", id); err != nil {
  5193  			return fmt.Errorf("deleting acl token failed: %v", err)
  5194  		}
  5195  	}
  5196  	if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil {
  5197  		return fmt.Errorf("index update failed: %v", err)
  5198  	}
  5199  	return txn.Commit()
  5200  }
  5201  
  5202  // ACLTokenByAccessorID is used to lookup a token by accessor ID
  5203  func (s *StateStore) ACLTokenByAccessorID(ws memdb.WatchSet, id string) (*structs.ACLToken, error) {
  5204  	if id == "" {
  5205  		return nil, fmt.Errorf("acl token lookup failed: missing accessor id")
  5206  	}
  5207  
  5208  	txn := s.db.ReadTxn()
  5209  
  5210  	watchCh, existing, err := txn.FirstWatch("acl_token", "id", id)
  5211  	if err != nil {
  5212  		return nil, fmt.Errorf("acl token lookup failed: %v", err)
  5213  	}
  5214  	ws.Add(watchCh)
  5215  
  5216  	if existing != nil {
  5217  		return existing.(*structs.ACLToken), nil
  5218  	}
  5219  	return nil, nil
  5220  }
  5221  
  5222  // ACLTokenBySecretID is used to lookup a token by secret ID
  5223  func (s *StateStore) ACLTokenBySecretID(ws memdb.WatchSet, secretID string) (*structs.ACLToken, error) {
  5224  	if secretID == "" {
  5225  		return nil, fmt.Errorf("acl token lookup failed: missing secret id")
  5226  	}
  5227  
  5228  	txn := s.db.ReadTxn()
  5229  
  5230  	watchCh, existing, err := txn.FirstWatch("acl_token", "secret", secretID)
  5231  	if err != nil {
  5232  		return nil, fmt.Errorf("acl token lookup failed: %v", err)
  5233  	}
  5234  	ws.Add(watchCh)
  5235  
  5236  	if existing != nil {
  5237  		return existing.(*structs.ACLToken), nil
  5238  	}
  5239  	return nil, nil
  5240  }
  5241  
  5242  // ACLTokenByAccessorIDPrefix is used to lookup tokens by prefix
  5243  func (s *StateStore) ACLTokenByAccessorIDPrefix(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) {
  5244  	txn := s.db.ReadTxn()
  5245  
  5246  	iter, err := txn.Get("acl_token", "id_prefix", prefix)
  5247  	if err != nil {
  5248  		return nil, fmt.Errorf("acl token lookup failed: %v", err)
  5249  	}
  5250  	ws.Add(iter.WatchCh())
  5251  	return iter, nil
  5252  }
  5253  
  5254  // ACLTokens returns an iterator over all the tokens
  5255  func (s *StateStore) ACLTokens(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  5256  	txn := s.db.ReadTxn()
  5257  
  5258  	// Walk the entire table
  5259  	iter, err := txn.Get("acl_token", "id")
  5260  	if err != nil {
  5261  		return nil, err
  5262  	}
  5263  	ws.Add(iter.WatchCh())
  5264  	return iter, nil
  5265  }
  5266  
  5267  // ACLTokensByGlobal returns an iterator over all the tokens filtered by global value
  5268  func (s *StateStore) ACLTokensByGlobal(ws memdb.WatchSet, globalVal bool) (memdb.ResultIterator, error) {
  5269  	txn := s.db.ReadTxn()
  5270  
  5271  	// Walk the entire table
  5272  	iter, err := txn.Get("acl_token", "global", globalVal)
  5273  	if err != nil {
  5274  		return nil, err
  5275  	}
  5276  	ws.Add(iter.WatchCh())
  5277  	return iter, nil
  5278  }
  5279  
  5280  // CanBootstrapACLToken checks if bootstrapping is possible and returns the reset index
  5281  func (s *StateStore) CanBootstrapACLToken() (bool, uint64, error) {
  5282  	txn := s.db.ReadTxn()
  5283  
  5284  	// Lookup the bootstrap sentinel
  5285  	out, err := txn.First("index", "id", "acl_token_bootstrap")
  5286  	if err != nil {
  5287  		return false, 0, err
  5288  	}
  5289  
  5290  	// No entry, we haven't bootstrapped yet
  5291  	if out == nil {
  5292  		return true, 0, nil
  5293  	}
  5294  
  5295  	// Return the reset index if we've already bootstrapped
  5296  	return false, out.(*IndexEntry).Value, nil
  5297  }
  5298  
  5299  // BootstrapACLToken is used to create an initial ACL token
  5300  func (s *StateStore) BootstrapACLTokens(msgType structs.MessageType, index uint64, resetIndex uint64, token *structs.ACLToken) error {
  5301  	txn := s.db.WriteTxnMsgT(msgType, index)
  5302  	defer txn.Abort()
  5303  
  5304  	// Check if we have already done a bootstrap
  5305  	existing, err := txn.First("index", "id", "acl_token_bootstrap")
  5306  	if err != nil {
  5307  		return fmt.Errorf("bootstrap check failed: %v", err)
  5308  	}
  5309  	if existing != nil {
  5310  		if resetIndex == 0 {
  5311  			return fmt.Errorf("ACL bootstrap already done")
  5312  		} else if resetIndex != existing.(*IndexEntry).Value {
  5313  			return fmt.Errorf("Invalid reset index for ACL bootstrap")
  5314  		}
  5315  	}
  5316  
  5317  	// Update the Create/Modify time
  5318  	token.CreateIndex = index
  5319  	token.ModifyIndex = index
  5320  
  5321  	// Insert the token
  5322  	if err := txn.Insert("acl_token", token); err != nil {
  5323  		return fmt.Errorf("upserting token failed: %v", err)
  5324  	}
  5325  
  5326  	// Update the indexes table, prevents future bootstrap until reset
  5327  	if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil {
  5328  		return fmt.Errorf("index update failed: %v", err)
  5329  	}
  5330  	if err := txn.Insert("index", &IndexEntry{"acl_token_bootstrap", index}); err != nil {
  5331  		return fmt.Errorf("index update failed: %v", err)
  5332  	}
  5333  	return txn.Commit()
  5334  }
  5335  
  5336  // SchedulerConfig is used to get the current Scheduler configuration.
  5337  func (s *StateStore) SchedulerConfig() (uint64, *structs.SchedulerConfiguration, error) {
  5338  	tx := s.db.ReadTxn()
  5339  	defer tx.Abort()
  5340  
  5341  	// Get the scheduler config
  5342  	c, err := tx.First("scheduler_config", "id")
  5343  	if err != nil {
  5344  		return 0, nil, fmt.Errorf("failed scheduler config lookup: %s", err)
  5345  	}
  5346  
  5347  	config, ok := c.(*structs.SchedulerConfiguration)
  5348  	if !ok {
  5349  		return 0, nil, nil
  5350  	}
  5351  
  5352  	return config.ModifyIndex, config, nil
  5353  }
  5354  
  5355  // SchedulerSetConfig is used to set the current Scheduler configuration.
  5356  func (s *StateStore) SchedulerSetConfig(index uint64, config *structs.SchedulerConfiguration) error {
  5357  	tx := s.db.WriteTxn(index)
  5358  	defer tx.Abort()
  5359  
  5360  	s.schedulerSetConfigTxn(index, tx, config)
  5361  
  5362  	return tx.Commit()
  5363  }
  5364  
  5365  func (s *StateStore) ClusterMetadata(ws memdb.WatchSet) (*structs.ClusterMetadata, error) {
  5366  	txn := s.db.ReadTxn()
  5367  	defer txn.Abort()
  5368  
  5369  	// Get the cluster metadata
  5370  	watchCh, m, err := txn.FirstWatch("cluster_meta", "id")
  5371  	if err != nil {
  5372  		return nil, errors.Wrap(err, "failed cluster metadata lookup")
  5373  	}
  5374  	ws.Add(watchCh)
  5375  
  5376  	if m != nil {
  5377  		return m.(*structs.ClusterMetadata), nil
  5378  	}
  5379  
  5380  	return nil, nil
  5381  }
  5382  
  5383  func (s *StateStore) ClusterSetMetadata(index uint64, meta *structs.ClusterMetadata) error {
  5384  	txn := s.db.WriteTxn(index)
  5385  	defer txn.Abort()
  5386  
  5387  	if err := s.setClusterMetadata(txn, meta); err != nil {
  5388  		return errors.Wrap(err, "set cluster metadata failed")
  5389  	}
  5390  
  5391  	return txn.Commit()
  5392  }
  5393  
  5394  // WithWriteTransaction executes the passed function within a write transaction,
  5395  // and returns its result.  If the invocation returns no error, the transaction
  5396  // is committed; otherwise, it's aborted.
  5397  func (s *StateStore) WithWriteTransaction(msgType structs.MessageType, index uint64, fn func(Txn) error) error {
  5398  	tx := s.db.WriteTxnMsgT(msgType, index)
  5399  	defer tx.Abort()
  5400  
  5401  	err := fn(tx)
  5402  	if err == nil {
  5403  		return tx.Commit()
  5404  	}
  5405  	return err
  5406  }
  5407  
  5408  // SchedulerCASConfig is used to update the scheduler configuration with a
  5409  // given Raft index. If the CAS index specified is not equal to the last observed index
  5410  // for the config, then the call is a noop.
  5411  func (s *StateStore) SchedulerCASConfig(index, cidx uint64, config *structs.SchedulerConfiguration) (bool, error) {
  5412  	tx := s.db.WriteTxn(index)
  5413  	defer tx.Abort()
  5414  
  5415  	// Check for an existing config
  5416  	existing, err := tx.First("scheduler_config", "id")
  5417  	if err != nil {
  5418  		return false, fmt.Errorf("failed scheduler config lookup: %s", err)
  5419  	}
  5420  
  5421  	// If the existing index does not match the provided CAS
  5422  	// index arg, then we shouldn't update anything and can safely
  5423  	// return early here.
  5424  	e, ok := existing.(*structs.SchedulerConfiguration)
  5425  	if !ok || (e != nil && e.ModifyIndex != cidx) {
  5426  		return false, nil
  5427  	}
  5428  
  5429  	s.schedulerSetConfigTxn(index, tx, config)
  5430  
  5431  	if err := tx.Commit(); err != nil {
  5432  		return false, err
  5433  	}
  5434  	return true, nil
  5435  }
  5436  
  5437  func (s *StateStore) schedulerSetConfigTxn(idx uint64, tx *txn, config *structs.SchedulerConfiguration) error {
  5438  	// Check for an existing config
  5439  	existing, err := tx.First("scheduler_config", "id")
  5440  	if err != nil {
  5441  		return fmt.Errorf("failed scheduler config lookup: %s", err)
  5442  	}
  5443  
  5444  	// Set the indexes.
  5445  	if existing != nil {
  5446  		config.CreateIndex = existing.(*structs.SchedulerConfiguration).CreateIndex
  5447  	} else {
  5448  		config.CreateIndex = idx
  5449  	}
  5450  	config.ModifyIndex = idx
  5451  
  5452  	if err := tx.Insert("scheduler_config", config); err != nil {
  5453  		return fmt.Errorf("failed updating scheduler config: %s", err)
  5454  	}
  5455  	return nil
  5456  }
  5457  
  5458  func (s *StateStore) setClusterMetadata(txn *txn, meta *structs.ClusterMetadata) error {
  5459  	// Check for an existing config, if it exists, sanity check the cluster ID matches
  5460  	existing, err := txn.First("cluster_meta", "id")
  5461  	if err != nil {
  5462  		return fmt.Errorf("failed cluster meta lookup: %v", err)
  5463  	}
  5464  
  5465  	if existing != nil {
  5466  		existingClusterID := existing.(*structs.ClusterMetadata).ClusterID
  5467  		if meta.ClusterID != existingClusterID && existingClusterID != "" {
  5468  			// there is a bug in cluster ID detection
  5469  			return fmt.Errorf("refusing to set new cluster id, previous: %s, new: %s", existingClusterID, meta.ClusterID)
  5470  		}
  5471  	}
  5472  
  5473  	// update is technically a noop, unless someday we add more / mutable fields
  5474  	if err := txn.Insert("cluster_meta", meta); err != nil {
  5475  		return fmt.Errorf("set cluster metadata failed: %v", err)
  5476  	}
  5477  
  5478  	return nil
  5479  }
  5480  
  5481  // UpsertScalingPolicy is used to insert a new scaling policy.
  5482  func (s *StateStore) UpsertScalingPolicies(index uint64, scalingPolicies []*structs.ScalingPolicy) error {
  5483  	txn := s.db.WriteTxn(index)
  5484  	defer txn.Abort()
  5485  
  5486  	if err := s.UpsertScalingPoliciesTxn(index, scalingPolicies, txn); err != nil {
  5487  		return err
  5488  	}
  5489  
  5490  	return txn.Commit()
  5491  }
  5492  
  5493  // upsertScalingPolicy is used to insert a new scaling policy.
  5494  func (s *StateStore) UpsertScalingPoliciesTxn(index uint64, scalingPolicies []*structs.ScalingPolicy,
  5495  	txn *txn) error {
  5496  
  5497  	hadUpdates := false
  5498  
  5499  	for _, policy := range scalingPolicies {
  5500  		// Check if the scaling policy already exists
  5501  		// Policy uniqueness is based on target and type
  5502  		it, err := txn.Get("scaling_policy", "target",
  5503  			policy.Target[structs.ScalingTargetNamespace],
  5504  			policy.Target[structs.ScalingTargetJob],
  5505  			policy.Target[structs.ScalingTargetGroup],
  5506  			policy.Target[structs.ScalingTargetTask],
  5507  		)
  5508  		if err != nil {
  5509  			return fmt.Errorf("scaling policy lookup failed: %v", err)
  5510  		}
  5511  
  5512  		// Check if type matches
  5513  		var existing *structs.ScalingPolicy
  5514  		for raw := it.Next(); raw != nil; raw = it.Next() {
  5515  			p := raw.(*structs.ScalingPolicy)
  5516  			if p.Type == policy.Type {
  5517  				existing = p
  5518  				break
  5519  			}
  5520  		}
  5521  
  5522  		// Setup the indexes correctly
  5523  		if existing != nil {
  5524  			if !existing.Diff(policy) {
  5525  				continue
  5526  			}
  5527  			policy.ID = existing.ID
  5528  			policy.CreateIndex = existing.CreateIndex
  5529  		} else {
  5530  			// policy.ID must have been set already in Job.Register before log apply
  5531  			policy.CreateIndex = index
  5532  		}
  5533  		policy.ModifyIndex = index
  5534  
  5535  		// Insert the scaling policy
  5536  		hadUpdates = true
  5537  		if err := txn.Insert("scaling_policy", policy); err != nil {
  5538  			return err
  5539  		}
  5540  	}
  5541  
  5542  	// Update the indexes table for scaling policy if we updated any policies
  5543  	if hadUpdates {
  5544  		if err := txn.Insert("index", &IndexEntry{"scaling_policy", index}); err != nil {
  5545  			return fmt.Errorf("index update failed: %v", err)
  5546  		}
  5547  	}
  5548  
  5549  	return nil
  5550  }
  5551  
  5552  // NamespaceByName is used to lookup a namespace by name
  5553  func (s *StateStore) NamespaceByName(ws memdb.WatchSet, name string) (*structs.Namespace, error) {
  5554  	txn := s.db.ReadTxn()
  5555  	return s.namespaceByNameImpl(ws, txn, name)
  5556  }
  5557  
  5558  // namespaceByNameImpl is used to lookup a namespace by name
  5559  func (s *StateStore) namespaceByNameImpl(ws memdb.WatchSet, txn *txn, name string) (*structs.Namespace, error) {
  5560  	watchCh, existing, err := txn.FirstWatch(TableNamespaces, "id", name)
  5561  	if err != nil {
  5562  		return nil, fmt.Errorf("namespace lookup failed: %v", err)
  5563  	}
  5564  	ws.Add(watchCh)
  5565  
  5566  	if existing != nil {
  5567  		return existing.(*structs.Namespace), nil
  5568  	}
  5569  	return nil, nil
  5570  }
  5571  
  5572  // namespaceExists returns whether a namespace exists
  5573  func (s *StateStore) namespaceExists(txn *txn, namespace string) (bool, error) {
  5574  	if namespace == structs.DefaultNamespace {
  5575  		return true, nil
  5576  	}
  5577  
  5578  	existing, err := txn.First(TableNamespaces, "id", namespace)
  5579  	if err != nil {
  5580  		return false, fmt.Errorf("namespace lookup failed: %v", err)
  5581  	}
  5582  
  5583  	return existing != nil, nil
  5584  }
  5585  
  5586  // NamespacesByNamePrefix is used to lookup namespaces by prefix
  5587  func (s *StateStore) NamespacesByNamePrefix(ws memdb.WatchSet, namePrefix string) (memdb.ResultIterator, error) {
  5588  	txn := s.db.ReadTxn()
  5589  
  5590  	iter, err := txn.Get(TableNamespaces, "id_prefix", namePrefix)
  5591  	if err != nil {
  5592  		return nil, fmt.Errorf("namespaces lookup failed: %v", err)
  5593  	}
  5594  	ws.Add(iter.WatchCh())
  5595  
  5596  	return iter, nil
  5597  }
  5598  
  5599  // Namespaces returns an iterator over all the namespaces
  5600  func (s *StateStore) Namespaces(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  5601  	txn := s.db.ReadTxn()
  5602  
  5603  	// Walk the entire namespace table
  5604  	iter, err := txn.Get(TableNamespaces, "id")
  5605  	if err != nil {
  5606  		return nil, err
  5607  	}
  5608  	ws.Add(iter.WatchCh())
  5609  	return iter, nil
  5610  }
  5611  
  5612  func (s *StateStore) NamespaceNames() ([]string, error) {
  5613  	it, err := s.Namespaces(nil)
  5614  	if err != nil {
  5615  		return nil, err
  5616  	}
  5617  
  5618  	nses := []string{}
  5619  	for {
  5620  		next := it.Next()
  5621  		if next == nil {
  5622  			break
  5623  		}
  5624  		ns := next.(*structs.Namespace)
  5625  		nses = append(nses, ns.Name)
  5626  	}
  5627  
  5628  	return nses, nil
  5629  }
  5630  
  5631  // UpsertNamespace is used to register or update a set of namespaces
  5632  func (s *StateStore) UpsertNamespaces(index uint64, namespaces []*structs.Namespace) error {
  5633  	txn := s.db.WriteTxn(index)
  5634  	defer txn.Abort()
  5635  
  5636  	for _, ns := range namespaces {
  5637  		if err := s.upsertNamespaceImpl(index, txn, ns); err != nil {
  5638  			return err
  5639  		}
  5640  	}
  5641  
  5642  	if err := txn.Insert("index", &IndexEntry{TableNamespaces, index}); err != nil {
  5643  		return fmt.Errorf("index update failed: %v", err)
  5644  	}
  5645  
  5646  	return txn.Commit()
  5647  }
  5648  
  5649  // upsertNamespaceImpl is used to upsert a namespace
  5650  func (s *StateStore) upsertNamespaceImpl(index uint64, txn *txn, namespace *structs.Namespace) error {
  5651  	// Ensure the namespace hash is non-nil. This should be done outside the state store
  5652  	// for performance reasons, but we check here for defense in depth.
  5653  	ns := namespace
  5654  	if len(ns.Hash) == 0 {
  5655  		ns.SetHash()
  5656  	}
  5657  
  5658  	// Check if the namespace already exists
  5659  	existing, err := txn.First(TableNamespaces, "id", ns.Name)
  5660  	if err != nil {
  5661  		return fmt.Errorf("namespace lookup failed: %v", err)
  5662  	}
  5663  
  5664  	// Setup the indexes correctly and determine which quotas need to be
  5665  	// reconciled
  5666  	var oldQuota string
  5667  	if existing != nil {
  5668  		exist := existing.(*structs.Namespace)
  5669  		ns.CreateIndex = exist.CreateIndex
  5670  		ns.ModifyIndex = index
  5671  
  5672  		// Grab the old quota on the namespace
  5673  		oldQuota = exist.Quota
  5674  	} else {
  5675  		ns.CreateIndex = index
  5676  		ns.ModifyIndex = index
  5677  	}
  5678  
  5679  	// Validate that the quota on the new namespace exists
  5680  	if ns.Quota != "" {
  5681  		exists, err := s.quotaSpecExists(txn, ns.Quota)
  5682  		if err != nil {
  5683  			return fmt.Errorf("looking up namespace quota %q failed: %v", ns.Quota, err)
  5684  		} else if !exists {
  5685  			return fmt.Errorf("namespace %q using non-existent quota %q", ns.Name, ns.Quota)
  5686  		}
  5687  	}
  5688  
  5689  	// Insert the namespace
  5690  	if err := txn.Insert(TableNamespaces, ns); err != nil {
  5691  		return fmt.Errorf("namespace insert failed: %v", err)
  5692  	}
  5693  
  5694  	// Reconcile changed quotas
  5695  	return s.quotaReconcile(index, txn, ns.Quota, oldQuota)
  5696  }
  5697  
  5698  // DeleteNamespaces is used to remove a set of namespaces
  5699  func (s *StateStore) DeleteNamespaces(index uint64, names []string) error {
  5700  	txn := s.db.WriteTxn(index)
  5701  	defer txn.Abort()
  5702  
  5703  	for _, name := range names {
  5704  		// Lookup the namespace
  5705  		existing, err := txn.First(TableNamespaces, "id", name)
  5706  		if err != nil {
  5707  			return fmt.Errorf("namespace lookup failed: %v", err)
  5708  		}
  5709  		if existing == nil {
  5710  			return fmt.Errorf("namespace not found")
  5711  		}
  5712  
  5713  		ns := existing.(*structs.Namespace)
  5714  		if ns.Name == structs.DefaultNamespace {
  5715  			return fmt.Errorf("default namespace can not be deleted")
  5716  		}
  5717  
  5718  		// Ensure that the namespace doesn't have any non-terminal jobs
  5719  		iter, err := s.jobsByNamespaceImpl(nil, name, txn)
  5720  		if err != nil {
  5721  			return err
  5722  		}
  5723  
  5724  		for {
  5725  			raw := iter.Next()
  5726  			if raw == nil {
  5727  				break
  5728  			}
  5729  			job := raw.(*structs.Job)
  5730  
  5731  			if job.Status != structs.JobStatusDead {
  5732  				return fmt.Errorf("namespace %q contains at least one non-terminal job %q. "+
  5733  					"All jobs must be terminal in namespace before it can be deleted", name, job.ID)
  5734  			}
  5735  		}
  5736  
  5737  		// Delete the namespace
  5738  		if err := txn.Delete(TableNamespaces, existing); err != nil {
  5739  			return fmt.Errorf("namespace deletion failed: %v", err)
  5740  		}
  5741  	}
  5742  
  5743  	if err := txn.Insert("index", &IndexEntry{TableNamespaces, index}); err != nil {
  5744  		return fmt.Errorf("index update failed: %v", err)
  5745  	}
  5746  
  5747  	return txn.Commit()
  5748  }
  5749  
  5750  func (s *StateStore) DeleteScalingPolicies(index uint64, ids []string) error {
  5751  	txn := s.db.WriteTxn(index)
  5752  	defer txn.Abort()
  5753  
  5754  	err := s.DeleteScalingPoliciesTxn(index, ids, txn)
  5755  	if err == nil {
  5756  		return txn.Commit()
  5757  	}
  5758  
  5759  	return err
  5760  }
  5761  
  5762  // DeleteScalingPolicies is used to delete a set of scaling policies by ID
  5763  func (s *StateStore) DeleteScalingPoliciesTxn(index uint64, ids []string, txn *txn) error {
  5764  	if len(ids) == 0 {
  5765  		return nil
  5766  	}
  5767  
  5768  	for _, id := range ids {
  5769  		// Lookup the scaling policy
  5770  		existing, err := txn.First("scaling_policy", "id", id)
  5771  		if err != nil {
  5772  			return fmt.Errorf("scaling policy lookup failed: %v", err)
  5773  		}
  5774  		if existing == nil {
  5775  			return fmt.Errorf("scaling policy not found")
  5776  		}
  5777  
  5778  		// Delete the scaling policy
  5779  		if err := txn.Delete("scaling_policy", existing); err != nil {
  5780  			return fmt.Errorf("scaling policy delete failed: %v", err)
  5781  		}
  5782  	}
  5783  
  5784  	if err := txn.Insert("index", &IndexEntry{"scaling_policy", index}); err != nil {
  5785  		return fmt.Errorf("index update failed: %v", err)
  5786  	}
  5787  
  5788  	return nil
  5789  }
  5790  
  5791  // ScalingPolicies returns an iterator over all the scaling policies
  5792  func (s *StateStore) ScalingPolicies(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  5793  	txn := s.db.ReadTxn()
  5794  
  5795  	// Walk the entire scaling_policy table
  5796  	iter, err := txn.Get("scaling_policy", "id")
  5797  	if err != nil {
  5798  		return nil, err
  5799  	}
  5800  
  5801  	ws.Add(iter.WatchCh())
  5802  
  5803  	return iter, nil
  5804  }
  5805  
  5806  // ScalingPoliciesByTypePrefix returns an iterator over scaling policies with a certain type prefix.
  5807  func (s *StateStore) ScalingPoliciesByTypePrefix(ws memdb.WatchSet, t string) (memdb.ResultIterator, error) {
  5808  	txn := s.db.ReadTxn()
  5809  
  5810  	iter, err := txn.Get("scaling_policy", "type_prefix", t)
  5811  	if err != nil {
  5812  		return nil, err
  5813  	}
  5814  
  5815  	ws.Add(iter.WatchCh())
  5816  	return iter, nil
  5817  }
  5818  
  5819  func (s *StateStore) ScalingPoliciesByNamespace(ws memdb.WatchSet, namespace, typ string) (memdb.ResultIterator, error) {
  5820  	txn := s.db.ReadTxn()
  5821  
  5822  	iter, err := txn.Get("scaling_policy", "target_prefix", namespace)
  5823  	if err != nil {
  5824  		return nil, err
  5825  	}
  5826  
  5827  	ws.Add(iter.WatchCh())
  5828  
  5829  	// Wrap the iterator in a filter to exact match the namespace
  5830  	iter = memdb.NewFilterIterator(iter, scalingPolicyNamespaceFilter(namespace))
  5831  
  5832  	// If policy type is specified as well, wrap again
  5833  	if typ != "" {
  5834  		iter = memdb.NewFilterIterator(iter, func(raw interface{}) bool {
  5835  			p, ok := raw.(*structs.ScalingPolicy)
  5836  			if !ok {
  5837  				return true
  5838  			}
  5839  			return !strings.HasPrefix(p.Type, typ)
  5840  		})
  5841  	}
  5842  
  5843  	return iter, nil
  5844  }
  5845  
  5846  func (s *StateStore) ScalingPoliciesByJob(ws memdb.WatchSet, namespace, jobID, policyType string) (memdb.ResultIterator,
  5847  	error) {
  5848  	txn := s.db.ReadTxn()
  5849  	iter, err := s.ScalingPoliciesByJobTxn(ws, namespace, jobID, txn)
  5850  	if err != nil {
  5851  		return nil, err
  5852  	}
  5853  
  5854  	if policyType == "" {
  5855  		return iter, nil
  5856  	}
  5857  
  5858  	filter := func(raw interface{}) bool {
  5859  		p, ok := raw.(*structs.ScalingPolicy)
  5860  		if !ok {
  5861  			return true
  5862  		}
  5863  		return policyType != p.Type
  5864  	}
  5865  
  5866  	return memdb.NewFilterIterator(iter, filter), nil
  5867  }
  5868  
  5869  func (s *StateStore) ScalingPoliciesByJobTxn(ws memdb.WatchSet, namespace, jobID string,
  5870  	txn *txn) (memdb.ResultIterator, error) {
  5871  
  5872  	iter, err := txn.Get("scaling_policy", "target_prefix", namespace, jobID)
  5873  	if err != nil {
  5874  		return nil, err
  5875  	}
  5876  
  5877  	ws.Add(iter.WatchCh())
  5878  
  5879  	filter := func(raw interface{}) bool {
  5880  		d, ok := raw.(*structs.ScalingPolicy)
  5881  		if !ok {
  5882  			return true
  5883  		}
  5884  
  5885  		return d.Target[structs.ScalingTargetJob] != jobID
  5886  	}
  5887  
  5888  	// Wrap the iterator in a filter
  5889  	wrap := memdb.NewFilterIterator(iter, filter)
  5890  	return wrap, nil
  5891  }
  5892  
  5893  func (s *StateStore) ScalingPolicyByID(ws memdb.WatchSet, id string) (*structs.ScalingPolicy, error) {
  5894  	txn := s.db.ReadTxn()
  5895  
  5896  	watchCh, existing, err := txn.FirstWatch("scaling_policy", "id", id)
  5897  	if err != nil {
  5898  		return nil, fmt.Errorf("scaling_policy lookup failed: %v", err)
  5899  	}
  5900  	ws.Add(watchCh)
  5901  
  5902  	if existing != nil {
  5903  		return existing.(*structs.ScalingPolicy), nil
  5904  	}
  5905  
  5906  	return nil, nil
  5907  }
  5908  
  5909  // ScalingPolicyByTargetAndType returns a fully-qualified policy against a target and policy type,
  5910  // or nil if it does not exist. This method does not honor the watchset on the policy type, just the target.
  5911  func (s *StateStore) ScalingPolicyByTargetAndType(ws memdb.WatchSet, target map[string]string, typ string) (*structs.ScalingPolicy,
  5912  	error) {
  5913  	txn := s.db.ReadTxn()
  5914  
  5915  	namespace := target[structs.ScalingTargetNamespace]
  5916  	job := target[structs.ScalingTargetJob]
  5917  	group := target[structs.ScalingTargetGroup]
  5918  	task := target[structs.ScalingTargetTask]
  5919  
  5920  	it, err := txn.Get("scaling_policy", "target", namespace, job, group, task)
  5921  	if err != nil {
  5922  		return nil, fmt.Errorf("scaling_policy lookup failed: %v", err)
  5923  	}
  5924  
  5925  	ws.Add(it.WatchCh())
  5926  
  5927  	// Check for type
  5928  	var existing *structs.ScalingPolicy
  5929  	for raw := it.Next(); raw != nil; raw = it.Next() {
  5930  		p := raw.(*structs.ScalingPolicy)
  5931  		if p.Type == typ {
  5932  			existing = p
  5933  			break
  5934  		}
  5935  	}
  5936  
  5937  	if existing != nil {
  5938  		return existing, nil
  5939  	}
  5940  
  5941  	return nil, nil
  5942  }
  5943  
  5944  func (s *StateStore) ScalingPoliciesByIDPrefix(ws memdb.WatchSet, namespace string, prefix string) (memdb.ResultIterator, error) {
  5945  	txn := s.db.ReadTxn()
  5946  
  5947  	iter, err := txn.Get("scaling_policy", "id_prefix", prefix)
  5948  	if err != nil {
  5949  		return nil, fmt.Errorf("scaling policy lookup failed: %v", err)
  5950  	}
  5951  
  5952  	ws.Add(iter.WatchCh())
  5953  
  5954  	iter = memdb.NewFilterIterator(iter, scalingPolicyNamespaceFilter(namespace))
  5955  
  5956  	return iter, nil
  5957  }
  5958  
  5959  // scalingPolicyNamespaceFilter returns a filter function that filters all
  5960  // scaling policies not targeting the given namespace.
  5961  func scalingPolicyNamespaceFilter(namespace string) func(interface{}) bool {
  5962  	return func(raw interface{}) bool {
  5963  		p, ok := raw.(*structs.ScalingPolicy)
  5964  		if !ok {
  5965  			return true
  5966  		}
  5967  
  5968  		return p.Target[structs.ScalingTargetNamespace] != namespace
  5969  	}
  5970  }
  5971  
  5972  // StateSnapshot is used to provide a point-in-time snapshot
  5973  type StateSnapshot struct {
  5974  	StateStore
  5975  }
  5976  
  5977  // DenormalizeAllocationsMap takes in a map of nodes to allocations, and queries the
  5978  // Allocation for each of the Allocation diffs and merges the updated attributes with
  5979  // the existing Allocation, and attaches the Job provided
  5980  func (s *StateSnapshot) DenormalizeAllocationsMap(nodeAllocations map[string][]*structs.Allocation) error {
  5981  	for nodeID, allocs := range nodeAllocations {
  5982  		denormalizedAllocs, err := s.DenormalizeAllocationSlice(allocs)
  5983  		if err != nil {
  5984  			return err
  5985  		}
  5986  
  5987  		nodeAllocations[nodeID] = denormalizedAllocs
  5988  	}
  5989  	return nil
  5990  }
  5991  
  5992  // DenormalizeAllocationSlice queries the Allocation for each allocation diff
  5993  // represented as an Allocation and merges the updated attributes with the existing
  5994  // Allocation, and attaches the Job provided.
  5995  //
  5996  // This should only be called on terminal allocs, particularly stopped or preempted allocs
  5997  func (s *StateSnapshot) DenormalizeAllocationSlice(allocs []*structs.Allocation) ([]*structs.Allocation, error) {
  5998  	allocDiffs := make([]*structs.AllocationDiff, len(allocs))
  5999  	for i, alloc := range allocs {
  6000  		allocDiffs[i] = alloc.AllocationDiff()
  6001  	}
  6002  
  6003  	return s.DenormalizeAllocationDiffSlice(allocDiffs)
  6004  }
  6005  
  6006  // DenormalizeAllocationDiffSlice queries the Allocation for each AllocationDiff and merges
  6007  // the updated attributes with the existing Allocation, and attaches the Job provided.
  6008  //
  6009  // This should only be called on terminal alloc, particularly stopped or preempted allocs
  6010  func (s *StateSnapshot) DenormalizeAllocationDiffSlice(allocDiffs []*structs.AllocationDiff) ([]*structs.Allocation, error) {
  6011  	// Output index for denormalized Allocations
  6012  	j := 0
  6013  
  6014  	denormalizedAllocs := make([]*structs.Allocation, len(allocDiffs))
  6015  	for _, allocDiff := range allocDiffs {
  6016  		alloc, err := s.AllocByID(nil, allocDiff.ID)
  6017  		if err != nil {
  6018  			return nil, fmt.Errorf("alloc lookup failed: %v", err)
  6019  		}
  6020  		if alloc == nil {
  6021  			return nil, fmt.Errorf("alloc %v doesn't exist", allocDiff.ID)
  6022  		}
  6023  
  6024  		// Merge the updates to the Allocation.  Don't update alloc.Job for terminal allocs
  6025  		// so alloc refers to the latest Job view before destruction and to ease handler implementations
  6026  		allocCopy := alloc.Copy()
  6027  
  6028  		if allocDiff.PreemptedByAllocation != "" {
  6029  			allocCopy.PreemptedByAllocation = allocDiff.PreemptedByAllocation
  6030  			allocCopy.DesiredDescription = getPreemptedAllocDesiredDescription(allocDiff.PreemptedByAllocation)
  6031  			allocCopy.DesiredStatus = structs.AllocDesiredStatusEvict
  6032  		} else {
  6033  			// If alloc is a stopped alloc
  6034  			allocCopy.DesiredDescription = allocDiff.DesiredDescription
  6035  			allocCopy.DesiredStatus = structs.AllocDesiredStatusStop
  6036  			if allocDiff.ClientStatus != "" {
  6037  				allocCopy.ClientStatus = allocDiff.ClientStatus
  6038  			}
  6039  			if allocDiff.FollowupEvalID != "" {
  6040  				allocCopy.FollowupEvalID = allocDiff.FollowupEvalID
  6041  			}
  6042  		}
  6043  		if allocDiff.ModifyTime != 0 {
  6044  			allocCopy.ModifyTime = allocDiff.ModifyTime
  6045  		}
  6046  
  6047  		// Update the allocDiff in the slice to equal the denormalized alloc
  6048  		denormalizedAllocs[j] = allocCopy
  6049  		j++
  6050  	}
  6051  	// Retain only the denormalized Allocations in the slice
  6052  	denormalizedAllocs = denormalizedAllocs[:j]
  6053  	return denormalizedAllocs, nil
  6054  }
  6055  
  6056  func getPreemptedAllocDesiredDescription(preemptedByAllocID string) string {
  6057  	return fmt.Sprintf("Preempted by alloc ID %v", preemptedByAllocID)
  6058  }
  6059  
  6060  // StateRestore is used to optimize the performance when
  6061  // restoring state by only using a single large transaction
  6062  // instead of thousands of sub transactions
  6063  type StateRestore struct {
  6064  	txn *txn
  6065  }
  6066  
  6067  // Abort is used to abort the restore operation
  6068  func (s *StateRestore) Abort() {
  6069  	s.txn.Abort()
  6070  }
  6071  
  6072  // Commit is used to commit the restore operation
  6073  func (s *StateRestore) Commit() error {
  6074  	return s.txn.Commit()
  6075  }
  6076  
  6077  // NodeRestore is used to restore a node
  6078  func (r *StateRestore) NodeRestore(node *structs.Node) error {
  6079  	if err := r.txn.Insert("nodes", node); err != nil {
  6080  		return fmt.Errorf("node insert failed: %v", err)
  6081  	}
  6082  	return nil
  6083  }
  6084  
  6085  // JobRestore is used to restore a job
  6086  func (r *StateRestore) JobRestore(job *structs.Job) error {
  6087  	if err := r.txn.Insert("jobs", job); err != nil {
  6088  		return fmt.Errorf("job insert failed: %v", err)
  6089  	}
  6090  	return nil
  6091  }
  6092  
  6093  // EvalRestore is used to restore an evaluation
  6094  func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
  6095  	if err := r.txn.Insert("evals", eval); err != nil {
  6096  		return fmt.Errorf("eval insert failed: %v", err)
  6097  	}
  6098  	return nil
  6099  }
  6100  
  6101  // AllocRestore is used to restore an allocation
  6102  func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error {
  6103  	if err := r.txn.Insert("allocs", alloc); err != nil {
  6104  		return fmt.Errorf("alloc insert failed: %v", err)
  6105  	}
  6106  	return nil
  6107  }
  6108  
  6109  // IndexRestore is used to restore an index
  6110  func (r *StateRestore) IndexRestore(idx *IndexEntry) error {
  6111  	if err := r.txn.Insert("index", idx); err != nil {
  6112  		return fmt.Errorf("index insert failed: %v", err)
  6113  	}
  6114  	return nil
  6115  }
  6116  
  6117  // PeriodicLaunchRestore is used to restore a periodic launch.
  6118  func (r *StateRestore) PeriodicLaunchRestore(launch *structs.PeriodicLaunch) error {
  6119  	if err := r.txn.Insert("periodic_launch", launch); err != nil {
  6120  		return fmt.Errorf("periodic launch insert failed: %v", err)
  6121  	}
  6122  	return nil
  6123  }
  6124  
  6125  // JobSummaryRestore is used to restore a job summary
  6126  func (r *StateRestore) JobSummaryRestore(jobSummary *structs.JobSummary) error {
  6127  	if err := r.txn.Insert("job_summary", jobSummary); err != nil {
  6128  		return fmt.Errorf("job summary insert failed: %v", err)
  6129  	}
  6130  	return nil
  6131  }
  6132  
  6133  // JobVersionRestore is used to restore a job version
  6134  func (r *StateRestore) JobVersionRestore(version *structs.Job) error {
  6135  	if err := r.txn.Insert("job_version", version); err != nil {
  6136  		return fmt.Errorf("job version insert failed: %v", err)
  6137  	}
  6138  	return nil
  6139  }
  6140  
  6141  // DeploymentRestore is used to restore a deployment
  6142  func (r *StateRestore) DeploymentRestore(deployment *structs.Deployment) error {
  6143  	if err := r.txn.Insert("deployment", deployment); err != nil {
  6144  		return fmt.Errorf("deployment insert failed: %v", err)
  6145  	}
  6146  	return nil
  6147  }
  6148  
  6149  // VaultAccessorRestore is used to restore a vault accessor
  6150  func (r *StateRestore) VaultAccessorRestore(accessor *structs.VaultAccessor) error {
  6151  	if err := r.txn.Insert("vault_accessors", accessor); err != nil {
  6152  		return fmt.Errorf("vault accessor insert failed: %v", err)
  6153  	}
  6154  	return nil
  6155  }
  6156  
  6157  // SITokenAccessorRestore is used to restore an SI token accessor
  6158  func (r *StateRestore) SITokenAccessorRestore(accessor *structs.SITokenAccessor) error {
  6159  	if err := r.txn.Insert(siTokenAccessorTable, accessor); err != nil {
  6160  		return errors.Wrap(err, "si token accessor insert failed")
  6161  	}
  6162  	return nil
  6163  }
  6164  
  6165  // ACLPolicyRestore is used to restore an ACL policy
  6166  func (r *StateRestore) ACLPolicyRestore(policy *structs.ACLPolicy) error {
  6167  	if err := r.txn.Insert("acl_policy", policy); err != nil {
  6168  		return fmt.Errorf("inserting acl policy failed: %v", err)
  6169  	}
  6170  	return nil
  6171  }
  6172  
  6173  // ACLTokenRestore is used to restore an ACL token
  6174  func (r *StateRestore) ACLTokenRestore(token *structs.ACLToken) error {
  6175  	if err := r.txn.Insert("acl_token", token); err != nil {
  6176  		return fmt.Errorf("inserting acl token failed: %v", err)
  6177  	}
  6178  	return nil
  6179  }
  6180  
  6181  func (r *StateRestore) SchedulerConfigRestore(schedConfig *structs.SchedulerConfiguration) error {
  6182  	if err := r.txn.Insert("scheduler_config", schedConfig); err != nil {
  6183  		return fmt.Errorf("inserting scheduler config failed: %s", err)
  6184  	}
  6185  	return nil
  6186  }
  6187  
  6188  func (r *StateRestore) ClusterMetadataRestore(meta *structs.ClusterMetadata) error {
  6189  	if err := r.txn.Insert("cluster_meta", meta); err != nil {
  6190  		return fmt.Errorf("inserting cluster meta failed: %v", err)
  6191  	}
  6192  	return nil
  6193  }
  6194  
  6195  // ScalingPolicyRestore is used to restore a scaling policy
  6196  func (r *StateRestore) ScalingPolicyRestore(scalingPolicy *structs.ScalingPolicy) error {
  6197  	if err := r.txn.Insert("scaling_policy", scalingPolicy); err != nil {
  6198  		return fmt.Errorf("scaling policy insert failed: %v", err)
  6199  	}
  6200  	return nil
  6201  }
  6202  
  6203  // CSIPluginRestore is used to restore a CSI plugin
  6204  func (r *StateRestore) CSIPluginRestore(plugin *structs.CSIPlugin) error {
  6205  	if err := r.txn.Insert("csi_plugins", plugin); err != nil {
  6206  		return fmt.Errorf("csi plugin insert failed: %v", err)
  6207  	}
  6208  	return nil
  6209  }
  6210  
  6211  // CSIVolumeRestore is used to restore a CSI volume
  6212  func (r *StateRestore) CSIVolumeRestore(volume *structs.CSIVolume) error {
  6213  	if err := r.txn.Insert("csi_volumes", volume); err != nil {
  6214  		return fmt.Errorf("csi volume insert failed: %v", err)
  6215  	}
  6216  	return nil
  6217  }
  6218  
  6219  // ScalingEventsRestore is used to restore scaling events for a job
  6220  func (r *StateRestore) ScalingEventsRestore(jobEvents *structs.JobScalingEvents) error {
  6221  	if err := r.txn.Insert("scaling_event", jobEvents); err != nil {
  6222  		return fmt.Errorf("scaling event insert failed: %v", err)
  6223  	}
  6224  	return nil
  6225  }
  6226  
  6227  // NamespaceRestore is used to restore a namespace
  6228  func (r *StateRestore) NamespaceRestore(ns *structs.Namespace) error {
  6229  	if err := r.txn.Insert(TableNamespaces, ns); err != nil {
  6230  		return fmt.Errorf("namespace insert failed: %v", err)
  6231  	}
  6232  	return nil
  6233  }