github.com/bigcommerce/nomad@v0.9.3-bc/nomad/state/state_store.go (about)

     1  package state
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sort"
     7  	"time"
     8  
     9  	"reflect"
    10  
    11  	log "github.com/hashicorp/go-hclog"
    12  	memdb "github.com/hashicorp/go-memdb"
    13  	multierror "github.com/hashicorp/go-multierror"
    14  	"github.com/hashicorp/nomad/helper"
    15  	"github.com/hashicorp/nomad/nomad/structs"
    16  )
    17  
    18  // Txn is a transaction against a state store.
    19  // This can be a read or write transaction.
    20  type Txn = *memdb.Txn
    21  
    22  const (
    23  	// NodeRegisterEventReregistered is the message used when the node becomes
    24  	// reregistered.
    25  	NodeRegisterEventRegistered = "Node registered"
    26  
    27  	// NodeRegisterEventReregistered is the message used when the node becomes
    28  	// reregistered.
    29  	NodeRegisterEventReregistered = "Node re-registered"
    30  )
    31  
    32  // IndexEntry is used with the "index" table
    33  // for managing the latest Raft index affecting a table.
    34  type IndexEntry struct {
    35  	Key   string
    36  	Value uint64
    37  }
    38  
    39  // StateStoreConfig is used to configure a new state store
    40  type StateStoreConfig struct {
    41  	// Logger is used to output the state store's logs
    42  	Logger log.Logger
    43  
    44  	// Region is the region of the server embedding the state store.
    45  	Region string
    46  }
    47  
    48  // The StateStore is responsible for maintaining all the Nomad
    49  // state. It is manipulated by the FSM which maintains consistency
    50  // through the use of Raft. The goals of the StateStore are to provide
    51  // high concurrency for read operations without blocking writes, and
    52  // to provide write availability in the face of reads. EVERY object
    53  // returned as a result of a read against the state store should be
    54  // considered a constant and NEVER modified in place.
    55  type StateStore struct {
    56  	logger log.Logger
    57  	db     *memdb.MemDB
    58  
    59  	// config is the passed in configuration
    60  	config *StateStoreConfig
    61  
    62  	// abandonCh is used to signal watchers that this state store has been
    63  	// abandoned (usually during a restore). This is only ever closed.
    64  	abandonCh chan struct{}
    65  }
    66  
    67  // NewStateStore is used to create a new state store
    68  func NewStateStore(config *StateStoreConfig) (*StateStore, error) {
    69  	// Create the MemDB
    70  	db, err := memdb.NewMemDB(stateStoreSchema())
    71  	if err != nil {
    72  		return nil, fmt.Errorf("state store setup failed: %v", err)
    73  	}
    74  
    75  	// Create the state store
    76  	s := &StateStore{
    77  		logger:    config.Logger.Named("state_store"),
    78  		db:        db,
    79  		config:    config,
    80  		abandonCh: make(chan struct{}),
    81  	}
    82  	return s, nil
    83  }
    84  
    85  // Config returns the state store configuration.
    86  func (s *StateStore) Config() *StateStoreConfig {
    87  	return s.config
    88  }
    89  
    90  // Snapshot is used to create a point in time snapshot. Because
    91  // we use MemDB, we just need to snapshot the state of the underlying
    92  // database.
    93  func (s *StateStore) Snapshot() (*StateSnapshot, error) {
    94  	snap := &StateSnapshot{
    95  		StateStore: StateStore{
    96  			logger: s.logger,
    97  			config: s.config,
    98  			db:     s.db.Snapshot(),
    99  		},
   100  	}
   101  	return snap, nil
   102  }
   103  
   104  // SnapshotAfter is used to create a point in time snapshot where the index is
   105  // guaranteed to be greater than or equal to the index parameter.
   106  //
   107  // Some server operations (such as scheduling) exchange objects via RPC
   108  // concurrent with Raft log application, so they must ensure the state store
   109  // snapshot they are operating on is at or after the index the objects
   110  // retrieved via RPC were applied to the Raft log at.
   111  //
   112  // Callers should maintain their own timer metric as the time this method
   113  // blocks indicates Raft log application latency relative to scheduling.
   114  func (s *StateStore) SnapshotAfter(ctx context.Context, index uint64) (*StateSnapshot, error) {
   115  	// Ported from work.go:waitForIndex prior to 0.9
   116  
   117  	const backoffBase = 20 * time.Millisecond
   118  	const backoffLimit = 1 * time.Second
   119  	var retries uint
   120  	var retryTimer *time.Timer
   121  
   122  	// XXX: Potential optimization is to set up a watch on the state
   123  	// store's index table and only unblock via a trigger rather than
   124  	// polling.
   125  	for {
   126  		// Get the states current index
   127  		snapshotIndex, err := s.LatestIndex()
   128  		if err != nil {
   129  			return nil, fmt.Errorf("failed to determine state store's index: %v", err)
   130  		}
   131  
   132  		// We only need the FSM state to be as recent as the given index
   133  		if snapshotIndex >= index {
   134  			return s.Snapshot()
   135  		}
   136  
   137  		// Exponential back off
   138  		retries++
   139  		if retryTimer == nil {
   140  			// First retry, start at baseline
   141  			retryTimer = time.NewTimer(backoffBase)
   142  		} else {
   143  			// Subsequent retry, reset timer
   144  			deadline := 1 << (2 * retries) * backoffBase
   145  			if deadline > backoffLimit {
   146  				deadline = backoffLimit
   147  			}
   148  			retryTimer.Reset(deadline)
   149  		}
   150  
   151  		select {
   152  		case <-ctx.Done():
   153  			return nil, ctx.Err()
   154  		case <-retryTimer.C:
   155  		}
   156  	}
   157  }
   158  
   159  // Restore is used to optimize the efficiency of rebuilding
   160  // state by minimizing the number of transactions and checking
   161  // overhead.
   162  func (s *StateStore) Restore() (*StateRestore, error) {
   163  	txn := s.db.Txn(true)
   164  	r := &StateRestore{
   165  		txn: txn,
   166  	}
   167  	return r, nil
   168  }
   169  
   170  // AbandonCh returns a channel you can wait on to know if the state store was
   171  // abandoned.
   172  func (s *StateStore) AbandonCh() <-chan struct{} {
   173  	return s.abandonCh
   174  }
   175  
   176  // Abandon is used to signal that the given state store has been abandoned.
   177  // Calling this more than one time will panic.
   178  func (s *StateStore) Abandon() {
   179  	close(s.abandonCh)
   180  }
   181  
   182  // QueryFn is the definition of a function that can be used to implement a basic
   183  // blocking query against the state store.
   184  type QueryFn func(memdb.WatchSet, *StateStore) (resp interface{}, index uint64, err error)
   185  
   186  // BlockingQuery takes a query function and runs the function until the minimum
   187  // query index is met or until the passed context is cancelled.
   188  func (s *StateStore) BlockingQuery(query QueryFn, minIndex uint64, ctx context.Context) (
   189  	resp interface{}, index uint64, err error) {
   190  
   191  RUN_QUERY:
   192  	// We capture the state store and its abandon channel but pass a snapshot to
   193  	// the blocking query function. We operate on the snapshot to allow separate
   194  	// calls to the state store not all wrapped within the same transaction.
   195  	abandonCh := s.AbandonCh()
   196  	snap, _ := s.Snapshot()
   197  	stateSnap := &snap.StateStore
   198  
   199  	// We can skip all watch tracking if this isn't a blocking query.
   200  	var ws memdb.WatchSet
   201  	if minIndex > 0 {
   202  		ws = memdb.NewWatchSet()
   203  
   204  		// This channel will be closed if a snapshot is restored and the
   205  		// whole state store is abandoned.
   206  		ws.Add(abandonCh)
   207  	}
   208  
   209  	resp, index, err = query(ws, stateSnap)
   210  	if err != nil {
   211  		return nil, index, err
   212  	}
   213  
   214  	// We haven't reached the min-index yet.
   215  	if minIndex > 0 && index <= minIndex {
   216  		if err := ws.WatchCtx(ctx); err != nil {
   217  			return nil, index, err
   218  		}
   219  
   220  		goto RUN_QUERY
   221  	}
   222  
   223  	return resp, index, nil
   224  }
   225  
   226  // UpsertPlanResults is used to upsert the results of a plan.
   227  func (s *StateStore) UpsertPlanResults(index uint64, results *structs.ApplyPlanResultsRequest) error {
   228  	snapshot, err := s.Snapshot()
   229  	if err != nil {
   230  		return err
   231  	}
   232  
   233  	allocsStopped, err := snapshot.DenormalizeAllocationDiffSlice(results.AllocsStopped)
   234  	if err != nil {
   235  		return err
   236  	}
   237  
   238  	allocsPreempted, err := snapshot.DenormalizeAllocationDiffSlice(results.AllocsPreempted)
   239  	if err != nil {
   240  		return err
   241  	}
   242  
   243  	// COMPAT 0.11: Remove this denormalization when NodePreemptions is removed
   244  	results.NodePreemptions, err = snapshot.DenormalizeAllocationSlice(results.NodePreemptions)
   245  	if err != nil {
   246  		return err
   247  	}
   248  
   249  	txn := s.db.Txn(true)
   250  	defer txn.Abort()
   251  
   252  	// Upsert the newly created or updated deployment
   253  	if results.Deployment != nil {
   254  		if err := s.upsertDeploymentImpl(index, results.Deployment, txn); err != nil {
   255  			return err
   256  		}
   257  	}
   258  
   259  	// Update the status of deployments effected by the plan.
   260  	if len(results.DeploymentUpdates) != 0 {
   261  		s.upsertDeploymentUpdates(index, results.DeploymentUpdates, txn)
   262  	}
   263  
   264  	// COMPAT: Nomad versions before 0.7.1 did not include the eval ID when
   265  	// applying the plan. Thus while we are upgrading, we ignore updating the
   266  	// modify index of evaluations from older plans.
   267  	if results.EvalID != "" {
   268  		// Update the modify index of the eval id
   269  		if err := s.updateEvalModifyIndex(txn, index, results.EvalID); err != nil {
   270  			return err
   271  		}
   272  	}
   273  
   274  	numAllocs := 0
   275  	if len(results.Alloc) > 0 || len(results.NodePreemptions) > 0 {
   276  		// COMPAT 0.11: This branch will be removed, when Alloc is removed
   277  		// Attach the job to all the allocations. It is pulled out in the payload to
   278  		// avoid the redundancy of encoding, but should be denormalized prior to
   279  		// being inserted into MemDB.
   280  		addComputedAllocAttrs(results.Alloc, results.Job)
   281  		numAllocs = len(results.Alloc) + len(results.NodePreemptions)
   282  	} else {
   283  		// Attach the job to all the allocations. It is pulled out in the payload to
   284  		// avoid the redundancy of encoding, but should be denormalized prior to
   285  		// being inserted into MemDB.
   286  		addComputedAllocAttrs(results.AllocsUpdated, results.Job)
   287  		numAllocs = len(allocsStopped) + len(results.AllocsUpdated) + len(allocsPreempted)
   288  	}
   289  
   290  	allocsToUpsert := make([]*structs.Allocation, 0, numAllocs)
   291  
   292  	// COMPAT 0.11: Both these appends should be removed when Alloc and NodePreemptions are removed
   293  	allocsToUpsert = append(allocsToUpsert, results.Alloc...)
   294  	allocsToUpsert = append(allocsToUpsert, results.NodePreemptions...)
   295  
   296  	allocsToUpsert = append(allocsToUpsert, allocsStopped...)
   297  	allocsToUpsert = append(allocsToUpsert, results.AllocsUpdated...)
   298  	allocsToUpsert = append(allocsToUpsert, allocsPreempted...)
   299  
   300  	if err := s.upsertAllocsImpl(index, allocsToUpsert, txn); err != nil {
   301  		return err
   302  	}
   303  
   304  	// Upsert followup evals for allocs that were preempted
   305  	for _, eval := range results.PreemptionEvals {
   306  		if err := s.nestedUpsertEval(txn, index, eval); err != nil {
   307  			return err
   308  		}
   309  	}
   310  
   311  	txn.Commit()
   312  	return nil
   313  }
   314  
   315  // addComputedAllocAttrs adds the computed/derived attributes to the allocation.
   316  // This method is used when an allocation is being denormalized.
   317  func addComputedAllocAttrs(allocs []*structs.Allocation, job *structs.Job) {
   318  	structs.DenormalizeAllocationJobs(job, allocs)
   319  
   320  	// COMPAT(0.11): Remove in 0.11
   321  	// Calculate the total resources of allocations. It is pulled out in the
   322  	// payload to avoid encoding something that can be computed, but should be
   323  	// denormalized prior to being inserted into MemDB.
   324  	for _, alloc := range allocs {
   325  		if alloc.Resources != nil {
   326  			continue
   327  		}
   328  
   329  		alloc.Resources = new(structs.Resources)
   330  		for _, task := range alloc.TaskResources {
   331  			alloc.Resources.Add(task)
   332  		}
   333  
   334  		// Add the shared resources
   335  		alloc.Resources.Add(alloc.SharedResources)
   336  	}
   337  }
   338  
   339  // upsertDeploymentUpdates updates the deployments given the passed status
   340  // updates.
   341  func (s *StateStore) upsertDeploymentUpdates(index uint64, updates []*structs.DeploymentStatusUpdate, txn *memdb.Txn) error {
   342  	for _, u := range updates {
   343  		if err := s.updateDeploymentStatusImpl(index, u, txn); err != nil {
   344  			return err
   345  		}
   346  	}
   347  
   348  	return nil
   349  }
   350  
   351  // UpsertJobSummary upserts a job summary into the state store.
   352  func (s *StateStore) UpsertJobSummary(index uint64, jobSummary *structs.JobSummary) error {
   353  	txn := s.db.Txn(true)
   354  	defer txn.Abort()
   355  
   356  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
   357  	if jobSummary.Namespace == "" {
   358  		jobSummary.Namespace = structs.DefaultNamespace
   359  	}
   360  
   361  	// Check if the job summary already exists
   362  	existing, err := txn.First("job_summary", "id", jobSummary.Namespace, jobSummary.JobID)
   363  	if err != nil {
   364  		return fmt.Errorf("job summary lookup failed: %v", err)
   365  	}
   366  
   367  	// Setup the indexes correctly
   368  	if existing != nil {
   369  		jobSummary.CreateIndex = existing.(*structs.JobSummary).CreateIndex
   370  		jobSummary.ModifyIndex = index
   371  	} else {
   372  		jobSummary.CreateIndex = index
   373  		jobSummary.ModifyIndex = index
   374  	}
   375  
   376  	// Update the index
   377  	if err := txn.Insert("job_summary", jobSummary); err != nil {
   378  		return err
   379  	}
   380  
   381  	// Update the indexes table for job summary
   382  	if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
   383  		return fmt.Errorf("index update failed: %v", err)
   384  	}
   385  
   386  	txn.Commit()
   387  	return nil
   388  }
   389  
   390  // DeleteJobSummary deletes the job summary with the given ID. This is for
   391  // testing purposes only.
   392  func (s *StateStore) DeleteJobSummary(index uint64, namespace, id string) error {
   393  	txn := s.db.Txn(true)
   394  	defer txn.Abort()
   395  
   396  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
   397  	if namespace == "" {
   398  		namespace = structs.DefaultNamespace
   399  	}
   400  
   401  	// Delete the job summary
   402  	if _, err := txn.DeleteAll("job_summary", "id", namespace, id); err != nil {
   403  		return fmt.Errorf("deleting job summary failed: %v", err)
   404  	}
   405  	if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
   406  		return fmt.Errorf("index update failed: %v", err)
   407  	}
   408  	txn.Commit()
   409  	return nil
   410  }
   411  
   412  // UpsertDeployment is used to insert a new deployment. If cancelPrior is set to
   413  // true, all prior deployments for the same job will be cancelled.
   414  func (s *StateStore) UpsertDeployment(index uint64, deployment *structs.Deployment) error {
   415  	txn := s.db.Txn(true)
   416  	defer txn.Abort()
   417  	if err := s.upsertDeploymentImpl(index, deployment, txn); err != nil {
   418  		return err
   419  	}
   420  	txn.Commit()
   421  	return nil
   422  }
   423  
   424  func (s *StateStore) upsertDeploymentImpl(index uint64, deployment *structs.Deployment, txn *memdb.Txn) error {
   425  	// Check if the deployment already exists
   426  	existing, err := txn.First("deployment", "id", deployment.ID)
   427  	if err != nil {
   428  		return fmt.Errorf("deployment lookup failed: %v", err)
   429  	}
   430  
   431  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
   432  	if deployment.Namespace == "" {
   433  		deployment.Namespace = structs.DefaultNamespace
   434  	}
   435  
   436  	// Setup the indexes correctly
   437  	if existing != nil {
   438  		deployment.CreateIndex = existing.(*structs.Deployment).CreateIndex
   439  		deployment.ModifyIndex = index
   440  	} else {
   441  		deployment.CreateIndex = index
   442  		deployment.ModifyIndex = index
   443  	}
   444  
   445  	// Insert the deployment
   446  	if err := txn.Insert("deployment", deployment); err != nil {
   447  		return err
   448  	}
   449  
   450  	// Update the indexes table for deployment
   451  	if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil {
   452  		return fmt.Errorf("index update failed: %v", err)
   453  	}
   454  
   455  	// If the deployment is being marked as complete, set the job to stable.
   456  	if deployment.Status == structs.DeploymentStatusSuccessful {
   457  		if err := s.updateJobStabilityImpl(index, deployment.Namespace, deployment.JobID, deployment.JobVersion, true, txn); err != nil {
   458  			return fmt.Errorf("failed to update job stability: %v", err)
   459  		}
   460  	}
   461  
   462  	return nil
   463  }
   464  
   465  func (s *StateStore) Deployments(ws memdb.WatchSet) (memdb.ResultIterator, error) {
   466  	txn := s.db.Txn(false)
   467  
   468  	// Walk the entire deployments table
   469  	iter, err := txn.Get("deployment", "id")
   470  	if err != nil {
   471  		return nil, err
   472  	}
   473  
   474  	ws.Add(iter.WatchCh())
   475  	return iter, nil
   476  }
   477  
   478  func (s *StateStore) DeploymentsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) {
   479  	txn := s.db.Txn(false)
   480  
   481  	// Walk the entire deployments table
   482  	iter, err := txn.Get("deployment", "namespace", namespace)
   483  	if err != nil {
   484  		return nil, err
   485  	}
   486  
   487  	ws.Add(iter.WatchCh())
   488  	return iter, nil
   489  }
   490  
   491  func (s *StateStore) DeploymentsByIDPrefix(ws memdb.WatchSet, namespace, deploymentID string) (memdb.ResultIterator, error) {
   492  	txn := s.db.Txn(false)
   493  
   494  	// Walk the entire deployments table
   495  	iter, err := txn.Get("deployment", "id_prefix", deploymentID)
   496  	if err != nil {
   497  		return nil, err
   498  	}
   499  
   500  	ws.Add(iter.WatchCh())
   501  
   502  	// Wrap the iterator in a filter
   503  	wrap := memdb.NewFilterIterator(iter, deploymentNamespaceFilter(namespace))
   504  	return wrap, nil
   505  }
   506  
   507  // deploymentNamespaceFilter returns a filter function that filters all
   508  // deployment not in the given namespace.
   509  func deploymentNamespaceFilter(namespace string) func(interface{}) bool {
   510  	return func(raw interface{}) bool {
   511  		d, ok := raw.(*structs.Deployment)
   512  		if !ok {
   513  			return true
   514  		}
   515  
   516  		return d.Namespace != namespace
   517  	}
   518  }
   519  
   520  func (s *StateStore) DeploymentByID(ws memdb.WatchSet, deploymentID string) (*structs.Deployment, error) {
   521  	txn := s.db.Txn(false)
   522  	return s.deploymentByIDImpl(ws, deploymentID, txn)
   523  }
   524  
   525  func (s *StateStore) deploymentByIDImpl(ws memdb.WatchSet, deploymentID string, txn *memdb.Txn) (*structs.Deployment, error) {
   526  	watchCh, existing, err := txn.FirstWatch("deployment", "id", deploymentID)
   527  	if err != nil {
   528  		return nil, fmt.Errorf("deployment lookup failed: %v", err)
   529  	}
   530  	ws.Add(watchCh)
   531  
   532  	if existing != nil {
   533  		return existing.(*structs.Deployment), nil
   534  	}
   535  
   536  	return nil, nil
   537  }
   538  
   539  func (s *StateStore) DeploymentsByJobID(ws memdb.WatchSet, namespace, jobID string, all bool) ([]*structs.Deployment, error) {
   540  	txn := s.db.Txn(false)
   541  
   542  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
   543  	if namespace == "" {
   544  		namespace = structs.DefaultNamespace
   545  	}
   546  
   547  	var job *structs.Job
   548  	// Read job from state store
   549  	_, existing, err := txn.FirstWatch("jobs", "id", namespace, jobID)
   550  	if err != nil {
   551  		return nil, fmt.Errorf("job lookup failed: %v", err)
   552  	}
   553  	if existing != nil {
   554  		job = existing.(*structs.Job)
   555  	}
   556  
   557  	// Get an iterator over the deployments
   558  	iter, err := txn.Get("deployment", "job", namespace, jobID)
   559  	if err != nil {
   560  		return nil, err
   561  	}
   562  
   563  	ws.Add(iter.WatchCh())
   564  
   565  	var out []*structs.Deployment
   566  	for {
   567  		raw := iter.Next()
   568  		if raw == nil {
   569  			break
   570  		}
   571  		d := raw.(*structs.Deployment)
   572  
   573  		// If the allocation belongs to a job with the same ID but a different
   574  		// create index and we are not getting all the allocations whose Jobs
   575  		// matches the same Job ID then we skip it
   576  		if !all && job != nil && d.JobCreateIndex != job.CreateIndex {
   577  			continue
   578  		}
   579  		out = append(out, d)
   580  	}
   581  
   582  	return out, nil
   583  }
   584  
   585  // LatestDeploymentByJobID returns the latest deployment for the given job. The
   586  // latest is determined strictly by CreateIndex.
   587  func (s *StateStore) LatestDeploymentByJobID(ws memdb.WatchSet, namespace, jobID string) (*structs.Deployment, error) {
   588  	txn := s.db.Txn(false)
   589  
   590  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
   591  	if namespace == "" {
   592  		namespace = structs.DefaultNamespace
   593  	}
   594  
   595  	// Get an iterator over the deployments
   596  	iter, err := txn.Get("deployment", "job", namespace, jobID)
   597  	if err != nil {
   598  		return nil, err
   599  	}
   600  
   601  	ws.Add(iter.WatchCh())
   602  
   603  	var out *structs.Deployment
   604  	for {
   605  		raw := iter.Next()
   606  		if raw == nil {
   607  			break
   608  		}
   609  
   610  		d := raw.(*structs.Deployment)
   611  		if out == nil || out.CreateIndex < d.CreateIndex {
   612  			out = d
   613  		}
   614  	}
   615  
   616  	return out, nil
   617  }
   618  
   619  // DeleteDeployment is used to delete a set of deployments by ID
   620  func (s *StateStore) DeleteDeployment(index uint64, deploymentIDs []string) error {
   621  	txn := s.db.Txn(true)
   622  	defer txn.Abort()
   623  
   624  	if len(deploymentIDs) == 0 {
   625  		return nil
   626  	}
   627  
   628  	for _, deploymentID := range deploymentIDs {
   629  		// Lookup the deployment
   630  		existing, err := txn.First("deployment", "id", deploymentID)
   631  		if err != nil {
   632  			return fmt.Errorf("deployment lookup failed: %v", err)
   633  		}
   634  		if existing == nil {
   635  			return fmt.Errorf("deployment not found")
   636  		}
   637  
   638  		// Delete the deployment
   639  		if err := txn.Delete("deployment", existing); err != nil {
   640  			return fmt.Errorf("deployment delete failed: %v", err)
   641  		}
   642  	}
   643  
   644  	if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil {
   645  		return fmt.Errorf("index update failed: %v", err)
   646  	}
   647  
   648  	txn.Commit()
   649  	return nil
   650  }
   651  
   652  // UpsertNode is used to register a node or update a node definition
   653  // This is assumed to be triggered by the client, so we retain the value
   654  // of drain/eligibility which is set by the scheduler.
   655  func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
   656  	txn := s.db.Txn(true)
   657  	defer txn.Abort()
   658  
   659  	// Check if the node already exists
   660  	existing, err := txn.First("nodes", "id", node.ID)
   661  	if err != nil {
   662  		return fmt.Errorf("node lookup failed: %v", err)
   663  	}
   664  
   665  	// Setup the indexes correctly
   666  	if existing != nil {
   667  		exist := existing.(*structs.Node)
   668  		node.CreateIndex = exist.CreateIndex
   669  		node.ModifyIndex = index
   670  
   671  		// Retain node events that have already been set on the node
   672  		node.Events = exist.Events
   673  
   674  		// If we are transitioning from down, record the re-registration
   675  		if exist.Status == structs.NodeStatusDown && node.Status != structs.NodeStatusDown {
   676  			appendNodeEvents(index, node, []*structs.NodeEvent{
   677  				structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster).
   678  					SetMessage(NodeRegisterEventReregistered).
   679  					SetTimestamp(time.Unix(node.StatusUpdatedAt, 0))})
   680  		}
   681  
   682  		node.Drain = exist.Drain                                 // Retain the drain mode
   683  		node.SchedulingEligibility = exist.SchedulingEligibility // Retain the eligibility
   684  		node.DrainStrategy = exist.DrainStrategy                 // Retain the drain strategy
   685  	} else {
   686  		// Because this is the first time the node is being registered, we should
   687  		// also create a node registration event
   688  		nodeEvent := structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster).
   689  			SetMessage(NodeRegisterEventRegistered).
   690  			SetTimestamp(time.Unix(node.StatusUpdatedAt, 0))
   691  		node.Events = []*structs.NodeEvent{nodeEvent}
   692  		node.CreateIndex = index
   693  		node.ModifyIndex = index
   694  	}
   695  
   696  	// Insert the node
   697  	if err := txn.Insert("nodes", node); err != nil {
   698  		return fmt.Errorf("node insert failed: %v", err)
   699  	}
   700  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   701  		return fmt.Errorf("index update failed: %v", err)
   702  	}
   703  
   704  	txn.Commit()
   705  	return nil
   706  }
   707  
   708  // DeleteNode is used to deregister a node
   709  func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
   710  	txn := s.db.Txn(true)
   711  	defer txn.Abort()
   712  
   713  	// Lookup the node
   714  	existing, err := txn.First("nodes", "id", nodeID)
   715  	if err != nil {
   716  		return fmt.Errorf("node lookup failed: %v", err)
   717  	}
   718  	if existing == nil {
   719  		return fmt.Errorf("node not found")
   720  	}
   721  
   722  	// Delete the node
   723  	if err := txn.Delete("nodes", existing); err != nil {
   724  		return fmt.Errorf("node delete failed: %v", err)
   725  	}
   726  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   727  		return fmt.Errorf("index update failed: %v", err)
   728  	}
   729  
   730  	txn.Commit()
   731  	return nil
   732  }
   733  
   734  // UpdateNodeStatus is used to update the status of a node
   735  func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string, updatedAt int64, event *structs.NodeEvent) error {
   736  	txn := s.db.Txn(true)
   737  	defer txn.Abort()
   738  
   739  	// Lookup the node
   740  	existing, err := txn.First("nodes", "id", nodeID)
   741  	if err != nil {
   742  		return fmt.Errorf("node lookup failed: %v", err)
   743  	}
   744  	if existing == nil {
   745  		return fmt.Errorf("node not found")
   746  	}
   747  
   748  	// Copy the existing node
   749  	existingNode := existing.(*structs.Node)
   750  	copyNode := existingNode.Copy()
   751  	copyNode.StatusUpdatedAt = updatedAt
   752  
   753  	// Add the event if given
   754  	if event != nil {
   755  		appendNodeEvents(index, copyNode, []*structs.NodeEvent{event})
   756  	}
   757  
   758  	// Update the status in the copy
   759  	copyNode.Status = status
   760  	copyNode.ModifyIndex = index
   761  
   762  	// Insert the node
   763  	if err := txn.Insert("nodes", copyNode); err != nil {
   764  		return fmt.Errorf("node update failed: %v", err)
   765  	}
   766  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   767  		return fmt.Errorf("index update failed: %v", err)
   768  	}
   769  
   770  	txn.Commit()
   771  	return nil
   772  }
   773  
   774  // BatchUpdateNodeDrain is used to update the drain of a node set of nodes
   775  func (s *StateStore) BatchUpdateNodeDrain(index uint64, updatedAt int64, updates map[string]*structs.DrainUpdate, events map[string]*structs.NodeEvent) error {
   776  	txn := s.db.Txn(true)
   777  	defer txn.Abort()
   778  	for node, update := range updates {
   779  		if err := s.updateNodeDrainImpl(txn, index, node, update.DrainStrategy, update.MarkEligible, updatedAt, events[node]); err != nil {
   780  			return err
   781  		}
   782  	}
   783  	txn.Commit()
   784  	return nil
   785  }
   786  
   787  // UpdateNodeDrain is used to update the drain of a node
   788  func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string,
   789  	drain *structs.DrainStrategy, markEligible bool, updatedAt int64, event *structs.NodeEvent) error {
   790  
   791  	txn := s.db.Txn(true)
   792  	defer txn.Abort()
   793  	if err := s.updateNodeDrainImpl(txn, index, nodeID, drain, markEligible, updatedAt, event); err != nil {
   794  		return err
   795  	}
   796  	txn.Commit()
   797  	return nil
   798  }
   799  
   800  func (s *StateStore) updateNodeDrainImpl(txn *memdb.Txn, index uint64, nodeID string,
   801  	drain *structs.DrainStrategy, markEligible bool, updatedAt int64, event *structs.NodeEvent) error {
   802  
   803  	// Lookup the node
   804  	existing, err := txn.First("nodes", "id", nodeID)
   805  	if err != nil {
   806  		return fmt.Errorf("node lookup failed: %v", err)
   807  	}
   808  	if existing == nil {
   809  		return fmt.Errorf("node not found")
   810  	}
   811  
   812  	// Copy the existing node
   813  	existingNode := existing.(*structs.Node)
   814  	copyNode := existingNode.Copy()
   815  	copyNode.StatusUpdatedAt = updatedAt
   816  
   817  	// Add the event if given
   818  	if event != nil {
   819  		appendNodeEvents(index, copyNode, []*structs.NodeEvent{event})
   820  	}
   821  
   822  	// Update the drain in the copy
   823  	copyNode.Drain = drain != nil // COMPAT: Remove in Nomad 0.9
   824  	copyNode.DrainStrategy = drain
   825  	if drain != nil {
   826  		copyNode.SchedulingEligibility = structs.NodeSchedulingIneligible
   827  	} else if markEligible {
   828  		copyNode.SchedulingEligibility = structs.NodeSchedulingEligible
   829  	}
   830  
   831  	copyNode.ModifyIndex = index
   832  
   833  	// Insert the node
   834  	if err := txn.Insert("nodes", copyNode); err != nil {
   835  		return fmt.Errorf("node update failed: %v", err)
   836  	}
   837  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   838  		return fmt.Errorf("index update failed: %v", err)
   839  	}
   840  
   841  	return nil
   842  }
   843  
   844  // UpdateNodeEligibility is used to update the scheduling eligibility of a node
   845  func (s *StateStore) UpdateNodeEligibility(index uint64, nodeID string, eligibility string, updatedAt int64, event *structs.NodeEvent) error {
   846  
   847  	txn := s.db.Txn(true)
   848  	defer txn.Abort()
   849  
   850  	// Lookup the node
   851  	existing, err := txn.First("nodes", "id", nodeID)
   852  	if err != nil {
   853  		return fmt.Errorf("node lookup failed: %v", err)
   854  	}
   855  	if existing == nil {
   856  		return fmt.Errorf("node not found")
   857  	}
   858  
   859  	// Copy the existing node
   860  	existingNode := existing.(*structs.Node)
   861  	copyNode := existingNode.Copy()
   862  	copyNode.StatusUpdatedAt = updatedAt
   863  
   864  	// Add the event if given
   865  	if event != nil {
   866  		appendNodeEvents(index, copyNode, []*structs.NodeEvent{event})
   867  	}
   868  
   869  	// Check if this is a valid action
   870  	if copyNode.DrainStrategy != nil && eligibility == structs.NodeSchedulingEligible {
   871  		return fmt.Errorf("can not set node's scheduling eligibility to eligible while it is draining")
   872  	}
   873  
   874  	// Update the eligibility in the copy
   875  	copyNode.SchedulingEligibility = eligibility
   876  	copyNode.ModifyIndex = index
   877  
   878  	// Insert the node
   879  	if err := txn.Insert("nodes", copyNode); err != nil {
   880  		return fmt.Errorf("node update failed: %v", err)
   881  	}
   882  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   883  		return fmt.Errorf("index update failed: %v", err)
   884  	}
   885  
   886  	txn.Commit()
   887  	return nil
   888  }
   889  
   890  // UpsertNodeEvents adds the node events to the nodes, rotating events as
   891  // necessary.
   892  func (s *StateStore) UpsertNodeEvents(index uint64, nodeEvents map[string][]*structs.NodeEvent) error {
   893  	txn := s.db.Txn(true)
   894  	defer txn.Abort()
   895  
   896  	for nodeID, events := range nodeEvents {
   897  		if err := s.upsertNodeEvents(index, nodeID, events, txn); err != nil {
   898  			return err
   899  		}
   900  	}
   901  
   902  	txn.Commit()
   903  	return nil
   904  }
   905  
   906  // upsertNodeEvent upserts a node event for a respective node. It also maintains
   907  // that a fixed number of node events are ever stored simultaneously, deleting
   908  // older events once this bound has been reached.
   909  func (s *StateStore) upsertNodeEvents(index uint64, nodeID string, events []*structs.NodeEvent, txn *memdb.Txn) error {
   910  	// Lookup the node
   911  	existing, err := txn.First("nodes", "id", nodeID)
   912  	if err != nil {
   913  		return fmt.Errorf("node lookup failed: %v", err)
   914  	}
   915  	if existing == nil {
   916  		return fmt.Errorf("node not found")
   917  	}
   918  
   919  	// Copy the existing node
   920  	existingNode := existing.(*structs.Node)
   921  	copyNode := existingNode.Copy()
   922  	appendNodeEvents(index, copyNode, events)
   923  
   924  	// Insert the node
   925  	if err := txn.Insert("nodes", copyNode); err != nil {
   926  		return fmt.Errorf("node update failed: %v", err)
   927  	}
   928  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   929  		return fmt.Errorf("index update failed: %v", err)
   930  	}
   931  
   932  	return nil
   933  }
   934  
   935  // appendNodeEvents is a helper that takes a node and new events and appends
   936  // them, pruning older events as needed.
   937  func appendNodeEvents(index uint64, node *structs.Node, events []*structs.NodeEvent) {
   938  	// Add the events, updating the indexes
   939  	for _, e := range events {
   940  		e.CreateIndex = index
   941  		node.Events = append(node.Events, e)
   942  	}
   943  
   944  	// Keep node events pruned to not exceed the max allowed
   945  	if l := len(node.Events); l > structs.MaxRetainedNodeEvents {
   946  		delta := l - structs.MaxRetainedNodeEvents
   947  		node.Events = node.Events[delta:]
   948  	}
   949  }
   950  
   951  // NodeByID is used to lookup a node by ID
   952  func (s *StateStore) NodeByID(ws memdb.WatchSet, nodeID string) (*structs.Node, error) {
   953  	txn := s.db.Txn(false)
   954  
   955  	watchCh, existing, err := txn.FirstWatch("nodes", "id", nodeID)
   956  	if err != nil {
   957  		return nil, fmt.Errorf("node lookup failed: %v", err)
   958  	}
   959  	ws.Add(watchCh)
   960  
   961  	if existing != nil {
   962  		return existing.(*structs.Node), nil
   963  	}
   964  	return nil, nil
   965  }
   966  
   967  // NodesByIDPrefix is used to lookup nodes by prefix
   968  func (s *StateStore) NodesByIDPrefix(ws memdb.WatchSet, nodeID string) (memdb.ResultIterator, error) {
   969  	txn := s.db.Txn(false)
   970  
   971  	iter, err := txn.Get("nodes", "id_prefix", nodeID)
   972  	if err != nil {
   973  		return nil, fmt.Errorf("node lookup failed: %v", err)
   974  	}
   975  	ws.Add(iter.WatchCh())
   976  
   977  	return iter, nil
   978  }
   979  
   980  // NodeBySecretID is used to lookup a node by SecretID
   981  func (s *StateStore) NodeBySecretID(ws memdb.WatchSet, secretID string) (*structs.Node, error) {
   982  	txn := s.db.Txn(false)
   983  
   984  	watchCh, existing, err := txn.FirstWatch("nodes", "secret_id", secretID)
   985  	if err != nil {
   986  		return nil, fmt.Errorf("node lookup by SecretID failed: %v", err)
   987  	}
   988  	ws.Add(watchCh)
   989  
   990  	if existing != nil {
   991  		return existing.(*structs.Node), nil
   992  	}
   993  	return nil, nil
   994  }
   995  
   996  // Nodes returns an iterator over all the nodes
   997  func (s *StateStore) Nodes(ws memdb.WatchSet) (memdb.ResultIterator, error) {
   998  	txn := s.db.Txn(false)
   999  
  1000  	// Walk the entire nodes table
  1001  	iter, err := txn.Get("nodes", "id")
  1002  	if err != nil {
  1003  		return nil, err
  1004  	}
  1005  	ws.Add(iter.WatchCh())
  1006  	return iter, nil
  1007  }
  1008  
  1009  // UpsertJob is used to register a job or update a job definition
  1010  func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
  1011  	txn := s.db.Txn(true)
  1012  	defer txn.Abort()
  1013  	if err := s.upsertJobImpl(index, job, false, txn); err != nil {
  1014  		return err
  1015  	}
  1016  	txn.Commit()
  1017  	return nil
  1018  }
  1019  
  1020  // UpsertJobTxn is used to register a job or update a job definition, like UpsertJob,
  1021  // but in a transaction.  Useful for when making multiple modifications atomically
  1022  func (s *StateStore) UpsertJobTxn(index uint64, job *structs.Job, txn Txn) error {
  1023  	return s.upsertJobImpl(index, job, false, txn)
  1024  }
  1025  
  1026  // upsertJobImpl is the implementation for registering a job or updating a job definition
  1027  func (s *StateStore) upsertJobImpl(index uint64, job *structs.Job, keepVersion bool, txn *memdb.Txn) error {
  1028  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1029  	if job.Namespace == "" {
  1030  		job.Namespace = structs.DefaultNamespace
  1031  	}
  1032  
  1033  	// Assert the namespace exists
  1034  	if exists, err := s.namespaceExists(txn, job.Namespace); err != nil {
  1035  		return err
  1036  	} else if !exists {
  1037  		return fmt.Errorf("job %q is in nonexistent namespace %q", job.ID, job.Namespace)
  1038  	}
  1039  
  1040  	// Check if the job already exists
  1041  	existing, err := txn.First("jobs", "id", job.Namespace, job.ID)
  1042  	if err != nil {
  1043  		return fmt.Errorf("job lookup failed: %v", err)
  1044  	}
  1045  
  1046  	// Setup the indexes correctly
  1047  	if existing != nil {
  1048  		job.CreateIndex = existing.(*structs.Job).CreateIndex
  1049  		job.ModifyIndex = index
  1050  
  1051  		// Bump the version unless asked to keep it. This should only be done
  1052  		// when changing an internal field such as Stable. A spec change should
  1053  		// always come with a version bump
  1054  		if !keepVersion {
  1055  			job.JobModifyIndex = index
  1056  			job.Version = existing.(*structs.Job).Version + 1
  1057  		}
  1058  
  1059  		// Compute the job status
  1060  		var err error
  1061  		job.Status, err = s.getJobStatus(txn, job, false)
  1062  		if err != nil {
  1063  			return fmt.Errorf("setting job status for %q failed: %v", job.ID, err)
  1064  		}
  1065  	} else {
  1066  		job.CreateIndex = index
  1067  		job.ModifyIndex = index
  1068  		job.JobModifyIndex = index
  1069  		job.Version = 0
  1070  
  1071  		if err := s.setJobStatus(index, txn, job, false, ""); err != nil {
  1072  			return fmt.Errorf("setting job status for %q failed: %v", job.ID, err)
  1073  		}
  1074  
  1075  		// Have to get the job again since it could have been updated
  1076  		updated, err := txn.First("jobs", "id", job.Namespace, job.ID)
  1077  		if err != nil {
  1078  			return fmt.Errorf("job lookup failed: %v", err)
  1079  		}
  1080  		if updated != nil {
  1081  			job = updated.(*structs.Job)
  1082  		}
  1083  	}
  1084  
  1085  	if err := s.updateSummaryWithJob(index, job, txn); err != nil {
  1086  		return fmt.Errorf("unable to create job summary: %v", err)
  1087  	}
  1088  
  1089  	if err := s.upsertJobVersion(index, job, txn); err != nil {
  1090  		return fmt.Errorf("unable to upsert job into job_version table: %v", err)
  1091  	}
  1092  
  1093  	// Insert the job
  1094  	if err := txn.Insert("jobs", job); err != nil {
  1095  		return fmt.Errorf("job insert failed: %v", err)
  1096  	}
  1097  	if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil {
  1098  		return fmt.Errorf("index update failed: %v", err)
  1099  	}
  1100  
  1101  	return nil
  1102  }
  1103  
  1104  // DeleteJob is used to deregister a job
  1105  func (s *StateStore) DeleteJob(index uint64, namespace, jobID string) error {
  1106  	txn := s.db.Txn(true)
  1107  	defer txn.Abort()
  1108  
  1109  	err := s.DeleteJobTxn(index, namespace, jobID, txn)
  1110  	if err == nil {
  1111  		txn.Commit()
  1112  	}
  1113  	return err
  1114  }
  1115  
  1116  // DeleteJobTxn is used to deregister a job, like DeleteJob,
  1117  // but in a transaction.  Useful for when making multiple modifications atomically
  1118  func (s *StateStore) DeleteJobTxn(index uint64, namespace, jobID string, txn Txn) error {
  1119  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1120  	if namespace == "" {
  1121  		namespace = structs.DefaultNamespace
  1122  	}
  1123  
  1124  	// Lookup the node
  1125  	existing, err := txn.First("jobs", "id", namespace, jobID)
  1126  	if err != nil {
  1127  		return fmt.Errorf("job lookup failed: %v", err)
  1128  	}
  1129  	if existing == nil {
  1130  		return fmt.Errorf("job not found")
  1131  	}
  1132  
  1133  	// Check if we should update a parent job summary
  1134  	job := existing.(*structs.Job)
  1135  	if job.ParentID != "" {
  1136  		summaryRaw, err := txn.First("job_summary", "id", namespace, job.ParentID)
  1137  		if err != nil {
  1138  			return fmt.Errorf("unable to retrieve summary for parent job: %v", err)
  1139  		}
  1140  
  1141  		// Only continue if the summary exists. It could not exist if the parent
  1142  		// job was removed
  1143  		if summaryRaw != nil {
  1144  			existing := summaryRaw.(*structs.JobSummary)
  1145  			pSummary := existing.Copy()
  1146  			if pSummary.Children != nil {
  1147  
  1148  				modified := false
  1149  				switch job.Status {
  1150  				case structs.JobStatusPending:
  1151  					pSummary.Children.Pending--
  1152  					pSummary.Children.Dead++
  1153  					modified = true
  1154  				case structs.JobStatusRunning:
  1155  					pSummary.Children.Running--
  1156  					pSummary.Children.Dead++
  1157  					modified = true
  1158  				case structs.JobStatusDead:
  1159  				default:
  1160  					return fmt.Errorf("unknown old job status %q", job.Status)
  1161  				}
  1162  
  1163  				if modified {
  1164  					// Update the modify index
  1165  					pSummary.ModifyIndex = index
  1166  
  1167  					// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1168  					if pSummary.Namespace == "" {
  1169  						pSummary.Namespace = structs.DefaultNamespace
  1170  					}
  1171  
  1172  					// Insert the summary
  1173  					if err := txn.Insert("job_summary", pSummary); err != nil {
  1174  						return fmt.Errorf("job summary insert failed: %v", err)
  1175  					}
  1176  					if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  1177  						return fmt.Errorf("index update failed: %v", err)
  1178  					}
  1179  				}
  1180  			}
  1181  		}
  1182  	}
  1183  
  1184  	// Delete the job
  1185  	if err := txn.Delete("jobs", existing); err != nil {
  1186  		return fmt.Errorf("job delete failed: %v", err)
  1187  	}
  1188  	if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil {
  1189  		return fmt.Errorf("index update failed: %v", err)
  1190  	}
  1191  
  1192  	// Delete the job versions
  1193  	if err := s.deleteJobVersions(index, job, txn); err != nil {
  1194  		return err
  1195  	}
  1196  
  1197  	// Delete the job summary
  1198  	if _, err = txn.DeleteAll("job_summary", "id", namespace, jobID); err != nil {
  1199  		return fmt.Errorf("deleing job summary failed: %v", err)
  1200  	}
  1201  	if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  1202  		return fmt.Errorf("index update failed: %v", err)
  1203  	}
  1204  
  1205  	return nil
  1206  }
  1207  
  1208  // deleteJobVersions deletes all versions of the given job.
  1209  func (s *StateStore) deleteJobVersions(index uint64, job *structs.Job, txn *memdb.Txn) error {
  1210  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1211  	if job.Namespace == "" {
  1212  		job.Namespace = structs.DefaultNamespace
  1213  	}
  1214  
  1215  	iter, err := txn.Get("job_version", "id_prefix", job.Namespace, job.ID)
  1216  	if err != nil {
  1217  		return err
  1218  	}
  1219  
  1220  	// Put them into a slice so there are no safety concerns while actually
  1221  	// performing the deletes
  1222  	jobs := []*structs.Job{}
  1223  	for {
  1224  		raw := iter.Next()
  1225  		if raw == nil {
  1226  			break
  1227  		}
  1228  
  1229  		// Ensure the ID is an exact match
  1230  		j := raw.(*structs.Job)
  1231  		if j.ID != job.ID {
  1232  			continue
  1233  		}
  1234  
  1235  		jobs = append(jobs, j)
  1236  	}
  1237  
  1238  	// Do the deletes
  1239  	for _, j := range jobs {
  1240  		if err := txn.Delete("job_version", j); err != nil {
  1241  			return fmt.Errorf("deleting job versions failed: %v", err)
  1242  		}
  1243  	}
  1244  
  1245  	if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil {
  1246  		return fmt.Errorf("index update failed: %v", err)
  1247  	}
  1248  
  1249  	return nil
  1250  }
  1251  
  1252  // upsertJobVersion inserts a job into its historic version table and limits the
  1253  // number of job versions that are tracked.
  1254  func (s *StateStore) upsertJobVersion(index uint64, job *structs.Job, txn *memdb.Txn) error {
  1255  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1256  	if job.Namespace == "" {
  1257  		job.Namespace = structs.DefaultNamespace
  1258  	}
  1259  
  1260  	// Insert the job
  1261  	if err := txn.Insert("job_version", job); err != nil {
  1262  		return fmt.Errorf("failed to insert job into job_version table: %v", err)
  1263  	}
  1264  
  1265  	if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil {
  1266  		return fmt.Errorf("index update failed: %v", err)
  1267  	}
  1268  
  1269  	// Get all the historic jobs for this ID
  1270  	all, err := s.jobVersionByID(txn, nil, job.Namespace, job.ID)
  1271  	if err != nil {
  1272  		return fmt.Errorf("failed to look up job versions for %q: %v", job.ID, err)
  1273  	}
  1274  
  1275  	// If we are below the limit there is no GCing to be done
  1276  	if len(all) <= structs.JobTrackedVersions {
  1277  		return nil
  1278  	}
  1279  
  1280  	// We have to delete a historic job to make room.
  1281  	// Find index of the highest versioned stable job
  1282  	stableIdx := -1
  1283  	for i, j := range all {
  1284  		if j.Stable {
  1285  			stableIdx = i
  1286  			break
  1287  		}
  1288  	}
  1289  
  1290  	// If the stable job is the oldest version, do a swap to bring it into the
  1291  	// keep set.
  1292  	max := structs.JobTrackedVersions
  1293  	if stableIdx == max {
  1294  		all[max-1], all[max] = all[max], all[max-1]
  1295  	}
  1296  
  1297  	// Delete the job outside of the set that are being kept.
  1298  	d := all[max]
  1299  	if err := txn.Delete("job_version", d); err != nil {
  1300  		return fmt.Errorf("failed to delete job %v (%d) from job_version", d.ID, d.Version)
  1301  	}
  1302  
  1303  	return nil
  1304  }
  1305  
  1306  // JobByID is used to lookup a job by its ID. JobByID returns the current/latest job
  1307  // version.
  1308  func (s *StateStore) JobByID(ws memdb.WatchSet, namespace, id string) (*structs.Job, error) {
  1309  	txn := s.db.Txn(false)
  1310  	return s.JobByIDTxn(ws, namespace, id, txn)
  1311  }
  1312  
  1313  // JobByIDTxn is used to lookup a job by its ID, like  JobByID. JobByID returns the job version
  1314  // accessible through in the transaction
  1315  func (s *StateStore) JobByIDTxn(ws memdb.WatchSet, namespace, id string, txn Txn) (*structs.Job, error) {
  1316  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1317  	if namespace == "" {
  1318  		namespace = structs.DefaultNamespace
  1319  	}
  1320  
  1321  	watchCh, existing, err := txn.FirstWatch("jobs", "id", namespace, id)
  1322  	if err != nil {
  1323  		return nil, fmt.Errorf("job lookup failed: %v", err)
  1324  	}
  1325  	ws.Add(watchCh)
  1326  
  1327  	if existing != nil {
  1328  		return existing.(*structs.Job), nil
  1329  	}
  1330  	return nil, nil
  1331  }
  1332  
  1333  // JobsByIDPrefix is used to lookup a job by prefix
  1334  func (s *StateStore) JobsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) {
  1335  	txn := s.db.Txn(false)
  1336  
  1337  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1338  	if namespace == "" {
  1339  		namespace = structs.DefaultNamespace
  1340  	}
  1341  
  1342  	iter, err := txn.Get("jobs", "id_prefix", namespace, id)
  1343  	if err != nil {
  1344  		return nil, fmt.Errorf("job lookup failed: %v", err)
  1345  	}
  1346  
  1347  	ws.Add(iter.WatchCh())
  1348  
  1349  	return iter, nil
  1350  }
  1351  
  1352  // JobVersionsByID returns all the tracked versions of a job.
  1353  func (s *StateStore) JobVersionsByID(ws memdb.WatchSet, namespace, id string) ([]*structs.Job, error) {
  1354  	txn := s.db.Txn(false)
  1355  
  1356  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1357  	if namespace == "" {
  1358  		namespace = structs.DefaultNamespace
  1359  	}
  1360  
  1361  	return s.jobVersionByID(txn, &ws, namespace, id)
  1362  }
  1363  
  1364  // jobVersionByID is the underlying implementation for retrieving all tracked
  1365  // versions of a job and is called under an existing transaction. A watch set
  1366  // can optionally be passed in to add the job histories to the watch set.
  1367  func (s *StateStore) jobVersionByID(txn *memdb.Txn, ws *memdb.WatchSet, namespace, id string) ([]*structs.Job, error) {
  1368  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1369  	if namespace == "" {
  1370  		namespace = structs.DefaultNamespace
  1371  	}
  1372  
  1373  	// Get all the historic jobs for this ID
  1374  	iter, err := txn.Get("job_version", "id_prefix", namespace, id)
  1375  	if err != nil {
  1376  		return nil, err
  1377  	}
  1378  
  1379  	if ws != nil {
  1380  		ws.Add(iter.WatchCh())
  1381  	}
  1382  
  1383  	var all []*structs.Job
  1384  	for {
  1385  		raw := iter.Next()
  1386  		if raw == nil {
  1387  			break
  1388  		}
  1389  
  1390  		// Ensure the ID is an exact match
  1391  		j := raw.(*structs.Job)
  1392  		if j.ID != id {
  1393  			continue
  1394  		}
  1395  
  1396  		all = append(all, j)
  1397  	}
  1398  
  1399  	// Sort in reverse order so that the highest version is first
  1400  	sort.Slice(all, func(i, j int) bool {
  1401  		return all[i].Version > all[j].Version
  1402  	})
  1403  
  1404  	return all, nil
  1405  }
  1406  
  1407  // JobByIDAndVersion returns the job identified by its ID and Version. The
  1408  // passed watchset may be nil.
  1409  func (s *StateStore) JobByIDAndVersion(ws memdb.WatchSet, namespace, id string, version uint64) (*structs.Job, error) {
  1410  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1411  	if namespace == "" {
  1412  		namespace = structs.DefaultNamespace
  1413  	}
  1414  	txn := s.db.Txn(false)
  1415  	return s.jobByIDAndVersionImpl(ws, namespace, id, version, txn)
  1416  }
  1417  
  1418  // jobByIDAndVersionImpl returns the job identified by its ID and Version. The
  1419  // passed watchset may be nil.
  1420  func (s *StateStore) jobByIDAndVersionImpl(ws memdb.WatchSet, namespace, id string,
  1421  	version uint64, txn *memdb.Txn) (*structs.Job, error) {
  1422  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1423  	if namespace == "" {
  1424  		namespace = structs.DefaultNamespace
  1425  	}
  1426  
  1427  	watchCh, existing, err := txn.FirstWatch("job_version", "id", namespace, id, version)
  1428  	if err != nil {
  1429  		return nil, err
  1430  	}
  1431  
  1432  	if ws != nil {
  1433  		ws.Add(watchCh)
  1434  	}
  1435  
  1436  	if existing != nil {
  1437  		job := existing.(*structs.Job)
  1438  		return job, nil
  1439  	}
  1440  
  1441  	return nil, nil
  1442  }
  1443  
  1444  func (s *StateStore) JobVersions(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  1445  	txn := s.db.Txn(false)
  1446  
  1447  	// Walk the entire deployments table
  1448  	iter, err := txn.Get("job_version", "id")
  1449  	if err != nil {
  1450  		return nil, err
  1451  	}
  1452  
  1453  	ws.Add(iter.WatchCh())
  1454  	return iter, nil
  1455  }
  1456  
  1457  // Jobs returns an iterator over all the jobs
  1458  func (s *StateStore) Jobs(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  1459  	txn := s.db.Txn(false)
  1460  
  1461  	// Walk the entire jobs table
  1462  	iter, err := txn.Get("jobs", "id")
  1463  	if err != nil {
  1464  		return nil, err
  1465  	}
  1466  
  1467  	ws.Add(iter.WatchCh())
  1468  
  1469  	return iter, nil
  1470  }
  1471  
  1472  // JobsByNamespace returns an iterator over all the jobs for the given namespace
  1473  func (s *StateStore) JobsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) {
  1474  	txn := s.db.Txn(false)
  1475  	return s.jobsByNamespaceImpl(ws, namespace, txn)
  1476  }
  1477  
  1478  // jobsByNamespaceImpl returns an iterator over all the jobs for the given namespace
  1479  func (s *StateStore) jobsByNamespaceImpl(ws memdb.WatchSet, namespace string, txn *memdb.Txn) (memdb.ResultIterator, error) {
  1480  	// Walk the entire jobs table
  1481  	iter, err := txn.Get("jobs", "id_prefix", namespace, "")
  1482  	if err != nil {
  1483  		return nil, err
  1484  	}
  1485  
  1486  	ws.Add(iter.WatchCh())
  1487  
  1488  	return iter, nil
  1489  }
  1490  
  1491  // JobsByPeriodic returns an iterator over all the periodic or non-periodic jobs.
  1492  func (s *StateStore) JobsByPeriodic(ws memdb.WatchSet, periodic bool) (memdb.ResultIterator, error) {
  1493  	txn := s.db.Txn(false)
  1494  
  1495  	iter, err := txn.Get("jobs", "periodic", periodic)
  1496  	if err != nil {
  1497  		return nil, err
  1498  	}
  1499  
  1500  	ws.Add(iter.WatchCh())
  1501  
  1502  	return iter, nil
  1503  }
  1504  
  1505  // JobsByScheduler returns an iterator over all the jobs with the specific
  1506  // scheduler type.
  1507  func (s *StateStore) JobsByScheduler(ws memdb.WatchSet, schedulerType string) (memdb.ResultIterator, error) {
  1508  	txn := s.db.Txn(false)
  1509  
  1510  	// Return an iterator for jobs with the specific type.
  1511  	iter, err := txn.Get("jobs", "type", schedulerType)
  1512  	if err != nil {
  1513  		return nil, err
  1514  	}
  1515  
  1516  	ws.Add(iter.WatchCh())
  1517  
  1518  	return iter, nil
  1519  }
  1520  
  1521  // JobsByGC returns an iterator over all jobs eligible or uneligible for garbage
  1522  // collection.
  1523  func (s *StateStore) JobsByGC(ws memdb.WatchSet, gc bool) (memdb.ResultIterator, error) {
  1524  	txn := s.db.Txn(false)
  1525  
  1526  	iter, err := txn.Get("jobs", "gc", gc)
  1527  	if err != nil {
  1528  		return nil, err
  1529  	}
  1530  
  1531  	ws.Add(iter.WatchCh())
  1532  
  1533  	return iter, nil
  1534  }
  1535  
  1536  // JobSummary returns a job summary object which matches a specific id.
  1537  func (s *StateStore) JobSummaryByID(ws memdb.WatchSet, namespace, jobID string) (*structs.JobSummary, error) {
  1538  	txn := s.db.Txn(false)
  1539  
  1540  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1541  	if namespace == "" {
  1542  		namespace = structs.DefaultNamespace
  1543  	}
  1544  
  1545  	watchCh, existing, err := txn.FirstWatch("job_summary", "id", namespace, jobID)
  1546  	if err != nil {
  1547  		return nil, err
  1548  	}
  1549  
  1550  	ws.Add(watchCh)
  1551  
  1552  	if existing != nil {
  1553  		summary := existing.(*structs.JobSummary)
  1554  		return summary, nil
  1555  	}
  1556  
  1557  	return nil, nil
  1558  }
  1559  
  1560  // JobSummaries walks the entire job summary table and returns all the job
  1561  // summary objects
  1562  func (s *StateStore) JobSummaries(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  1563  	txn := s.db.Txn(false)
  1564  
  1565  	iter, err := txn.Get("job_summary", "id")
  1566  	if err != nil {
  1567  		return nil, err
  1568  	}
  1569  
  1570  	ws.Add(iter.WatchCh())
  1571  
  1572  	return iter, nil
  1573  }
  1574  
  1575  // JobSummaryByPrefix is used to look up Job Summary by id prefix
  1576  func (s *StateStore) JobSummaryByPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) {
  1577  	txn := s.db.Txn(false)
  1578  
  1579  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1580  	if namespace == "" {
  1581  		namespace = structs.DefaultNamespace
  1582  	}
  1583  
  1584  	iter, err := txn.Get("job_summary", "id_prefix", namespace, id)
  1585  	if err != nil {
  1586  		return nil, fmt.Errorf("eval lookup failed: %v", err)
  1587  	}
  1588  
  1589  	ws.Add(iter.WatchCh())
  1590  
  1591  	return iter, nil
  1592  }
  1593  
  1594  // UpsertPeriodicLaunch is used to register a launch or update it.
  1595  func (s *StateStore) UpsertPeriodicLaunch(index uint64, launch *structs.PeriodicLaunch) error {
  1596  	txn := s.db.Txn(true)
  1597  	defer txn.Abort()
  1598  
  1599  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1600  	if launch.Namespace == "" {
  1601  		launch.Namespace = structs.DefaultNamespace
  1602  	}
  1603  
  1604  	// Check if the job already exists
  1605  	existing, err := txn.First("periodic_launch", "id", launch.Namespace, launch.ID)
  1606  	if err != nil {
  1607  		return fmt.Errorf("periodic launch lookup failed: %v", err)
  1608  	}
  1609  
  1610  	// Setup the indexes correctly
  1611  	if existing != nil {
  1612  		launch.CreateIndex = existing.(*structs.PeriodicLaunch).CreateIndex
  1613  		launch.ModifyIndex = index
  1614  	} else {
  1615  		launch.CreateIndex = index
  1616  		launch.ModifyIndex = index
  1617  	}
  1618  
  1619  	// Insert the job
  1620  	if err := txn.Insert("periodic_launch", launch); err != nil {
  1621  		return fmt.Errorf("launch insert failed: %v", err)
  1622  	}
  1623  	if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil {
  1624  		return fmt.Errorf("index update failed: %v", err)
  1625  	}
  1626  
  1627  	txn.Commit()
  1628  	return nil
  1629  }
  1630  
  1631  // DeletePeriodicLaunch is used to delete the periodic launch
  1632  func (s *StateStore) DeletePeriodicLaunch(index uint64, namespace, jobID string) error {
  1633  	txn := s.db.Txn(true)
  1634  	defer txn.Abort()
  1635  
  1636  	err := s.DeletePeriodicLaunchTxn(index, namespace, jobID, txn)
  1637  	if err == nil {
  1638  		txn.Commit()
  1639  	}
  1640  	return err
  1641  }
  1642  
  1643  // DeletePeriodicLaunchTxn is used to delete the periodic launch, like DeletePeriodicLaunch
  1644  // but in a transaction.  Useful for when making multiple modifications atomically
  1645  func (s *StateStore) DeletePeriodicLaunchTxn(index uint64, namespace, jobID string, txn Txn) error {
  1646  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1647  	if namespace == "" {
  1648  		namespace = structs.DefaultNamespace
  1649  	}
  1650  
  1651  	// Lookup the launch
  1652  	existing, err := txn.First("periodic_launch", "id", namespace, jobID)
  1653  	if err != nil {
  1654  		return fmt.Errorf("launch lookup failed: %v", err)
  1655  	}
  1656  	if existing == nil {
  1657  		return fmt.Errorf("launch not found")
  1658  	}
  1659  
  1660  	// Delete the launch
  1661  	if err := txn.Delete("periodic_launch", existing); err != nil {
  1662  		return fmt.Errorf("launch delete failed: %v", err)
  1663  	}
  1664  	if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil {
  1665  		return fmt.Errorf("index update failed: %v", err)
  1666  	}
  1667  
  1668  	return nil
  1669  }
  1670  
  1671  // PeriodicLaunchByID is used to lookup a periodic launch by the periodic job
  1672  // ID.
  1673  func (s *StateStore) PeriodicLaunchByID(ws memdb.WatchSet, namespace, id string) (*structs.PeriodicLaunch, error) {
  1674  	txn := s.db.Txn(false)
  1675  
  1676  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1677  	if namespace == "" {
  1678  		namespace = structs.DefaultNamespace
  1679  	}
  1680  
  1681  	watchCh, existing, err := txn.FirstWatch("periodic_launch", "id", namespace, id)
  1682  	if err != nil {
  1683  		return nil, fmt.Errorf("periodic launch lookup failed: %v", err)
  1684  	}
  1685  
  1686  	ws.Add(watchCh)
  1687  
  1688  	if existing != nil {
  1689  		return existing.(*structs.PeriodicLaunch), nil
  1690  	}
  1691  	return nil, nil
  1692  }
  1693  
  1694  // PeriodicLaunches returns an iterator over all the periodic launches
  1695  func (s *StateStore) PeriodicLaunches(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  1696  	txn := s.db.Txn(false)
  1697  
  1698  	// Walk the entire table
  1699  	iter, err := txn.Get("periodic_launch", "id")
  1700  	if err != nil {
  1701  		return nil, err
  1702  	}
  1703  
  1704  	ws.Add(iter.WatchCh())
  1705  
  1706  	return iter, nil
  1707  }
  1708  
  1709  // UpsertEvals is used to upsert a set of evaluations
  1710  func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) error {
  1711  	txn := s.db.Txn(true)
  1712  	defer txn.Abort()
  1713  
  1714  	err := s.UpsertEvalsTxn(index, evals, txn)
  1715  	if err == nil {
  1716  		txn.Commit()
  1717  	}
  1718  	return err
  1719  }
  1720  
  1721  // UpsertEvals is used to upsert a set of evaluations, like UpsertEvals
  1722  // but in a transaction.  Useful for when making multiple modifications atomically
  1723  func (s *StateStore) UpsertEvalsTxn(index uint64, evals []*structs.Evaluation, txn Txn) error {
  1724  	// Do a nested upsert
  1725  	jobs := make(map[structs.NamespacedID]string, len(evals))
  1726  	for _, eval := range evals {
  1727  		if err := s.nestedUpsertEval(txn, index, eval); err != nil {
  1728  			return err
  1729  		}
  1730  
  1731  		tuple := structs.NamespacedID{
  1732  			ID:        eval.JobID,
  1733  			Namespace: eval.Namespace,
  1734  		}
  1735  		jobs[tuple] = ""
  1736  	}
  1737  
  1738  	// Set the job's status
  1739  	if err := s.setJobStatuses(index, txn, jobs, false); err != nil {
  1740  		return fmt.Errorf("setting job status failed: %v", err)
  1741  	}
  1742  
  1743  	return nil
  1744  }
  1745  
  1746  // nestedUpsertEvaluation is used to nest an evaluation upsert within a transaction
  1747  func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *structs.Evaluation) error {
  1748  	// Lookup the evaluation
  1749  	existing, err := txn.First("evals", "id", eval.ID)
  1750  	if err != nil {
  1751  		return fmt.Errorf("eval lookup failed: %v", err)
  1752  	}
  1753  
  1754  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1755  	if eval.Namespace == "" {
  1756  		eval.Namespace = structs.DefaultNamespace
  1757  	}
  1758  
  1759  	// Update the indexes
  1760  	if existing != nil {
  1761  		eval.CreateIndex = existing.(*structs.Evaluation).CreateIndex
  1762  		eval.ModifyIndex = index
  1763  	} else {
  1764  		eval.CreateIndex = index
  1765  		eval.ModifyIndex = index
  1766  	}
  1767  
  1768  	// Update the job summary
  1769  	summaryRaw, err := txn.First("job_summary", "id", eval.Namespace, eval.JobID)
  1770  	if err != nil {
  1771  		return fmt.Errorf("job summary lookup failed: %v", err)
  1772  	}
  1773  	if summaryRaw != nil {
  1774  		js := summaryRaw.(*structs.JobSummary).Copy()
  1775  		hasSummaryChanged := false
  1776  		for tg, num := range eval.QueuedAllocations {
  1777  			if summary, ok := js.Summary[tg]; ok {
  1778  				if summary.Queued != num {
  1779  					summary.Queued = num
  1780  					js.Summary[tg] = summary
  1781  					hasSummaryChanged = true
  1782  				}
  1783  			} else {
  1784  				s.logger.Error("unable to update queued for job and task group", "job_id", eval.JobID, "task_group", tg, "namespace", eval.Namespace)
  1785  			}
  1786  		}
  1787  
  1788  		// Insert the job summary
  1789  		if hasSummaryChanged {
  1790  			// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1791  			if js.Namespace == "" {
  1792  				js.Namespace = structs.DefaultNamespace
  1793  			}
  1794  
  1795  			js.ModifyIndex = index
  1796  			if err := txn.Insert("job_summary", js); err != nil {
  1797  				return fmt.Errorf("job summary insert failed: %v", err)
  1798  			}
  1799  			if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  1800  				return fmt.Errorf("index update failed: %v", err)
  1801  			}
  1802  		}
  1803  	}
  1804  
  1805  	// Check if the job has any blocked evaluations and cancel them
  1806  	if eval.Status == structs.EvalStatusComplete && len(eval.FailedTGAllocs) == 0 {
  1807  		// Get the blocked evaluation for a job if it exists
  1808  		iter, err := txn.Get("evals", "job", eval.Namespace, eval.JobID, structs.EvalStatusBlocked)
  1809  		if err != nil {
  1810  			return fmt.Errorf("failed to get blocked evals for job %q in namespace %q: %v", eval.JobID, eval.Namespace, err)
  1811  		}
  1812  
  1813  		var blocked []*structs.Evaluation
  1814  		for {
  1815  			raw := iter.Next()
  1816  			if raw == nil {
  1817  				break
  1818  			}
  1819  			blocked = append(blocked, raw.(*structs.Evaluation))
  1820  		}
  1821  
  1822  		// Go through and update the evals
  1823  		for _, eval := range blocked {
  1824  			newEval := eval.Copy()
  1825  			newEval.Status = structs.EvalStatusCancelled
  1826  			newEval.StatusDescription = fmt.Sprintf("evaluation %q successful", newEval.ID)
  1827  			newEval.ModifyIndex = index
  1828  
  1829  			// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1830  			if newEval.Namespace == "" {
  1831  				newEval.Namespace = structs.DefaultNamespace
  1832  			}
  1833  
  1834  			if err := txn.Insert("evals", newEval); err != nil {
  1835  				return fmt.Errorf("eval insert failed: %v", err)
  1836  			}
  1837  		}
  1838  	}
  1839  
  1840  	// Insert the eval
  1841  	if err := txn.Insert("evals", eval); err != nil {
  1842  		return fmt.Errorf("eval insert failed: %v", err)
  1843  	}
  1844  	if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil {
  1845  		return fmt.Errorf("index update failed: %v", err)
  1846  	}
  1847  	return nil
  1848  }
  1849  
  1850  // updateEvalModifyIndex is used to update the modify index of an evaluation that has been
  1851  // through a scheduler pass. This is done as part of plan apply. It ensures that when a subsequent
  1852  // scheduler workers process a re-queued evaluation it sees any partial updates from the plan apply.
  1853  func (s *StateStore) updateEvalModifyIndex(txn *memdb.Txn, index uint64, evalID string) error {
  1854  	// Lookup the evaluation
  1855  	existing, err := txn.First("evals", "id", evalID)
  1856  	if err != nil {
  1857  		return fmt.Errorf("eval lookup failed: %v", err)
  1858  	}
  1859  	if existing == nil {
  1860  		s.logger.Error("unable to find eval", "eval_id", evalID)
  1861  		return fmt.Errorf("unable to find eval id %q", evalID)
  1862  	}
  1863  	eval := existing.(*structs.Evaluation).Copy()
  1864  	// Update the indexes
  1865  	eval.ModifyIndex = index
  1866  
  1867  	// Insert the eval
  1868  	if err := txn.Insert("evals", eval); err != nil {
  1869  		return fmt.Errorf("eval insert failed: %v", err)
  1870  	}
  1871  	if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil {
  1872  		return fmt.Errorf("index update failed: %v", err)
  1873  	}
  1874  	return nil
  1875  }
  1876  
  1877  // DeleteEval is used to delete an evaluation
  1878  func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error {
  1879  	txn := s.db.Txn(true)
  1880  	defer txn.Abort()
  1881  
  1882  	jobs := make(map[structs.NamespacedID]string, len(evals))
  1883  	for _, eval := range evals {
  1884  		existing, err := txn.First("evals", "id", eval)
  1885  		if err != nil {
  1886  			return fmt.Errorf("eval lookup failed: %v", err)
  1887  		}
  1888  		if existing == nil {
  1889  			continue
  1890  		}
  1891  		if err := txn.Delete("evals", existing); err != nil {
  1892  			return fmt.Errorf("eval delete failed: %v", err)
  1893  		}
  1894  		eval := existing.(*structs.Evaluation)
  1895  
  1896  		tuple := structs.NamespacedID{
  1897  			ID:        eval.JobID,
  1898  			Namespace: eval.Namespace,
  1899  		}
  1900  		jobs[tuple] = ""
  1901  	}
  1902  
  1903  	for _, alloc := range allocs {
  1904  		raw, err := txn.First("allocs", "id", alloc)
  1905  		if err != nil {
  1906  			return fmt.Errorf("alloc lookup failed: %v", err)
  1907  		}
  1908  		if raw == nil {
  1909  			continue
  1910  		}
  1911  		if err := txn.Delete("allocs", raw); err != nil {
  1912  			return fmt.Errorf("alloc delete failed: %v", err)
  1913  		}
  1914  	}
  1915  
  1916  	// Update the indexes
  1917  	if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil {
  1918  		return fmt.Errorf("index update failed: %v", err)
  1919  	}
  1920  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  1921  		return fmt.Errorf("index update failed: %v", err)
  1922  	}
  1923  
  1924  	// Set the job's status
  1925  	if err := s.setJobStatuses(index, txn, jobs, true); err != nil {
  1926  		return fmt.Errorf("setting job status failed: %v", err)
  1927  	}
  1928  
  1929  	txn.Commit()
  1930  	return nil
  1931  }
  1932  
  1933  // EvalByID is used to lookup an eval by its ID
  1934  func (s *StateStore) EvalByID(ws memdb.WatchSet, id string) (*structs.Evaluation, error) {
  1935  	txn := s.db.Txn(false)
  1936  
  1937  	watchCh, existing, err := txn.FirstWatch("evals", "id", id)
  1938  	if err != nil {
  1939  		return nil, fmt.Errorf("eval lookup failed: %v", err)
  1940  	}
  1941  
  1942  	ws.Add(watchCh)
  1943  
  1944  	if existing != nil {
  1945  		return existing.(*structs.Evaluation), nil
  1946  	}
  1947  	return nil, nil
  1948  }
  1949  
  1950  // EvalsByIDPrefix is used to lookup evaluations by prefix in a particular
  1951  // namespace
  1952  func (s *StateStore) EvalsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) {
  1953  	txn := s.db.Txn(false)
  1954  
  1955  	// Get an iterator over all evals by the id prefix
  1956  	iter, err := txn.Get("evals", "id_prefix", id)
  1957  	if err != nil {
  1958  		return nil, fmt.Errorf("eval lookup failed: %v", err)
  1959  	}
  1960  
  1961  	ws.Add(iter.WatchCh())
  1962  
  1963  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1964  	if namespace == "" {
  1965  		namespace = structs.DefaultNamespace
  1966  	}
  1967  
  1968  	// Wrap the iterator in a filter
  1969  	wrap := memdb.NewFilterIterator(iter, evalNamespaceFilter(namespace))
  1970  	return wrap, nil
  1971  }
  1972  
  1973  // evalNamespaceFilter returns a filter function that filters all evaluations
  1974  // not in the given namespace.
  1975  func evalNamespaceFilter(namespace string) func(interface{}) bool {
  1976  	return func(raw interface{}) bool {
  1977  		eval, ok := raw.(*structs.Evaluation)
  1978  		if !ok {
  1979  			return true
  1980  		}
  1981  
  1982  		return eval.Namespace != namespace
  1983  	}
  1984  }
  1985  
  1986  // EvalsByJob returns all the evaluations by job id
  1987  func (s *StateStore) EvalsByJob(ws memdb.WatchSet, namespace, jobID string) ([]*structs.Evaluation, error) {
  1988  	txn := s.db.Txn(false)
  1989  
  1990  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  1991  	if namespace == "" {
  1992  		namespace = structs.DefaultNamespace
  1993  	}
  1994  
  1995  	// Get an iterator over the node allocations
  1996  	iter, err := txn.Get("evals", "job_prefix", namespace, jobID)
  1997  	if err != nil {
  1998  		return nil, err
  1999  	}
  2000  
  2001  	ws.Add(iter.WatchCh())
  2002  
  2003  	var out []*structs.Evaluation
  2004  	for {
  2005  		raw := iter.Next()
  2006  		if raw == nil {
  2007  			break
  2008  		}
  2009  
  2010  		e := raw.(*structs.Evaluation)
  2011  
  2012  		// Filter non-exact matches
  2013  		if e.JobID != jobID {
  2014  			continue
  2015  		}
  2016  
  2017  		out = append(out, e)
  2018  	}
  2019  	return out, nil
  2020  }
  2021  
  2022  // Evals returns an iterator over all the evaluations
  2023  func (s *StateStore) Evals(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  2024  	txn := s.db.Txn(false)
  2025  
  2026  	// Walk the entire table
  2027  	iter, err := txn.Get("evals", "id")
  2028  	if err != nil {
  2029  		return nil, err
  2030  	}
  2031  
  2032  	ws.Add(iter.WatchCh())
  2033  
  2034  	return iter, nil
  2035  }
  2036  
  2037  // EvalsByNamespace returns an iterator over all the evaluations in the given
  2038  // namespace
  2039  func (s *StateStore) EvalsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) {
  2040  	txn := s.db.Txn(false)
  2041  
  2042  	// Walk the entire table
  2043  	iter, err := txn.Get("evals", "namespace", namespace)
  2044  	if err != nil {
  2045  		return nil, err
  2046  	}
  2047  
  2048  	ws.Add(iter.WatchCh())
  2049  
  2050  	return iter, nil
  2051  }
  2052  
  2053  // UpdateAllocsFromClient is used to update an allocation based on input
  2054  // from a client. While the schedulers are the authority on the allocation for
  2055  // most things, some updates are authoritative from the client. Specifically,
  2056  // the desired state comes from the schedulers, while the actual state comes
  2057  // from clients.
  2058  func (s *StateStore) UpdateAllocsFromClient(index uint64, allocs []*structs.Allocation) error {
  2059  	txn := s.db.Txn(true)
  2060  	defer txn.Abort()
  2061  
  2062  	// Handle each of the updated allocations
  2063  	for _, alloc := range allocs {
  2064  		if err := s.nestedUpdateAllocFromClient(txn, index, alloc); err != nil {
  2065  			return err
  2066  		}
  2067  	}
  2068  
  2069  	// Update the indexes
  2070  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  2071  		return fmt.Errorf("index update failed: %v", err)
  2072  	}
  2073  
  2074  	txn.Commit()
  2075  	return nil
  2076  }
  2077  
  2078  // nestedUpdateAllocFromClient is used to nest an update of an allocation with client status
  2079  func (s *StateStore) nestedUpdateAllocFromClient(txn *memdb.Txn, index uint64, alloc *structs.Allocation) error {
  2080  	// Look for existing alloc
  2081  	existing, err := txn.First("allocs", "id", alloc.ID)
  2082  	if err != nil {
  2083  		return fmt.Errorf("alloc lookup failed: %v", err)
  2084  	}
  2085  
  2086  	// Nothing to do if this does not exist
  2087  	if existing == nil {
  2088  		return nil
  2089  	}
  2090  	exist := existing.(*structs.Allocation)
  2091  
  2092  	// Copy everything from the existing allocation
  2093  	copyAlloc := exist.Copy()
  2094  
  2095  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  2096  	if copyAlloc.Namespace == "" {
  2097  		copyAlloc.Namespace = structs.DefaultNamespace
  2098  	}
  2099  
  2100  	// Pull in anything the client is the authority on
  2101  	copyAlloc.ClientStatus = alloc.ClientStatus
  2102  	copyAlloc.ClientDescription = alloc.ClientDescription
  2103  	copyAlloc.TaskStates = alloc.TaskStates
  2104  
  2105  	// The client can only set its deployment health and timestamp, so just take
  2106  	// those
  2107  	if copyAlloc.DeploymentStatus != nil && alloc.DeploymentStatus != nil {
  2108  		oldHasHealthy := copyAlloc.DeploymentStatus.HasHealth()
  2109  		newHasHealthy := alloc.DeploymentStatus.HasHealth()
  2110  
  2111  		// We got new health information from the client
  2112  		if newHasHealthy && (!oldHasHealthy || *copyAlloc.DeploymentStatus.Healthy != *alloc.DeploymentStatus.Healthy) {
  2113  			// Updated deployment health and timestamp
  2114  			copyAlloc.DeploymentStatus.Healthy = helper.BoolToPtr(*alloc.DeploymentStatus.Healthy)
  2115  			copyAlloc.DeploymentStatus.Timestamp = alloc.DeploymentStatus.Timestamp
  2116  			copyAlloc.DeploymentStatus.ModifyIndex = index
  2117  		}
  2118  	} else if alloc.DeploymentStatus != nil {
  2119  		// First time getting a deployment status so copy everything and just
  2120  		// set the index
  2121  		copyAlloc.DeploymentStatus = alloc.DeploymentStatus.Copy()
  2122  		copyAlloc.DeploymentStatus.ModifyIndex = index
  2123  	}
  2124  
  2125  	// Update the modify index
  2126  	copyAlloc.ModifyIndex = index
  2127  
  2128  	// Update the modify time
  2129  	copyAlloc.ModifyTime = alloc.ModifyTime
  2130  
  2131  	if err := s.updateDeploymentWithAlloc(index, copyAlloc, exist, txn); err != nil {
  2132  		return fmt.Errorf("error updating deployment: %v", err)
  2133  	}
  2134  
  2135  	if err := s.updateSummaryWithAlloc(index, copyAlloc, exist, txn); err != nil {
  2136  		return fmt.Errorf("error updating job summary: %v", err)
  2137  	}
  2138  
  2139  	if err := s.updateEntWithAlloc(index, copyAlloc, exist, txn); err != nil {
  2140  		return err
  2141  	}
  2142  
  2143  	// Update the allocation
  2144  	if err := txn.Insert("allocs", copyAlloc); err != nil {
  2145  		return fmt.Errorf("alloc insert failed: %v", err)
  2146  	}
  2147  
  2148  	// Set the job's status
  2149  	forceStatus := ""
  2150  	if !copyAlloc.TerminalStatus() {
  2151  		forceStatus = structs.JobStatusRunning
  2152  	}
  2153  
  2154  	tuple := structs.NamespacedID{
  2155  		ID:        exist.JobID,
  2156  		Namespace: exist.Namespace,
  2157  	}
  2158  	jobs := map[structs.NamespacedID]string{tuple: forceStatus}
  2159  
  2160  	if err := s.setJobStatuses(index, txn, jobs, false); err != nil {
  2161  		return fmt.Errorf("setting job status failed: %v", err)
  2162  	}
  2163  	return nil
  2164  }
  2165  
  2166  // UpsertAllocs is used to evict a set of allocations and allocate new ones at
  2167  // the same time.
  2168  func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error {
  2169  	txn := s.db.Txn(true)
  2170  	defer txn.Abort()
  2171  	if err := s.upsertAllocsImpl(index, allocs, txn); err != nil {
  2172  		return err
  2173  	}
  2174  	txn.Commit()
  2175  	return nil
  2176  }
  2177  
  2178  // upsertAllocs is the actual implementation of UpsertAllocs so that it may be
  2179  // used with an existing transaction.
  2180  func (s *StateStore) upsertAllocsImpl(index uint64, allocs []*structs.Allocation, txn *memdb.Txn) error {
  2181  	// Handle the allocations
  2182  	jobs := make(map[structs.NamespacedID]string, 1)
  2183  	for _, alloc := range allocs {
  2184  		existing, err := txn.First("allocs", "id", alloc.ID)
  2185  		if err != nil {
  2186  			return fmt.Errorf("alloc lookup failed: %v", err)
  2187  		}
  2188  		exist, _ := existing.(*structs.Allocation)
  2189  
  2190  		if exist == nil {
  2191  			alloc.CreateIndex = index
  2192  			alloc.ModifyIndex = index
  2193  			alloc.AllocModifyIndex = index
  2194  			if alloc.DeploymentStatus != nil {
  2195  				alloc.DeploymentStatus.ModifyIndex = index
  2196  			}
  2197  
  2198  			// Issue https://github.com/hashicorp/nomad/issues/2583 uncovered
  2199  			// the a race between a forced garbage collection and the scheduler
  2200  			// marking an allocation as terminal. The issue is that the
  2201  			// allocation from the scheduler has its job normalized and the FSM
  2202  			// will only denormalize if the allocation is not terminal.  However
  2203  			// if the allocation is garbage collected, that will result in a
  2204  			// allocation being upserted for the first time without a job
  2205  			// attached. By returning an error here, it will cause the FSM to
  2206  			// error, causing the plan_apply to error and thus causing the
  2207  			// evaluation to be failed. This will force an index refresh that
  2208  			// should solve this issue.
  2209  			if alloc.Job == nil {
  2210  				return fmt.Errorf("attempting to upsert allocation %q without a job", alloc.ID)
  2211  			}
  2212  		} else {
  2213  			alloc.CreateIndex = exist.CreateIndex
  2214  			alloc.ModifyIndex = index
  2215  			alloc.AllocModifyIndex = index
  2216  
  2217  			// Keep the clients task states
  2218  			alloc.TaskStates = exist.TaskStates
  2219  
  2220  			// If the scheduler is marking this allocation as lost we do not
  2221  			// want to reuse the status of the existing allocation.
  2222  			if alloc.ClientStatus != structs.AllocClientStatusLost {
  2223  				alloc.ClientStatus = exist.ClientStatus
  2224  				alloc.ClientDescription = exist.ClientDescription
  2225  			}
  2226  
  2227  			// The job has been denormalized so re-attach the original job
  2228  			if alloc.Job == nil {
  2229  				alloc.Job = exist.Job
  2230  			}
  2231  		}
  2232  
  2233  		// COMPAT 0.7: Upgrade old objects that do not have namespaces
  2234  		if alloc.Namespace == "" {
  2235  			alloc.Namespace = structs.DefaultNamespace
  2236  		}
  2237  
  2238  		// OPTIMIZATION:
  2239  		// These should be given a map of new to old allocation and the updates
  2240  		// should be one on all changes. The current implementation causes O(n)
  2241  		// lookups/copies/insertions rather than O(1)
  2242  		if err := s.updateDeploymentWithAlloc(index, alloc, exist, txn); err != nil {
  2243  			return fmt.Errorf("error updating deployment: %v", err)
  2244  		}
  2245  
  2246  		if err := s.updateSummaryWithAlloc(index, alloc, exist, txn); err != nil {
  2247  			return fmt.Errorf("error updating job summary: %v", err)
  2248  		}
  2249  
  2250  		if err := s.updateEntWithAlloc(index, alloc, exist, txn); err != nil {
  2251  			return err
  2252  		}
  2253  
  2254  		if err := txn.Insert("allocs", alloc); err != nil {
  2255  			return fmt.Errorf("alloc insert failed: %v", err)
  2256  		}
  2257  
  2258  		if alloc.PreviousAllocation != "" {
  2259  			prevAlloc, err := txn.First("allocs", "id", alloc.PreviousAllocation)
  2260  			if err != nil {
  2261  				return fmt.Errorf("alloc lookup failed: %v", err)
  2262  			}
  2263  			existingPrevAlloc, _ := prevAlloc.(*structs.Allocation)
  2264  			if existingPrevAlloc != nil {
  2265  				prevAllocCopy := existingPrevAlloc.Copy()
  2266  				prevAllocCopy.NextAllocation = alloc.ID
  2267  				prevAllocCopy.ModifyIndex = index
  2268  				if err := txn.Insert("allocs", prevAllocCopy); err != nil {
  2269  					return fmt.Errorf("alloc insert failed: %v", err)
  2270  				}
  2271  			}
  2272  		}
  2273  
  2274  		// If the allocation is running, force the job to running status.
  2275  		forceStatus := ""
  2276  		if !alloc.TerminalStatus() {
  2277  			forceStatus = structs.JobStatusRunning
  2278  		}
  2279  
  2280  		tuple := structs.NamespacedID{
  2281  			ID:        alloc.JobID,
  2282  			Namespace: alloc.Namespace,
  2283  		}
  2284  		jobs[tuple] = forceStatus
  2285  	}
  2286  
  2287  	// Update the indexes
  2288  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  2289  		return fmt.Errorf("index update failed: %v", err)
  2290  	}
  2291  
  2292  	// Set the job's status
  2293  	if err := s.setJobStatuses(index, txn, jobs, false); err != nil {
  2294  		return fmt.Errorf("setting job status failed: %v", err)
  2295  	}
  2296  
  2297  	return nil
  2298  }
  2299  
  2300  // UpdateAllocsDesiredTransitions is used to update a set of allocations
  2301  // desired transitions.
  2302  func (s *StateStore) UpdateAllocsDesiredTransitions(index uint64, allocs map[string]*structs.DesiredTransition,
  2303  	evals []*structs.Evaluation) error {
  2304  
  2305  	txn := s.db.Txn(true)
  2306  	defer txn.Abort()
  2307  
  2308  	// Handle each of the updated allocations
  2309  	for id, transition := range allocs {
  2310  		if err := s.nestedUpdateAllocDesiredTransition(txn, index, id, transition); err != nil {
  2311  			return err
  2312  		}
  2313  	}
  2314  
  2315  	for _, eval := range evals {
  2316  		if err := s.nestedUpsertEval(txn, index, eval); err != nil {
  2317  			return err
  2318  		}
  2319  	}
  2320  
  2321  	// Update the indexes
  2322  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  2323  		return fmt.Errorf("index update failed: %v", err)
  2324  	}
  2325  
  2326  	txn.Commit()
  2327  	return nil
  2328  }
  2329  
  2330  // nestedUpdateAllocDesiredTransition is used to nest an update of an
  2331  // allocations desired transition
  2332  func (s *StateStore) nestedUpdateAllocDesiredTransition(
  2333  	txn *memdb.Txn, index uint64, allocID string,
  2334  	transition *structs.DesiredTransition) error {
  2335  
  2336  	// Look for existing alloc
  2337  	existing, err := txn.First("allocs", "id", allocID)
  2338  	if err != nil {
  2339  		return fmt.Errorf("alloc lookup failed: %v", err)
  2340  	}
  2341  
  2342  	// Nothing to do if this does not exist
  2343  	if existing == nil {
  2344  		return nil
  2345  	}
  2346  	exist := existing.(*structs.Allocation)
  2347  
  2348  	// Copy everything from the existing allocation
  2349  	copyAlloc := exist.Copy()
  2350  
  2351  	// Merge the desired transitions
  2352  	copyAlloc.DesiredTransition.Merge(transition)
  2353  
  2354  	// Update the modify index
  2355  	copyAlloc.ModifyIndex = index
  2356  
  2357  	// Update the allocation
  2358  	if err := txn.Insert("allocs", copyAlloc); err != nil {
  2359  		return fmt.Errorf("alloc insert failed: %v", err)
  2360  	}
  2361  
  2362  	return nil
  2363  }
  2364  
  2365  // AllocByID is used to lookup an allocation by its ID
  2366  func (s *StateStore) AllocByID(ws memdb.WatchSet, id string) (*structs.Allocation, error) {
  2367  	txn := s.db.Txn(false)
  2368  
  2369  	watchCh, existing, err := txn.FirstWatch("allocs", "id", id)
  2370  	if err != nil {
  2371  		return nil, fmt.Errorf("alloc lookup failed: %v", err)
  2372  	}
  2373  
  2374  	ws.Add(watchCh)
  2375  
  2376  	if existing != nil {
  2377  		return existing.(*structs.Allocation), nil
  2378  	}
  2379  	return nil, nil
  2380  }
  2381  
  2382  // AllocsByIDPrefix is used to lookup allocs by prefix
  2383  func (s *StateStore) AllocsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) {
  2384  	txn := s.db.Txn(false)
  2385  
  2386  	iter, err := txn.Get("allocs", "id_prefix", id)
  2387  	if err != nil {
  2388  		return nil, fmt.Errorf("alloc lookup failed: %v", err)
  2389  	}
  2390  
  2391  	ws.Add(iter.WatchCh())
  2392  
  2393  	// Wrap the iterator in a filter
  2394  	wrap := memdb.NewFilterIterator(iter, allocNamespaceFilter(namespace))
  2395  	return wrap, nil
  2396  }
  2397  
  2398  // allocNamespaceFilter returns a filter function that filters all allocations
  2399  // not in the given namespace.
  2400  func allocNamespaceFilter(namespace string) func(interface{}) bool {
  2401  	return func(raw interface{}) bool {
  2402  		alloc, ok := raw.(*structs.Allocation)
  2403  		if !ok {
  2404  			return true
  2405  		}
  2406  
  2407  		return alloc.Namespace != namespace
  2408  	}
  2409  }
  2410  
  2411  // AllocsByNode returns all the allocations by node
  2412  func (s *StateStore) AllocsByNode(ws memdb.WatchSet, node string) ([]*structs.Allocation, error) {
  2413  	txn := s.db.Txn(false)
  2414  
  2415  	// Get an iterator over the node allocations, using only the
  2416  	// node prefix which ignores the terminal status
  2417  	iter, err := txn.Get("allocs", "node_prefix", node)
  2418  	if err != nil {
  2419  		return nil, err
  2420  	}
  2421  
  2422  	ws.Add(iter.WatchCh())
  2423  
  2424  	var out []*structs.Allocation
  2425  	for {
  2426  		raw := iter.Next()
  2427  		if raw == nil {
  2428  			break
  2429  		}
  2430  		out = append(out, raw.(*structs.Allocation))
  2431  	}
  2432  	return out, nil
  2433  }
  2434  
  2435  // AllocsByNode returns all the allocations by node and terminal status
  2436  func (s *StateStore) AllocsByNodeTerminal(ws memdb.WatchSet, node string, terminal bool) ([]*structs.Allocation, error) {
  2437  	txn := s.db.Txn(false)
  2438  
  2439  	// Get an iterator over the node allocations
  2440  	iter, err := txn.Get("allocs", "node", node, terminal)
  2441  	if err != nil {
  2442  		return nil, err
  2443  	}
  2444  
  2445  	ws.Add(iter.WatchCh())
  2446  
  2447  	var out []*structs.Allocation
  2448  	for {
  2449  		raw := iter.Next()
  2450  		if raw == nil {
  2451  			break
  2452  		}
  2453  		out = append(out, raw.(*structs.Allocation))
  2454  	}
  2455  	return out, nil
  2456  }
  2457  
  2458  // AllocsByJob returns all the allocations by job id
  2459  func (s *StateStore) AllocsByJob(ws memdb.WatchSet, namespace, jobID string, all bool) ([]*structs.Allocation, error) {
  2460  	txn := s.db.Txn(false)
  2461  
  2462  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  2463  	if namespace == "" {
  2464  		namespace = structs.DefaultNamespace
  2465  	}
  2466  
  2467  	// Get the job
  2468  	var job *structs.Job
  2469  	rawJob, err := txn.First("jobs", "id", namespace, jobID)
  2470  	if err != nil {
  2471  		return nil, err
  2472  	}
  2473  	if rawJob != nil {
  2474  		job = rawJob.(*structs.Job)
  2475  	}
  2476  
  2477  	// Get an iterator over the node allocations
  2478  	iter, err := txn.Get("allocs", "job", namespace, jobID)
  2479  	if err != nil {
  2480  		return nil, err
  2481  	}
  2482  
  2483  	ws.Add(iter.WatchCh())
  2484  
  2485  	var out []*structs.Allocation
  2486  	for {
  2487  		raw := iter.Next()
  2488  		if raw == nil {
  2489  			break
  2490  		}
  2491  
  2492  		alloc := raw.(*structs.Allocation)
  2493  		// If the allocation belongs to a job with the same ID but a different
  2494  		// create index and we are not getting all the allocations whose Jobs
  2495  		// matches the same Job ID then we skip it
  2496  		if !all && job != nil && alloc.Job.CreateIndex != job.CreateIndex {
  2497  			continue
  2498  		}
  2499  		out = append(out, raw.(*structs.Allocation))
  2500  	}
  2501  	return out, nil
  2502  }
  2503  
  2504  // AllocsByEval returns all the allocations by eval id
  2505  func (s *StateStore) AllocsByEval(ws memdb.WatchSet, evalID string) ([]*structs.Allocation, error) {
  2506  	txn := s.db.Txn(false)
  2507  
  2508  	// Get an iterator over the eval allocations
  2509  	iter, err := txn.Get("allocs", "eval", evalID)
  2510  	if err != nil {
  2511  		return nil, err
  2512  	}
  2513  
  2514  	ws.Add(iter.WatchCh())
  2515  
  2516  	var out []*structs.Allocation
  2517  	for {
  2518  		raw := iter.Next()
  2519  		if raw == nil {
  2520  			break
  2521  		}
  2522  		out = append(out, raw.(*structs.Allocation))
  2523  	}
  2524  	return out, nil
  2525  }
  2526  
  2527  // AllocsByDeployment returns all the allocations by deployment id
  2528  func (s *StateStore) AllocsByDeployment(ws memdb.WatchSet, deploymentID string) ([]*structs.Allocation, error) {
  2529  	txn := s.db.Txn(false)
  2530  
  2531  	// Get an iterator over the deployments allocations
  2532  	iter, err := txn.Get("allocs", "deployment", deploymentID)
  2533  	if err != nil {
  2534  		return nil, err
  2535  	}
  2536  
  2537  	ws.Add(iter.WatchCh())
  2538  
  2539  	var out []*structs.Allocation
  2540  	for {
  2541  		raw := iter.Next()
  2542  		if raw == nil {
  2543  			break
  2544  		}
  2545  		out = append(out, raw.(*structs.Allocation))
  2546  	}
  2547  	return out, nil
  2548  }
  2549  
  2550  // Allocs returns an iterator over all the evaluations
  2551  func (s *StateStore) Allocs(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  2552  	txn := s.db.Txn(false)
  2553  
  2554  	// Walk the entire table
  2555  	iter, err := txn.Get("allocs", "id")
  2556  	if err != nil {
  2557  		return nil, err
  2558  	}
  2559  
  2560  	ws.Add(iter.WatchCh())
  2561  
  2562  	return iter, nil
  2563  }
  2564  
  2565  // AllocsByNamespace returns an iterator over all the allocations in the
  2566  // namespace
  2567  func (s *StateStore) AllocsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) {
  2568  	txn := s.db.Txn(false)
  2569  	return s.allocsByNamespaceImpl(ws, txn, namespace)
  2570  }
  2571  
  2572  // allocsByNamespaceImpl returns an iterator over all the allocations in the
  2573  // namespace
  2574  func (s *StateStore) allocsByNamespaceImpl(ws memdb.WatchSet, txn *memdb.Txn, namespace string) (memdb.ResultIterator, error) {
  2575  	// Walk the entire table
  2576  	iter, err := txn.Get("allocs", "namespace", namespace)
  2577  	if err != nil {
  2578  		return nil, err
  2579  	}
  2580  
  2581  	ws.Add(iter.WatchCh())
  2582  
  2583  	return iter, nil
  2584  }
  2585  
  2586  // UpsertVaultAccessors is used to register a set of Vault Accessors
  2587  func (s *StateStore) UpsertVaultAccessor(index uint64, accessors []*structs.VaultAccessor) error {
  2588  	txn := s.db.Txn(true)
  2589  	defer txn.Abort()
  2590  
  2591  	for _, accessor := range accessors {
  2592  		// Set the create index
  2593  		accessor.CreateIndex = index
  2594  
  2595  		// Insert the accessor
  2596  		if err := txn.Insert("vault_accessors", accessor); err != nil {
  2597  			return fmt.Errorf("accessor insert failed: %v", err)
  2598  		}
  2599  	}
  2600  
  2601  	if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil {
  2602  		return fmt.Errorf("index update failed: %v", err)
  2603  	}
  2604  
  2605  	txn.Commit()
  2606  	return nil
  2607  }
  2608  
  2609  // DeleteVaultAccessors is used to delete a set of Vault Accessors
  2610  func (s *StateStore) DeleteVaultAccessors(index uint64, accessors []*structs.VaultAccessor) error {
  2611  	txn := s.db.Txn(true)
  2612  	defer txn.Abort()
  2613  
  2614  	// Lookup the accessor
  2615  	for _, accessor := range accessors {
  2616  		// Delete the accessor
  2617  		if err := txn.Delete("vault_accessors", accessor); err != nil {
  2618  			return fmt.Errorf("accessor delete failed: %v", err)
  2619  		}
  2620  	}
  2621  
  2622  	if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil {
  2623  		return fmt.Errorf("index update failed: %v", err)
  2624  	}
  2625  
  2626  	txn.Commit()
  2627  	return nil
  2628  }
  2629  
  2630  // VaultAccessor returns the given Vault accessor
  2631  func (s *StateStore) VaultAccessor(ws memdb.WatchSet, accessor string) (*structs.VaultAccessor, error) {
  2632  	txn := s.db.Txn(false)
  2633  
  2634  	watchCh, existing, err := txn.FirstWatch("vault_accessors", "id", accessor)
  2635  	if err != nil {
  2636  		return nil, fmt.Errorf("accessor lookup failed: %v", err)
  2637  	}
  2638  
  2639  	ws.Add(watchCh)
  2640  
  2641  	if existing != nil {
  2642  		return existing.(*structs.VaultAccessor), nil
  2643  	}
  2644  
  2645  	return nil, nil
  2646  }
  2647  
  2648  // VaultAccessors returns an iterator of Vault accessors.
  2649  func (s *StateStore) VaultAccessors(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  2650  	txn := s.db.Txn(false)
  2651  
  2652  	iter, err := txn.Get("vault_accessors", "id")
  2653  	if err != nil {
  2654  		return nil, err
  2655  	}
  2656  
  2657  	ws.Add(iter.WatchCh())
  2658  
  2659  	return iter, nil
  2660  }
  2661  
  2662  // VaultAccessorsByAlloc returns all the Vault accessors by alloc id
  2663  func (s *StateStore) VaultAccessorsByAlloc(ws memdb.WatchSet, allocID string) ([]*structs.VaultAccessor, error) {
  2664  	txn := s.db.Txn(false)
  2665  
  2666  	// Get an iterator over the accessors
  2667  	iter, err := txn.Get("vault_accessors", "alloc_id", allocID)
  2668  	if err != nil {
  2669  		return nil, err
  2670  	}
  2671  
  2672  	ws.Add(iter.WatchCh())
  2673  
  2674  	var out []*structs.VaultAccessor
  2675  	for {
  2676  		raw := iter.Next()
  2677  		if raw == nil {
  2678  			break
  2679  		}
  2680  		out = append(out, raw.(*structs.VaultAccessor))
  2681  	}
  2682  	return out, nil
  2683  }
  2684  
  2685  // VaultAccessorsByNode returns all the Vault accessors by node id
  2686  func (s *StateStore) VaultAccessorsByNode(ws memdb.WatchSet, nodeID string) ([]*structs.VaultAccessor, error) {
  2687  	txn := s.db.Txn(false)
  2688  
  2689  	// Get an iterator over the accessors
  2690  	iter, err := txn.Get("vault_accessors", "node_id", nodeID)
  2691  	if err != nil {
  2692  		return nil, err
  2693  	}
  2694  
  2695  	ws.Add(iter.WatchCh())
  2696  
  2697  	var out []*structs.VaultAccessor
  2698  	for {
  2699  		raw := iter.Next()
  2700  		if raw == nil {
  2701  			break
  2702  		}
  2703  		out = append(out, raw.(*structs.VaultAccessor))
  2704  	}
  2705  	return out, nil
  2706  }
  2707  
  2708  // UpdateDeploymentStatus is used to make deployment status updates and
  2709  // potentially make a evaluation
  2710  func (s *StateStore) UpdateDeploymentStatus(index uint64, req *structs.DeploymentStatusUpdateRequest) error {
  2711  	txn := s.db.Txn(true)
  2712  	defer txn.Abort()
  2713  
  2714  	if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil {
  2715  		return err
  2716  	}
  2717  
  2718  	// Upsert the job if necessary
  2719  	if req.Job != nil {
  2720  		if err := s.upsertJobImpl(index, req.Job, false, txn); err != nil {
  2721  			return err
  2722  		}
  2723  	}
  2724  
  2725  	// Upsert the optional eval
  2726  	if req.Eval != nil {
  2727  		if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil {
  2728  			return err
  2729  		}
  2730  	}
  2731  
  2732  	txn.Commit()
  2733  	return nil
  2734  }
  2735  
  2736  // updateDeploymentStatusImpl is used to make deployment status updates
  2737  func (s *StateStore) updateDeploymentStatusImpl(index uint64, u *structs.DeploymentStatusUpdate, txn *memdb.Txn) error {
  2738  	// Retrieve deployment
  2739  	ws := memdb.NewWatchSet()
  2740  	deployment, err := s.deploymentByIDImpl(ws, u.DeploymentID, txn)
  2741  	if err != nil {
  2742  		return err
  2743  	} else if deployment == nil {
  2744  		return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", u.DeploymentID)
  2745  	} else if !deployment.Active() {
  2746  		return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status)
  2747  	}
  2748  
  2749  	// Apply the new status
  2750  	copy := deployment.Copy()
  2751  	copy.Status = u.Status
  2752  	copy.StatusDescription = u.StatusDescription
  2753  	copy.ModifyIndex = index
  2754  
  2755  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  2756  	if copy.Namespace == "" {
  2757  		copy.Namespace = structs.DefaultNamespace
  2758  	}
  2759  
  2760  	// Insert the deployment
  2761  	if err := txn.Insert("deployment", copy); err != nil {
  2762  		return err
  2763  	}
  2764  
  2765  	// Update the index
  2766  	if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil {
  2767  		return fmt.Errorf("index update failed: %v", err)
  2768  	}
  2769  
  2770  	// If the deployment is being marked as complete, set the job to stable.
  2771  	if copy.Status == structs.DeploymentStatusSuccessful {
  2772  		if err := s.updateJobStabilityImpl(index, copy.Namespace, copy.JobID, copy.JobVersion, true, txn); err != nil {
  2773  			return fmt.Errorf("failed to update job stability: %v", err)
  2774  		}
  2775  	}
  2776  
  2777  	return nil
  2778  }
  2779  
  2780  // UpdateJobStability updates the stability of the given job and version to the
  2781  // desired status.
  2782  func (s *StateStore) UpdateJobStability(index uint64, namespace, jobID string, jobVersion uint64, stable bool) error {
  2783  	txn := s.db.Txn(true)
  2784  	defer txn.Abort()
  2785  
  2786  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  2787  	if namespace == "" {
  2788  		namespace = structs.DefaultNamespace
  2789  	}
  2790  
  2791  	if err := s.updateJobStabilityImpl(index, namespace, jobID, jobVersion, stable, txn); err != nil {
  2792  		return err
  2793  	}
  2794  
  2795  	txn.Commit()
  2796  	return nil
  2797  }
  2798  
  2799  // updateJobStabilityImpl updates the stability of the given job and version
  2800  func (s *StateStore) updateJobStabilityImpl(index uint64, namespace, jobID string, jobVersion uint64, stable bool, txn *memdb.Txn) error {
  2801  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  2802  	if namespace == "" {
  2803  		namespace = structs.DefaultNamespace
  2804  	}
  2805  
  2806  	// Get the job that is referenced
  2807  	job, err := s.jobByIDAndVersionImpl(nil, namespace, jobID, jobVersion, txn)
  2808  	if err != nil {
  2809  		return err
  2810  	}
  2811  
  2812  	// Has already been cleared, nothing to do
  2813  	if job == nil {
  2814  		return nil
  2815  	}
  2816  
  2817  	// If the job already has the desired stability, nothing to do
  2818  	if job.Stable == stable {
  2819  		return nil
  2820  	}
  2821  
  2822  	copy := job.Copy()
  2823  	copy.Stable = stable
  2824  	return s.upsertJobImpl(index, copy, true, txn)
  2825  }
  2826  
  2827  // UpdateDeploymentPromotion is used to promote canaries in a deployment and
  2828  // potentially make a evaluation
  2829  func (s *StateStore) UpdateDeploymentPromotion(index uint64, req *structs.ApplyDeploymentPromoteRequest) error {
  2830  	txn := s.db.Txn(true)
  2831  	defer txn.Abort()
  2832  
  2833  	// Retrieve deployment and ensure it is not terminal and is active
  2834  	ws := memdb.NewWatchSet()
  2835  	deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn)
  2836  	if err != nil {
  2837  		return err
  2838  	} else if deployment == nil {
  2839  		return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID)
  2840  	} else if !deployment.Active() {
  2841  		return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status)
  2842  	}
  2843  
  2844  	// Retrieve effected allocations
  2845  	iter, err := txn.Get("allocs", "deployment", req.DeploymentID)
  2846  	if err != nil {
  2847  		return err
  2848  	}
  2849  
  2850  	// groupIndex is a map of groups being promoted
  2851  	groupIndex := make(map[string]struct{}, len(req.Groups))
  2852  	for _, g := range req.Groups {
  2853  		groupIndex[g] = struct{}{}
  2854  	}
  2855  
  2856  	// canaryIndex is the set of placed canaries in the deployment
  2857  	canaryIndex := make(map[string]struct{}, len(deployment.TaskGroups))
  2858  	for _, state := range deployment.TaskGroups {
  2859  		for _, c := range state.PlacedCanaries {
  2860  			canaryIndex[c] = struct{}{}
  2861  		}
  2862  	}
  2863  
  2864  	// healthyCounts is a mapping of group to the number of healthy canaries
  2865  	healthyCounts := make(map[string]int, len(deployment.TaskGroups))
  2866  
  2867  	// promotable is the set of allocations that we can move from canary to
  2868  	// non-canary
  2869  	var promotable []*structs.Allocation
  2870  
  2871  	for {
  2872  		raw := iter.Next()
  2873  		if raw == nil {
  2874  			break
  2875  		}
  2876  
  2877  		alloc := raw.(*structs.Allocation)
  2878  
  2879  		// Check that the alloc is a canary
  2880  		if _, ok := canaryIndex[alloc.ID]; !ok {
  2881  			continue
  2882  		}
  2883  
  2884  		// Check that the canary is part of a group being promoted
  2885  		if _, ok := groupIndex[alloc.TaskGroup]; !req.All && !ok {
  2886  			continue
  2887  		}
  2888  
  2889  		// Ensure the canaries are healthy
  2890  		if alloc.TerminalStatus() || !alloc.DeploymentStatus.IsHealthy() {
  2891  			continue
  2892  		}
  2893  
  2894  		healthyCounts[alloc.TaskGroup]++
  2895  		promotable = append(promotable, alloc)
  2896  	}
  2897  
  2898  	// Determine if we have enough healthy allocations
  2899  	var unhealthyErr multierror.Error
  2900  	for tg, state := range deployment.TaskGroups {
  2901  		if _, ok := groupIndex[tg]; !req.All && !ok {
  2902  			continue
  2903  		}
  2904  
  2905  		need := state.DesiredCanaries
  2906  		if need == 0 {
  2907  			continue
  2908  		}
  2909  
  2910  		if have := healthyCounts[tg]; have < need {
  2911  			multierror.Append(&unhealthyErr, fmt.Errorf("Task group %q has %d/%d healthy allocations", tg, have, need))
  2912  		}
  2913  	}
  2914  
  2915  	if err := unhealthyErr.ErrorOrNil(); err != nil {
  2916  		return err
  2917  	}
  2918  
  2919  	// Update deployment
  2920  	copy := deployment.Copy()
  2921  	copy.ModifyIndex = index
  2922  	for tg, status := range copy.TaskGroups {
  2923  		_, ok := groupIndex[tg]
  2924  		if !req.All && !ok {
  2925  			continue
  2926  		}
  2927  
  2928  		status.Promoted = true
  2929  	}
  2930  
  2931  	// If the deployment no longer needs promotion, update its status
  2932  	if !copy.RequiresPromotion() && copy.Status == structs.DeploymentStatusRunning {
  2933  		copy.StatusDescription = structs.DeploymentStatusDescriptionRunning
  2934  	}
  2935  
  2936  	// Insert the deployment
  2937  	if err := s.upsertDeploymentImpl(index, copy, txn); err != nil {
  2938  		return err
  2939  	}
  2940  
  2941  	// Upsert the optional eval
  2942  	if req.Eval != nil {
  2943  		if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil {
  2944  			return err
  2945  		}
  2946  	}
  2947  
  2948  	// For each promotable allocation remove the canary field
  2949  	for _, alloc := range promotable {
  2950  		promoted := alloc.Copy()
  2951  		promoted.DeploymentStatus.Canary = false
  2952  		promoted.DeploymentStatus.ModifyIndex = index
  2953  		promoted.ModifyIndex = index
  2954  		promoted.AllocModifyIndex = index
  2955  
  2956  		if err := txn.Insert("allocs", promoted); err != nil {
  2957  			return fmt.Errorf("alloc insert failed: %v", err)
  2958  		}
  2959  	}
  2960  
  2961  	// Update the alloc index
  2962  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  2963  		return fmt.Errorf("index update failed: %v", err)
  2964  	}
  2965  
  2966  	txn.Commit()
  2967  	return nil
  2968  }
  2969  
  2970  // UpdateDeploymentAllocHealth is used to update the health of allocations as
  2971  // part of the deployment and potentially make a evaluation
  2972  func (s *StateStore) UpdateDeploymentAllocHealth(index uint64, req *structs.ApplyDeploymentAllocHealthRequest) error {
  2973  	txn := s.db.Txn(true)
  2974  	defer txn.Abort()
  2975  
  2976  	// Retrieve deployment and ensure it is not terminal and is active
  2977  	ws := memdb.NewWatchSet()
  2978  	deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn)
  2979  	if err != nil {
  2980  		return err
  2981  	} else if deployment == nil {
  2982  		return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID)
  2983  	} else if !deployment.Active() {
  2984  		return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status)
  2985  	}
  2986  
  2987  	// Update the health status of each allocation
  2988  	if total := len(req.HealthyAllocationIDs) + len(req.UnhealthyAllocationIDs); total != 0 {
  2989  		setAllocHealth := func(id string, healthy bool, ts time.Time) error {
  2990  			existing, err := txn.First("allocs", "id", id)
  2991  			if err != nil {
  2992  				return fmt.Errorf("alloc %q lookup failed: %v", id, err)
  2993  			}
  2994  			if existing == nil {
  2995  				return fmt.Errorf("unknown alloc %q", id)
  2996  			}
  2997  
  2998  			old := existing.(*structs.Allocation)
  2999  			if old.DeploymentID != req.DeploymentID {
  3000  				return fmt.Errorf("alloc %q is not part of deployment %q", id, req.DeploymentID)
  3001  			}
  3002  
  3003  			// Set the health
  3004  			copy := old.Copy()
  3005  			if copy.DeploymentStatus == nil {
  3006  				copy.DeploymentStatus = &structs.AllocDeploymentStatus{}
  3007  			}
  3008  			copy.DeploymentStatus.Healthy = helper.BoolToPtr(healthy)
  3009  			copy.DeploymentStatus.Timestamp = ts
  3010  			copy.DeploymentStatus.ModifyIndex = index
  3011  			copy.ModifyIndex = index
  3012  
  3013  			if err := s.updateDeploymentWithAlloc(index, copy, old, txn); err != nil {
  3014  				return fmt.Errorf("error updating deployment: %v", err)
  3015  			}
  3016  
  3017  			if err := txn.Insert("allocs", copy); err != nil {
  3018  				return fmt.Errorf("alloc insert failed: %v", err)
  3019  			}
  3020  
  3021  			return nil
  3022  		}
  3023  
  3024  		for _, id := range req.HealthyAllocationIDs {
  3025  			if err := setAllocHealth(id, true, req.Timestamp); err != nil {
  3026  				return err
  3027  			}
  3028  		}
  3029  		for _, id := range req.UnhealthyAllocationIDs {
  3030  			if err := setAllocHealth(id, false, req.Timestamp); err != nil {
  3031  				return err
  3032  			}
  3033  		}
  3034  
  3035  		// Update the indexes
  3036  		if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
  3037  			return fmt.Errorf("index update failed: %v", err)
  3038  		}
  3039  	}
  3040  
  3041  	// Update the deployment status as needed.
  3042  	if req.DeploymentUpdate != nil {
  3043  		if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil {
  3044  			return err
  3045  		}
  3046  	}
  3047  
  3048  	// Upsert the job if necessary
  3049  	if req.Job != nil {
  3050  		if err := s.upsertJobImpl(index, req.Job, false, txn); err != nil {
  3051  			return err
  3052  		}
  3053  	}
  3054  
  3055  	// Upsert the optional eval
  3056  	if req.Eval != nil {
  3057  		if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil {
  3058  			return err
  3059  		}
  3060  	}
  3061  
  3062  	txn.Commit()
  3063  	return nil
  3064  }
  3065  
  3066  // LastIndex returns the greatest index value for all indexes
  3067  func (s *StateStore) LatestIndex() (uint64, error) {
  3068  	indexes, err := s.Indexes()
  3069  	if err != nil {
  3070  		return 0, err
  3071  	}
  3072  
  3073  	var max uint64 = 0
  3074  	for {
  3075  		raw := indexes.Next()
  3076  		if raw == nil {
  3077  			break
  3078  		}
  3079  
  3080  		// Prepare the request struct
  3081  		idx := raw.(*IndexEntry)
  3082  
  3083  		// Determine the max
  3084  		if idx.Value > max {
  3085  			max = idx.Value
  3086  		}
  3087  	}
  3088  
  3089  	return max, nil
  3090  }
  3091  
  3092  // Index finds the matching index value
  3093  func (s *StateStore) Index(name string) (uint64, error) {
  3094  	txn := s.db.Txn(false)
  3095  
  3096  	// Lookup the first matching index
  3097  	out, err := txn.First("index", "id", name)
  3098  	if err != nil {
  3099  		return 0, err
  3100  	}
  3101  	if out == nil {
  3102  		return 0, nil
  3103  	}
  3104  	return out.(*IndexEntry).Value, nil
  3105  }
  3106  
  3107  // RemoveIndex is a helper method to remove an index for testing purposes
  3108  func (s *StateStore) RemoveIndex(name string) error {
  3109  	txn := s.db.Txn(true)
  3110  	defer txn.Abort()
  3111  
  3112  	if _, err := txn.DeleteAll("index", "id", name); err != nil {
  3113  		return err
  3114  	}
  3115  
  3116  	txn.Commit()
  3117  	return nil
  3118  }
  3119  
  3120  // Indexes returns an iterator over all the indexes
  3121  func (s *StateStore) Indexes() (memdb.ResultIterator, error) {
  3122  	txn := s.db.Txn(false)
  3123  
  3124  	// Walk the entire nodes table
  3125  	iter, err := txn.Get("index", "id")
  3126  	if err != nil {
  3127  		return nil, err
  3128  	}
  3129  	return iter, nil
  3130  }
  3131  
  3132  // ReconcileJobSummaries re-creates summaries for all jobs present in the state
  3133  // store
  3134  func (s *StateStore) ReconcileJobSummaries(index uint64) error {
  3135  	txn := s.db.Txn(true)
  3136  	defer txn.Abort()
  3137  
  3138  	// Get all the jobs
  3139  	iter, err := txn.Get("jobs", "id")
  3140  	if err != nil {
  3141  		return err
  3142  	}
  3143  	// COMPAT: Remove after 0.11
  3144  	// Iterate over jobs to build a list of parent jobs and their children
  3145  	parentMap := make(map[string][]*structs.Job)
  3146  	for {
  3147  		rawJob := iter.Next()
  3148  		if rawJob == nil {
  3149  			break
  3150  		}
  3151  		job := rawJob.(*structs.Job)
  3152  		if job.ParentID != "" {
  3153  			children := parentMap[job.ParentID]
  3154  			children = append(children, job)
  3155  			parentMap[job.ParentID] = children
  3156  		}
  3157  	}
  3158  
  3159  	// Get all the jobs again
  3160  	iter, err = txn.Get("jobs", "id")
  3161  	if err != nil {
  3162  		return err
  3163  	}
  3164  
  3165  	for {
  3166  		rawJob := iter.Next()
  3167  		if rawJob == nil {
  3168  			break
  3169  		}
  3170  		job := rawJob.(*structs.Job)
  3171  
  3172  		if job.IsParameterized() || job.IsPeriodic() {
  3173  			// COMPAT: Remove after 0.11
  3174  
  3175  			// The following block of code fixes incorrect child summaries due to a bug
  3176  			// See https://github.com/hashicorp/nomad/issues/3886 for details
  3177  			rawSummary, err := txn.First("job_summary", "id", job.Namespace, job.ID)
  3178  			if err != nil {
  3179  				return err
  3180  			}
  3181  			if rawSummary == nil {
  3182  				continue
  3183  			}
  3184  
  3185  			oldSummary := rawSummary.(*structs.JobSummary)
  3186  
  3187  			// Create an empty summary
  3188  			summary := &structs.JobSummary{
  3189  				JobID:     job.ID,
  3190  				Namespace: job.Namespace,
  3191  				Summary:   make(map[string]structs.TaskGroupSummary),
  3192  				Children:  &structs.JobChildrenSummary{},
  3193  			}
  3194  
  3195  			// Iterate over children of this job if any to fix summary counts
  3196  			children := parentMap[job.ID]
  3197  			for _, childJob := range children {
  3198  				switch childJob.Status {
  3199  				case structs.JobStatusPending:
  3200  					summary.Children.Pending++
  3201  				case structs.JobStatusDead:
  3202  					summary.Children.Dead++
  3203  				case structs.JobStatusRunning:
  3204  					summary.Children.Running++
  3205  				}
  3206  			}
  3207  
  3208  			// Insert the job summary if its different
  3209  			if !reflect.DeepEqual(summary, oldSummary) {
  3210  				// Set the create index of the summary same as the job's create index
  3211  				// and the modify index to the current index
  3212  				summary.CreateIndex = job.CreateIndex
  3213  				summary.ModifyIndex = index
  3214  
  3215  				if err := txn.Insert("job_summary", summary); err != nil {
  3216  					return fmt.Errorf("error inserting job summary: %v", err)
  3217  				}
  3218  			}
  3219  
  3220  			// Done with handling a parent job, continue to next
  3221  			continue
  3222  		}
  3223  
  3224  		// Create a job summary for the job
  3225  		summary := &structs.JobSummary{
  3226  			JobID:     job.ID,
  3227  			Namespace: job.Namespace,
  3228  			Summary:   make(map[string]structs.TaskGroupSummary),
  3229  		}
  3230  		for _, tg := range job.TaskGroups {
  3231  			summary.Summary[tg.Name] = structs.TaskGroupSummary{}
  3232  		}
  3233  
  3234  		// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3235  		if job.Namespace == "" {
  3236  			job.Namespace = structs.DefaultNamespace
  3237  		}
  3238  
  3239  		// Find all the allocations for the jobs
  3240  		iterAllocs, err := txn.Get("allocs", "job", job.Namespace, job.ID)
  3241  		if err != nil {
  3242  			return err
  3243  		}
  3244  
  3245  		// Calculate the summary for the job
  3246  		for {
  3247  			rawAlloc := iterAllocs.Next()
  3248  			if rawAlloc == nil {
  3249  				break
  3250  			}
  3251  			alloc := rawAlloc.(*structs.Allocation)
  3252  
  3253  			// Ignore the allocation if it doesn't belong to the currently
  3254  			// registered job. The allocation is checked because of issue #2304
  3255  			if alloc.Job == nil || alloc.Job.CreateIndex != job.CreateIndex {
  3256  				continue
  3257  			}
  3258  
  3259  			tg := summary.Summary[alloc.TaskGroup]
  3260  			switch alloc.ClientStatus {
  3261  			case structs.AllocClientStatusFailed:
  3262  				tg.Failed += 1
  3263  			case structs.AllocClientStatusLost:
  3264  				tg.Lost += 1
  3265  			case structs.AllocClientStatusComplete:
  3266  				tg.Complete += 1
  3267  			case structs.AllocClientStatusRunning:
  3268  				tg.Running += 1
  3269  			case structs.AllocClientStatusPending:
  3270  				tg.Starting += 1
  3271  			default:
  3272  				s.logger.Error("invalid client status set on allocation", "client_status", alloc.ClientStatus, "alloc_id", alloc.ID)
  3273  			}
  3274  			summary.Summary[alloc.TaskGroup] = tg
  3275  		}
  3276  
  3277  		// Set the create index of the summary same as the job's create index
  3278  		// and the modify index to the current index
  3279  		summary.CreateIndex = job.CreateIndex
  3280  		summary.ModifyIndex = index
  3281  
  3282  		// Insert the job summary
  3283  		if err := txn.Insert("job_summary", summary); err != nil {
  3284  			return fmt.Errorf("error inserting job summary: %v", err)
  3285  		}
  3286  	}
  3287  
  3288  	// Update the indexes table for job summary
  3289  	if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  3290  		return fmt.Errorf("index update failed: %v", err)
  3291  	}
  3292  	txn.Commit()
  3293  	return nil
  3294  }
  3295  
  3296  // setJobStatuses is a helper for calling setJobStatus on multiple jobs by ID.
  3297  // It takes a map of job IDs to an optional forceStatus string. It returns an
  3298  // error if the job doesn't exist or setJobStatus fails.
  3299  func (s *StateStore) setJobStatuses(index uint64, txn *memdb.Txn,
  3300  	jobs map[structs.NamespacedID]string, evalDelete bool) error {
  3301  	for tuple, forceStatus := range jobs {
  3302  		// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3303  		if tuple.Namespace == "" {
  3304  			tuple.Namespace = structs.DefaultNamespace
  3305  		}
  3306  
  3307  		existing, err := txn.First("jobs", "id", tuple.Namespace, tuple.ID)
  3308  		if err != nil {
  3309  			return fmt.Errorf("job lookup failed: %v", err)
  3310  		}
  3311  
  3312  		if existing == nil {
  3313  			continue
  3314  		}
  3315  
  3316  		if err := s.setJobStatus(index, txn, existing.(*structs.Job), evalDelete, forceStatus); err != nil {
  3317  			return err
  3318  		}
  3319  	}
  3320  
  3321  	return nil
  3322  }
  3323  
  3324  // setJobStatus sets the status of the job by looking up associated evaluations
  3325  // and allocations. evalDelete should be set to true if setJobStatus is being
  3326  // called because an evaluation is being deleted (potentially because of garbage
  3327  // collection). If forceStatus is non-empty, the job's status will be set to the
  3328  // passed status.
  3329  func (s *StateStore) setJobStatus(index uint64, txn *memdb.Txn,
  3330  	job *structs.Job, evalDelete bool, forceStatus string) error {
  3331  
  3332  	// Capture the current status so we can check if there is a change
  3333  	oldStatus := job.Status
  3334  	if index == job.CreateIndex {
  3335  		oldStatus = ""
  3336  	}
  3337  	newStatus := forceStatus
  3338  
  3339  	// If forceStatus is not set, compute the jobs status.
  3340  	if forceStatus == "" {
  3341  		var err error
  3342  		newStatus, err = s.getJobStatus(txn, job, evalDelete)
  3343  		if err != nil {
  3344  			return err
  3345  		}
  3346  	}
  3347  
  3348  	// Fast-path if nothing has changed.
  3349  	if oldStatus == newStatus {
  3350  		return nil
  3351  	}
  3352  
  3353  	// Copy and update the existing job
  3354  	updated := job.Copy()
  3355  	updated.Status = newStatus
  3356  	updated.ModifyIndex = index
  3357  
  3358  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3359  	if updated.Namespace == "" {
  3360  		updated.Namespace = structs.DefaultNamespace
  3361  	}
  3362  
  3363  	// Insert the job
  3364  	if err := txn.Insert("jobs", updated); err != nil {
  3365  		return fmt.Errorf("job insert failed: %v", err)
  3366  	}
  3367  	if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil {
  3368  		return fmt.Errorf("index update failed: %v", err)
  3369  	}
  3370  
  3371  	// Update the children summary
  3372  	if updated.ParentID != "" {
  3373  		// Try to update the summary of the parent job summary
  3374  		summaryRaw, err := txn.First("job_summary", "id", updated.Namespace, updated.ParentID)
  3375  		if err != nil {
  3376  			return fmt.Errorf("unable to retrieve summary for parent job: %v", err)
  3377  		}
  3378  
  3379  		// Only continue if the summary exists. It could not exist if the parent
  3380  		// job was removed
  3381  		if summaryRaw != nil {
  3382  			existing := summaryRaw.(*structs.JobSummary)
  3383  			pSummary := existing.Copy()
  3384  			if pSummary.Children == nil {
  3385  				pSummary.Children = new(structs.JobChildrenSummary)
  3386  			}
  3387  
  3388  			// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3389  			if pSummary.Namespace == "" {
  3390  				pSummary.Namespace = structs.DefaultNamespace
  3391  			}
  3392  
  3393  			// Determine the transition and update the correct fields
  3394  			children := pSummary.Children
  3395  
  3396  			// Decrement old status
  3397  			if oldStatus != "" {
  3398  				switch oldStatus {
  3399  				case structs.JobStatusPending:
  3400  					children.Pending--
  3401  				case structs.JobStatusRunning:
  3402  					children.Running--
  3403  				case structs.JobStatusDead:
  3404  					children.Dead--
  3405  				default:
  3406  					return fmt.Errorf("unknown old job status %q", oldStatus)
  3407  				}
  3408  			}
  3409  
  3410  			// Increment new status
  3411  			switch newStatus {
  3412  			case structs.JobStatusPending:
  3413  				children.Pending++
  3414  			case structs.JobStatusRunning:
  3415  				children.Running++
  3416  			case structs.JobStatusDead:
  3417  				children.Dead++
  3418  			default:
  3419  				return fmt.Errorf("unknown new job status %q", newStatus)
  3420  			}
  3421  
  3422  			// Update the index
  3423  			pSummary.ModifyIndex = index
  3424  
  3425  			// Insert the summary
  3426  			if err := txn.Insert("job_summary", pSummary); err != nil {
  3427  				return fmt.Errorf("job summary insert failed: %v", err)
  3428  			}
  3429  			if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  3430  				return fmt.Errorf("index update failed: %v", err)
  3431  			}
  3432  		}
  3433  	}
  3434  
  3435  	return nil
  3436  }
  3437  
  3438  func (s *StateStore) getJobStatus(txn *memdb.Txn, job *structs.Job, evalDelete bool) (string, error) {
  3439  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3440  	if job.Namespace == "" {
  3441  		job.Namespace = structs.DefaultNamespace
  3442  	}
  3443  
  3444  	// System, Periodic and Parameterized jobs are running until explicitly
  3445  	// stopped
  3446  	if job.Type == structs.JobTypeSystem || job.IsParameterized() || job.IsPeriodic() {
  3447  		if job.Stop {
  3448  			return structs.JobStatusDead, nil
  3449  		}
  3450  
  3451  		return structs.JobStatusRunning, nil
  3452  	}
  3453  
  3454  	allocs, err := txn.Get("allocs", "job", job.Namespace, job.ID)
  3455  	if err != nil {
  3456  		return "", err
  3457  	}
  3458  
  3459  	// If there is a non-terminal allocation, the job is running.
  3460  	hasAlloc := false
  3461  	for alloc := allocs.Next(); alloc != nil; alloc = allocs.Next() {
  3462  		hasAlloc = true
  3463  		if !alloc.(*structs.Allocation).TerminalStatus() {
  3464  			return structs.JobStatusRunning, nil
  3465  		}
  3466  	}
  3467  
  3468  	evals, err := txn.Get("evals", "job_prefix", job.Namespace, job.ID)
  3469  	if err != nil {
  3470  		return "", err
  3471  	}
  3472  
  3473  	hasEval := false
  3474  	for raw := evals.Next(); raw != nil; raw = evals.Next() {
  3475  		e := raw.(*structs.Evaluation)
  3476  
  3477  		// Filter non-exact matches
  3478  		if e.JobID != job.ID {
  3479  			continue
  3480  		}
  3481  
  3482  		hasEval = true
  3483  		if !e.TerminalStatus() {
  3484  			return structs.JobStatusPending, nil
  3485  		}
  3486  	}
  3487  
  3488  	// The job is dead if all the allocations and evals are terminal or if there
  3489  	// are no evals because of garbage collection.
  3490  	if evalDelete || hasEval || hasAlloc {
  3491  		return structs.JobStatusDead, nil
  3492  	}
  3493  
  3494  	return structs.JobStatusPending, nil
  3495  }
  3496  
  3497  // updateSummaryWithJob creates or updates job summaries when new jobs are
  3498  // upserted or existing ones are updated
  3499  func (s *StateStore) updateSummaryWithJob(index uint64, job *structs.Job,
  3500  	txn *memdb.Txn) error {
  3501  
  3502  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3503  	if job.Namespace == "" {
  3504  		job.Namespace = structs.DefaultNamespace
  3505  	}
  3506  
  3507  	// Update the job summary
  3508  	summaryRaw, err := txn.First("job_summary", "id", job.Namespace, job.ID)
  3509  	if err != nil {
  3510  		return fmt.Errorf("job summary lookup failed: %v", err)
  3511  	}
  3512  
  3513  	// Get the summary or create if necessary
  3514  	var summary *structs.JobSummary
  3515  	hasSummaryChanged := false
  3516  	if summaryRaw != nil {
  3517  		summary = summaryRaw.(*structs.JobSummary).Copy()
  3518  	} else {
  3519  		summary = &structs.JobSummary{
  3520  			JobID:       job.ID,
  3521  			Namespace:   job.Namespace,
  3522  			Summary:     make(map[string]structs.TaskGroupSummary),
  3523  			Children:    new(structs.JobChildrenSummary),
  3524  			CreateIndex: index,
  3525  		}
  3526  		hasSummaryChanged = true
  3527  	}
  3528  
  3529  	for _, tg := range job.TaskGroups {
  3530  		if _, ok := summary.Summary[tg.Name]; !ok {
  3531  			newSummary := structs.TaskGroupSummary{
  3532  				Complete: 0,
  3533  				Failed:   0,
  3534  				Running:  0,
  3535  				Starting: 0,
  3536  			}
  3537  			summary.Summary[tg.Name] = newSummary
  3538  			hasSummaryChanged = true
  3539  		}
  3540  	}
  3541  
  3542  	// The job summary has changed, so update the modify index.
  3543  	if hasSummaryChanged {
  3544  		summary.ModifyIndex = index
  3545  
  3546  		// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3547  		if summary.Namespace == "" {
  3548  			summary.Namespace = structs.DefaultNamespace
  3549  		}
  3550  
  3551  		// Update the indexes table for job summary
  3552  		if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  3553  			return fmt.Errorf("index update failed: %v", err)
  3554  		}
  3555  		if err := txn.Insert("job_summary", summary); err != nil {
  3556  			return err
  3557  		}
  3558  	}
  3559  
  3560  	return nil
  3561  }
  3562  
  3563  // updateDeploymentWithAlloc is used to update the deployment state associated
  3564  // with the given allocation. The passed alloc may be updated if the deployment
  3565  // status has changed to capture the modify index at which it has changed.
  3566  func (s *StateStore) updateDeploymentWithAlloc(index uint64, alloc, existing *structs.Allocation, txn *memdb.Txn) error {
  3567  	// Nothing to do if the allocation is not associated with a deployment
  3568  	if alloc.DeploymentID == "" {
  3569  		return nil
  3570  	}
  3571  
  3572  	// Get the deployment
  3573  	ws := memdb.NewWatchSet()
  3574  	deployment, err := s.deploymentByIDImpl(ws, alloc.DeploymentID, txn)
  3575  	if err != nil {
  3576  		return err
  3577  	}
  3578  	if deployment == nil {
  3579  		return nil
  3580  	}
  3581  
  3582  	// Retrieve the deployment state object
  3583  	_, ok := deployment.TaskGroups[alloc.TaskGroup]
  3584  	if !ok {
  3585  		// If the task group isn't part of the deployment, the task group wasn't
  3586  		// part of a rolling update so nothing to do
  3587  		return nil
  3588  	}
  3589  
  3590  	// Do not modify in-place. Instead keep track of what must be done
  3591  	placed := 0
  3592  	healthy := 0
  3593  	unhealthy := 0
  3594  
  3595  	// If there was no existing allocation, this is a placement and we increment
  3596  	// the placement
  3597  	existingHealthSet := existing != nil && existing.DeploymentStatus.HasHealth()
  3598  	allocHealthSet := alloc.DeploymentStatus.HasHealth()
  3599  	if existing == nil || existing.DeploymentID != alloc.DeploymentID {
  3600  		placed++
  3601  	} else if !existingHealthSet && allocHealthSet {
  3602  		if *alloc.DeploymentStatus.Healthy {
  3603  			healthy++
  3604  		} else {
  3605  			unhealthy++
  3606  		}
  3607  	} else if existingHealthSet && allocHealthSet {
  3608  		// See if it has gone from healthy to unhealthy
  3609  		if *existing.DeploymentStatus.Healthy && !*alloc.DeploymentStatus.Healthy {
  3610  			healthy--
  3611  			unhealthy++
  3612  		}
  3613  	}
  3614  
  3615  	// Nothing to do
  3616  	if placed == 0 && healthy == 0 && unhealthy == 0 {
  3617  		return nil
  3618  	}
  3619  
  3620  	// Update the allocation's deployment status modify index
  3621  	if alloc.DeploymentStatus != nil && healthy+unhealthy != 0 {
  3622  		alloc.DeploymentStatus.ModifyIndex = index
  3623  	}
  3624  
  3625  	// Create a copy of the deployment object
  3626  	deploymentCopy := deployment.Copy()
  3627  	deploymentCopy.ModifyIndex = index
  3628  
  3629  	state := deploymentCopy.TaskGroups[alloc.TaskGroup]
  3630  	state.PlacedAllocs += placed
  3631  	state.HealthyAllocs += healthy
  3632  	state.UnhealthyAllocs += unhealthy
  3633  
  3634  	// Update the progress deadline
  3635  	if pd := state.ProgressDeadline; pd != 0 {
  3636  		// If we are the first placed allocation for the deployment start the progress deadline.
  3637  		if placed != 0 && state.RequireProgressBy.IsZero() {
  3638  			// Use modify time instead of create time because we may in-place
  3639  			// update the allocation to be part of a new deployment.
  3640  			state.RequireProgressBy = time.Unix(0, alloc.ModifyTime).Add(pd)
  3641  		} else if healthy != 0 {
  3642  			if d := alloc.DeploymentStatus.Timestamp.Add(pd); d.After(state.RequireProgressBy) {
  3643  				state.RequireProgressBy = d
  3644  			}
  3645  		}
  3646  	}
  3647  
  3648  	// Upsert the deployment
  3649  	if err := s.upsertDeploymentImpl(index, deploymentCopy, txn); err != nil {
  3650  		return err
  3651  	}
  3652  
  3653  	return nil
  3654  }
  3655  
  3656  // updateSummaryWithAlloc updates the job summary when allocations are updated
  3657  // or inserted
  3658  func (s *StateStore) updateSummaryWithAlloc(index uint64, alloc *structs.Allocation,
  3659  	existingAlloc *structs.Allocation, txn *memdb.Txn) error {
  3660  
  3661  	// We don't have to update the summary if the job is missing
  3662  	if alloc.Job == nil {
  3663  		return nil
  3664  	}
  3665  	// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3666  	if alloc.Namespace == "" {
  3667  		alloc.Namespace = structs.DefaultNamespace
  3668  	}
  3669  
  3670  	summaryRaw, err := txn.First("job_summary", "id", alloc.Namespace, alloc.JobID)
  3671  	if err != nil {
  3672  		return fmt.Errorf("unable to lookup job summary for job id %q in namespace %q: %v", alloc.JobID, alloc.Namespace, err)
  3673  	}
  3674  
  3675  	if summaryRaw == nil {
  3676  		// Check if the job is de-registered
  3677  		rawJob, err := txn.First("jobs", "id", alloc.Namespace, alloc.JobID)
  3678  		if err != nil {
  3679  			return fmt.Errorf("unable to query job: %v", err)
  3680  		}
  3681  
  3682  		// If the job is de-registered then we skip updating it's summary
  3683  		if rawJob == nil {
  3684  			return nil
  3685  		}
  3686  
  3687  		return fmt.Errorf("job summary for job %q in namespace %q is not present", alloc.JobID, alloc.Namespace)
  3688  	}
  3689  
  3690  	// Get a copy of the existing summary
  3691  	jobSummary := summaryRaw.(*structs.JobSummary).Copy()
  3692  
  3693  	// Not updating the job summary because the allocation doesn't belong to the
  3694  	// currently registered job
  3695  	if jobSummary.CreateIndex != alloc.Job.CreateIndex {
  3696  		return nil
  3697  	}
  3698  
  3699  	tgSummary, ok := jobSummary.Summary[alloc.TaskGroup]
  3700  	if !ok {
  3701  		return fmt.Errorf("unable to find task group in the job summary: %v", alloc.TaskGroup)
  3702  	}
  3703  
  3704  	summaryChanged := false
  3705  	if existingAlloc == nil {
  3706  		switch alloc.DesiredStatus {
  3707  		case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict:
  3708  			s.logger.Error("new allocation inserted into state store with bad desired status",
  3709  				"alloc_id", alloc.ID, "desired_status", alloc.DesiredStatus)
  3710  		}
  3711  		switch alloc.ClientStatus {
  3712  		case structs.AllocClientStatusPending:
  3713  			tgSummary.Starting += 1
  3714  			if tgSummary.Queued > 0 {
  3715  				tgSummary.Queued -= 1
  3716  			}
  3717  			summaryChanged = true
  3718  		case structs.AllocClientStatusRunning, structs.AllocClientStatusFailed,
  3719  			structs.AllocClientStatusComplete:
  3720  			s.logger.Error("new allocation inserted into state store with bad client status",
  3721  				"alloc_id", alloc.ID, "client_status", alloc.ClientStatus)
  3722  		}
  3723  	} else if existingAlloc.ClientStatus != alloc.ClientStatus {
  3724  		// Incrementing the client of the bin of the current state
  3725  		switch alloc.ClientStatus {
  3726  		case structs.AllocClientStatusRunning:
  3727  			tgSummary.Running += 1
  3728  		case structs.AllocClientStatusFailed:
  3729  			tgSummary.Failed += 1
  3730  		case structs.AllocClientStatusPending:
  3731  			tgSummary.Starting += 1
  3732  		case structs.AllocClientStatusComplete:
  3733  			tgSummary.Complete += 1
  3734  		case structs.AllocClientStatusLost:
  3735  			tgSummary.Lost += 1
  3736  		}
  3737  
  3738  		// Decrementing the count of the bin of the last state
  3739  		switch existingAlloc.ClientStatus {
  3740  		case structs.AllocClientStatusRunning:
  3741  			if tgSummary.Running > 0 {
  3742  				tgSummary.Running -= 1
  3743  			}
  3744  		case structs.AllocClientStatusPending:
  3745  			if tgSummary.Starting > 0 {
  3746  				tgSummary.Starting -= 1
  3747  			}
  3748  		case structs.AllocClientStatusLost:
  3749  			if tgSummary.Lost > 0 {
  3750  				tgSummary.Lost -= 1
  3751  			}
  3752  		case structs.AllocClientStatusFailed, structs.AllocClientStatusComplete:
  3753  		default:
  3754  			s.logger.Error("invalid old client status for allocatio",
  3755  				"alloc_id", existingAlloc.ID, "client_status", existingAlloc.ClientStatus)
  3756  		}
  3757  		summaryChanged = true
  3758  	}
  3759  	jobSummary.Summary[alloc.TaskGroup] = tgSummary
  3760  
  3761  	if summaryChanged {
  3762  		jobSummary.ModifyIndex = index
  3763  
  3764  		// COMPAT 0.7: Upgrade old objects that do not have namespaces
  3765  		if jobSummary.Namespace == "" {
  3766  			jobSummary.Namespace = structs.DefaultNamespace
  3767  		}
  3768  
  3769  		// Update the indexes table for job summary
  3770  		if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil {
  3771  			return fmt.Errorf("index update failed: %v", err)
  3772  		}
  3773  
  3774  		if err := txn.Insert("job_summary", jobSummary); err != nil {
  3775  			return fmt.Errorf("updating job summary failed: %v", err)
  3776  		}
  3777  	}
  3778  
  3779  	return nil
  3780  }
  3781  
  3782  // addEphemeralDiskToTaskGroups adds missing EphemeralDisk objects to TaskGroups
  3783  func (s *StateStore) addEphemeralDiskToTaskGroups(job *structs.Job) {
  3784  	for _, tg := range job.TaskGroups {
  3785  		var diskMB int
  3786  		for _, task := range tg.Tasks {
  3787  			if task.Resources != nil {
  3788  				diskMB += task.Resources.DiskMB
  3789  				task.Resources.DiskMB = 0
  3790  			}
  3791  		}
  3792  		if tg.EphemeralDisk != nil {
  3793  			continue
  3794  		}
  3795  		tg.EphemeralDisk = &structs.EphemeralDisk{
  3796  			SizeMB: diskMB,
  3797  		}
  3798  	}
  3799  }
  3800  
  3801  // UpsertACLPolicies is used to create or update a set of ACL policies
  3802  func (s *StateStore) UpsertACLPolicies(index uint64, policies []*structs.ACLPolicy) error {
  3803  	txn := s.db.Txn(true)
  3804  	defer txn.Abort()
  3805  
  3806  	for _, policy := range policies {
  3807  		// Ensure the policy hash is non-nil. This should be done outside the state store
  3808  		// for performance reasons, but we check here for defense in depth.
  3809  		if len(policy.Hash) == 0 {
  3810  			policy.SetHash()
  3811  		}
  3812  
  3813  		// Check if the policy already exists
  3814  		existing, err := txn.First("acl_policy", "id", policy.Name)
  3815  		if err != nil {
  3816  			return fmt.Errorf("policy lookup failed: %v", err)
  3817  		}
  3818  
  3819  		// Update all the indexes
  3820  		if existing != nil {
  3821  			policy.CreateIndex = existing.(*structs.ACLPolicy).CreateIndex
  3822  			policy.ModifyIndex = index
  3823  		} else {
  3824  			policy.CreateIndex = index
  3825  			policy.ModifyIndex = index
  3826  		}
  3827  
  3828  		// Update the policy
  3829  		if err := txn.Insert("acl_policy", policy); err != nil {
  3830  			return fmt.Errorf("upserting policy failed: %v", err)
  3831  		}
  3832  	}
  3833  
  3834  	// Update the indexes tabl
  3835  	if err := txn.Insert("index", &IndexEntry{"acl_policy", index}); err != nil {
  3836  		return fmt.Errorf("index update failed: %v", err)
  3837  	}
  3838  
  3839  	txn.Commit()
  3840  	return nil
  3841  }
  3842  
  3843  // DeleteACLPolicies deletes the policies with the given names
  3844  func (s *StateStore) DeleteACLPolicies(index uint64, names []string) error {
  3845  	txn := s.db.Txn(true)
  3846  	defer txn.Abort()
  3847  
  3848  	// Delete the policy
  3849  	for _, name := range names {
  3850  		if _, err := txn.DeleteAll("acl_policy", "id", name); err != nil {
  3851  			return fmt.Errorf("deleting acl policy failed: %v", err)
  3852  		}
  3853  	}
  3854  	if err := txn.Insert("index", &IndexEntry{"acl_policy", index}); err != nil {
  3855  		return fmt.Errorf("index update failed: %v", err)
  3856  	}
  3857  	txn.Commit()
  3858  	return nil
  3859  }
  3860  
  3861  // ACLPolicyByName is used to lookup a policy by name
  3862  func (s *StateStore) ACLPolicyByName(ws memdb.WatchSet, name string) (*structs.ACLPolicy, error) {
  3863  	txn := s.db.Txn(false)
  3864  
  3865  	watchCh, existing, err := txn.FirstWatch("acl_policy", "id", name)
  3866  	if err != nil {
  3867  		return nil, fmt.Errorf("acl policy lookup failed: %v", err)
  3868  	}
  3869  	ws.Add(watchCh)
  3870  
  3871  	if existing != nil {
  3872  		return existing.(*structs.ACLPolicy), nil
  3873  	}
  3874  	return nil, nil
  3875  }
  3876  
  3877  // ACLPolicyByNamePrefix is used to lookup policies by prefix
  3878  func (s *StateStore) ACLPolicyByNamePrefix(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) {
  3879  	txn := s.db.Txn(false)
  3880  
  3881  	iter, err := txn.Get("acl_policy", "id_prefix", prefix)
  3882  	if err != nil {
  3883  		return nil, fmt.Errorf("acl policy lookup failed: %v", err)
  3884  	}
  3885  	ws.Add(iter.WatchCh())
  3886  
  3887  	return iter, nil
  3888  }
  3889  
  3890  // ACLPolicies returns an iterator over all the acl policies
  3891  func (s *StateStore) ACLPolicies(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  3892  	txn := s.db.Txn(false)
  3893  
  3894  	// Walk the entire table
  3895  	iter, err := txn.Get("acl_policy", "id")
  3896  	if err != nil {
  3897  		return nil, err
  3898  	}
  3899  	ws.Add(iter.WatchCh())
  3900  	return iter, nil
  3901  }
  3902  
  3903  // UpsertACLTokens is used to create or update a set of ACL tokens
  3904  func (s *StateStore) UpsertACLTokens(index uint64, tokens []*structs.ACLToken) error {
  3905  	txn := s.db.Txn(true)
  3906  	defer txn.Abort()
  3907  
  3908  	for _, token := range tokens {
  3909  		// Ensure the policy hash is non-nil. This should be done outside the state store
  3910  		// for performance reasons, but we check here for defense in depth.
  3911  		if len(token.Hash) == 0 {
  3912  			token.SetHash()
  3913  		}
  3914  
  3915  		// Check if the token already exists
  3916  		existing, err := txn.First("acl_token", "id", token.AccessorID)
  3917  		if err != nil {
  3918  			return fmt.Errorf("token lookup failed: %v", err)
  3919  		}
  3920  
  3921  		// Update all the indexes
  3922  		if existing != nil {
  3923  			existTK := existing.(*structs.ACLToken)
  3924  			token.CreateIndex = existTK.CreateIndex
  3925  			token.ModifyIndex = index
  3926  
  3927  			// Do not allow SecretID or create time to change
  3928  			token.SecretID = existTK.SecretID
  3929  			token.CreateTime = existTK.CreateTime
  3930  
  3931  		} else {
  3932  			token.CreateIndex = index
  3933  			token.ModifyIndex = index
  3934  		}
  3935  
  3936  		// Update the token
  3937  		if err := txn.Insert("acl_token", token); err != nil {
  3938  			return fmt.Errorf("upserting token failed: %v", err)
  3939  		}
  3940  	}
  3941  
  3942  	// Update the indexes table
  3943  	if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil {
  3944  		return fmt.Errorf("index update failed: %v", err)
  3945  	}
  3946  	txn.Commit()
  3947  	return nil
  3948  }
  3949  
  3950  // DeleteACLTokens deletes the tokens with the given accessor ids
  3951  func (s *StateStore) DeleteACLTokens(index uint64, ids []string) error {
  3952  	txn := s.db.Txn(true)
  3953  	defer txn.Abort()
  3954  
  3955  	// Delete the tokens
  3956  	for _, id := range ids {
  3957  		if _, err := txn.DeleteAll("acl_token", "id", id); err != nil {
  3958  			return fmt.Errorf("deleting acl token failed: %v", err)
  3959  		}
  3960  	}
  3961  	if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil {
  3962  		return fmt.Errorf("index update failed: %v", err)
  3963  	}
  3964  	txn.Commit()
  3965  	return nil
  3966  }
  3967  
  3968  // ACLTokenByAccessorID is used to lookup a token by accessor ID
  3969  func (s *StateStore) ACLTokenByAccessorID(ws memdb.WatchSet, id string) (*structs.ACLToken, error) {
  3970  	txn := s.db.Txn(false)
  3971  
  3972  	watchCh, existing, err := txn.FirstWatch("acl_token", "id", id)
  3973  	if err != nil {
  3974  		return nil, fmt.Errorf("acl token lookup failed: %v", err)
  3975  	}
  3976  	ws.Add(watchCh)
  3977  
  3978  	if existing != nil {
  3979  		return existing.(*structs.ACLToken), nil
  3980  	}
  3981  	return nil, nil
  3982  }
  3983  
  3984  // ACLTokenBySecretID is used to lookup a token by secret ID
  3985  func (s *StateStore) ACLTokenBySecretID(ws memdb.WatchSet, secretID string) (*structs.ACLToken, error) {
  3986  	txn := s.db.Txn(false)
  3987  
  3988  	watchCh, existing, err := txn.FirstWatch("acl_token", "secret", secretID)
  3989  	if err != nil {
  3990  		return nil, fmt.Errorf("acl token lookup failed: %v", err)
  3991  	}
  3992  	ws.Add(watchCh)
  3993  
  3994  	if existing != nil {
  3995  		return existing.(*structs.ACLToken), nil
  3996  	}
  3997  	return nil, nil
  3998  }
  3999  
  4000  // ACLTokenByAccessorIDPrefix is used to lookup tokens by prefix
  4001  func (s *StateStore) ACLTokenByAccessorIDPrefix(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) {
  4002  	txn := s.db.Txn(false)
  4003  
  4004  	iter, err := txn.Get("acl_token", "id_prefix", prefix)
  4005  	if err != nil {
  4006  		return nil, fmt.Errorf("acl token lookup failed: %v", err)
  4007  	}
  4008  	ws.Add(iter.WatchCh())
  4009  	return iter, nil
  4010  }
  4011  
  4012  // ACLTokens returns an iterator over all the tokens
  4013  func (s *StateStore) ACLTokens(ws memdb.WatchSet) (memdb.ResultIterator, error) {
  4014  	txn := s.db.Txn(false)
  4015  
  4016  	// Walk the entire table
  4017  	iter, err := txn.Get("acl_token", "id")
  4018  	if err != nil {
  4019  		return nil, err
  4020  	}
  4021  	ws.Add(iter.WatchCh())
  4022  	return iter, nil
  4023  }
  4024  
  4025  // ACLTokensByGlobal returns an iterator over all the tokens filtered by global value
  4026  func (s *StateStore) ACLTokensByGlobal(ws memdb.WatchSet, globalVal bool) (memdb.ResultIterator, error) {
  4027  	txn := s.db.Txn(false)
  4028  
  4029  	// Walk the entire table
  4030  	iter, err := txn.Get("acl_token", "global", globalVal)
  4031  	if err != nil {
  4032  		return nil, err
  4033  	}
  4034  	ws.Add(iter.WatchCh())
  4035  	return iter, nil
  4036  }
  4037  
  4038  // CanBootstrapACLToken checks if bootstrapping is possible and returns the reset index
  4039  func (s *StateStore) CanBootstrapACLToken() (bool, uint64, error) {
  4040  	txn := s.db.Txn(false)
  4041  
  4042  	// Lookup the bootstrap sentinel
  4043  	out, err := txn.First("index", "id", "acl_token_bootstrap")
  4044  	if err != nil {
  4045  		return false, 0, err
  4046  	}
  4047  
  4048  	// No entry, we haven't bootstrapped yet
  4049  	if out == nil {
  4050  		return true, 0, nil
  4051  	}
  4052  
  4053  	// Return the reset index if we've already bootstrapped
  4054  	return false, out.(*IndexEntry).Value, nil
  4055  }
  4056  
  4057  // BootstrapACLToken is used to create an initial ACL token
  4058  func (s *StateStore) BootstrapACLTokens(index, resetIndex uint64, token *structs.ACLToken) error {
  4059  	txn := s.db.Txn(true)
  4060  	defer txn.Abort()
  4061  
  4062  	// Check if we have already done a bootstrap
  4063  	existing, err := txn.First("index", "id", "acl_token_bootstrap")
  4064  	if err != nil {
  4065  		return fmt.Errorf("bootstrap check failed: %v", err)
  4066  	}
  4067  	if existing != nil {
  4068  		if resetIndex == 0 {
  4069  			return fmt.Errorf("ACL bootstrap already done")
  4070  		} else if resetIndex != existing.(*IndexEntry).Value {
  4071  			return fmt.Errorf("Invalid reset index for ACL bootstrap")
  4072  		}
  4073  	}
  4074  
  4075  	// Update the Create/Modify time
  4076  	token.CreateIndex = index
  4077  	token.ModifyIndex = index
  4078  
  4079  	// Insert the token
  4080  	if err := txn.Insert("acl_token", token); err != nil {
  4081  		return fmt.Errorf("upserting token failed: %v", err)
  4082  	}
  4083  
  4084  	// Update the indexes table, prevents future bootstrap until reset
  4085  	if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil {
  4086  		return fmt.Errorf("index update failed: %v", err)
  4087  	}
  4088  	if err := txn.Insert("index", &IndexEntry{"acl_token_bootstrap", index}); err != nil {
  4089  		return fmt.Errorf("index update failed: %v", err)
  4090  	}
  4091  	txn.Commit()
  4092  	return nil
  4093  }
  4094  
  4095  // SchedulerConfig is used to get the current Scheduler configuration.
  4096  func (s *StateStore) SchedulerConfig() (uint64, *structs.SchedulerConfiguration, error) {
  4097  	tx := s.db.Txn(false)
  4098  	defer tx.Abort()
  4099  
  4100  	// Get the scheduler config
  4101  	c, err := tx.First("scheduler_config", "id")
  4102  	if err != nil {
  4103  		return 0, nil, fmt.Errorf("failed scheduler config lookup: %s", err)
  4104  	}
  4105  
  4106  	config, ok := c.(*structs.SchedulerConfiguration)
  4107  	if !ok {
  4108  		return 0, nil, nil
  4109  	}
  4110  
  4111  	return config.ModifyIndex, config, nil
  4112  }
  4113  
  4114  // SchedulerSetConfig is used to set the current Scheduler configuration.
  4115  func (s *StateStore) SchedulerSetConfig(idx uint64, config *structs.SchedulerConfiguration) error {
  4116  	tx := s.db.Txn(true)
  4117  	defer tx.Abort()
  4118  
  4119  	s.schedulerSetConfigTxn(idx, tx, config)
  4120  
  4121  	tx.Commit()
  4122  	return nil
  4123  }
  4124  
  4125  // WithWriteTransaction executes the passed function within a write transaction,
  4126  // and returns its result.  If the invocation returns no error, the transaction
  4127  // is committed; otherwise, it's aborted.
  4128  func (s *StateStore) WithWriteTransaction(fn func(Txn) error) error {
  4129  	tx := s.db.Txn(true)
  4130  	defer tx.Abort()
  4131  
  4132  	err := fn(tx)
  4133  	if err == nil {
  4134  		tx.Commit()
  4135  	}
  4136  	return err
  4137  }
  4138  
  4139  // SchedulerCASConfig is used to update the scheduler configuration with a
  4140  // given Raft index. If the CAS index specified is not equal to the last observed index
  4141  // for the config, then the call is a noop.
  4142  func (s *StateStore) SchedulerCASConfig(idx, cidx uint64, config *structs.SchedulerConfiguration) (bool, error) {
  4143  	tx := s.db.Txn(true)
  4144  	defer tx.Abort()
  4145  
  4146  	// Check for an existing config
  4147  	existing, err := tx.First("scheduler_config", "id")
  4148  	if err != nil {
  4149  		return false, fmt.Errorf("failed scheduler config lookup: %s", err)
  4150  	}
  4151  
  4152  	// If the existing index does not match the provided CAS
  4153  	// index arg, then we shouldn't update anything and can safely
  4154  	// return early here.
  4155  	e, ok := existing.(*structs.SchedulerConfiguration)
  4156  	if !ok || (e != nil && e.ModifyIndex != cidx) {
  4157  		return false, nil
  4158  	}
  4159  
  4160  	s.schedulerSetConfigTxn(idx, tx, config)
  4161  
  4162  	tx.Commit()
  4163  	return true, nil
  4164  }
  4165  
  4166  func (s *StateStore) schedulerSetConfigTxn(idx uint64, tx *memdb.Txn, config *structs.SchedulerConfiguration) error {
  4167  	// Check for an existing config
  4168  	existing, err := tx.First("scheduler_config", "id")
  4169  	if err != nil {
  4170  		return fmt.Errorf("failed scheduler config lookup: %s", err)
  4171  	}
  4172  
  4173  	// Set the indexes.
  4174  	if existing != nil {
  4175  		config.CreateIndex = existing.(*structs.SchedulerConfiguration).CreateIndex
  4176  	} else {
  4177  		config.CreateIndex = idx
  4178  	}
  4179  	config.ModifyIndex = idx
  4180  
  4181  	if err := tx.Insert("scheduler_config", config); err != nil {
  4182  		return fmt.Errorf("failed updating scheduler config: %s", err)
  4183  	}
  4184  	return nil
  4185  }
  4186  
  4187  // StateSnapshot is used to provide a point-in-time snapshot
  4188  type StateSnapshot struct {
  4189  	StateStore
  4190  }
  4191  
  4192  // DenormalizeAllocationsMap takes in a map of nodes to allocations, and queries the
  4193  // Allocation for each of the Allocation diffs and merges the updated attributes with
  4194  // the existing Allocation, and attaches the Job provided
  4195  func (s *StateSnapshot) DenormalizeAllocationsMap(nodeAllocations map[string][]*structs.Allocation) error {
  4196  	for nodeID, allocs := range nodeAllocations {
  4197  		denormalizedAllocs, err := s.DenormalizeAllocationSlice(allocs)
  4198  		if err != nil {
  4199  			return err
  4200  		}
  4201  
  4202  		nodeAllocations[nodeID] = denormalizedAllocs
  4203  	}
  4204  	return nil
  4205  }
  4206  
  4207  // DenormalizeAllocationSlice queries the Allocation for each allocation diff
  4208  // represented as an Allocation and merges the updated attributes with the existing
  4209  // Allocation, and attaches the Job provided.
  4210  func (s *StateSnapshot) DenormalizeAllocationSlice(allocs []*structs.Allocation) ([]*structs.Allocation, error) {
  4211  	allocDiffs := make([]*structs.AllocationDiff, len(allocs))
  4212  	for i, alloc := range allocs {
  4213  		allocDiffs[i] = alloc.AllocationDiff()
  4214  	}
  4215  
  4216  	return s.DenormalizeAllocationDiffSlice(allocDiffs)
  4217  }
  4218  
  4219  // DenormalizeAllocationDiffSlice queries the Allocation for each AllocationDiff and merges
  4220  // the updated attributes with the existing Allocation, and attaches the Job provided.
  4221  //
  4222  // This should only be called on terminal alloc, particularly stopped or preempted allocs
  4223  func (s *StateSnapshot) DenormalizeAllocationDiffSlice(allocDiffs []*structs.AllocationDiff) ([]*structs.Allocation, error) {
  4224  	// Output index for denormalized Allocations
  4225  	j := 0
  4226  
  4227  	denormalizedAllocs := make([]*structs.Allocation, len(allocDiffs))
  4228  	for _, allocDiff := range allocDiffs {
  4229  		alloc, err := s.AllocByID(nil, allocDiff.ID)
  4230  		if err != nil {
  4231  			return nil, fmt.Errorf("alloc lookup failed: %v", err)
  4232  		}
  4233  		if alloc == nil {
  4234  			return nil, fmt.Errorf("alloc %v doesn't exist", allocDiff.ID)
  4235  		}
  4236  
  4237  		// Merge the updates to the Allocation.  Don't update alloc.Job for terminal allocs
  4238  		// so alloc refers to the latest Job view before destruction and to ease handler implementations
  4239  		allocCopy := alloc.Copy()
  4240  
  4241  		if allocDiff.PreemptedByAllocation != "" {
  4242  			allocCopy.PreemptedByAllocation = allocDiff.PreemptedByAllocation
  4243  			allocCopy.DesiredDescription = getPreemptedAllocDesiredDescription(allocDiff.PreemptedByAllocation)
  4244  			allocCopy.DesiredStatus = structs.AllocDesiredStatusEvict
  4245  		} else {
  4246  			// If alloc is a stopped alloc
  4247  			allocCopy.DesiredDescription = allocDiff.DesiredDescription
  4248  			allocCopy.DesiredStatus = structs.AllocDesiredStatusStop
  4249  			if allocDiff.ClientStatus != "" {
  4250  				allocCopy.ClientStatus = allocDiff.ClientStatus
  4251  			}
  4252  		}
  4253  		if allocDiff.ModifyTime != 0 {
  4254  			allocCopy.ModifyTime = allocDiff.ModifyTime
  4255  		}
  4256  
  4257  		// Update the allocDiff in the slice to equal the denormalized alloc
  4258  		denormalizedAllocs[j] = allocCopy
  4259  		j++
  4260  	}
  4261  	// Retain only the denormalized Allocations in the slice
  4262  	denormalizedAllocs = denormalizedAllocs[:j]
  4263  	return denormalizedAllocs, nil
  4264  }
  4265  
  4266  func getPreemptedAllocDesiredDescription(PreemptedByAllocID string) string {
  4267  	return fmt.Sprintf("Preempted by alloc ID %v", PreemptedByAllocID)
  4268  }
  4269  
  4270  // StateRestore is used to optimize the performance when
  4271  // restoring state by only using a single large transaction
  4272  // instead of thousands of sub transactions
  4273  type StateRestore struct {
  4274  	txn *memdb.Txn
  4275  }
  4276  
  4277  // Abort is used to abort the restore operation
  4278  func (s *StateRestore) Abort() {
  4279  	s.txn.Abort()
  4280  }
  4281  
  4282  // Commit is used to commit the restore operation
  4283  func (s *StateRestore) Commit() {
  4284  	s.txn.Commit()
  4285  }
  4286  
  4287  // NodeRestore is used to restore a node
  4288  func (r *StateRestore) NodeRestore(node *structs.Node) error {
  4289  	if err := r.txn.Insert("nodes", node); err != nil {
  4290  		return fmt.Errorf("node insert failed: %v", err)
  4291  	}
  4292  	return nil
  4293  }
  4294  
  4295  // JobRestore is used to restore a job
  4296  func (r *StateRestore) JobRestore(job *structs.Job) error {
  4297  	if err := r.txn.Insert("jobs", job); err != nil {
  4298  		return fmt.Errorf("job insert failed: %v", err)
  4299  	}
  4300  	return nil
  4301  }
  4302  
  4303  // EvalRestore is used to restore an evaluation
  4304  func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
  4305  	if err := r.txn.Insert("evals", eval); err != nil {
  4306  		return fmt.Errorf("eval insert failed: %v", err)
  4307  	}
  4308  	return nil
  4309  }
  4310  
  4311  // AllocRestore is used to restore an allocation
  4312  func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error {
  4313  	if err := r.txn.Insert("allocs", alloc); err != nil {
  4314  		return fmt.Errorf("alloc insert failed: %v", err)
  4315  	}
  4316  	return nil
  4317  }
  4318  
  4319  // IndexRestore is used to restore an index
  4320  func (r *StateRestore) IndexRestore(idx *IndexEntry) error {
  4321  	if err := r.txn.Insert("index", idx); err != nil {
  4322  		return fmt.Errorf("index insert failed: %v", err)
  4323  	}
  4324  	return nil
  4325  }
  4326  
  4327  // PeriodicLaunchRestore is used to restore a periodic launch.
  4328  func (r *StateRestore) PeriodicLaunchRestore(launch *structs.PeriodicLaunch) error {
  4329  	if err := r.txn.Insert("periodic_launch", launch); err != nil {
  4330  		return fmt.Errorf("periodic launch insert failed: %v", err)
  4331  	}
  4332  	return nil
  4333  }
  4334  
  4335  // JobSummaryRestore is used to restore a job summary
  4336  func (r *StateRestore) JobSummaryRestore(jobSummary *structs.JobSummary) error {
  4337  	if err := r.txn.Insert("job_summary", jobSummary); err != nil {
  4338  		return fmt.Errorf("job summary insert failed: %v", err)
  4339  	}
  4340  	return nil
  4341  }
  4342  
  4343  // JobVersionRestore is used to restore a job version
  4344  func (r *StateRestore) JobVersionRestore(version *structs.Job) error {
  4345  	if err := r.txn.Insert("job_version", version); err != nil {
  4346  		return fmt.Errorf("job version insert failed: %v", err)
  4347  	}
  4348  	return nil
  4349  }
  4350  
  4351  // DeploymentRestore is used to restore a deployment
  4352  func (r *StateRestore) DeploymentRestore(deployment *structs.Deployment) error {
  4353  	if err := r.txn.Insert("deployment", deployment); err != nil {
  4354  		return fmt.Errorf("deployment insert failed: %v", err)
  4355  	}
  4356  	return nil
  4357  }
  4358  
  4359  // VaultAccessorRestore is used to restore a vault accessor
  4360  func (r *StateRestore) VaultAccessorRestore(accessor *structs.VaultAccessor) error {
  4361  	if err := r.txn.Insert("vault_accessors", accessor); err != nil {
  4362  		return fmt.Errorf("vault accessor insert failed: %v", err)
  4363  	}
  4364  	return nil
  4365  }
  4366  
  4367  // ACLPolicyRestore is used to restore an ACL policy
  4368  func (r *StateRestore) ACLPolicyRestore(policy *structs.ACLPolicy) error {
  4369  	if err := r.txn.Insert("acl_policy", policy); err != nil {
  4370  		return fmt.Errorf("inserting acl policy failed: %v", err)
  4371  	}
  4372  	return nil
  4373  }
  4374  
  4375  // ACLTokenRestore is used to restore an ACL token
  4376  func (r *StateRestore) ACLTokenRestore(token *structs.ACLToken) error {
  4377  	if err := r.txn.Insert("acl_token", token); err != nil {
  4378  		return fmt.Errorf("inserting acl token failed: %v", err)
  4379  	}
  4380  	return nil
  4381  }
  4382  
  4383  func (r *StateRestore) SchedulerConfigRestore(schedConfig *structs.SchedulerConfiguration) error {
  4384  	if err := r.txn.Insert("scheduler_config", schedConfig); err != nil {
  4385  		return fmt.Errorf("inserting scheduler config failed: %s", err)
  4386  	}
  4387  	return nil
  4388  }
  4389  
  4390  // addEphemeralDiskToTaskGroups adds missing EphemeralDisk objects to TaskGroups
  4391  func (r *StateRestore) addEphemeralDiskToTaskGroups(job *structs.Job) {
  4392  	for _, tg := range job.TaskGroups {
  4393  		if tg.EphemeralDisk != nil {
  4394  			continue
  4395  		}
  4396  		var sizeMB int
  4397  		for _, task := range tg.Tasks {
  4398  			if task.Resources != nil {
  4399  				sizeMB += task.Resources.DiskMB
  4400  				task.Resources.DiskMB = 0
  4401  			}
  4402  		}
  4403  		tg.EphemeralDisk = &structs.EphemeralDisk{
  4404  			SizeMB: sizeMB,
  4405  		}
  4406  	}
  4407  }