github.com/huiliang/nomad@v0.2.1-0.20151124023127-7a8b664699ff/nomad/state/state_store.go (about)

     1  package state
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"log"
     7  	"sync"
     8  
     9  	"github.com/hashicorp/go-memdb"
    10  	"github.com/hashicorp/nomad/nomad/structs"
    11  	"github.com/hashicorp/nomad/nomad/watch"
    12  )
    13  
    14  // IndexEntry is used with the "index" table
    15  // for managing the latest Raft index affecting a table.
    16  type IndexEntry struct {
    17  	Key   string
    18  	Value uint64
    19  }
    20  
    21  // The StateStore is responsible for maintaining all the Nomad
    22  // state. It is manipulated by the FSM which maintains consistency
    23  // through the use of Raft. The goals of the StateStore are to provide
    24  // high concurrency for read operations without blocking writes, and
    25  // to provide write availability in the face of reads. EVERY object
    26  // returned as a result of a read against the state store should be
    27  // considered a constant and NEVER modified in place.
    28  type StateStore struct {
    29  	logger *log.Logger
    30  	db     *memdb.MemDB
    31  	watch  *stateWatch
    32  }
    33  
    34  // NewStateStore is used to create a new state store
    35  func NewStateStore(logOutput io.Writer) (*StateStore, error) {
    36  	// Create the MemDB
    37  	db, err := memdb.NewMemDB(stateStoreSchema())
    38  	if err != nil {
    39  		return nil, fmt.Errorf("state store setup failed: %v", err)
    40  	}
    41  
    42  	// Create the state store
    43  	s := &StateStore{
    44  		logger: log.New(logOutput, "", log.LstdFlags),
    45  		db:     db,
    46  		watch:  newStateWatch(),
    47  	}
    48  	return s, nil
    49  }
    50  
    51  // Snapshot is used to create a point in time snapshot. Because
    52  // we use MemDB, we just need to snapshot the state of the underlying
    53  // database.
    54  func (s *StateStore) Snapshot() (*StateSnapshot, error) {
    55  	snap := &StateSnapshot{
    56  		StateStore: StateStore{
    57  			logger: s.logger,
    58  			db:     s.db.Snapshot(),
    59  			watch:  s.watch,
    60  		},
    61  	}
    62  	return snap, nil
    63  }
    64  
    65  // Restore is used to optimize the efficiency of rebuilding
    66  // state by minimizing the number of transactions and checking
    67  // overhead.
    68  func (s *StateStore) Restore() (*StateRestore, error) {
    69  	txn := s.db.Txn(true)
    70  	r := &StateRestore{
    71  		txn:   txn,
    72  		watch: s.watch,
    73  		items: watch.NewItems(),
    74  	}
    75  	return r, nil
    76  }
    77  
    78  // Watch subscribes a channel to a set of watch items.
    79  func (s *StateStore) Watch(items watch.Items, notify chan struct{}) {
    80  	s.watch.watch(items, notify)
    81  }
    82  
    83  // StopWatch unsubscribes a channel from a set of watch items.
    84  func (s *StateStore) StopWatch(items watch.Items, notify chan struct{}) {
    85  	s.watch.stopWatch(items, notify)
    86  }
    87  
    88  // UpsertNode is used to register a node or update a node definition
    89  // This is assumed to be triggered by the client, so we retain the value
    90  // of drain which is set by the scheduler.
    91  func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error {
    92  	txn := s.db.Txn(true)
    93  	defer txn.Abort()
    94  
    95  	watcher := watch.NewItems()
    96  	watcher.Add(watch.Item{Table: "nodes"})
    97  	watcher.Add(watch.Item{Node: node.ID})
    98  
    99  	// Check if the node already exists
   100  	existing, err := txn.First("nodes", "id", node.ID)
   101  	if err != nil {
   102  		return fmt.Errorf("node lookup failed: %v", err)
   103  	}
   104  
   105  	// Setup the indexes correctly
   106  	if existing != nil {
   107  		exist := existing.(*structs.Node)
   108  		node.CreateIndex = exist.CreateIndex
   109  		node.ModifyIndex = index
   110  		node.Drain = exist.Drain // Retain the drain mode
   111  	} else {
   112  		node.CreateIndex = index
   113  		node.ModifyIndex = index
   114  	}
   115  
   116  	// Insert the node
   117  	if err := txn.Insert("nodes", node); err != nil {
   118  		return fmt.Errorf("node insert failed: %v", err)
   119  	}
   120  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   121  		return fmt.Errorf("index update failed: %v", err)
   122  	}
   123  
   124  	txn.Defer(func() { s.watch.notify(watcher) })
   125  	txn.Commit()
   126  	return nil
   127  }
   128  
   129  // DeleteNode is used to deregister a node
   130  func (s *StateStore) DeleteNode(index uint64, nodeID string) error {
   131  	txn := s.db.Txn(true)
   132  	defer txn.Abort()
   133  
   134  	watcher := watch.NewItems()
   135  	watcher.Add(watch.Item{Table: "nodes"})
   136  	watcher.Add(watch.Item{Node: nodeID})
   137  
   138  	// Lookup the node
   139  	existing, err := txn.First("nodes", "id", nodeID)
   140  	if err != nil {
   141  		return fmt.Errorf("node lookup failed: %v", err)
   142  	}
   143  	if existing == nil {
   144  		return fmt.Errorf("node not found")
   145  	}
   146  
   147  	// Delete the node
   148  	if err := txn.Delete("nodes", existing); err != nil {
   149  		return fmt.Errorf("node delete failed: %v", err)
   150  	}
   151  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   152  		return fmt.Errorf("index update failed: %v", err)
   153  	}
   154  
   155  	txn.Defer(func() { s.watch.notify(watcher) })
   156  	txn.Commit()
   157  	return nil
   158  }
   159  
   160  // UpdateNodeStatus is used to update the status of a node
   161  func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error {
   162  	txn := s.db.Txn(true)
   163  	defer txn.Abort()
   164  
   165  	watcher := watch.NewItems()
   166  	watcher.Add(watch.Item{Table: "nodes"})
   167  	watcher.Add(watch.Item{Node: nodeID})
   168  
   169  	// Lookup the node
   170  	existing, err := txn.First("nodes", "id", nodeID)
   171  	if err != nil {
   172  		return fmt.Errorf("node lookup failed: %v", err)
   173  	}
   174  	if existing == nil {
   175  		return fmt.Errorf("node not found")
   176  	}
   177  
   178  	// Copy the existing node
   179  	existingNode := existing.(*structs.Node)
   180  	copyNode := new(structs.Node)
   181  	*copyNode = *existingNode
   182  
   183  	// Update the status in the copy
   184  	copyNode.Status = status
   185  	copyNode.ModifyIndex = index
   186  
   187  	// Insert the node
   188  	if err := txn.Insert("nodes", copyNode); err != nil {
   189  		return fmt.Errorf("node update failed: %v", err)
   190  	}
   191  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   192  		return fmt.Errorf("index update failed: %v", err)
   193  	}
   194  
   195  	txn.Defer(func() { s.watch.notify(watcher) })
   196  	txn.Commit()
   197  	return nil
   198  }
   199  
   200  // UpdateNodeDrain is used to update the drain of a node
   201  func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) error {
   202  	txn := s.db.Txn(true)
   203  	defer txn.Abort()
   204  
   205  	watcher := watch.NewItems()
   206  	watcher.Add(watch.Item{Table: "nodes"})
   207  	watcher.Add(watch.Item{Node: nodeID})
   208  
   209  	// Lookup the node
   210  	existing, err := txn.First("nodes", "id", nodeID)
   211  	if err != nil {
   212  		return fmt.Errorf("node lookup failed: %v", err)
   213  	}
   214  	if existing == nil {
   215  		return fmt.Errorf("node not found")
   216  	}
   217  
   218  	// Copy the existing node
   219  	existingNode := existing.(*structs.Node)
   220  	copyNode := new(structs.Node)
   221  	*copyNode = *existingNode
   222  
   223  	// Update the drain in the copy
   224  	copyNode.Drain = drain
   225  	copyNode.ModifyIndex = index
   226  
   227  	// Insert the node
   228  	if err := txn.Insert("nodes", copyNode); err != nil {
   229  		return fmt.Errorf("node update failed: %v", err)
   230  	}
   231  	if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil {
   232  		return fmt.Errorf("index update failed: %v", err)
   233  	}
   234  
   235  	txn.Defer(func() { s.watch.notify(watcher) })
   236  	txn.Commit()
   237  	return nil
   238  }
   239  
   240  // NodeByID is used to lookup a node by ID
   241  func (s *StateStore) NodeByID(nodeID string) (*structs.Node, error) {
   242  	txn := s.db.Txn(false)
   243  
   244  	existing, err := txn.First("nodes", "id", nodeID)
   245  	if err != nil {
   246  		return nil, fmt.Errorf("node lookup failed: %v", err)
   247  	}
   248  
   249  	if existing != nil {
   250  		return existing.(*structs.Node), nil
   251  	}
   252  	return nil, nil
   253  }
   254  
   255  // Nodes returns an iterator over all the nodes
   256  func (s *StateStore) Nodes() (memdb.ResultIterator, error) {
   257  	txn := s.db.Txn(false)
   258  
   259  	// Walk the entire nodes table
   260  	iter, err := txn.Get("nodes", "id")
   261  	if err != nil {
   262  		return nil, err
   263  	}
   264  	return iter, nil
   265  }
   266  
   267  // UpsertJob is used to register a job or update a job definition
   268  func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error {
   269  	txn := s.db.Txn(true)
   270  	defer txn.Abort()
   271  
   272  	watcher := watch.NewItems()
   273  	watcher.Add(watch.Item{Table: "jobs"})
   274  	watcher.Add(watch.Item{Job: job.ID})
   275  
   276  	// Check if the job already exists
   277  	existing, err := txn.First("jobs", "id", job.ID)
   278  	if err != nil {
   279  		return fmt.Errorf("job lookup failed: %v", err)
   280  	}
   281  
   282  	// Setup the indexes correctly
   283  	if existing != nil {
   284  		job.CreateIndex = existing.(*structs.Job).CreateIndex
   285  		job.ModifyIndex = index
   286  	} else {
   287  		job.CreateIndex = index
   288  		job.ModifyIndex = index
   289  	}
   290  
   291  	// Insert the job
   292  	if err := txn.Insert("jobs", job); err != nil {
   293  		return fmt.Errorf("job insert failed: %v", err)
   294  	}
   295  	if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil {
   296  		return fmt.Errorf("index update failed: %v", err)
   297  	}
   298  
   299  	txn.Defer(func() { s.watch.notify(watcher) })
   300  	txn.Commit()
   301  	return nil
   302  }
   303  
   304  // DeleteJob is used to deregister a job
   305  func (s *StateStore) DeleteJob(index uint64, jobID string) error {
   306  	txn := s.db.Txn(true)
   307  	defer txn.Abort()
   308  
   309  	watcher := watch.NewItems()
   310  	watcher.Add(watch.Item{Table: "jobs"})
   311  	watcher.Add(watch.Item{Job: jobID})
   312  
   313  	// Lookup the node
   314  	existing, err := txn.First("jobs", "id", jobID)
   315  	if err != nil {
   316  		return fmt.Errorf("job lookup failed: %v", err)
   317  	}
   318  	if existing == nil {
   319  		return fmt.Errorf("job not found")
   320  	}
   321  
   322  	// Delete the node
   323  	if err := txn.Delete("jobs", existing); err != nil {
   324  		return fmt.Errorf("job delete failed: %v", err)
   325  	}
   326  	if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil {
   327  		return fmt.Errorf("index update failed: %v", err)
   328  	}
   329  
   330  	txn.Defer(func() { s.watch.notify(watcher) })
   331  	txn.Commit()
   332  	return nil
   333  }
   334  
   335  // JobByID is used to lookup a job by its ID
   336  func (s *StateStore) JobByID(id string) (*structs.Job, error) {
   337  	txn := s.db.Txn(false)
   338  
   339  	existing, err := txn.First("jobs", "id", id)
   340  	if err != nil {
   341  		return nil, fmt.Errorf("job lookup failed: %v", err)
   342  	}
   343  
   344  	if existing != nil {
   345  		return existing.(*structs.Job), nil
   346  	}
   347  	return nil, nil
   348  }
   349  
   350  // Jobs returns an iterator over all the jobs
   351  func (s *StateStore) Jobs() (memdb.ResultIterator, error) {
   352  	txn := s.db.Txn(false)
   353  
   354  	// Walk the entire jobs table
   355  	iter, err := txn.Get("jobs", "id")
   356  	if err != nil {
   357  		return nil, err
   358  	}
   359  	return iter, nil
   360  }
   361  
   362  // JobsByScheduler returns an iterator over all the jobs with the specific
   363  // scheduler type.
   364  func (s *StateStore) JobsByScheduler(schedulerType string) (memdb.ResultIterator, error) {
   365  	txn := s.db.Txn(false)
   366  
   367  	// Return an iterator for jobs with the specific type.
   368  	iter, err := txn.Get("jobs", "type", schedulerType)
   369  	if err != nil {
   370  		return nil, err
   371  	}
   372  	return iter, nil
   373  }
   374  
   375  // UpsertEvaluation is used to upsert an evaluation
   376  func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) error {
   377  	txn := s.db.Txn(true)
   378  	defer txn.Abort()
   379  
   380  	watcher := watch.NewItems()
   381  	watcher.Add(watch.Item{Table: "evals"})
   382  
   383  	// Do a nested upsert
   384  	for _, eval := range evals {
   385  		watcher.Add(watch.Item{Eval: eval.ID})
   386  		if err := s.nestedUpsertEval(txn, index, eval); err != nil {
   387  			return err
   388  		}
   389  	}
   390  
   391  	txn.Defer(func() { s.watch.notify(watcher) })
   392  	txn.Commit()
   393  	return nil
   394  }
   395  
   396  // nestedUpsertEvaluation is used to nest an evaluation upsert within a transaction
   397  func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *structs.Evaluation) error {
   398  	// Lookup the evaluation
   399  	existing, err := txn.First("evals", "id", eval.ID)
   400  	if err != nil {
   401  		return fmt.Errorf("eval lookup failed: %v", err)
   402  	}
   403  
   404  	// Update the indexes
   405  	if existing != nil {
   406  		eval.CreateIndex = existing.(*structs.Evaluation).CreateIndex
   407  		eval.ModifyIndex = index
   408  	} else {
   409  		eval.CreateIndex = index
   410  		eval.ModifyIndex = index
   411  	}
   412  
   413  	// Insert the eval
   414  	if err := txn.Insert("evals", eval); err != nil {
   415  		return fmt.Errorf("eval insert failed: %v", err)
   416  	}
   417  	if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil {
   418  		return fmt.Errorf("index update failed: %v", err)
   419  	}
   420  	return nil
   421  }
   422  
   423  // DeleteEval is used to delete an evaluation
   424  func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error {
   425  	txn := s.db.Txn(true)
   426  	defer txn.Abort()
   427  	watcher := watch.NewItems()
   428  	watcher.Add(watch.Item{Table: "evals"})
   429  	watcher.Add(watch.Item{Table: "allocs"})
   430  
   431  	for _, eval := range evals {
   432  		existing, err := txn.First("evals", "id", eval)
   433  		if err != nil {
   434  			return fmt.Errorf("eval lookup failed: %v", err)
   435  		}
   436  		if existing == nil {
   437  			continue
   438  		}
   439  		if err := txn.Delete("evals", existing); err != nil {
   440  			return fmt.Errorf("eval delete failed: %v", err)
   441  		}
   442  		watcher.Add(watch.Item{Eval: eval})
   443  	}
   444  
   445  	for _, alloc := range allocs {
   446  		existing, err := txn.First("allocs", "id", alloc)
   447  		if err != nil {
   448  			return fmt.Errorf("alloc lookup failed: %v", err)
   449  		}
   450  		if existing == nil {
   451  			continue
   452  		}
   453  		if err := txn.Delete("allocs", existing); err != nil {
   454  			return fmt.Errorf("alloc delete failed: %v", err)
   455  		}
   456  		realAlloc := existing.(*structs.Allocation)
   457  		watcher.Add(watch.Item{Alloc: realAlloc.ID})
   458  		watcher.Add(watch.Item{AllocEval: realAlloc.EvalID})
   459  		watcher.Add(watch.Item{AllocJob: realAlloc.JobID})
   460  		watcher.Add(watch.Item{AllocNode: realAlloc.NodeID})
   461  	}
   462  
   463  	// Update the indexes
   464  	if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil {
   465  		return fmt.Errorf("index update failed: %v", err)
   466  	}
   467  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
   468  		return fmt.Errorf("index update failed: %v", err)
   469  	}
   470  
   471  	txn.Defer(func() { s.watch.notify(watcher) })
   472  	txn.Commit()
   473  	return nil
   474  }
   475  
   476  // EvalByID is used to lookup an eval by its ID
   477  func (s *StateStore) EvalByID(id string) (*structs.Evaluation, error) {
   478  	txn := s.db.Txn(false)
   479  
   480  	existing, err := txn.First("evals", "id", id)
   481  	if err != nil {
   482  		return nil, fmt.Errorf("eval lookup failed: %v", err)
   483  	}
   484  
   485  	if existing != nil {
   486  		return existing.(*structs.Evaluation), nil
   487  	}
   488  	return nil, nil
   489  }
   490  
   491  // EvalsByJob returns all the evaluations by job id
   492  func (s *StateStore) EvalsByJob(jobID string) ([]*structs.Evaluation, error) {
   493  	txn := s.db.Txn(false)
   494  
   495  	// Get an iterator over the node allocations
   496  	iter, err := txn.Get("evals", "job", jobID)
   497  	if err != nil {
   498  		return nil, err
   499  	}
   500  
   501  	var out []*structs.Evaluation
   502  	for {
   503  		raw := iter.Next()
   504  		if raw == nil {
   505  			break
   506  		}
   507  		out = append(out, raw.(*structs.Evaluation))
   508  	}
   509  	return out, nil
   510  }
   511  
   512  // Evals returns an iterator over all the evaluations
   513  func (s *StateStore) Evals() (memdb.ResultIterator, error) {
   514  	txn := s.db.Txn(false)
   515  
   516  	// Walk the entire table
   517  	iter, err := txn.Get("evals", "id")
   518  	if err != nil {
   519  		return nil, err
   520  	}
   521  	return iter, nil
   522  }
   523  
   524  // UpdateAllocFromClient is used to update an allocation based on input
   525  // from a client. While the schedulers are the authority on the allocation for
   526  // most things, some updates are authoritative from the client. Specifically,
   527  // the desired state comes from the schedulers, while the actual state comes
   528  // from clients.
   529  func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocation) error {
   530  	txn := s.db.Txn(true)
   531  	defer txn.Abort()
   532  
   533  	watcher := watch.NewItems()
   534  	watcher.Add(watch.Item{Table: "allocs"})
   535  	watcher.Add(watch.Item{Alloc: alloc.ID})
   536  	watcher.Add(watch.Item{AllocEval: alloc.EvalID})
   537  	watcher.Add(watch.Item{AllocJob: alloc.JobID})
   538  	watcher.Add(watch.Item{AllocNode: alloc.NodeID})
   539  
   540  	// Look for existing alloc
   541  	existing, err := txn.First("allocs", "id", alloc.ID)
   542  	if err != nil {
   543  		return fmt.Errorf("alloc lookup failed: %v", err)
   544  	}
   545  
   546  	// Nothing to do if this does not exist
   547  	if existing == nil {
   548  		return nil
   549  	}
   550  	exist := existing.(*structs.Allocation)
   551  
   552  	// Copy everything from the existing allocation
   553  	copyAlloc := new(structs.Allocation)
   554  	*copyAlloc = *exist
   555  
   556  	// Pull in anything the client is the authority on
   557  	copyAlloc.ClientStatus = alloc.ClientStatus
   558  	copyAlloc.ClientDescription = alloc.ClientDescription
   559  
   560  	// Update the modify index
   561  	copyAlloc.ModifyIndex = index
   562  
   563  	// Update the allocation
   564  	if err := txn.Insert("allocs", copyAlloc); err != nil {
   565  		return fmt.Errorf("alloc insert failed: %v", err)
   566  	}
   567  
   568  	// Update the indexes
   569  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
   570  		return fmt.Errorf("index update failed: %v", err)
   571  	}
   572  
   573  	txn.Defer(func() { s.watch.notify(watcher) })
   574  	txn.Commit()
   575  	return nil
   576  }
   577  
   578  // UpsertAllocs is used to evict a set of allocations
   579  // and allocate new ones at the same time.
   580  func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error {
   581  	txn := s.db.Txn(true)
   582  	defer txn.Abort()
   583  
   584  	watcher := watch.NewItems()
   585  	watcher.Add(watch.Item{Table: "allocs"})
   586  
   587  	// Handle the allocations
   588  	for _, alloc := range allocs {
   589  		existing, err := txn.First("allocs", "id", alloc.ID)
   590  		if err != nil {
   591  			return fmt.Errorf("alloc lookup failed: %v", err)
   592  		}
   593  
   594  		if existing == nil {
   595  			alloc.CreateIndex = index
   596  			alloc.ModifyIndex = index
   597  		} else {
   598  			exist := existing.(*structs.Allocation)
   599  			alloc.CreateIndex = exist.CreateIndex
   600  			alloc.ModifyIndex = index
   601  			alloc.ClientStatus = exist.ClientStatus
   602  			alloc.ClientDescription = exist.ClientDescription
   603  		}
   604  		if err := txn.Insert("allocs", alloc); err != nil {
   605  			return fmt.Errorf("alloc insert failed: %v", err)
   606  		}
   607  
   608  		watcher.Add(watch.Item{Alloc: alloc.ID})
   609  		watcher.Add(watch.Item{AllocEval: alloc.EvalID})
   610  		watcher.Add(watch.Item{AllocJob: alloc.JobID})
   611  		watcher.Add(watch.Item{AllocNode: alloc.NodeID})
   612  	}
   613  
   614  	// Update the indexes
   615  	if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
   616  		return fmt.Errorf("index update failed: %v", err)
   617  	}
   618  
   619  	txn.Defer(func() { s.watch.notify(watcher) })
   620  	txn.Commit()
   621  	return nil
   622  }
   623  
   624  // AllocByID is used to lookup an allocation by its ID
   625  func (s *StateStore) AllocByID(id string) (*structs.Allocation, error) {
   626  	txn := s.db.Txn(false)
   627  
   628  	existing, err := txn.First("allocs", "id", id)
   629  	if err != nil {
   630  		return nil, fmt.Errorf("alloc lookup failed: %v", err)
   631  	}
   632  
   633  	if existing != nil {
   634  		return existing.(*structs.Allocation), nil
   635  	}
   636  	return nil, nil
   637  }
   638  
   639  // AllocsByNode returns all the allocations by node
   640  func (s *StateStore) AllocsByNode(node string) ([]*structs.Allocation, error) {
   641  	txn := s.db.Txn(false)
   642  
   643  	// Get an iterator over the node allocations
   644  	iter, err := txn.Get("allocs", "node", node)
   645  	if err != nil {
   646  		return nil, err
   647  	}
   648  
   649  	var out []*structs.Allocation
   650  	for {
   651  		raw := iter.Next()
   652  		if raw == nil {
   653  			break
   654  		}
   655  		out = append(out, raw.(*structs.Allocation))
   656  	}
   657  	return out, nil
   658  }
   659  
   660  // AllocsByJob returns all the allocations by job id
   661  func (s *StateStore) AllocsByJob(jobID string) ([]*structs.Allocation, error) {
   662  	txn := s.db.Txn(false)
   663  
   664  	// Get an iterator over the node allocations
   665  	iter, err := txn.Get("allocs", "job", jobID)
   666  	if err != nil {
   667  		return nil, err
   668  	}
   669  
   670  	var out []*structs.Allocation
   671  	for {
   672  		raw := iter.Next()
   673  		if raw == nil {
   674  			break
   675  		}
   676  		out = append(out, raw.(*structs.Allocation))
   677  	}
   678  	return out, nil
   679  }
   680  
   681  // AllocsByEval returns all the allocations by eval id
   682  func (s *StateStore) AllocsByEval(evalID string) ([]*structs.Allocation, error) {
   683  	txn := s.db.Txn(false)
   684  
   685  	// Get an iterator over the eval allocations
   686  	iter, err := txn.Get("allocs", "eval", evalID)
   687  	if err != nil {
   688  		return nil, err
   689  	}
   690  
   691  	var out []*structs.Allocation
   692  	for {
   693  		raw := iter.Next()
   694  		if raw == nil {
   695  			break
   696  		}
   697  		out = append(out, raw.(*structs.Allocation))
   698  	}
   699  	return out, nil
   700  }
   701  
   702  // Allocs returns an iterator over all the evaluations
   703  func (s *StateStore) Allocs() (memdb.ResultIterator, error) {
   704  	txn := s.db.Txn(false)
   705  
   706  	// Walk the entire table
   707  	iter, err := txn.Get("allocs", "id")
   708  	if err != nil {
   709  		return nil, err
   710  	}
   711  	return iter, nil
   712  }
   713  
   714  // Index finds the matching index value
   715  func (s *StateStore) Index(name string) (uint64, error) {
   716  	txn := s.db.Txn(false)
   717  
   718  	// Lookup the first matching index
   719  	out, err := txn.First("index", "id", name)
   720  	if err != nil {
   721  		return 0, err
   722  	}
   723  	if out == nil {
   724  		return 0, nil
   725  	}
   726  	return out.(*IndexEntry).Value, nil
   727  }
   728  
   729  // Indexes returns an iterator over all the indexes
   730  func (s *StateStore) Indexes() (memdb.ResultIterator, error) {
   731  	txn := s.db.Txn(false)
   732  
   733  	// Walk the entire nodes table
   734  	iter, err := txn.Get("index", "id")
   735  	if err != nil {
   736  		return nil, err
   737  	}
   738  	return iter, nil
   739  }
   740  
   741  // StateSnapshot is used to provide a point-in-time snapshot
   742  type StateSnapshot struct {
   743  	StateStore
   744  }
   745  
   746  // StateRestore is used to optimize the performance when
   747  // restoring state by only using a single large transaction
   748  // instead of thousands of sub transactions
   749  type StateRestore struct {
   750  	txn   *memdb.Txn
   751  	watch *stateWatch
   752  	items watch.Items
   753  }
   754  
   755  // Abort is used to abort the restore operation
   756  func (s *StateRestore) Abort() {
   757  	s.txn.Abort()
   758  }
   759  
   760  // Commit is used to commit the restore operation
   761  func (s *StateRestore) Commit() {
   762  	s.txn.Defer(func() { s.watch.notify(s.items) })
   763  	s.txn.Commit()
   764  }
   765  
   766  // NodeRestore is used to restore a node
   767  func (r *StateRestore) NodeRestore(node *structs.Node) error {
   768  	r.items.Add(watch.Item{Table: "nodes"})
   769  	r.items.Add(watch.Item{Node: node.ID})
   770  	if err := r.txn.Insert("nodes", node); err != nil {
   771  		return fmt.Errorf("node insert failed: %v", err)
   772  	}
   773  	return nil
   774  }
   775  
   776  // JobRestore is used to restore a job
   777  func (r *StateRestore) JobRestore(job *structs.Job) error {
   778  	r.items.Add(watch.Item{Table: "jobs"})
   779  	r.items.Add(watch.Item{Job: job.ID})
   780  	if err := r.txn.Insert("jobs", job); err != nil {
   781  		return fmt.Errorf("job insert failed: %v", err)
   782  	}
   783  	return nil
   784  }
   785  
   786  // EvalRestore is used to restore an evaluation
   787  func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error {
   788  	r.items.Add(watch.Item{Table: "evals"})
   789  	r.items.Add(watch.Item{Eval: eval.ID})
   790  	if err := r.txn.Insert("evals", eval); err != nil {
   791  		return fmt.Errorf("eval insert failed: %v", err)
   792  	}
   793  	return nil
   794  }
   795  
   796  // AllocRestore is used to restore an allocation
   797  func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error {
   798  	r.items.Add(watch.Item{Table: "allocs"})
   799  	r.items.Add(watch.Item{Alloc: alloc.ID})
   800  	r.items.Add(watch.Item{AllocEval: alloc.EvalID})
   801  	r.items.Add(watch.Item{AllocJob: alloc.JobID})
   802  	r.items.Add(watch.Item{AllocNode: alloc.NodeID})
   803  	if err := r.txn.Insert("allocs", alloc); err != nil {
   804  		return fmt.Errorf("alloc insert failed: %v", err)
   805  	}
   806  	return nil
   807  }
   808  
   809  // IndexRestore is used to restore an index
   810  func (r *StateRestore) IndexRestore(idx *IndexEntry) error {
   811  	if err := r.txn.Insert("index", idx); err != nil {
   812  		return fmt.Errorf("index insert failed: %v", err)
   813  	}
   814  	return nil
   815  }
   816  
   817  // stateWatch holds shared state for watching updates. This is
   818  // outside of StateStore so it can be shared with snapshots.
   819  type stateWatch struct {
   820  	items map[watch.Item]*NotifyGroup
   821  	l     sync.Mutex
   822  }
   823  
   824  // newStateWatch creates a new stateWatch for change notification.
   825  func newStateWatch() *stateWatch {
   826  	return &stateWatch{
   827  		items: make(map[watch.Item]*NotifyGroup),
   828  	}
   829  }
   830  
   831  // watch subscribes a channel to the given watch items.
   832  func (w *stateWatch) watch(items watch.Items, ch chan struct{}) {
   833  	w.l.Lock()
   834  	defer w.l.Unlock()
   835  
   836  	for item, _ := range items {
   837  		grp, ok := w.items[item]
   838  		if !ok {
   839  			grp = new(NotifyGroup)
   840  			w.items[item] = grp
   841  		}
   842  		grp.Wait(ch)
   843  	}
   844  }
   845  
   846  // stopWatch unsubscribes a channel from the given watch items.
   847  func (w *stateWatch) stopWatch(items watch.Items, ch chan struct{}) {
   848  	w.l.Lock()
   849  	defer w.l.Unlock()
   850  
   851  	for item, _ := range items {
   852  		if grp, ok := w.items[item]; ok {
   853  			grp.Clear(ch)
   854  			if grp.Empty() {
   855  				delete(w.items, item)
   856  			}
   857  		}
   858  	}
   859  }
   860  
   861  // notify is used to fire notifications on the given watch items.
   862  func (w *stateWatch) notify(items watch.Items) {
   863  	w.l.Lock()
   864  	defer w.l.Unlock()
   865  
   866  	for wi, _ := range items {
   867  		if grp, ok := w.items[wi]; ok {
   868  			grp.Notify()
   869  		}
   870  	}
   871  }