github.com/bigcommerce/nomad@v0.9.3-bc/client/state/state_database.go (about)

     1  package state
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"path/filepath"
     7  	"time"
     8  
     9  	hclog "github.com/hashicorp/go-hclog"
    10  	trstate "github.com/hashicorp/nomad/client/allocrunner/taskrunner/state"
    11  	dmstate "github.com/hashicorp/nomad/client/devicemanager/state"
    12  	driverstate "github.com/hashicorp/nomad/client/pluginmanager/drivermanager/state"
    13  	"github.com/hashicorp/nomad/helper/boltdd"
    14  	"github.com/hashicorp/nomad/nomad/structs"
    15  )
    16  
    17  /*
    18  The client has a boltDB backed state store. The schema as of 0.9 looks as follows:
    19  
    20  meta/
    21  |--> version -> '2' (not msgpack encoded)
    22  |--> upgraded -> time.Now().Format(timeRFC3339)
    23  allocations/
    24  |--> <alloc-id>/
    25     |--> alloc         -> allocEntry{*structs.Allocation}
    26     |--> deploy_status -> deployStatusEntry{*structs.AllocDeploymentStatus}
    27     |--> task-<name>/
    28        |--> local_state -> *trstate.LocalState # Local-only state
    29        |--> task_state  -> *structs.TaskState  # Sync'd to servers
    30  
    31  devicemanager/
    32  |--> plugin_state -> *dmstate.PluginState
    33  
    34  drivermanager/
    35  |--> plugin_state -> *dmstate.PluginState
    36  */
    37  
    38  var (
    39  	// metaBucketName is the name of the metadata bucket
    40  	metaBucketName = []byte("meta")
    41  
    42  	// metaVersionKey is the key the state schema version is stored under.
    43  	metaVersionKey = []byte("version")
    44  
    45  	// metaVersion is the value of the state schema version to detect when
    46  	// an upgrade is needed. It skips the usual boltdd/msgpack backend to
    47  	// be as portable and futureproof as possible.
    48  	metaVersion = []byte{'2'}
    49  
    50  	// metaUpgradedKey is the key that stores the timestamp of the last
    51  	// time the schema was upgraded.
    52  	metaUpgradedKey = []byte("upgraded")
    53  
    54  	// allocationsBucketName is the bucket name containing all allocation related
    55  	// data
    56  	allocationsBucketName = []byte("allocations")
    57  
    58  	// allocKey is the key Allocations are stored under encapsulated in
    59  	// allocEntry structs.
    60  	allocKey = []byte("alloc")
    61  
    62  	// allocDeployStatusKey is the key *structs.AllocDeploymentStatus is
    63  	// stored under.
    64  	allocDeployStatusKey = []byte("deploy_status")
    65  
    66  	// allocations -> $allocid -> task-$taskname -> the keys below
    67  	taskLocalStateKey = []byte("local_state")
    68  	taskStateKey      = []byte("task_state")
    69  
    70  	// devManagerBucket is the bucket name containing all device manager related
    71  	// data
    72  	devManagerBucket = []byte("devicemanager")
    73  
    74  	// driverManagerBucket is the bucket name container all driver manager
    75  	// related data
    76  	driverManagerBucket = []byte("drivermanager")
    77  
    78  	// managerPluginStateKey is the key by which plugin manager plugin state is
    79  	// stored at
    80  	managerPluginStateKey = []byte("plugin_state")
    81  )
    82  
    83  // taskBucketName returns the bucket name for the given task name.
    84  func taskBucketName(taskName string) []byte {
    85  	return []byte("task-" + taskName)
    86  }
    87  
    88  // NewStateDBFunc creates a StateDB given a state directory.
    89  type NewStateDBFunc func(logger hclog.Logger, stateDir string) (StateDB, error)
    90  
    91  // GetStateDBFactory returns a func for creating a StateDB
    92  func GetStateDBFactory(devMode bool) NewStateDBFunc {
    93  	// Return a noop state db implementation when in debug mode
    94  	if devMode {
    95  		return func(hclog.Logger, string) (StateDB, error) {
    96  			return NoopDB{}, nil
    97  		}
    98  	}
    99  
   100  	return NewBoltStateDB
   101  }
   102  
   103  // BoltStateDB persists and restores Nomad client state in a boltdb. All
   104  // methods are safe for concurrent access.
   105  type BoltStateDB struct {
   106  	stateDir string
   107  	db       *boltdd.DB
   108  	logger   hclog.Logger
   109  }
   110  
   111  // NewBoltStateDB creates or opens an existing boltdb state file or returns an
   112  // error.
   113  func NewBoltStateDB(logger hclog.Logger, stateDir string) (StateDB, error) {
   114  	fn := filepath.Join(stateDir, "state.db")
   115  
   116  	// Check to see if the DB already exists
   117  	fi, err := os.Stat(fn)
   118  	if err != nil && !os.IsNotExist(err) {
   119  		return nil, err
   120  	}
   121  	firstRun := fi == nil
   122  
   123  	// Create or open the boltdb state database
   124  	db, err := boltdd.Open(fn, 0600, nil)
   125  	if err != nil {
   126  		return nil, fmt.Errorf("failed to create state database: %v", err)
   127  	}
   128  
   129  	sdb := &BoltStateDB{
   130  		stateDir: stateDir,
   131  		db:       db,
   132  		logger:   logger,
   133  	}
   134  
   135  	// If db did not already exist, initialize metadata fields
   136  	if firstRun {
   137  		if err := sdb.init(); err != nil {
   138  			return nil, err
   139  		}
   140  	}
   141  
   142  	return sdb, nil
   143  }
   144  
   145  func (s *BoltStateDB) Name() string {
   146  	return "boltdb"
   147  }
   148  
   149  // GetAllAllocations gets all allocations persisted by this client and returns
   150  // a map of alloc ids to errors for any allocations that could not be restored.
   151  //
   152  // If a fatal error was encountered it will be returned and the other two
   153  // values will be nil.
   154  func (s *BoltStateDB) GetAllAllocations() ([]*structs.Allocation, map[string]error, error) {
   155  	var allocs []*structs.Allocation
   156  	var errs map[string]error
   157  	err := s.db.View(func(tx *boltdd.Tx) error {
   158  		allocs, errs = s.getAllAllocations(tx)
   159  		return nil
   160  	})
   161  
   162  	// db.View itself may return an error, so still check
   163  	if err != nil {
   164  		return nil, nil, err
   165  	}
   166  
   167  	return allocs, errs, nil
   168  }
   169  
   170  // allocEntry wraps values in the Allocations buckets
   171  type allocEntry struct {
   172  	Alloc *structs.Allocation
   173  }
   174  
   175  func (s *BoltStateDB) getAllAllocations(tx *boltdd.Tx) ([]*structs.Allocation, map[string]error) {
   176  	allocs := []*structs.Allocation{}
   177  	errs := map[string]error{}
   178  
   179  	allocationsBkt := tx.Bucket(allocationsBucketName)
   180  	if allocationsBkt == nil {
   181  		// No allocs
   182  		return allocs, errs
   183  	}
   184  
   185  	// Create a cursor for iteration.
   186  	c := allocationsBkt.BoltBucket().Cursor()
   187  
   188  	// Iterate over all the allocation buckets
   189  	for k, _ := c.First(); k != nil; k, _ = c.Next() {
   190  		allocID := string(k)
   191  		allocBkt := allocationsBkt.Bucket(k)
   192  		if allocBkt == nil {
   193  			errs[allocID] = fmt.Errorf("missing alloc bucket")
   194  			continue
   195  		}
   196  
   197  		var ae allocEntry
   198  		if err := allocBkt.Get(allocKey, &ae); err != nil {
   199  			errs[allocID] = fmt.Errorf("failed to decode alloc: %v", err)
   200  			continue
   201  		}
   202  
   203  		allocs = append(allocs, ae.Alloc)
   204  	}
   205  
   206  	return allocs, errs
   207  }
   208  
   209  // PutAllocation stores an allocation or returns an error.
   210  func (s *BoltStateDB) PutAllocation(alloc *structs.Allocation) error {
   211  	return s.db.Update(func(tx *boltdd.Tx) error {
   212  		// Retrieve the root allocations bucket
   213  		allocsBkt, err := tx.CreateBucketIfNotExists(allocationsBucketName)
   214  		if err != nil {
   215  			return err
   216  		}
   217  
   218  		// Retrieve the specific allocations bucket
   219  		key := []byte(alloc.ID)
   220  		allocBkt, err := allocsBkt.CreateBucketIfNotExists(key)
   221  		if err != nil {
   222  			return err
   223  		}
   224  
   225  		allocState := allocEntry{
   226  			Alloc: alloc,
   227  		}
   228  		return allocBkt.Put(allocKey, &allocState)
   229  	})
   230  }
   231  
   232  // deployStatusEntry wraps values for DeploymentStatus keys.
   233  type deployStatusEntry struct {
   234  	DeploymentStatus *structs.AllocDeploymentStatus
   235  }
   236  
   237  // PutDeploymentStatus stores an allocation's DeploymentStatus or returns an
   238  // error.
   239  func (s *BoltStateDB) PutDeploymentStatus(allocID string, ds *structs.AllocDeploymentStatus) error {
   240  	return s.db.Update(func(tx *boltdd.Tx) error {
   241  		return putDeploymentStatusImpl(tx, allocID, ds)
   242  	})
   243  }
   244  
   245  func putDeploymentStatusImpl(tx *boltdd.Tx, allocID string, ds *structs.AllocDeploymentStatus) error {
   246  	allocBkt, err := getAllocationBucket(tx, allocID)
   247  	if err != nil {
   248  		return err
   249  	}
   250  
   251  	entry := deployStatusEntry{
   252  		DeploymentStatus: ds,
   253  	}
   254  	return allocBkt.Put(allocDeployStatusKey, &entry)
   255  }
   256  
   257  // GetDeploymentStatus retrieves an allocation's DeploymentStatus or returns an
   258  // error.
   259  func (s *BoltStateDB) GetDeploymentStatus(allocID string) (*structs.AllocDeploymentStatus, error) {
   260  	var entry deployStatusEntry
   261  
   262  	err := s.db.View(func(tx *boltdd.Tx) error {
   263  		allAllocsBkt := tx.Bucket(allocationsBucketName)
   264  		if allAllocsBkt == nil {
   265  			// No state, return
   266  			return nil
   267  		}
   268  
   269  		allocBkt := allAllocsBkt.Bucket([]byte(allocID))
   270  		if allocBkt == nil {
   271  			// No state for alloc, return
   272  			return nil
   273  		}
   274  
   275  		return allocBkt.Get(allocDeployStatusKey, &entry)
   276  	})
   277  
   278  	// It's valid for this field to be nil/missing
   279  	if boltdd.IsErrNotFound(err) {
   280  		return nil, nil
   281  	}
   282  
   283  	if err != nil {
   284  		return nil, err
   285  	}
   286  
   287  	return entry.DeploymentStatus, nil
   288  }
   289  
   290  // GetTaskRunnerState returns the LocalState and TaskState for a
   291  // TaskRunner. LocalState or TaskState will be nil if they do not exist.
   292  //
   293  // If an error is encountered both LocalState and TaskState will be nil.
   294  func (s *BoltStateDB) GetTaskRunnerState(allocID, taskName string) (*trstate.LocalState, *structs.TaskState, error) {
   295  	var ls *trstate.LocalState
   296  	var ts *structs.TaskState
   297  
   298  	err := s.db.View(func(tx *boltdd.Tx) error {
   299  		allAllocsBkt := tx.Bucket(allocationsBucketName)
   300  		if allAllocsBkt == nil {
   301  			// No state, return
   302  			return nil
   303  		}
   304  
   305  		allocBkt := allAllocsBkt.Bucket([]byte(allocID))
   306  		if allocBkt == nil {
   307  			// No state for alloc, return
   308  			return nil
   309  		}
   310  
   311  		taskBkt := allocBkt.Bucket(taskBucketName(taskName))
   312  		if taskBkt == nil {
   313  			// No state for task, return
   314  			return nil
   315  		}
   316  
   317  		// Restore Local State if it exists
   318  		ls = &trstate.LocalState{}
   319  		if err := taskBkt.Get(taskLocalStateKey, ls); err != nil {
   320  			if !boltdd.IsErrNotFound(err) {
   321  				return fmt.Errorf("failed to read local task runner state: %v", err)
   322  			}
   323  
   324  			// Key not found, reset ls to nil
   325  			ls = nil
   326  		}
   327  
   328  		// Restore Task State if it exists
   329  		ts = &structs.TaskState{}
   330  		if err := taskBkt.Get(taskStateKey, ts); err != nil {
   331  			if !boltdd.IsErrNotFound(err) {
   332  				return fmt.Errorf("failed to read task state: %v", err)
   333  			}
   334  
   335  			// Key not found, reset ts to nil
   336  			ts = nil
   337  		}
   338  
   339  		return nil
   340  	})
   341  
   342  	if err != nil {
   343  		return nil, nil, err
   344  	}
   345  
   346  	return ls, ts, nil
   347  }
   348  
   349  // PutTaskRunnerLocalState stores TaskRunner's LocalState or returns an error.
   350  func (s *BoltStateDB) PutTaskRunnerLocalState(allocID, taskName string, val *trstate.LocalState) error {
   351  	return s.db.Update(func(tx *boltdd.Tx) error {
   352  		return putTaskRunnerLocalStateImpl(tx, allocID, taskName, val)
   353  	})
   354  }
   355  
   356  // putTaskRunnerLocalStateImpl stores TaskRunner's LocalState in an ongoing
   357  // transaction or returns an error.
   358  func putTaskRunnerLocalStateImpl(tx *boltdd.Tx, allocID, taskName string, val *trstate.LocalState) error {
   359  	taskBkt, err := getTaskBucket(tx, allocID, taskName)
   360  	if err != nil {
   361  		return fmt.Errorf("failed to retrieve allocation bucket: %v", err)
   362  	}
   363  
   364  	if err := taskBkt.Put(taskLocalStateKey, val); err != nil {
   365  		return fmt.Errorf("failed to write task_runner state: %v", err)
   366  	}
   367  
   368  	return nil
   369  }
   370  
   371  // PutTaskState stores a task's state or returns an error.
   372  func (s *BoltStateDB) PutTaskState(allocID, taskName string, state *structs.TaskState) error {
   373  	return s.db.Update(func(tx *boltdd.Tx) error {
   374  		return putTaskStateImpl(tx, allocID, taskName, state)
   375  	})
   376  }
   377  
   378  // putTaskStateImpl stores a task's state in an ongoing transaction or returns
   379  // an error.
   380  func putTaskStateImpl(tx *boltdd.Tx, allocID, taskName string, state *structs.TaskState) error {
   381  	taskBkt, err := getTaskBucket(tx, allocID, taskName)
   382  	if err != nil {
   383  		return fmt.Errorf("failed to retrieve allocation bucket: %v", err)
   384  	}
   385  
   386  	return taskBkt.Put(taskStateKey, state)
   387  }
   388  
   389  // DeleteTaskBucket is used to delete a task bucket if it exists.
   390  func (s *BoltStateDB) DeleteTaskBucket(allocID, taskName string) error {
   391  	return s.db.Update(func(tx *boltdd.Tx) error {
   392  		// Retrieve the root allocations bucket
   393  		allocations := tx.Bucket(allocationsBucketName)
   394  		if allocations == nil {
   395  			return nil
   396  		}
   397  
   398  		// Retrieve the specific allocations bucket
   399  		alloc := allocations.Bucket([]byte(allocID))
   400  		if alloc == nil {
   401  			return nil
   402  		}
   403  
   404  		// Check if the bucket exists
   405  		key := taskBucketName(taskName)
   406  		return alloc.DeleteBucket(key)
   407  	})
   408  }
   409  
   410  // DeleteAllocationBucket is used to delete an allocation bucket if it exists.
   411  func (s *BoltStateDB) DeleteAllocationBucket(allocID string) error {
   412  	return s.db.Update(func(tx *boltdd.Tx) error {
   413  		// Retrieve the root allocations bucket
   414  		allocations := tx.Bucket(allocationsBucketName)
   415  		if allocations == nil {
   416  			return nil
   417  		}
   418  
   419  		key := []byte(allocID)
   420  		return allocations.DeleteBucket(key)
   421  	})
   422  }
   423  
   424  // Close releases all database resources and unlocks the database file on disk.
   425  // All transactions must be closed before closing the database.
   426  func (s *BoltStateDB) Close() error {
   427  	return s.db.Close()
   428  }
   429  
   430  // getAllocationBucket returns the bucket used to persist state about a
   431  // particular allocation. If the root allocation bucket or the specific
   432  // allocation bucket doesn't exist, it will be created as long as the
   433  // transaction is writable.
   434  func getAllocationBucket(tx *boltdd.Tx, allocID string) (*boltdd.Bucket, error) {
   435  	var err error
   436  	w := tx.Writable()
   437  
   438  	// Retrieve the root allocations bucket
   439  	allocations := tx.Bucket(allocationsBucketName)
   440  	if allocations == nil {
   441  		if !w {
   442  			return nil, fmt.Errorf("Allocations bucket doesn't exist and transaction is not writable")
   443  		}
   444  
   445  		allocations, err = tx.CreateBucketIfNotExists(allocationsBucketName)
   446  		if err != nil {
   447  			return nil, err
   448  		}
   449  	}
   450  
   451  	// Retrieve the specific allocations bucket
   452  	key := []byte(allocID)
   453  	alloc := allocations.Bucket(key)
   454  	if alloc == nil {
   455  		if !w {
   456  			return nil, fmt.Errorf("Allocation bucket doesn't exist and transaction is not writable")
   457  		}
   458  
   459  		alloc, err = allocations.CreateBucket(key)
   460  		if err != nil {
   461  			return nil, err
   462  		}
   463  	}
   464  
   465  	return alloc, nil
   466  }
   467  
   468  // getTaskBucket returns the bucket used to persist state about a
   469  // particular task. If the root allocation bucket, the specific
   470  // allocation or task bucket doesn't exist, they will be created as long as the
   471  // transaction is writable.
   472  func getTaskBucket(tx *boltdd.Tx, allocID, taskName string) (*boltdd.Bucket, error) {
   473  	alloc, err := getAllocationBucket(tx, allocID)
   474  	if err != nil {
   475  		return nil, err
   476  	}
   477  
   478  	// Retrieve the specific task bucket
   479  	w := tx.Writable()
   480  	key := taskBucketName(taskName)
   481  	task := alloc.Bucket(key)
   482  	if task == nil {
   483  		if !w {
   484  			return nil, fmt.Errorf("Task bucket doesn't exist and transaction is not writable")
   485  		}
   486  
   487  		task, err = alloc.CreateBucket(key)
   488  		if err != nil {
   489  			return nil, err
   490  		}
   491  	}
   492  
   493  	return task, nil
   494  }
   495  
   496  // PutDevicePluginState stores the device manager's plugin state or returns an
   497  // error.
   498  func (s *BoltStateDB) PutDevicePluginState(ps *dmstate.PluginState) error {
   499  	return s.db.Update(func(tx *boltdd.Tx) error {
   500  		// Retrieve the root device manager bucket
   501  		devBkt, err := tx.CreateBucketIfNotExists(devManagerBucket)
   502  		if err != nil {
   503  			return err
   504  		}
   505  
   506  		return devBkt.Put(managerPluginStateKey, ps)
   507  	})
   508  }
   509  
   510  // GetDevicePluginState stores the device manager's plugin state or returns an
   511  // error.
   512  func (s *BoltStateDB) GetDevicePluginState() (*dmstate.PluginState, error) {
   513  	var ps *dmstate.PluginState
   514  
   515  	err := s.db.View(func(tx *boltdd.Tx) error {
   516  		devBkt := tx.Bucket(devManagerBucket)
   517  		if devBkt == nil {
   518  			// No state, return
   519  			return nil
   520  		}
   521  
   522  		// Restore Plugin State if it exists
   523  		ps = &dmstate.PluginState{}
   524  		if err := devBkt.Get(managerPluginStateKey, ps); err != nil {
   525  			if !boltdd.IsErrNotFound(err) {
   526  				return fmt.Errorf("failed to read device manager plugin state: %v", err)
   527  			}
   528  
   529  			// Key not found, reset ps to nil
   530  			ps = nil
   531  		}
   532  
   533  		return nil
   534  	})
   535  
   536  	if err != nil {
   537  		return nil, err
   538  	}
   539  
   540  	return ps, nil
   541  }
   542  
   543  // PutDriverPluginState stores the driver manager's plugin state or returns an
   544  // error.
   545  func (s *BoltStateDB) PutDriverPluginState(ps *driverstate.PluginState) error {
   546  	return s.db.Update(func(tx *boltdd.Tx) error {
   547  		// Retrieve the root driver manager bucket
   548  		driverBkt, err := tx.CreateBucketIfNotExists(driverManagerBucket)
   549  		if err != nil {
   550  			return err
   551  		}
   552  
   553  		return driverBkt.Put(managerPluginStateKey, ps)
   554  	})
   555  }
   556  
   557  // GetDriverPluginState stores the driver manager's plugin state or returns an
   558  // error.
   559  func (s *BoltStateDB) GetDriverPluginState() (*driverstate.PluginState, error) {
   560  	var ps *driverstate.PluginState
   561  
   562  	err := s.db.View(func(tx *boltdd.Tx) error {
   563  		driverBkt := tx.Bucket(driverManagerBucket)
   564  		if driverBkt == nil {
   565  			// No state, return
   566  			return nil
   567  		}
   568  
   569  		// Restore Plugin State if it exists
   570  		ps = &driverstate.PluginState{}
   571  		if err := driverBkt.Get(managerPluginStateKey, ps); err != nil {
   572  			if !boltdd.IsErrNotFound(err) {
   573  				return fmt.Errorf("failed to read driver manager plugin state: %v", err)
   574  			}
   575  
   576  			// Key not found, reset ps to nil
   577  			ps = nil
   578  		}
   579  
   580  		return nil
   581  	})
   582  
   583  	if err != nil {
   584  		return nil, err
   585  	}
   586  
   587  	return ps, nil
   588  }
   589  
   590  // init initializes metadata entries in a newly created state database.
   591  func (s *BoltStateDB) init() error {
   592  	return s.db.Update(func(tx *boltdd.Tx) error {
   593  		return addMeta(tx.BoltTx())
   594  	})
   595  }
   596  
   597  // Upgrade bolt state db from 0.8 schema to 0.9 schema. Noop if already using
   598  // 0.9 schema. Creates a backup before upgrading.
   599  func (s *BoltStateDB) Upgrade() error {
   600  	// Check to see if the underlying DB needs upgrading.
   601  	upgrade, err := NeedsUpgrade(s.db.BoltDB())
   602  	if err != nil {
   603  		return err
   604  	}
   605  	if !upgrade {
   606  		// No upgrade needed!
   607  		return nil
   608  	}
   609  
   610  	// Upgraded needed. Backup the boltdb first.
   611  	backupFileName := filepath.Join(s.stateDir, "state.db.backup")
   612  	if err := backupDB(s.db.BoltDB(), backupFileName); err != nil {
   613  		return fmt.Errorf("error backing up state db: %v", err)
   614  	}
   615  
   616  	// Perform the upgrade
   617  	if err := s.db.Update(func(tx *boltdd.Tx) error {
   618  		if err := UpgradeAllocs(s.logger, tx); err != nil {
   619  			return err
   620  		}
   621  
   622  		// Add standard metadata
   623  		if err := addMeta(tx.BoltTx()); err != nil {
   624  			return err
   625  		}
   626  
   627  		// Write the time the upgrade was done
   628  		bkt, err := tx.CreateBucketIfNotExists(metaBucketName)
   629  		if err != nil {
   630  			return err
   631  		}
   632  		return bkt.Put(metaUpgradedKey, time.Now().Format(time.RFC3339))
   633  	}); err != nil {
   634  		return err
   635  	}
   636  
   637  	s.logger.Info("successfully upgraded state")
   638  	return nil
   639  }
   640  
   641  // DB allows access to the underlying BoltDB for testing purposes.
   642  func (s *BoltStateDB) DB() *boltdd.DB {
   643  	return s.db
   644  }