github.com/manicqin/nomad@v0.9.5/client/state/state_database.go (about)

     1  package state
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"path/filepath"
     7  	"time"
     8  
     9  	"github.com/boltdb/bolt"
    10  
    11  	hclog "github.com/hashicorp/go-hclog"
    12  	trstate "github.com/hashicorp/nomad/client/allocrunner/taskrunner/state"
    13  	dmstate "github.com/hashicorp/nomad/client/devicemanager/state"
    14  	driverstate "github.com/hashicorp/nomad/client/pluginmanager/drivermanager/state"
    15  	"github.com/hashicorp/nomad/helper/boltdd"
    16  	"github.com/hashicorp/nomad/nomad/structs"
    17  )
    18  
    19  /*
    20  The client has a boltDB backed state store. The schema as of 0.9 looks as follows:
    21  
    22  meta/
    23  |--> version -> '2' (not msgpack encoded)
    24  |--> upgraded -> time.Now().Format(timeRFC3339)
    25  allocations/
    26  |--> <alloc-id>/
    27     |--> alloc         -> allocEntry{*structs.Allocation}
    28     |--> deploy_status -> deployStatusEntry{*structs.AllocDeploymentStatus}
    29     |--> task-<name>/
    30        |--> local_state -> *trstate.LocalState # Local-only state
    31        |--> task_state  -> *structs.TaskState  # Sync'd to servers
    32  
    33  devicemanager/
    34  |--> plugin_state -> *dmstate.PluginState
    35  
    36  drivermanager/
    37  |--> plugin_state -> *dmstate.PluginState
    38  */
    39  
    40  var (
    41  	// metaBucketName is the name of the metadata bucket
    42  	metaBucketName = []byte("meta")
    43  
    44  	// metaVersionKey is the key the state schema version is stored under.
    45  	metaVersionKey = []byte("version")
    46  
    47  	// metaVersion is the value of the state schema version to detect when
    48  	// an upgrade is needed. It skips the usual boltdd/msgpack backend to
    49  	// be as portable and futureproof as possible.
    50  	metaVersion = []byte{'2'}
    51  
    52  	// metaUpgradedKey is the key that stores the timestamp of the last
    53  	// time the schema was upgraded.
    54  	metaUpgradedKey = []byte("upgraded")
    55  
    56  	// allocationsBucketName is the bucket name containing all allocation related
    57  	// data
    58  	allocationsBucketName = []byte("allocations")
    59  
    60  	// allocKey is the key Allocations are stored under encapsulated in
    61  	// allocEntry structs.
    62  	allocKey = []byte("alloc")
    63  
    64  	// allocDeployStatusKey is the key *structs.AllocDeploymentStatus is
    65  	// stored under.
    66  	allocDeployStatusKey = []byte("deploy_status")
    67  
    68  	// allocations -> $allocid -> task-$taskname -> the keys below
    69  	taskLocalStateKey = []byte("local_state")
    70  	taskStateKey      = []byte("task_state")
    71  
    72  	// devManagerBucket is the bucket name containing all device manager related
    73  	// data
    74  	devManagerBucket = []byte("devicemanager")
    75  
    76  	// driverManagerBucket is the bucket name container all driver manager
    77  	// related data
    78  	driverManagerBucket = []byte("drivermanager")
    79  
    80  	// managerPluginStateKey is the key by which plugin manager plugin state is
    81  	// stored at
    82  	managerPluginStateKey = []byte("plugin_state")
    83  )
    84  
    85  // taskBucketName returns the bucket name for the given task name.
    86  func taskBucketName(taskName string) []byte {
    87  	return []byte("task-" + taskName)
    88  }
    89  
    90  // NewStateDBFunc creates a StateDB given a state directory.
    91  type NewStateDBFunc func(logger hclog.Logger, stateDir string) (StateDB, error)
    92  
    93  // GetStateDBFactory returns a func for creating a StateDB
    94  func GetStateDBFactory(devMode bool) NewStateDBFunc {
    95  	// Return a noop state db implementation when in debug mode
    96  	if devMode {
    97  		return func(hclog.Logger, string) (StateDB, error) {
    98  			return NoopDB{}, nil
    99  		}
   100  	}
   101  
   102  	return NewBoltStateDB
   103  }
   104  
   105  // BoltStateDB persists and restores Nomad client state in a boltdb. All
   106  // methods are safe for concurrent access.
   107  type BoltStateDB struct {
   108  	stateDir string
   109  	db       *boltdd.DB
   110  	logger   hclog.Logger
   111  }
   112  
   113  // NewBoltStateDB creates or opens an existing boltdb state file or returns an
   114  // error.
   115  func NewBoltStateDB(logger hclog.Logger, stateDir string) (StateDB, error) {
   116  	fn := filepath.Join(stateDir, "state.db")
   117  
   118  	// Check to see if the DB already exists
   119  	fi, err := os.Stat(fn)
   120  	if err != nil && !os.IsNotExist(err) {
   121  		return nil, err
   122  	}
   123  	firstRun := fi == nil
   124  
   125  	// Timeout to force failure when accessing a data dir that is already in use
   126  	timeout := &bolt.Options{Timeout: 5 * time.Second}
   127  
   128  	// Create or open the boltdb state database
   129  	db, err := boltdd.Open(fn, 0600, timeout)
   130  	if err == bolt.ErrTimeout {
   131  		return nil, fmt.Errorf("timed out while opening database, is another Nomad process accessing data_dir %s?", stateDir)
   132  	} else if err != nil {
   133  		return nil, fmt.Errorf("failed to create state database: %v", err)
   134  	}
   135  
   136  	sdb := &BoltStateDB{
   137  		stateDir: stateDir,
   138  		db:       db,
   139  		logger:   logger,
   140  	}
   141  
   142  	// If db did not already exist, initialize metadata fields
   143  	if firstRun {
   144  		if err := sdb.init(); err != nil {
   145  			return nil, err
   146  		}
   147  	}
   148  
   149  	return sdb, nil
   150  }
   151  
   152  func (s *BoltStateDB) Name() string {
   153  	return "boltdb"
   154  }
   155  
   156  // GetAllAllocations gets all allocations persisted by this client and returns
   157  // a map of alloc ids to errors for any allocations that could not be restored.
   158  //
   159  // If a fatal error was encountered it will be returned and the other two
   160  // values will be nil.
   161  func (s *BoltStateDB) GetAllAllocations() ([]*structs.Allocation, map[string]error, error) {
   162  	var allocs []*structs.Allocation
   163  	var errs map[string]error
   164  	err := s.db.View(func(tx *boltdd.Tx) error {
   165  		allocs, errs = s.getAllAllocations(tx)
   166  		return nil
   167  	})
   168  
   169  	// db.View itself may return an error, so still check
   170  	if err != nil {
   171  		return nil, nil, err
   172  	}
   173  
   174  	return allocs, errs, nil
   175  }
   176  
   177  // allocEntry wraps values in the Allocations buckets
   178  type allocEntry struct {
   179  	Alloc *structs.Allocation
   180  }
   181  
   182  func (s *BoltStateDB) getAllAllocations(tx *boltdd.Tx) ([]*structs.Allocation, map[string]error) {
   183  	allocs := []*structs.Allocation{}
   184  	errs := map[string]error{}
   185  
   186  	allocationsBkt := tx.Bucket(allocationsBucketName)
   187  	if allocationsBkt == nil {
   188  		// No allocs
   189  		return allocs, errs
   190  	}
   191  
   192  	// Create a cursor for iteration.
   193  	c := allocationsBkt.BoltBucket().Cursor()
   194  
   195  	// Iterate over all the allocation buckets
   196  	for k, _ := c.First(); k != nil; k, _ = c.Next() {
   197  		allocID := string(k)
   198  		allocBkt := allocationsBkt.Bucket(k)
   199  		if allocBkt == nil {
   200  			errs[allocID] = fmt.Errorf("missing alloc bucket")
   201  			continue
   202  		}
   203  
   204  		var ae allocEntry
   205  		if err := allocBkt.Get(allocKey, &ae); err != nil {
   206  			errs[allocID] = fmt.Errorf("failed to decode alloc: %v", err)
   207  			continue
   208  		}
   209  
   210  		allocs = append(allocs, ae.Alloc)
   211  	}
   212  
   213  	return allocs, errs
   214  }
   215  
   216  // PutAllocation stores an allocation or returns an error.
   217  func (s *BoltStateDB) PutAllocation(alloc *structs.Allocation) error {
   218  	return s.db.Update(func(tx *boltdd.Tx) error {
   219  		// Retrieve the root allocations bucket
   220  		allocsBkt, err := tx.CreateBucketIfNotExists(allocationsBucketName)
   221  		if err != nil {
   222  			return err
   223  		}
   224  
   225  		// Retrieve the specific allocations bucket
   226  		key := []byte(alloc.ID)
   227  		allocBkt, err := allocsBkt.CreateBucketIfNotExists(key)
   228  		if err != nil {
   229  			return err
   230  		}
   231  
   232  		allocState := allocEntry{
   233  			Alloc: alloc,
   234  		}
   235  		return allocBkt.Put(allocKey, &allocState)
   236  	})
   237  }
   238  
   239  // deployStatusEntry wraps values for DeploymentStatus keys.
   240  type deployStatusEntry struct {
   241  	DeploymentStatus *structs.AllocDeploymentStatus
   242  }
   243  
   244  // PutDeploymentStatus stores an allocation's DeploymentStatus or returns an
   245  // error.
   246  func (s *BoltStateDB) PutDeploymentStatus(allocID string, ds *structs.AllocDeploymentStatus) error {
   247  	return s.db.Update(func(tx *boltdd.Tx) error {
   248  		return putDeploymentStatusImpl(tx, allocID, ds)
   249  	})
   250  }
   251  
   252  func putDeploymentStatusImpl(tx *boltdd.Tx, allocID string, ds *structs.AllocDeploymentStatus) error {
   253  	allocBkt, err := getAllocationBucket(tx, allocID)
   254  	if err != nil {
   255  		return err
   256  	}
   257  
   258  	entry := deployStatusEntry{
   259  		DeploymentStatus: ds,
   260  	}
   261  	return allocBkt.Put(allocDeployStatusKey, &entry)
   262  }
   263  
   264  // GetDeploymentStatus retrieves an allocation's DeploymentStatus or returns an
   265  // error.
   266  func (s *BoltStateDB) GetDeploymentStatus(allocID string) (*structs.AllocDeploymentStatus, error) {
   267  	var entry deployStatusEntry
   268  
   269  	err := s.db.View(func(tx *boltdd.Tx) error {
   270  		allAllocsBkt := tx.Bucket(allocationsBucketName)
   271  		if allAllocsBkt == nil {
   272  			// No state, return
   273  			return nil
   274  		}
   275  
   276  		allocBkt := allAllocsBkt.Bucket([]byte(allocID))
   277  		if allocBkt == nil {
   278  			// No state for alloc, return
   279  			return nil
   280  		}
   281  
   282  		return allocBkt.Get(allocDeployStatusKey, &entry)
   283  	})
   284  
   285  	// It's valid for this field to be nil/missing
   286  	if boltdd.IsErrNotFound(err) {
   287  		return nil, nil
   288  	}
   289  
   290  	if err != nil {
   291  		return nil, err
   292  	}
   293  
   294  	return entry.DeploymentStatus, nil
   295  }
   296  
   297  // GetTaskRunnerState returns the LocalState and TaskState for a
   298  // TaskRunner. LocalState or TaskState will be nil if they do not exist.
   299  //
   300  // If an error is encountered both LocalState and TaskState will be nil.
   301  func (s *BoltStateDB) GetTaskRunnerState(allocID, taskName string) (*trstate.LocalState, *structs.TaskState, error) {
   302  	var ls *trstate.LocalState
   303  	var ts *structs.TaskState
   304  
   305  	err := s.db.View(func(tx *boltdd.Tx) error {
   306  		allAllocsBkt := tx.Bucket(allocationsBucketName)
   307  		if allAllocsBkt == nil {
   308  			// No state, return
   309  			return nil
   310  		}
   311  
   312  		allocBkt := allAllocsBkt.Bucket([]byte(allocID))
   313  		if allocBkt == nil {
   314  			// No state for alloc, return
   315  			return nil
   316  		}
   317  
   318  		taskBkt := allocBkt.Bucket(taskBucketName(taskName))
   319  		if taskBkt == nil {
   320  			// No state for task, return
   321  			return nil
   322  		}
   323  
   324  		// Restore Local State if it exists
   325  		ls = &trstate.LocalState{}
   326  		if err := taskBkt.Get(taskLocalStateKey, ls); err != nil {
   327  			if !boltdd.IsErrNotFound(err) {
   328  				return fmt.Errorf("failed to read local task runner state: %v", err)
   329  			}
   330  
   331  			// Key not found, reset ls to nil
   332  			ls = nil
   333  		}
   334  
   335  		// Restore Task State if it exists
   336  		ts = &structs.TaskState{}
   337  		if err := taskBkt.Get(taskStateKey, ts); err != nil {
   338  			if !boltdd.IsErrNotFound(err) {
   339  				return fmt.Errorf("failed to read task state: %v", err)
   340  			}
   341  
   342  			// Key not found, reset ts to nil
   343  			ts = nil
   344  		}
   345  
   346  		return nil
   347  	})
   348  
   349  	if err != nil {
   350  		return nil, nil, err
   351  	}
   352  
   353  	return ls, ts, nil
   354  }
   355  
   356  // PutTaskRunnerLocalState stores TaskRunner's LocalState or returns an error.
   357  func (s *BoltStateDB) PutTaskRunnerLocalState(allocID, taskName string, val *trstate.LocalState) error {
   358  	return s.db.Update(func(tx *boltdd.Tx) error {
   359  		return putTaskRunnerLocalStateImpl(tx, allocID, taskName, val)
   360  	})
   361  }
   362  
   363  // putTaskRunnerLocalStateImpl stores TaskRunner's LocalState in an ongoing
   364  // transaction or returns an error.
   365  func putTaskRunnerLocalStateImpl(tx *boltdd.Tx, allocID, taskName string, val *trstate.LocalState) error {
   366  	taskBkt, err := getTaskBucket(tx, allocID, taskName)
   367  	if err != nil {
   368  		return fmt.Errorf("failed to retrieve allocation bucket: %v", err)
   369  	}
   370  
   371  	if err := taskBkt.Put(taskLocalStateKey, val); err != nil {
   372  		return fmt.Errorf("failed to write task_runner state: %v", err)
   373  	}
   374  
   375  	return nil
   376  }
   377  
   378  // PutTaskState stores a task's state or returns an error.
   379  func (s *BoltStateDB) PutTaskState(allocID, taskName string, state *structs.TaskState) error {
   380  	return s.db.Update(func(tx *boltdd.Tx) error {
   381  		return putTaskStateImpl(tx, allocID, taskName, state)
   382  	})
   383  }
   384  
   385  // putTaskStateImpl stores a task's state in an ongoing transaction or returns
   386  // an error.
   387  func putTaskStateImpl(tx *boltdd.Tx, allocID, taskName string, state *structs.TaskState) error {
   388  	taskBkt, err := getTaskBucket(tx, allocID, taskName)
   389  	if err != nil {
   390  		return fmt.Errorf("failed to retrieve allocation bucket: %v", err)
   391  	}
   392  
   393  	return taskBkt.Put(taskStateKey, state)
   394  }
   395  
   396  // DeleteTaskBucket is used to delete a task bucket if it exists.
   397  func (s *BoltStateDB) DeleteTaskBucket(allocID, taskName string) error {
   398  	return s.db.Update(func(tx *boltdd.Tx) error {
   399  		// Retrieve the root allocations bucket
   400  		allocations := tx.Bucket(allocationsBucketName)
   401  		if allocations == nil {
   402  			return nil
   403  		}
   404  
   405  		// Retrieve the specific allocations bucket
   406  		alloc := allocations.Bucket([]byte(allocID))
   407  		if alloc == nil {
   408  			return nil
   409  		}
   410  
   411  		// Check if the bucket exists
   412  		key := taskBucketName(taskName)
   413  		return alloc.DeleteBucket(key)
   414  	})
   415  }
   416  
   417  // DeleteAllocationBucket is used to delete an allocation bucket if it exists.
   418  func (s *BoltStateDB) DeleteAllocationBucket(allocID string) error {
   419  	return s.db.Update(func(tx *boltdd.Tx) error {
   420  		// Retrieve the root allocations bucket
   421  		allocations := tx.Bucket(allocationsBucketName)
   422  		if allocations == nil {
   423  			return nil
   424  		}
   425  
   426  		key := []byte(allocID)
   427  		return allocations.DeleteBucket(key)
   428  	})
   429  }
   430  
   431  // Close releases all database resources and unlocks the database file on disk.
   432  // All transactions must be closed before closing the database.
   433  func (s *BoltStateDB) Close() error {
   434  	return s.db.Close()
   435  }
   436  
   437  // getAllocationBucket returns the bucket used to persist state about a
   438  // particular allocation. If the root allocation bucket or the specific
   439  // allocation bucket doesn't exist, it will be created as long as the
   440  // transaction is writable.
   441  func getAllocationBucket(tx *boltdd.Tx, allocID string) (*boltdd.Bucket, error) {
   442  	var err error
   443  	w := tx.Writable()
   444  
   445  	// Retrieve the root allocations bucket
   446  	allocations := tx.Bucket(allocationsBucketName)
   447  	if allocations == nil {
   448  		if !w {
   449  			return nil, fmt.Errorf("Allocations bucket doesn't exist and transaction is not writable")
   450  		}
   451  
   452  		allocations, err = tx.CreateBucketIfNotExists(allocationsBucketName)
   453  		if err != nil {
   454  			return nil, err
   455  		}
   456  	}
   457  
   458  	// Retrieve the specific allocations bucket
   459  	key := []byte(allocID)
   460  	alloc := allocations.Bucket(key)
   461  	if alloc == nil {
   462  		if !w {
   463  			return nil, fmt.Errorf("Allocation bucket doesn't exist and transaction is not writable")
   464  		}
   465  
   466  		alloc, err = allocations.CreateBucket(key)
   467  		if err != nil {
   468  			return nil, err
   469  		}
   470  	}
   471  
   472  	return alloc, nil
   473  }
   474  
   475  // getTaskBucket returns the bucket used to persist state about a
   476  // particular task. If the root allocation bucket, the specific
   477  // allocation or task bucket doesn't exist, they will be created as long as the
   478  // transaction is writable.
   479  func getTaskBucket(tx *boltdd.Tx, allocID, taskName string) (*boltdd.Bucket, error) {
   480  	alloc, err := getAllocationBucket(tx, allocID)
   481  	if err != nil {
   482  		return nil, err
   483  	}
   484  
   485  	// Retrieve the specific task bucket
   486  	w := tx.Writable()
   487  	key := taskBucketName(taskName)
   488  	task := alloc.Bucket(key)
   489  	if task == nil {
   490  		if !w {
   491  			return nil, fmt.Errorf("Task bucket doesn't exist and transaction is not writable")
   492  		}
   493  
   494  		task, err = alloc.CreateBucket(key)
   495  		if err != nil {
   496  			return nil, err
   497  		}
   498  	}
   499  
   500  	return task, nil
   501  }
   502  
   503  // PutDevicePluginState stores the device manager's plugin state or returns an
   504  // error.
   505  func (s *BoltStateDB) PutDevicePluginState(ps *dmstate.PluginState) error {
   506  	return s.db.Update(func(tx *boltdd.Tx) error {
   507  		// Retrieve the root device manager bucket
   508  		devBkt, err := tx.CreateBucketIfNotExists(devManagerBucket)
   509  		if err != nil {
   510  			return err
   511  		}
   512  
   513  		return devBkt.Put(managerPluginStateKey, ps)
   514  	})
   515  }
   516  
   517  // GetDevicePluginState stores the device manager's plugin state or returns an
   518  // error.
   519  func (s *BoltStateDB) GetDevicePluginState() (*dmstate.PluginState, error) {
   520  	var ps *dmstate.PluginState
   521  
   522  	err := s.db.View(func(tx *boltdd.Tx) error {
   523  		devBkt := tx.Bucket(devManagerBucket)
   524  		if devBkt == nil {
   525  			// No state, return
   526  			return nil
   527  		}
   528  
   529  		// Restore Plugin State if it exists
   530  		ps = &dmstate.PluginState{}
   531  		if err := devBkt.Get(managerPluginStateKey, ps); err != nil {
   532  			if !boltdd.IsErrNotFound(err) {
   533  				return fmt.Errorf("failed to read device manager plugin state: %v", err)
   534  			}
   535  
   536  			// Key not found, reset ps to nil
   537  			ps = nil
   538  		}
   539  
   540  		return nil
   541  	})
   542  
   543  	if err != nil {
   544  		return nil, err
   545  	}
   546  
   547  	return ps, nil
   548  }
   549  
   550  // PutDriverPluginState stores the driver manager's plugin state or returns an
   551  // error.
   552  func (s *BoltStateDB) PutDriverPluginState(ps *driverstate.PluginState) error {
   553  	return s.db.Update(func(tx *boltdd.Tx) error {
   554  		// Retrieve the root driver manager bucket
   555  		driverBkt, err := tx.CreateBucketIfNotExists(driverManagerBucket)
   556  		if err != nil {
   557  			return err
   558  		}
   559  
   560  		return driverBkt.Put(managerPluginStateKey, ps)
   561  	})
   562  }
   563  
   564  // GetDriverPluginState stores the driver manager's plugin state or returns an
   565  // error.
   566  func (s *BoltStateDB) GetDriverPluginState() (*driverstate.PluginState, error) {
   567  	var ps *driverstate.PluginState
   568  
   569  	err := s.db.View(func(tx *boltdd.Tx) error {
   570  		driverBkt := tx.Bucket(driverManagerBucket)
   571  		if driverBkt == nil {
   572  			// No state, return
   573  			return nil
   574  		}
   575  
   576  		// Restore Plugin State if it exists
   577  		ps = &driverstate.PluginState{}
   578  		if err := driverBkt.Get(managerPluginStateKey, ps); err != nil {
   579  			if !boltdd.IsErrNotFound(err) {
   580  				return fmt.Errorf("failed to read driver manager plugin state: %v", err)
   581  			}
   582  
   583  			// Key not found, reset ps to nil
   584  			ps = nil
   585  		}
   586  
   587  		return nil
   588  	})
   589  
   590  	if err != nil {
   591  		return nil, err
   592  	}
   593  
   594  	return ps, nil
   595  }
   596  
   597  // init initializes metadata entries in a newly created state database.
   598  func (s *BoltStateDB) init() error {
   599  	return s.db.Update(func(tx *boltdd.Tx) error {
   600  		return addMeta(tx.BoltTx())
   601  	})
   602  }
   603  
   604  // Upgrade bolt state db from 0.8 schema to 0.9 schema. Noop if already using
   605  // 0.9 schema. Creates a backup before upgrading.
   606  func (s *BoltStateDB) Upgrade() error {
   607  	// Check to see if the underlying DB needs upgrading.
   608  	upgrade, err := NeedsUpgrade(s.db.BoltDB())
   609  	if err != nil {
   610  		return err
   611  	}
   612  	if !upgrade {
   613  		// No upgrade needed!
   614  		return nil
   615  	}
   616  
   617  	// Upgraded needed. Backup the boltdb first.
   618  	backupFileName := filepath.Join(s.stateDir, "state.db.backup")
   619  	if err := backupDB(s.db.BoltDB(), backupFileName); err != nil {
   620  		return fmt.Errorf("error backing up state db: %v", err)
   621  	}
   622  
   623  	// Perform the upgrade
   624  	if err := s.db.Update(func(tx *boltdd.Tx) error {
   625  		if err := UpgradeAllocs(s.logger, tx); err != nil {
   626  			return err
   627  		}
   628  
   629  		// Add standard metadata
   630  		if err := addMeta(tx.BoltTx()); err != nil {
   631  			return err
   632  		}
   633  
   634  		// Write the time the upgrade was done
   635  		bkt, err := tx.CreateBucketIfNotExists(metaBucketName)
   636  		if err != nil {
   637  			return err
   638  		}
   639  		return bkt.Put(metaUpgradedKey, time.Now().Format(time.RFC3339))
   640  	}); err != nil {
   641  		return err
   642  	}
   643  
   644  	s.logger.Info("successfully upgraded state")
   645  	return nil
   646  }
   647  
   648  // DB allows access to the underlying BoltDB for testing purposes.
   649  func (s *BoltStateDB) DB() *boltdd.DB {
   650  	return s.db
   651  }