github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/state/db_test.go

github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/state/db_test.go (about)

     1  package state
     2  
     3  import (
     4  	"os"
     5  	"reflect"
     6  	"sync"
     7  	"testing"
     8  	"time"
     9  
    10  	"github.com/hashicorp/nomad/ci"
    11  	trstate "github.com/hashicorp/nomad/client/allocrunner/taskrunner/state"
    12  	dmstate "github.com/hashicorp/nomad/client/devicemanager/state"
    13  	"github.com/hashicorp/nomad/client/dynamicplugins"
    14  	driverstate "github.com/hashicorp/nomad/client/pluginmanager/drivermanager/state"
    15  	"github.com/hashicorp/nomad/helper/testlog"
    16  	"github.com/hashicorp/nomad/nomad/mock"
    17  	"github.com/hashicorp/nomad/nomad/structs"
    18  	"github.com/kr/pretty"
    19  	"github.com/shoenig/test/must"
    20  	"github.com/stretchr/testify/require"
    21  )
    22  
    23  // assert each implementation satisfies StateDB interface
    24  var (
    25  	_ StateDB = (*BoltStateDB)(nil)
    26  	_ StateDB = (*MemDB)(nil)
    27  	_ StateDB = (*NoopDB)(nil)
    28  	_ StateDB = (*ErrDB)(nil)
    29  )
    30  
    31  func setupBoltStateDB(t *testing.T) *BoltStateDB {
    32  	dir := t.TempDir()
    33  
    34  	db, err := NewBoltStateDB(testlog.HCLogger(t), dir)
    35  	if err != nil {
    36  		if rmErr := os.RemoveAll(dir); rmErr != nil {
    37  			t.Logf("error removing boltdb dir: %v", rmErr)
    38  		}
    39  		t.Fatalf("error creating boltdb: %v", err)
    40  	}
    41  
    42  	t.Cleanup(func() {
    43  		if closeErr := db.Close(); closeErr != nil {
    44  			t.Errorf("error closing boltdb: %v", closeErr)
    45  		}
    46  	})
    47  
    48  	return db.(*BoltStateDB)
    49  }
    50  
    51  func testDB(t *testing.T, f func(*testing.T, StateDB)) {
    52  	dbs := []StateDB{
    53  		setupBoltStateDB(t),
    54  		NewMemDB(testlog.HCLogger(t)),
    55  	}
    56  
    57  	for _, db := range dbs {
    58  		t.Run(db.Name(), func(t *testing.T) {
    59  			f(t, db)
    60  		})
    61  	}
    62  }
    63  
    64  // TestStateDB_Allocations asserts the behavior of GetAllAllocations, PutAllocation, and
    65  // DeleteAllocationBucket for all operational StateDB implementations.
    66  func TestStateDB_Allocations(t *testing.T) {
    67  	ci.Parallel(t)
    68  
    69  	testDB(t, func(t *testing.T, db StateDB) {
    70  		require := require.New(t)
    71  
    72  		// Empty database should return empty non-nil results
    73  		allocs, errs, err := db.GetAllAllocations()
    74  		require.NoError(err)
    75  		require.NotNil(allocs)
    76  		require.Empty(allocs)
    77  		require.NotNil(errs)
    78  		require.Empty(errs)
    79  
    80  		// Put allocations
    81  		alloc1 := mock.Alloc()
    82  		alloc2 := mock.BatchAlloc()
    83  
    84  		require.NoError(db.PutAllocation(alloc1))
    85  		require.NoError(db.PutAllocation(alloc2))
    86  
    87  		// Retrieve them
    88  		allocs, errs, err = db.GetAllAllocations()
    89  		require.NoError(err)
    90  		require.NotNil(allocs)
    91  		require.Len(allocs, 2)
    92  		for _, a := range allocs {
    93  			switch a.ID {
    94  			case alloc1.ID:
    95  				if !reflect.DeepEqual(a, alloc1) {
    96  					pretty.Ldiff(t, a, alloc1)
    97  					t.Fatalf("alloc %q unequal", a.ID)
    98  				}
    99  			case alloc2.ID:
   100  				if !reflect.DeepEqual(a, alloc2) {
   101  					pretty.Ldiff(t, a, alloc2)
   102  					t.Fatalf("alloc %q unequal", a.ID)
   103  				}
   104  			default:
   105  				t.Fatalf("unexpected alloc id %q", a.ID)
   106  			}
   107  		}
   108  		require.NotNil(errs)
   109  		require.Empty(errs)
   110  
   111  		// Add another
   112  		alloc3 := mock.SystemAlloc()
   113  		require.NoError(db.PutAllocation(alloc3))
   114  		allocs, errs, err = db.GetAllAllocations()
   115  		require.NoError(err)
   116  		require.NotNil(allocs)
   117  		require.Len(allocs, 3)
   118  		require.Contains(allocs, alloc1)
   119  		require.Contains(allocs, alloc2)
   120  		require.Contains(allocs, alloc3)
   121  		require.NotNil(errs)
   122  		require.Empty(errs)
   123  
   124  		// Deleting a nonexistent alloc is a noop
   125  		require.NoError(db.DeleteAllocationBucket("asdf"))
   126  		allocs, _, err = db.GetAllAllocations()
   127  		require.NoError(err)
   128  		require.NotNil(allocs)
   129  		require.Len(allocs, 3)
   130  
   131  		// Delete alloc1
   132  		require.NoError(db.DeleteAllocationBucket(alloc1.ID))
   133  		allocs, errs, err = db.GetAllAllocations()
   134  		require.NoError(err)
   135  		require.NotNil(allocs)
   136  		require.Len(allocs, 2)
   137  		require.Contains(allocs, alloc2)
   138  		require.Contains(allocs, alloc3)
   139  		require.NotNil(errs)
   140  		require.Empty(errs)
   141  	})
   142  }
   143  
   144  // Integer division, rounded up.
   145  func ceilDiv(a, b int) int {
   146  	return (a + b - 1) / b
   147  }
   148  
   149  // TestStateDB_Batch asserts the behavior of PutAllocation, PutNetworkStatus and
   150  // DeleteAllocationBucket in batch mode, for all operational StateDB implementations.
   151  func TestStateDB_Batch(t *testing.T) {
   152  	ci.Parallel(t)
   153  
   154  	testDB(t, func(t *testing.T, db StateDB) {
   155  		require := require.New(t)
   156  
   157  		// For BoltDB, get initial tx_id
   158  		var getTxID func() int
   159  		var prevTxID int
   160  		var batchDelay time.Duration
   161  		var batchSize int
   162  		if boltStateDB, ok := db.(*BoltStateDB); ok {
   163  			boltdb := boltStateDB.DB().BoltDB()
   164  			getTxID = func() int {
   165  				tx, err := boltdb.Begin(true)
   166  				require.NoError(err)
   167  				defer tx.Rollback()
   168  				return tx.ID()
   169  			}
   170  			prevTxID = getTxID()
   171  			batchDelay = boltdb.MaxBatchDelay
   172  			batchSize = boltdb.MaxBatchSize
   173  		}
   174  
   175  		// Write 1000 allocations and network statuses in batch mode
   176  		startTime := time.Now()
   177  		const numAllocs = 1000
   178  		var allocs []*structs.Allocation
   179  		for i := 0; i < numAllocs; i++ {
   180  			allocs = append(allocs, mock.Alloc())
   181  		}
   182  		var wg sync.WaitGroup
   183  		for _, alloc := range allocs {
   184  			wg.Add(1)
   185  			go func(alloc *structs.Allocation) {
   186  				require.NoError(db.PutNetworkStatus(alloc.ID, mock.AllocNetworkStatus(), WithBatchMode()))
   187  				require.NoError(db.PutAllocation(alloc, WithBatchMode()))
   188  				wg.Done()
   189  			}(alloc)
   190  		}
   191  		wg.Wait()
   192  
   193  		// Check BoltDB actually combined PutAllocation calls into much fewer transactions.
   194  		// The actual number of transactions depends on how fast the goroutines are spawned,
   195  		// with every batchDelay (10ms by default) period saved in a separate transaction,
   196  		// plus each transaction is limited to batchSize writes (1000 by default).
   197  		// See boltdb MaxBatchDelay and MaxBatchSize parameters for more details.
   198  		if getTxID != nil {
   199  			numTransactions := getTxID() - prevTxID
   200  			writeTime := time.Now().Sub(startTime)
   201  			expectedNumTransactions := ceilDiv(2*numAllocs, batchSize) + ceilDiv(int(writeTime), int(batchDelay))
   202  			require.LessOrEqual(numTransactions, expectedNumTransactions)
   203  			prevTxID = getTxID()
   204  		}
   205  
   206  		// Retrieve allocs and make sure they are the same (order can differ)
   207  		readAllocs, errs, err := db.GetAllAllocations()
   208  		require.NoError(err)
   209  		require.NotNil(readAllocs)
   210  		require.Len(readAllocs, len(allocs))
   211  		require.NotNil(errs)
   212  		require.Empty(errs)
   213  
   214  		readAllocsById := make(map[string]*structs.Allocation)
   215  		for _, readAlloc := range readAllocs {
   216  			readAllocsById[readAlloc.ID] = readAlloc
   217  		}
   218  		for _, alloc := range allocs {
   219  			readAlloc, ok := readAllocsById[alloc.ID]
   220  			if !ok {
   221  				t.Fatalf("no alloc with ID=%q", alloc.ID)
   222  			}
   223  			if !reflect.DeepEqual(readAlloc, alloc) {
   224  				pretty.Ldiff(t, readAlloc, alloc)
   225  				t.Fatalf("alloc %q unequal", alloc.ID)
   226  			}
   227  		}
   228  
   229  		// Delete all allocs in batch mode
   230  		startTime = time.Now()
   231  		for _, alloc := range allocs {
   232  			wg.Add(1)
   233  			go func(alloc *structs.Allocation) {
   234  				require.NoError(db.DeleteAllocationBucket(alloc.ID, WithBatchMode()))
   235  				wg.Done()
   236  			}(alloc)
   237  		}
   238  		wg.Wait()
   239  
   240  		// Check BoltDB combined DeleteAllocationBucket calls into much fewer transactions.
   241  		if getTxID != nil {
   242  			numTransactions := getTxID() - prevTxID
   243  			writeTime := time.Now().Sub(startTime)
   244  			expectedNumTransactions := ceilDiv(numAllocs, batchSize) + ceilDiv(int(writeTime), int(batchDelay))
   245  			require.LessOrEqual(numTransactions, expectedNumTransactions)
   246  			prevTxID = getTxID()
   247  		}
   248  
   249  		// Check all allocs were deleted.
   250  		readAllocs, errs, err = db.GetAllAllocations()
   251  		require.NoError(err)
   252  		require.Empty(readAllocs)
   253  		require.Empty(errs)
   254  	})
   255  }
   256  
   257  // TestStateDB_TaskState asserts the behavior of task state related StateDB
   258  // methods.
   259  func TestStateDB_TaskState(t *testing.T) {
   260  	ci.Parallel(t)
   261  
   262  	testDB(t, func(t *testing.T, db StateDB) {
   263  		require := require.New(t)
   264  
   265  		// Getting nonexistent state should return nils
   266  		ls, ts, err := db.GetTaskRunnerState("allocid", "taskname")
   267  		require.NoError(err)
   268  		require.Nil(ls)
   269  		require.Nil(ts)
   270  
   271  		// Putting TaskState without first putting the allocation should work
   272  		state := structs.NewTaskState()
   273  		state.Failed = true // set a non-default value
   274  		require.NoError(db.PutTaskState("allocid", "taskname", state))
   275  
   276  		// Getting should return the available state
   277  		ls, ts, err = db.GetTaskRunnerState("allocid", "taskname")
   278  		require.NoError(err)
   279  		require.Nil(ls)
   280  		require.Equal(state, ts)
   281  
   282  		// Deleting a nonexistent task should not error
   283  		require.NoError(db.DeleteTaskBucket("adsf", "asdf"))
   284  		require.NoError(db.DeleteTaskBucket("asllocid", "asdf"))
   285  
   286  		// Data should be untouched
   287  		ls, ts, err = db.GetTaskRunnerState("allocid", "taskname")
   288  		require.NoError(err)
   289  		require.Nil(ls)
   290  		require.Equal(state, ts)
   291  
   292  		// Deleting the task should remove the state
   293  		require.NoError(db.DeleteTaskBucket("allocid", "taskname"))
   294  		ls, ts, err = db.GetTaskRunnerState("allocid", "taskname")
   295  		require.NoError(err)
   296  		require.Nil(ls)
   297  		require.Nil(ts)
   298  
   299  		// Putting LocalState should work just like TaskState
   300  		origLocalState := trstate.NewLocalState()
   301  		require.NoError(db.PutTaskRunnerLocalState("allocid", "taskname", origLocalState))
   302  		ls, ts, err = db.GetTaskRunnerState("allocid", "taskname")
   303  		require.NoError(err)
   304  		require.Equal(origLocalState, ls)
   305  		require.Nil(ts)
   306  	})
   307  }
   308  
   309  // TestStateDB_DeviceManager asserts the behavior of device manager state related StateDB
   310  // methods.
   311  func TestStateDB_DeviceManager(t *testing.T) {
   312  	ci.Parallel(t)
   313  
   314  	testDB(t, func(t *testing.T, db StateDB) {
   315  		require := require.New(t)
   316  
   317  		// Getting nonexistent state should return nils
   318  		ps, err := db.GetDevicePluginState()
   319  		require.NoError(err)
   320  		require.Nil(ps)
   321  
   322  		// Putting PluginState should work
   323  		state := &dmstate.PluginState{}
   324  		require.NoError(db.PutDevicePluginState(state))
   325  
   326  		// Getting should return the available state
   327  		ps, err = db.GetDevicePluginState()
   328  		require.NoError(err)
   329  		require.NotNil(ps)
   330  		require.Equal(state, ps)
   331  	})
   332  }
   333  
   334  // TestStateDB_DriverManager asserts the behavior of device manager state related StateDB
   335  // methods.
   336  func TestStateDB_DriverManager(t *testing.T) {
   337  	ci.Parallel(t)
   338  
   339  	testDB(t, func(t *testing.T, db StateDB) {
   340  		require := require.New(t)
   341  
   342  		// Getting nonexistent state should return nils
   343  		ps, err := db.GetDriverPluginState()
   344  		require.NoError(err)
   345  		require.Nil(ps)
   346  
   347  		// Putting PluginState should work
   348  		state := &driverstate.PluginState{}
   349  		require.NoError(db.PutDriverPluginState(state))
   350  
   351  		// Getting should return the available state
   352  		ps, err = db.GetDriverPluginState()
   353  		require.NoError(err)
   354  		require.NotNil(ps)
   355  		require.Equal(state, ps)
   356  	})
   357  }
   358  
   359  // TestStateDB_DynamicRegistry asserts the behavior of dynamic registry state related StateDB
   360  // methods.
   361  func TestStateDB_DynamicRegistry(t *testing.T) {
   362  	ci.Parallel(t)
   363  
   364  	testDB(t, func(t *testing.T, db StateDB) {
   365  		require := require.New(t)
   366  
   367  		// Getting nonexistent state should return nils
   368  		ps, err := db.GetDynamicPluginRegistryState()
   369  		require.NoError(err)
   370  		require.Nil(ps)
   371  
   372  		// Putting PluginState should work
   373  		state := &dynamicplugins.RegistryState{}
   374  		require.NoError(db.PutDynamicPluginRegistryState(state))
   375  
   376  		// Getting should return the available state
   377  		ps, err = db.GetDynamicPluginRegistryState()
   378  		require.NoError(err)
   379  		require.NotNil(ps)
   380  		require.Equal(state, ps)
   381  	})
   382  }
   383  
   384  func TestStateDB_CheckResult_keyForCheck(t *testing.T) {
   385  	ci.Parallel(t)
   386  
   387  	allocID := "alloc1"
   388  	checkID := structs.CheckID("id1")
   389  	result := keyForCheck(allocID, checkID)
   390  	exp := allocID + "_" + string(checkID)
   391  	must.Eq(t, exp, string(result))
   392  }
   393  
   394  func TestStateDB_CheckResult(t *testing.T) {
   395  	ci.Parallel(t)
   396  
   397  	qr := func(id string) *structs.CheckQueryResult {
   398  		return &structs.CheckQueryResult{
   399  			ID:        structs.CheckID(id),
   400  			Mode:      "healthiness",
   401  			Status:    "passing",
   402  			Output:    "nomad: tcp ok",
   403  			Timestamp: 1,
   404  			Group:     "group",
   405  			Task:      "task",
   406  			Service:   "service",
   407  			Check:     "check",
   408  		}
   409  	}
   410  
   411  	testDB(t, func(t *testing.T, db StateDB) {
   412  		t.Run("put and get", func(t *testing.T) {
   413  			err := db.PutCheckResult("alloc1", qr("abc123"))
   414  			must.NoError(t, err)
   415  			results, err := db.GetCheckResults()
   416  			must.NoError(t, err)
   417  			must.MapContainsKeys(t, results, []string{"alloc1"})
   418  			must.MapContainsKeys(t, results["alloc1"], []structs.CheckID{"abc123"})
   419  		})
   420  	})
   421  
   422  	testDB(t, func(t *testing.T, db StateDB) {
   423  		t.Run("delete", func(t *testing.T) {
   424  			must.NoError(t, db.PutCheckResult("alloc1", qr("id1")))
   425  			must.NoError(t, db.PutCheckResult("alloc1", qr("id2")))
   426  			must.NoError(t, db.PutCheckResult("alloc1", qr("id3")))
   427  			must.NoError(t, db.PutCheckResult("alloc1", qr("id4")))
   428  			must.NoError(t, db.PutCheckResult("alloc2", qr("id5")))
   429  			err := db.DeleteCheckResults("alloc1", []structs.CheckID{"id2", "id3"})
   430  			must.NoError(t, err)
   431  			results, err := db.GetCheckResults()
   432  			must.NoError(t, err)
   433  			must.MapContainsKeys(t, results, []string{"alloc1", "alloc2"})
   434  			must.MapContainsKeys(t, results["alloc1"], []structs.CheckID{"id1", "id4"})
   435  			must.MapContainsKeys(t, results["alloc2"], []structs.CheckID{"id5"})
   436  		})
   437  	})
   438  
   439  	testDB(t, func(t *testing.T, db StateDB) {
   440  		t.Run("purge", func(t *testing.T) {
   441  			must.NoError(t, db.PutCheckResult("alloc1", qr("id1")))
   442  			must.NoError(t, db.PutCheckResult("alloc1", qr("id2")))
   443  			must.NoError(t, db.PutCheckResult("alloc1", qr("id3")))
   444  			must.NoError(t, db.PutCheckResult("alloc1", qr("id4")))
   445  			must.NoError(t, db.PutCheckResult("alloc2", qr("id5")))
   446  			err := db.PurgeCheckResults("alloc1")
   447  			must.NoError(t, err)
   448  			results, err := db.GetCheckResults()
   449  			must.NoError(t, err)
   450  			must.MapContainsKeys(t, results, []string{"alloc2"})
   451  			must.MapContainsKeys(t, results["alloc2"], []structs.CheckID{"id5"})
   452  		})
   453  	})
   454  
   455  }
   456  
   457  // TestStateDB_Upgrade asserts calling Upgrade on new databases always
   458  // succeeds.
   459  func TestStateDB_Upgrade(t *testing.T) {
   460  	ci.Parallel(t)
   461  
   462  	testDB(t, func(t *testing.T, db StateDB) {
   463  		require.NoError(t, db.Upgrade())
   464  	})
   465  }