github.com/hashicorp/nomad/api@v0.0.0-20240306165712-3193ac204f65/nodes_test.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package api
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"sort"
    10  	"testing"
    11  	"time"
    12  
    13  	"github.com/hashicorp/nomad/api/internal/testutil"
    14  	"github.com/shoenig/test/must"
    15  	"github.com/shoenig/test/wait"
    16  )
    17  
    18  func queryNodeList(t *testing.T, nodes *Nodes) ([]*NodeListStub, *QueryMeta) {
    19  	t.Helper()
    20  	var (
    21  		nodeListStub []*NodeListStub
    22  		queryMeta    *QueryMeta
    23  		err          error
    24  	)
    25  
    26  	f := func() error {
    27  		nodeListStub, queryMeta, err = nodes.List(nil)
    28  		if err != nil {
    29  			return fmt.Errorf("failed to list nodes: %w", err)
    30  		}
    31  		if len(nodeListStub) == 0 {
    32  			return fmt.Errorf("no nodes yet")
    33  		}
    34  		return nil
    35  	}
    36  
    37  	must.Wait(t, wait.InitialSuccess(
    38  		wait.ErrorFunc(f),
    39  		wait.Timeout(10*time.Second),
    40  		wait.Gap(1*time.Second),
    41  	))
    42  
    43  	return nodeListStub, queryMeta
    44  }
    45  
    46  func oneNodeFromNodeList(t *testing.T, nodes *Nodes) *NodeListStub {
    47  	nodeListStub, _ := queryNodeList(t, nodes)
    48  	must.Len(t, 1, nodeListStub, must.Sprint("expected 1 node"))
    49  	return nodeListStub[0]
    50  }
    51  
    52  func TestNodes_List(t *testing.T) {
    53  	testutil.Parallel(t)
    54  
    55  	c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
    56  		c.DevMode = true
    57  	})
    58  	defer s.Stop()
    59  	nodes := c.Nodes()
    60  
    61  	nodeListStub, queryMeta := queryNodeList(t, nodes)
    62  	must.Len(t, 1, nodeListStub)
    63  	must.Eq(t, NodePoolDefault, nodeListStub[0].NodePool)
    64  
    65  	// Check that we got valid QueryMeta.
    66  	assertQueryMeta(t, queryMeta)
    67  }
    68  
    69  func TestNodes_PrefixList(t *testing.T) {
    70  	testutil.Parallel(t)
    71  
    72  	c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
    73  		c.DevMode = true
    74  	})
    75  	defer s.Stop()
    76  	nodes := c.Nodes()
    77  
    78  	// Get the node ID
    79  	nodeID := oneNodeFromNodeList(t, nodes).ID
    80  
    81  	// Find node based on four character prefix
    82  	out, qm, err := nodes.PrefixList(nodeID[:4])
    83  	must.NoError(t, err)
    84  	must.Len(t, 1, out, must.Sprint("expected only 1 node"))
    85  
    86  	// Check that we got valid QueryMeta.
    87  	assertQueryMeta(t, qm)
    88  }
    89  
    90  // TestNodes_List_Resources asserts that ?resources=true includes allocated and
    91  // reserved resources in the response.
    92  func TestNodes_List_Resources(t *testing.T) {
    93  	testutil.Parallel(t)
    94  
    95  	c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
    96  		c.DevMode = true
    97  	})
    98  	defer s.Stop()
    99  	nodes := c.Nodes()
   100  
   101  	node := oneNodeFromNodeList(t, nodes)
   102  
   103  	// By default resources should *not* be included
   104  	must.Nil(t, node.NodeResources)
   105  	must.Nil(t, node.ReservedResources)
   106  
   107  	qo := &QueryOptions{
   108  		Params: map[string]string{"resources": "true"},
   109  	}
   110  
   111  	out, _, err := nodes.List(qo)
   112  	must.NoError(t, err)
   113  	must.NotNil(t, out[0].NodeResources)
   114  	must.NotNil(t, out[0].ReservedResources)
   115  }
   116  
   117  func TestNodes_Info(t *testing.T) {
   118  	testutil.Parallel(t)
   119  
   120  	startTime := time.Now().Unix()
   121  	c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
   122  		c.DevMode = true
   123  	})
   124  	defer s.Stop()
   125  	nodes := c.Nodes()
   126  
   127  	// Retrieving a nonexistent node returns error
   128  	_, _, infoErr := nodes.Info("12345678-abcd-efab-cdef-123456789abc", nil)
   129  	must.ErrorContains(t, infoErr, "not found")
   130  
   131  	// Get the node ID and DC
   132  	node := oneNodeFromNodeList(t, nodes)
   133  	nodeID, dc := node.ID, node.Datacenter
   134  
   135  	// Querying for existing nodes returns properly
   136  	result, qm, err := nodes.Info(nodeID, nil)
   137  	must.NoError(t, err)
   138  
   139  	assertQueryMeta(t, qm)
   140  
   141  	// Check that the result is what we expect
   142  	must.Eq(t, nodeID, result.ID)
   143  	must.Eq(t, dc, result.Datacenter)
   144  	must.Eq(t, NodePoolDefault, result.NodePool)
   145  
   146  	must.Eq(t, 20000, result.NodeResources.MinDynamicPort)
   147  	must.Eq(t, 32000, result.NodeResources.MaxDynamicPort)
   148  
   149  	// Check that the StatusUpdatedAt field is being populated correctly
   150  	must.Less(t, result.StatusUpdatedAt, startTime)
   151  
   152  	// check we have at least one event
   153  	must.GreaterEq(t, 1, len(result.Events))
   154  }
   155  
   156  func TestNodes_NoSecretID(t *testing.T) {
   157  	testutil.Parallel(t)
   158  
   159  	c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
   160  		c.DevMode = true
   161  	})
   162  	defer s.Stop()
   163  	nodes := c.Nodes()
   164  
   165  	// Get the node ID
   166  	nodeID := oneNodeFromNodeList(t, nodes).ID
   167  
   168  	// perform a raw http call and make sure that:
   169  	// - "ID" to make sure that raw decoding is working correctly
   170  	// - "SecretID" to make sure it's not present
   171  	resp := make(map[string]interface{})
   172  	_, err := c.query("/v1/node/"+nodeID, &resp, nil)
   173  	must.NoError(t, err)
   174  	must.Eq(t, nodeID, resp["ID"].(string))
   175  	must.Eq(t, "", resp["SecretID"])
   176  }
   177  
   178  func TestNodes_ToggleDrain(t *testing.T) {
   179  	testutil.Parallel(t)
   180  
   181  	c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
   182  		c.DevMode = true
   183  	})
   184  	defer s.Stop()
   185  	nodes := c.Nodes()
   186  
   187  	// Wait for node registration and get the ID
   188  	nodeID := oneNodeFromNodeList(t, nodes).ID
   189  
   190  	// Check for drain mode
   191  	out, _, err := nodes.Info(nodeID, nil)
   192  	must.NoError(t, err)
   193  	must.False(t, out.Drain)
   194  	must.Nil(t, out.LastDrain)
   195  
   196  	// Toggle it on
   197  	timeBeforeDrain := time.Now().Add(-1 * time.Second)
   198  	spec := &DrainSpec{
   199  		Deadline: 10 * time.Second,
   200  	}
   201  	drainMeta := map[string]string{
   202  		"reason": "this node needs to go",
   203  	}
   204  	drainOut, err := nodes.UpdateDrainOpts(nodeID, &DrainOptions{
   205  		DrainSpec:    spec,
   206  		MarkEligible: false,
   207  		Meta:         drainMeta,
   208  	}, nil)
   209  	must.NoError(t, err)
   210  	assertWriteMeta(t, &drainOut.WriteMeta)
   211  
   212  	// Drain may have completed before we can check, use event stream
   213  	ctx, cancel := context.WithCancel(context.Background())
   214  	defer cancel()
   215  
   216  	streamCh, err := c.EventStream().Stream(ctx, map[Topic][]string{
   217  		TopicNode: {nodeID},
   218  	}, 0, nil)
   219  	must.NoError(t, err)
   220  
   221  	// we expect to see the node change to Drain:true and then back to Drain:false+ineligible
   222  	var sawDraining, sawDrainComplete uint64
   223  	for sawDrainComplete == 0 {
   224  		select {
   225  		case events := <-streamCh:
   226  			must.NoError(t, events.Err)
   227  			for _, e := range events.Events {
   228  				node, err := e.Node()
   229  				must.NoError(t, err)
   230  				must.Eq(t, node.DrainStrategy != nil, node.Drain)
   231  				must.True(t, !node.Drain || node.SchedulingEligibility == NodeSchedulingIneligible) // node.Drain => "ineligible"
   232  				if node.Drain && node.SchedulingEligibility == NodeSchedulingIneligible {
   233  					must.NotNil(t, node.LastDrain)
   234  					must.Eq(t, DrainStatusDraining, node.LastDrain.Status)
   235  					now := time.Now()
   236  					must.False(t, node.LastDrain.StartedAt.Before(timeBeforeDrain))
   237  					must.False(t, node.LastDrain.StartedAt.After(now))
   238  					must.Eq(t, drainMeta, node.LastDrain.Meta)
   239  					sawDraining = node.ModifyIndex
   240  				} else if sawDraining != 0 && !node.Drain && node.SchedulingEligibility == NodeSchedulingIneligible {
   241  					must.NotNil(t, node.LastDrain)
   242  					must.Eq(t, DrainStatusComplete, node.LastDrain.Status)
   243  					must.True(t, !node.LastDrain.UpdatedAt.Before(node.LastDrain.StartedAt))
   244  					must.Eq(t, drainMeta, node.LastDrain.Meta)
   245  					sawDrainComplete = node.ModifyIndex
   246  				}
   247  			}
   248  		case <-time.After(5 * time.Second):
   249  			must.Unreachable(t, must.Sprint("waiting on stream event that never happened"))
   250  		}
   251  	}
   252  
   253  	// Toggle off again
   254  	drainOut, err = nodes.UpdateDrain(nodeID, nil, true, nil)
   255  	must.NoError(t, err)
   256  	assertWriteMeta(t, &drainOut.WriteMeta)
   257  
   258  	// Check again
   259  	out, _, err = nodes.Info(nodeID, nil)
   260  	must.NoError(t, err)
   261  	must.False(t, out.Drain)
   262  	must.Nil(t, out.DrainStrategy)
   263  	must.Eq(t, NodeSchedulingEligible, out.SchedulingEligibility)
   264  }
   265  
   266  func TestNodes_ToggleEligibility(t *testing.T) {
   267  	testutil.Parallel(t)
   268  
   269  	c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
   270  		c.DevMode = true
   271  	})
   272  	defer s.Stop()
   273  	nodes := c.Nodes()
   274  
   275  	// Get node ID
   276  	nodeID := oneNodeFromNodeList(t, nodes).ID
   277  
   278  	// Check for eligibility
   279  	out, _, err := nodes.Info(nodeID, nil)
   280  	must.NoError(t, err)
   281  	must.Eq(t, NodeSchedulingEligible, out.SchedulingEligibility)
   282  
   283  	// Toggle it off
   284  	eligOut, err := nodes.ToggleEligibility(nodeID, false, nil)
   285  	must.NoError(t, err)
   286  	assertWriteMeta(t, &eligOut.WriteMeta)
   287  
   288  	// Check again
   289  	out, _, err = nodes.Info(nodeID, nil)
   290  	must.NoError(t, err)
   291  	must.Eq(t, NodeSchedulingIneligible, out.SchedulingEligibility)
   292  
   293  	// Toggle on
   294  	eligOut, err = nodes.ToggleEligibility(nodeID, true, nil)
   295  	must.NoError(t, err)
   296  	assertWriteMeta(t, &eligOut.WriteMeta)
   297  
   298  	// Check again
   299  	out, _, err = nodes.Info(nodeID, nil)
   300  	must.NoError(t, err)
   301  	must.Eq(t, NodeSchedulingEligible, out.SchedulingEligibility)
   302  	must.Nil(t, out.DrainStrategy)
   303  }
   304  
   305  func TestNodes_Allocations(t *testing.T) {
   306  	testutil.Parallel(t)
   307  
   308  	c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
   309  		c.DevMode = true
   310  	})
   311  	defer s.Stop()
   312  	nodes := c.Nodes()
   313  
   314  	// Looking up by a nonexistent node returns nothing. We
   315  	// don't check the index here because it's possible the node
   316  	// has already registered, in which case we will get a non-
   317  	// zero result anyways.
   318  	allocations, _, err := nodes.Allocations("nope", nil)
   319  	must.NoError(t, err)
   320  	must.Len(t, 0, allocations)
   321  }
   322  
   323  func TestNodes_ForceEvaluate(t *testing.T) {
   324  	testutil.Parallel(t)
   325  
   326  	c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
   327  		c.DevMode = true
   328  	})
   329  	defer s.Stop()
   330  	nodes := c.Nodes()
   331  
   332  	// Force-eval on a nonexistent node fails
   333  	_, _, err := nodes.ForceEvaluate("12345678-abcd-efab-cdef-123456789abc", nil)
   334  	must.ErrorContains(t, err, "not found")
   335  
   336  	// Wait for node registration and get the ID
   337  	nodeID := oneNodeFromNodeList(t, nodes).ID
   338  
   339  	// Try force-eval again. We don't check the WriteMeta because
   340  	// there are no allocations to process, so we would get an index
   341  	// of zero. Same goes for the eval ID.
   342  	_, _, err = nodes.ForceEvaluate(nodeID, nil)
   343  	must.NoError(t, err)
   344  }
   345  
   346  func TestNodes_Sort(t *testing.T) {
   347  	testutil.Parallel(t)
   348  
   349  	nodes := []*NodeListStub{
   350  		{CreateIndex: 2},
   351  		{CreateIndex: 1},
   352  		{CreateIndex: 5},
   353  	}
   354  	sort.Sort(NodeIndexSort(nodes))
   355  
   356  	expect := []*NodeListStub{
   357  		{CreateIndex: 5},
   358  		{CreateIndex: 2},
   359  		{CreateIndex: 1},
   360  	}
   361  	must.Eq(t, expect, nodes)
   362  }
   363  
   364  // Unittest monitorDrainMultiplex when an error occurs
   365  func TestNodes_MonitorDrain_Multiplex_Bad(t *testing.T) {
   366  	testutil.Parallel(t)
   367  
   368  	ctx := context.Background()
   369  	multiplexCtx, cancel := context.WithCancel(ctx)
   370  
   371  	// monitorDrainMultiplex doesn't require anything on *Nodes, so we
   372  	// don't need to use a full Client
   373  	var nodeClient *Nodes
   374  
   375  	outCh := make(chan *MonitorMessage, 8)
   376  	nodeCh := make(chan *MonitorMessage, 1)
   377  	allocCh := make(chan *MonitorMessage, 8)
   378  	exitedCh := make(chan struct{})
   379  	go func() {
   380  		defer close(exitedCh)
   381  		nodeClient.monitorDrainMultiplex(ctx, cancel, outCh, nodeCh, allocCh)
   382  	}()
   383  
   384  	// Fake an alloc update
   385  	msg := Messagef(0, "alloc update")
   386  	allocCh <- msg
   387  	must.Eq(t, msg, <-outCh)
   388  
   389  	// Fake a node update
   390  	msg = Messagef(0, "node update")
   391  	nodeCh <- msg
   392  	must.Eq(t, msg, <-outCh)
   393  
   394  	// Fake an error that should shut everything down
   395  	msg = Messagef(MonitorMsgLevelError, "fake error")
   396  	nodeCh <- msg
   397  	must.Eq(t, msg, <-outCh)
   398  
   399  	_, ok := <-exitedCh
   400  	must.False(t, ok)
   401  
   402  	_, ok = <-outCh
   403  	must.False(t, ok)
   404  
   405  	// Exiting should also cancel the context that would be passed to the
   406  	// node & alloc watchers
   407  	select {
   408  	case <-multiplexCtx.Done():
   409  	case <-time.After(100 * time.Millisecond):
   410  		must.Unreachable(t, must.Sprint("multiplex context was not cancelled"))
   411  	}
   412  }
   413  
   414  // Unittest monitorDrainMultiplex when drain finishes
   415  func TestNodes_MonitorDrain_Multiplex_Good(t *testing.T) {
   416  	testutil.Parallel(t)
   417  
   418  	ctx := context.Background()
   419  	multiplexCtx, cancel := context.WithCancel(ctx)
   420  
   421  	// monitorDrainMultiplex doesn't require anything on *Nodes, so we
   422  	// don't need to use a full Client
   423  	var nodeClient *Nodes
   424  
   425  	outCh := make(chan *MonitorMessage, 8)
   426  	nodeCh := make(chan *MonitorMessage, 1)
   427  	allocCh := make(chan *MonitorMessage, 8)
   428  	exitedCh := make(chan struct{})
   429  	go func() {
   430  		defer close(exitedCh)
   431  		nodeClient.monitorDrainMultiplex(ctx, cancel, outCh, nodeCh, allocCh)
   432  	}()
   433  
   434  	// Fake a node updating and finishing
   435  	msg := Messagef(MonitorMsgLevelInfo, "node update")
   436  	nodeCh <- msg
   437  	close(nodeCh)
   438  	must.Eq(t, msg, <-outCh)
   439  
   440  	// Nothing else should have exited yet
   441  	select {
   442  	case badMsg, ok := <-outCh:
   443  		must.False(t, ok, must.Sprintf("unexpected output %v", badMsg))
   444  		must.Unreachable(t, must.Sprint("out channel closed unexpectedly"))
   445  	case <-exitedCh:
   446  		must.Unreachable(t, must.Sprint("multiplexer exited unexpectedly"))
   447  	case <-multiplexCtx.Done():
   448  		must.Unreachable(t, must.Sprint("multiplexer context canceled unexpectedly"))
   449  	case <-time.After(10 * time.Millisecond):
   450  		t.Logf("multiplexer still running as expected")
   451  	}
   452  
   453  	// Fake an alloc update coming in after the node monitor has finished
   454  	msg = Messagef(0, "alloc update")
   455  	allocCh <- msg
   456  	must.Eq(t, msg, <-outCh)
   457  
   458  	// Closing the allocCh should cause everything to exit
   459  	close(allocCh)
   460  
   461  	_, ok := <-exitedCh
   462  	must.False(t, ok)
   463  
   464  	_, ok = <-outCh
   465  	must.False(t, ok)
   466  
   467  	// Exiting should also cancel the context that would be passed to the
   468  	// node & alloc watchers
   469  	select {
   470  	case <-multiplexCtx.Done():
   471  	case <-time.After(100 * time.Millisecond):
   472  		must.Unreachable(t, must.Sprint("context was not cancelled"))
   473  	}
   474  }
   475  
   476  func TestNodes_DrainStrategy_Equal(t *testing.T) {
   477  	testutil.Parallel(t)
   478  
   479  	// nil
   480  	var d *DrainStrategy
   481  	must.Equal(t, nil, d)
   482  
   483  	o := &DrainStrategy{}
   484  	must.NotEqual(t, d, o)
   485  	must.NotEqual(t, o, d)
   486  
   487  	d = &DrainStrategy{}
   488  	must.Equal(t, d, o)
   489  	must.Equal(t, o, d)
   490  
   491  	// ForceDeadline
   492  	d.ForceDeadline = time.Now()
   493  	must.NotEqual(t, d, o)
   494  
   495  	o.ForceDeadline = d.ForceDeadline
   496  	must.Equal(t, d, o)
   497  
   498  	// Deadline
   499  	d.Deadline = 1
   500  	must.NotEqual(t, d, o)
   501  
   502  	o.Deadline = 1
   503  	must.Equal(t, d, o)
   504  
   505  	// IgnoreSystemJobs
   506  	d.IgnoreSystemJobs = true
   507  	must.NotEqual(t, d, o)
   508  
   509  	o.IgnoreSystemJobs = true
   510  	must.Equal(t, d, o)
   511  }
   512  
   513  func TestNodes_Purge(t *testing.T) {
   514  	testutil.Parallel(t)
   515  
   516  	c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
   517  		c.DevMode = true
   518  	})
   519  	defer s.Stop()
   520  	nodes := c.Nodes()
   521  
   522  	// Purge on a nonexistent node fails.
   523  	_, _, err := c.Nodes().Purge("12345678-abcd-efab-cdef-123456789abc", nil)
   524  	must.ErrorContains(t, err, "not found")
   525  
   526  	// Wait for nodeID
   527  	nodeID := oneNodeFromNodeList(t, nodes).ID
   528  
   529  	// Perform the node purge and check the response objects.
   530  	out, meta, err := c.Nodes().Purge(nodeID, nil)
   531  	must.NoError(t, err)
   532  	must.NotNil(t, out)
   533  
   534  	// We can't use assertQueryMeta here, as the RPC response does not populate
   535  	// the known leader field.
   536  	must.Positive(t, meta.LastIndex)
   537  }
   538  
   539  func TestNodeStatValueFormatting(t *testing.T) {
   540  	testutil.Parallel(t)
   541  
   542  	cases := []struct {
   543  		expected string
   544  		value    StatValue
   545  	}{
   546  		{
   547  			"true",
   548  			StatValue{BoolVal: pointerOf(true)},
   549  		},
   550  		{
   551  			"false",
   552  			StatValue{BoolVal: pointerOf(false)},
   553  		},
   554  		{
   555  			"myvalue",
   556  			StatValue{StringVal: pointerOf("myvalue")},
   557  		},
   558  		{
   559  			"2.718",
   560  			StatValue{
   561  				FloatNumeratorVal: float64ToPtr(2.718),
   562  			},
   563  		},
   564  		{
   565  			"2.718 / 3.14",
   566  			StatValue{
   567  				FloatNumeratorVal:   float64ToPtr(2.718),
   568  				FloatDenominatorVal: float64ToPtr(3.14),
   569  			},
   570  		},
   571  		{
   572  			"2.718 MHz",
   573  			StatValue{
   574  				FloatNumeratorVal: float64ToPtr(2.718),
   575  				Unit:              "MHz",
   576  			},
   577  		},
   578  		{
   579  			"2.718 / 3.14 MHz",
   580  			StatValue{
   581  				FloatNumeratorVal:   float64ToPtr(2.718),
   582  				FloatDenominatorVal: float64ToPtr(3.14),
   583  				Unit:                "MHz",
   584  			},
   585  		},
   586  		{
   587  			"2",
   588  			StatValue{
   589  				IntNumeratorVal: pointerOf(int64(2)),
   590  			},
   591  		},
   592  		{
   593  			"2 / 3",
   594  			StatValue{
   595  				IntNumeratorVal:   pointerOf(int64(2)),
   596  				IntDenominatorVal: pointerOf(int64(3)),
   597  			},
   598  		},
   599  		{
   600  			"2 MHz",
   601  			StatValue{
   602  				IntNumeratorVal: pointerOf(int64(2)),
   603  				Unit:            "MHz",
   604  			},
   605  		},
   606  		{
   607  			"2 / 3 MHz",
   608  			StatValue{
   609  				IntNumeratorVal:   pointerOf(int64(2)),
   610  				IntDenominatorVal: pointerOf(int64(3)),
   611  				Unit:              "MHz",
   612  			},
   613  		},
   614  	}
   615  
   616  	for i, c := range cases {
   617  		t.Run(fmt.Sprintf("case %d %v", i, c.expected), func(t *testing.T) {
   618  			formatted := c.value.String()
   619  			must.Eq(t, c.expected, formatted)
   620  		})
   621  	}
   622  }