github.com/hashicorp/nomad/api@v0.0.0-20240306165712-3193ac204f65/agent_test.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package api
     5  
     6  import (
     7  	"fmt"
     8  	"sort"
     9  	"strings"
    10  	"testing"
    11  	"time"
    12  
    13  	"github.com/hashicorp/nomad/api/internal/testutil"
    14  	"github.com/shoenig/test/must"
    15  	"github.com/shoenig/test/wait"
    16  )
    17  
    18  func TestAgent_Self(t *testing.T) {
    19  	testutil.Parallel(t)
    20  
    21  	c, s := makeClient(t, nil, nil)
    22  	defer s.Stop()
    23  
    24  	// Get a handle on the Agent endpoints
    25  	a := c.Agent()
    26  
    27  	// Query the endpoint
    28  	res, err := a.Self()
    29  	must.NoError(t, err)
    30  
    31  	// Check that we got a valid response
    32  	must.NotEq(t, "", res.Member.Name, must.Sprint("missing member name"))
    33  
    34  	// Local cache was populated
    35  	must.NotEq(t, "", a.nodeName, must.Sprint("cache should be populated"))
    36  	must.NotEq(t, "", a.datacenter, must.Sprint("cache should be populated"))
    37  	must.NotEq(t, "", a.region, must.Sprint("cache should be populated"))
    38  }
    39  
    40  func TestAgent_NodeName(t *testing.T) {
    41  	testutil.Parallel(t)
    42  
    43  	c, s := makeClient(t, nil, nil)
    44  	defer s.Stop()
    45  	a := c.Agent()
    46  
    47  	// Query the agent for the node name
    48  	nodeName, err := a.NodeName()
    49  	must.NoError(t, err)
    50  	must.NotEq(t, "", nodeName)
    51  }
    52  
    53  func TestAgent_Datacenter(t *testing.T) {
    54  	testutil.Parallel(t)
    55  
    56  	c, s := makeClient(t, nil, nil)
    57  	defer s.Stop()
    58  	a := c.Agent()
    59  
    60  	// Query the agent for the datacenter
    61  	dc, err := a.Datacenter()
    62  	must.NoError(t, err)
    63  	must.Eq(t, "dc1", dc)
    64  }
    65  
    66  func TestAgent_Join(t *testing.T) {
    67  	testutil.Parallel(t)
    68  
    69  	c1, s1 := makeClient(t, nil, nil)
    70  	defer s1.Stop()
    71  	a1 := c1.Agent()
    72  
    73  	_, s2 := makeClient(t, nil, func(c *testutil.TestServerConfig) {
    74  		c.Server.BootstrapExpect = 0
    75  	})
    76  	defer s2.Stop()
    77  
    78  	// Attempting to join a nonexistent host returns error
    79  	n, err := a1.Join("nope")
    80  	must.Error(t, err)
    81  	must.Zero(t, 0, must.Sprint("should be zero errors"))
    82  
    83  	// Returns correctly if join succeeds
    84  	n, err = a1.Join(s2.SerfAddr)
    85  	must.NoError(t, err)
    86  	must.One(t, n)
    87  }
    88  
    89  func TestAgent_Members(t *testing.T) {
    90  	testutil.Parallel(t)
    91  
    92  	c, s := makeClient(t, nil, nil)
    93  	defer s.Stop()
    94  	a := c.Agent()
    95  
    96  	// Query nomad for all the known members
    97  	mem, err := a.Members()
    98  	must.NoError(t, err)
    99  
   100  	// Check that we got the expected result
   101  	must.Len(t, 1, mem.Members)
   102  	must.NotEq(t, "", mem.Members[0].Name)
   103  	must.NotEq(t, "", mem.Members[0].Addr)
   104  	must.NotEq(t, 0, mem.Members[0].Port)
   105  }
   106  
   107  func TestAgent_ForceLeave(t *testing.T) {
   108  	testutil.Parallel(t)
   109  
   110  	c, s := makeClient(t, nil, nil)
   111  	defer s.Stop()
   112  	a := c.Agent()
   113  
   114  	// Force-leave on a nonexistent node does not error
   115  	err := a.ForceLeave("nope")
   116  	must.NoError(t, err)
   117  
   118  	// Force-leave on an existing node
   119  	_, s2 := makeClient(t, nil, func(c *testutil.TestServerConfig) {
   120  		c.Server.BootstrapExpect = 0
   121  	})
   122  	defer s2.Stop()
   123  	// Create a new node to join
   124  	n, err := a.Join(s2.SerfAddr)
   125  	must.NoError(t, err)
   126  	must.One(t, n)
   127  
   128  	membersBefore, err := a.MembersOpts(&QueryOptions{})
   129  	must.NoError(t, err)
   130  	must.Eq(t, membersBefore.Members[1].Status, "alive")
   131  
   132  	err = a.ForceLeave(membersBefore.Members[1].Name)
   133  	must.NoError(t, err)
   134  
   135  	time.Sleep(3 * time.Second)
   136  
   137  	f := func() error {
   138  		membersAfter, err := a.MembersOpts(&QueryOptions{})
   139  		if err != nil {
   140  			return err
   141  		}
   142  		for _, node := range membersAfter.Members {
   143  			if node.Name == membersBefore.Members[1].Name {
   144  				if node.Status != "leaving" {
   145  					return fmt.Errorf("node did not leave")
   146  				}
   147  			}
   148  		}
   149  		return nil
   150  	}
   151  	must.Wait(t, wait.InitialSuccess(
   152  		wait.ErrorFunc(f),
   153  		wait.Timeout(3*time.Second),
   154  		wait.Gap(100*time.Millisecond),
   155  	))
   156  
   157  }
   158  
   159  func TestAgent_ForceLeavePrune(t *testing.T) {
   160  	testutil.Parallel(t)
   161  
   162  	c, s := makeClient(t, nil, nil)
   163  	defer s.Stop()
   164  	a := c.Agent()
   165  
   166  	nodeName := "foo"
   167  	_, s2 := makeClient(t, nil, func(c *testutil.TestServerConfig) {
   168  		c.NodeName = nodeName
   169  		c.Server.BootstrapExpect = 0
   170  	})
   171  
   172  	n, err := a.Join(s2.SerfAddr)
   173  	must.NoError(t, err)
   174  	must.One(t, n)
   175  	membersBefore, err := a.MembersOpts(&QueryOptions{})
   176  	must.NoError(t, err)
   177  
   178  	s2.Stop()
   179  
   180  	forceLeaveOpts := ForceLeaveOpts{
   181  		Prune: true,
   182  	}
   183  	nodeName = nodeName + ".global"
   184  	err = a.ForceLeaveWithOptions(nodeName, forceLeaveOpts)
   185  	must.NoError(t, err)
   186  
   187  	f := func() error {
   188  		membersAfter, err := a.MembersOpts(&QueryOptions{})
   189  		if err != nil {
   190  			return err
   191  		}
   192  		if len(membersAfter.Members) == len(membersBefore.Members) {
   193  			return fmt.Errorf("node did not get pruned")
   194  		}
   195  		return nil
   196  	}
   197  	must.Wait(t, wait.InitialSuccess(
   198  		wait.ErrorFunc(f),
   199  		wait.Timeout(5*time.Second),
   200  		wait.Gap(100*time.Millisecond),
   201  	))
   202  
   203  }
   204  
   205  func (a *AgentMember) String() string {
   206  	return "{Name: " + a.Name + " Region: " + a.Tags["region"] + " DC: " + a.Tags["dc"] + "}"
   207  }
   208  
   209  func TestAgents_Sort(t *testing.T) {
   210  	testutil.Parallel(t)
   211  
   212  	var sortTests = []struct {
   213  		in  []*AgentMember
   214  		out []*AgentMember
   215  	}{
   216  		{
   217  			[]*AgentMember{
   218  				{Name: "nomad-2.vac.us-east",
   219  					Tags: map[string]string{"region": "us-east", "dc": "us-east-1c"}},
   220  				{Name: "nomad-1.global",
   221  					Tags: map[string]string{"region": "global", "dc": "dc1"}},
   222  				{Name: "nomad-1.vac.us-east",
   223  					Tags: map[string]string{"region": "us-east", "dc": "us-east-1c"}},
   224  			},
   225  			[]*AgentMember{
   226  				{Name: "nomad-1.global",
   227  					Tags: map[string]string{"region": "global", "dc": "dc1"}},
   228  				{Name: "nomad-1.vac.us-east",
   229  					Tags: map[string]string{"region": "us-east", "dc": "us-east-1c"}},
   230  				{Name: "nomad-2.vac.us-east",
   231  					Tags: map[string]string{"region": "us-east", "dc": "us-east-1c"}},
   232  			},
   233  		},
   234  		{
   235  			[]*AgentMember{
   236  				{Name: "nomad-02.tam.us-east",
   237  					Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
   238  				{Name: "nomad-02.pal.us-west",
   239  					Tags: map[string]string{"region": "us-west", "dc": "palo_alto"}},
   240  				{Name: "nomad-01.pal.us-west",
   241  					Tags: map[string]string{"region": "us-west", "dc": "palo_alto"}},
   242  				{Name: "nomad-01.tam.us-east",
   243  					Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
   244  			},
   245  			[]*AgentMember{
   246  				{Name: "nomad-01.tam.us-east",
   247  					Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
   248  				{Name: "nomad-02.tam.us-east",
   249  					Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
   250  				{Name: "nomad-01.pal.us-west",
   251  					Tags: map[string]string{"region": "us-west", "dc": "palo_alto"}},
   252  				{Name: "nomad-02.pal.us-west",
   253  					Tags: map[string]string{"region": "us-west", "dc": "palo_alto"}},
   254  			},
   255  		},
   256  		{
   257  			[]*AgentMember{
   258  				{Name: "nomad-02.tam.us-east",
   259  					Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
   260  				{Name: "nomad-02.ams.europe",
   261  					Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
   262  				{Name: "nomad-01.tam.us-east",
   263  					Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
   264  				{Name: "nomad-01.ams.europe",
   265  					Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
   266  			},
   267  			[]*AgentMember{
   268  				{Name: "nomad-01.ams.europe",
   269  					Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
   270  				{Name: "nomad-02.ams.europe",
   271  					Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
   272  				{Name: "nomad-01.tam.us-east",
   273  					Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
   274  				{Name: "nomad-02.tam.us-east",
   275  					Tags: map[string]string{"region": "us-east", "dc": "tampa"}},
   276  			},
   277  		},
   278  		{
   279  			[]*AgentMember{
   280  				{Name: "nomad-02.ber.europe",
   281  					Tags: map[string]string{"region": "europe", "dc": "berlin"}},
   282  				{Name: "nomad-02.ams.europe",
   283  					Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
   284  				{Name: "nomad-01.ams.europe",
   285  					Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
   286  				{Name: "nomad-01.ber.europe",
   287  					Tags: map[string]string{"region": "europe", "dc": "berlin"}},
   288  			},
   289  			[]*AgentMember{
   290  				{Name: "nomad-01.ams.europe",
   291  					Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
   292  				{Name: "nomad-02.ams.europe",
   293  					Tags: map[string]string{"region": "europe", "dc": "amsterdam"}},
   294  				{Name: "nomad-01.ber.europe",
   295  					Tags: map[string]string{"region": "europe", "dc": "berlin"}},
   296  				{Name: "nomad-02.ber.europe",
   297  					Tags: map[string]string{"region": "europe", "dc": "berlin"}},
   298  			},
   299  		},
   300  		{
   301  			[]*AgentMember{
   302  				{Name: "nomad-1.global"},
   303  				{Name: "nomad-3.global"},
   304  				{Name: "nomad-2.global"},
   305  			},
   306  			[]*AgentMember{
   307  				{Name: "nomad-1.global"},
   308  				{Name: "nomad-2.global"},
   309  				{Name: "nomad-3.global"},
   310  			},
   311  		},
   312  	}
   313  	for _, tt := range sortTests {
   314  		sort.Sort(AgentMembersNameSort(tt.in))
   315  		must.Eq(t, tt.in, tt.out)
   316  	}
   317  }
   318  
   319  func TestAgent_Health(t *testing.T) {
   320  	testutil.Parallel(t)
   321  
   322  	c, s := makeClient(t, nil, nil)
   323  	defer s.Stop()
   324  	a := c.Agent()
   325  
   326  	health, err := a.Health()
   327  	must.NoError(t, err)
   328  	must.True(t, health.Server.Ok)
   329  }
   330  
   331  // TestAgent_MonitorWithNode tests the Monitor endpoint
   332  // passing in a log level and node ie, which tests monitor
   333  // functionality for a specific client node
   334  func TestAgent_MonitorWithNode(t *testing.T) {
   335  	testutil.Parallel(t)
   336  
   337  	c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
   338  		c.DevMode = true
   339  	})
   340  	defer s.Stop()
   341  
   342  	agent := c.Agent()
   343  	node := oneNodeFromNodeList(t, c.Nodes())
   344  
   345  	doneCh := make(chan struct{})
   346  	q := &QueryOptions{
   347  		Params: map[string]string{
   348  			"log_level": "debug",
   349  			"node_id":   node.ID,
   350  		},
   351  	}
   352  
   353  	frames, errCh := agent.Monitor(doneCh, q)
   354  	defer close(doneCh)
   355  
   356  	// make a request to generate some logs
   357  	_, err := agent.NodeName()
   358  	must.NoError(t, err)
   359  
   360  	// Wait for a log message
   361  OUTER:
   362  	for {
   363  		select {
   364  		case f := <-frames:
   365  			if strings.Contains(string(f.Data), "[DEBUG]") {
   366  				break OUTER
   367  			}
   368  		case err := <-errCh:
   369  			t.Errorf("Error: %v", err)
   370  		case <-time.After(2 * time.Second):
   371  			t.Fatal("failed to get a DEBUG log message")
   372  		}
   373  	}
   374  }
   375  
   376  // TestAgent_Monitor tests the Monitor endpoint
   377  // passing in only a log level, which tests the servers
   378  // monitor functionality
   379  func TestAgent_Monitor(t *testing.T) {
   380  	testutil.Parallel(t)
   381  
   382  	c, s := makeClient(t, nil, nil)
   383  	defer s.Stop()
   384  
   385  	agent := c.Agent()
   386  
   387  	q := &QueryOptions{
   388  		Params: map[string]string{
   389  			"log_level": "debug",
   390  		},
   391  	}
   392  
   393  	doneCh := make(chan struct{})
   394  	frames, errCh := agent.Monitor(doneCh, q)
   395  	defer close(doneCh)
   396  
   397  	// make a request to generate some logs
   398  	_, err := agent.Region()
   399  	must.NoError(t, err)
   400  
   401  	// Wait for a log message
   402  OUTER:
   403  	for {
   404  		select {
   405  		case log := <-frames:
   406  			if log == nil {
   407  				continue
   408  			}
   409  			if strings.Contains(string(log.Data), "[DEBUG]") {
   410  				break OUTER
   411  			}
   412  		case err := <-errCh:
   413  			t.Fatalf("error: %v", err)
   414  		case <-time.After(2 * time.Second):
   415  			must.Unreachable(t, must.Sprint("failed to get DEBUG log message"))
   416  		}
   417  	}
   418  }
   419  
   420  func TestAgentCPUProfile(t *testing.T) {
   421  	testutil.Parallel(t)
   422  
   423  	c, s, token := makeACLClient(t, nil, nil)
   424  	defer s.Stop()
   425  
   426  	agent := c.Agent()
   427  
   428  	q := &QueryOptions{
   429  		AuthToken: token.SecretID,
   430  	}
   431  
   432  	// Valid local request
   433  	{
   434  		opts := PprofOptions{
   435  			Seconds: 1,
   436  		}
   437  		resp, err := agent.CPUProfile(opts, q)
   438  		must.NoError(t, err)
   439  		must.NotNil(t, resp)
   440  	}
   441  
   442  	// Invalid server request
   443  	{
   444  		opts := PprofOptions{
   445  			Seconds:  1,
   446  			ServerID: "unknown.global",
   447  		}
   448  		resp, err := agent.CPUProfile(opts, q)
   449  		must.Error(t, err)
   450  		must.ErrorContains(t, err, "500 (unknown Nomad server unknown.global)")
   451  		must.Nil(t, resp)
   452  	}
   453  
   454  }
   455  
   456  func TestAgentTrace(t *testing.T) {
   457  	testutil.Parallel(t)
   458  
   459  	c, s, token := makeACLClient(t, nil, nil)
   460  	defer s.Stop()
   461  
   462  	agent := c.Agent()
   463  
   464  	q := &QueryOptions{
   465  		AuthToken: token.SecretID,
   466  	}
   467  
   468  	resp, err := agent.Trace(PprofOptions{}, q)
   469  	must.NoError(t, err)
   470  	must.NotNil(t, resp)
   471  }
   472  
   473  func TestAgentProfile(t *testing.T) {
   474  	testutil.Parallel(t)
   475  
   476  	c, s, token := makeACLClient(t, nil, nil)
   477  	defer s.Stop()
   478  
   479  	agent := c.Agent()
   480  
   481  	q := &QueryOptions{
   482  		AuthToken: token.SecretID,
   483  	}
   484  
   485  	{
   486  		resp, err := agent.Lookup("heap", PprofOptions{}, q)
   487  		must.NoError(t, err)
   488  		must.NotNil(t, resp)
   489  	}
   490  
   491  	// unknown profile
   492  	{
   493  		resp, err := agent.Lookup("invalid", PprofOptions{}, q)
   494  		must.Error(t, err)
   495  		must.ErrorContains(t, err, "Unexpected response code: 404")
   496  		must.Nil(t, resp)
   497  	}
   498  }
   499  
   500  func TestAgent_SchedulerWorkerConfig(t *testing.T) {
   501  	testutil.Parallel(t)
   502  
   503  	c, s := makeClient(t, nil, nil)
   504  	defer s.Stop()
   505  	a := c.Agent()
   506  
   507  	config, err := a.GetSchedulerWorkerConfig(nil)
   508  	must.NoError(t, err)
   509  	must.NotNil(t, config)
   510  	newConfig := SchedulerWorkerPoolArgs{NumSchedulers: 0, EnabledSchedulers: []string{"_core", "system"}}
   511  	resp, err := a.SetSchedulerWorkerConfig(newConfig, nil)
   512  	must.NoError(t, err)
   513  	must.NotEq(t, config, resp)
   514  }
   515  
   516  func TestAgent_SchedulerWorkerConfig_BadRequest(t *testing.T) {
   517  	testutil.Parallel(t)
   518  
   519  	c, s := makeClient(t, nil, nil)
   520  	defer s.Stop()
   521  	a := c.Agent()
   522  
   523  	config, err := a.GetSchedulerWorkerConfig(nil)
   524  	must.NoError(t, err)
   525  	must.NotNil(t, config)
   526  	newConfig := SchedulerWorkerPoolArgs{NumSchedulers: -1, EnabledSchedulers: []string{"_core", "system"}}
   527  	_, err = a.SetSchedulerWorkerConfig(newConfig, nil)
   528  	must.Error(t, err)
   529  	must.ErrorContains(t, err, "400 (Invalid request)")
   530  }
   531  
   532  func TestAgent_SchedulerWorkersInfo(t *testing.T) {
   533  	testutil.Parallel(t)
   534  
   535  	c, s := makeClient(t, nil, nil)
   536  	defer s.Stop()
   537  	a := c.Agent()
   538  
   539  	info, err := a.GetSchedulerWorkersInfo(nil)
   540  	must.NoError(t, err)
   541  	must.NotNil(t, info)
   542  	defaultSchedulers := []string{"batch", "system", "sysbatch", "service", "_core"}
   543  	for _, worker := range info.Schedulers {
   544  		must.SliceContainsAll(t, defaultSchedulers, worker.EnabledSchedulers)
   545  	}
   546  }