github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/command/operator_debug_test.go (about)

     1  package command
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"path/filepath"
     7  	"testing"
     8  	"time"
     9  
    10  	"github.com/hashicorp/nomad/command/agent"
    11  	"github.com/hashicorp/nomad/helper"
    12  	"github.com/hashicorp/nomad/nomad/state"
    13  	"github.com/hashicorp/nomad/testutil"
    14  	"github.com/mitchellh/cli"
    15  	"github.com/stretchr/testify/assert"
    16  	"github.com/stretchr/testify/require"
    17  )
    18  
    19  // NOTE: most of these tests cannot be run in parallel
    20  
    21  type testCase struct {
    22  	name            string
    23  	args            []string
    24  	expectedCode    int
    25  	expectedOutputs []string
    26  	expectedError   string
    27  }
    28  
    29  type testCases []testCase
    30  
    31  func runTestCases(t *testing.T, cases testCases) {
    32  	t.Helper()
    33  	for _, c := range cases {
    34  		t.Run(c.name, func(t *testing.T) {
    35  			// Setup mock UI
    36  			ui := cli.NewMockUi()
    37  			cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
    38  
    39  			// Run test case
    40  			code := cmd.Run(c.args)
    41  			out := ui.OutputWriter.String()
    42  			outerr := ui.ErrorWriter.String()
    43  
    44  			// Verify case expectations
    45  			require.Equalf(t, code, c.expectedCode, "expected exit code %d, got: %d: %s", c.expectedCode, code, outerr)
    46  			for _, expectedOutput := range c.expectedOutputs {
    47  				require.Contains(t, out, expectedOutput, "expected output %q, got %q", expectedOutput, out)
    48  			}
    49  			require.Containsf(t, outerr, c.expectedError, "expected error %q, got %q", c.expectedError, outerr)
    50  		})
    51  	}
    52  }
    53  
    54  func TestDebug_NodeClass(t *testing.T) {
    55  	// Start test server and API client
    56  	srv, _, url := testServer(t, false, nil)
    57  	defer srv.Shutdown()
    58  
    59  	// Wait for leadership to establish
    60  	testutil.WaitForLeader(t, srv.Agent.RPC)
    61  
    62  	// Retrieve server RPC address to join clients
    63  	srvRPCAddr := srv.GetConfig().AdvertiseAddrs.RPC
    64  	t.Logf("[TEST] Leader started, srv.GetConfig().AdvertiseAddrs.RPC: %s", srvRPCAddr)
    65  
    66  	// Setup client 1 (nodeclass = clienta)
    67  	agentConfFunc1 := func(c *agent.Config) {
    68  		c.Region = "global"
    69  		c.Server.Enabled = false
    70  		c.Client.NodeClass = "clienta"
    71  		c.Client.Enabled = true
    72  		c.Client.Servers = []string{srvRPCAddr}
    73  	}
    74  
    75  	// Start client 1
    76  	client1 := agent.NewTestAgent(t, "client1", agentConfFunc1)
    77  	defer client1.Shutdown()
    78  
    79  	// Wait for client1 to connect
    80  	client1NodeID := client1.Agent.Client().NodeID()
    81  	testutil.WaitForClient(t, srv.Agent.RPC, client1NodeID)
    82  	t.Logf("[TEST] Client1 ready, id: %s", client1NodeID)
    83  
    84  	// Setup client 2 (nodeclass = clientb)
    85  	agentConfFunc2 := func(c *agent.Config) {
    86  		c.Region = "global"
    87  		c.Server.Enabled = false
    88  		c.Client.NodeClass = "clientb"
    89  		c.Client.Enabled = true
    90  		c.Client.Servers = []string{srvRPCAddr}
    91  	}
    92  
    93  	// Start client 2
    94  	client2 := agent.NewTestAgent(t, "client2", agentConfFunc2)
    95  	defer client2.Shutdown()
    96  
    97  	// Wait for client2 to connect
    98  	client2NodeID := client2.Agent.Client().NodeID()
    99  	testutil.WaitForClient(t, srv.Agent.RPC, client2NodeID)
   100  	t.Logf("[TEST] Client2 ready, id: %s", client2NodeID)
   101  
   102  	// Setup client 3 (nodeclass = clienta)
   103  	agentConfFunc3 := func(c *agent.Config) {
   104  		c.Server.Enabled = false
   105  		c.Client.NodeClass = "clienta"
   106  		c.Client.Servers = []string{srvRPCAddr}
   107  	}
   108  
   109  	// Start client 3
   110  	client3 := agent.NewTestAgent(t, "client3", agentConfFunc3)
   111  	defer client3.Shutdown()
   112  
   113  	// Wait for client3 to connect
   114  	client3NodeID := client3.Agent.Client().NodeID()
   115  	testutil.WaitForClient(t, srv.Agent.RPC, client3NodeID)
   116  	t.Logf("[TEST] Client3 ready, id: %s", client3NodeID)
   117  
   118  	// Setup test cases
   119  	cases := testCases{
   120  		{
   121  			name:         "address=api, node-class=clienta, max-nodes=2",
   122  			args:         []string{"-address", url, "-duration", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "clienta", "-max-nodes", "2"},
   123  			expectedCode: 0,
   124  			expectedOutputs: []string{
   125  				"Servers: (1/1)",
   126  				"Clients: (2/3)",
   127  				"Max node count reached (2)",
   128  				"Node Class: clienta",
   129  				"Created debug archive",
   130  			},
   131  			expectedError: "",
   132  		},
   133  		{
   134  			name:         "address=api, node-class=clientb, max-nodes=2",
   135  			args:         []string{"-address", url, "-duration", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "clientb", "-max-nodes", "2"},
   136  			expectedCode: 0,
   137  			expectedOutputs: []string{
   138  				"Servers: (1/1)",
   139  				"Clients: (1/3)",
   140  				"Node Class: clientb",
   141  				"Created debug archive",
   142  			},
   143  			expectedError: "",
   144  		},
   145  	}
   146  
   147  	runTestCases(t, cases)
   148  }
   149  
   150  func TestDebug_ClientToServer(t *testing.T) {
   151  	// Start test server and API client
   152  	srv, _, url := testServer(t, false, nil)
   153  	defer srv.Shutdown()
   154  
   155  	// Wait for leadership to establish
   156  	testutil.WaitForLeader(t, srv.Agent.RPC)
   157  
   158  	// Retrieve server RPC address to join client
   159  	srvRPCAddr := srv.GetConfig().AdvertiseAddrs.RPC
   160  	t.Logf("[TEST] Leader started, srv.GetConfig().AdvertiseAddrs.RPC: %s", srvRPCAddr)
   161  
   162  	// Setup client 1 (nodeclass = clienta)
   163  	agentConfFunc1 := func(c *agent.Config) {
   164  		c.Region = "global"
   165  		c.Server.Enabled = false
   166  		c.Client.NodeClass = "clienta"
   167  		c.Client.Enabled = true
   168  		c.Client.Servers = []string{srvRPCAddr}
   169  	}
   170  
   171  	// Start client 1
   172  	client1 := agent.NewTestAgent(t, "client1", agentConfFunc1)
   173  	defer client1.Shutdown()
   174  
   175  	// Wait for client 1 to connect
   176  	client1NodeID := client1.Agent.Client().NodeID()
   177  	testutil.WaitForClient(t, srv.Agent.RPC, client1NodeID)
   178  	t.Logf("[TEST] Client1 ready, id: %s", client1NodeID)
   179  
   180  	// Get API addresses
   181  	addrServer := srv.HTTPAddr()
   182  	addrClient1 := client1.HTTPAddr()
   183  
   184  	t.Logf("[TEST] testAgent api address: %s", url)
   185  	t.Logf("[TEST] Server    api address: %s", addrServer)
   186  	t.Logf("[TEST] Client1   api address: %s", addrClient1)
   187  
   188  	// Setup test cases
   189  	var cases = testCases{
   190  		{
   191  			name:            "testAgent api server",
   192  			args:            []string{"-address", url, "-duration", "250ms", "-server-id", "all", "-node-id", "all"},
   193  			expectedCode:    0,
   194  			expectedOutputs: []string{"Created debug archive"},
   195  		},
   196  		{
   197  			name:            "server address",
   198  			args:            []string{"-address", addrServer, "-duration", "250ms", "-server-id", "all", "-node-id", "all"},
   199  			expectedCode:    0,
   200  			expectedOutputs: []string{"Created debug archive"},
   201  		},
   202  		{
   203  			name:            "client1 address - verify no SIGSEGV panic",
   204  			args:            []string{"-address", addrClient1, "-duration", "250ms", "-server-id", "all", "-node-id", "all"},
   205  			expectedCode:    0,
   206  			expectedOutputs: []string{"Created debug archive"},
   207  		},
   208  	}
   209  
   210  	runTestCases(t, cases)
   211  }
   212  
   213  func TestDebug_SingleServer(t *testing.T) {
   214  	srv, _, url := testServer(t, false, nil)
   215  	defer srv.Shutdown()
   216  	testutil.WaitForLeader(t, srv.Agent.RPC)
   217  
   218  	var cases = testCases{
   219  		{
   220  			name:         "address=api, server-id=leader",
   221  			args:         []string{"-address", url, "-duration", "250ms", "-server-id", "leader"},
   222  			expectedCode: 0,
   223  			expectedOutputs: []string{
   224  				"Servers: (1/1)",
   225  				"Clients: (0/0)",
   226  				"Created debug archive",
   227  			},
   228  			expectedError: "",
   229  		},
   230  		{
   231  			name:         "address=api, server-id=all",
   232  			args:         []string{"-address", url, "-duration", "250ms", "-server-id", "all"},
   233  			expectedCode: 0,
   234  			expectedOutputs: []string{
   235  				"Servers: (1/1)",
   236  				"Clients: (0/0)",
   237  				"Created debug archive",
   238  			},
   239  			expectedError: "",
   240  		},
   241  	}
   242  
   243  	runTestCases(t, cases)
   244  }
   245  
   246  func TestDebug_Failures(t *testing.T) {
   247  	srv, _, url := testServer(t, false, nil)
   248  	defer srv.Shutdown()
   249  	testutil.WaitForLeader(t, srv.Agent.RPC)
   250  
   251  	var cases = testCases{
   252  		{
   253  			name:         "fails incorrect args",
   254  			args:         []string{"some", "bad", "args"},
   255  			expectedCode: 1,
   256  		},
   257  		{
   258  			name:         "Fails illegal node ids",
   259  			args:         []string{"-node-id", "foo:bar"},
   260  			expectedCode: 1,
   261  		},
   262  		{
   263  			name:         "Fails missing node ids",
   264  			args:         []string{"-node-id", "abc,def", "-duration", "250ms"},
   265  			expectedCode: 1,
   266  		},
   267  		{
   268  			name:         "Fails bad durations",
   269  			args:         []string{"-duration", "foo"},
   270  			expectedCode: 1,
   271  		},
   272  		{
   273  			name:         "Fails bad intervals",
   274  			args:         []string{"-interval", "bar"},
   275  			expectedCode: 1,
   276  		},
   277  		{
   278  			name:          "Fails bad address",
   279  			args:          []string{"-address", url + "bogus"},
   280  			expectedCode:  1,
   281  			expectedError: "invalid address",
   282  		},
   283  	}
   284  
   285  	runTestCases(t, cases)
   286  }
   287  
   288  func TestDebug_Bad_CSIPlugin_Names(t *testing.T) {
   289  	// Start test server and API client
   290  	srv, _, url := testServer(t, false, nil)
   291  	defer srv.Shutdown()
   292  
   293  	// Wait for leadership to establish
   294  	testutil.WaitForLeader(t, srv.Agent.RPC)
   295  
   296  	cases := []string{
   297  		"aws/ebs",
   298  		"gcp-*-1",
   299  	}
   300  	for _, pluginName := range cases {
   301  		cleanup := state.CreateTestCSIPlugin(srv.Agent.Server().State(), pluginName)
   302  		defer cleanup()
   303  	}
   304  
   305  	// Setup mock UI
   306  	ui := cli.NewMockUi()
   307  	cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
   308  
   309  	// Debug on the leader and all client nodes
   310  	code := cmd.Run([]string{"-address", url, "-duration", "250ms", "-server-id", "leader", "-node-id", "all", "-output", os.TempDir()})
   311  	assert.Equal(t, 0, code)
   312  
   313  	// Bad plugin name should be escaped before it reaches the sandbox test
   314  	require.NotContains(t, ui.ErrorWriter.String(), "file path escapes capture directory")
   315  	require.Contains(t, ui.OutputWriter.String(), "Starting debugger")
   316  
   317  	path := cmd.collectDir
   318  	defer os.Remove(path)
   319  
   320  	var pluginFiles []string
   321  	for _, pluginName := range cases {
   322  		pluginFile := fmt.Sprintf("csi-plugin-id-%s.json", helper.CleanFilename(pluginName, "_"))
   323  		pluginFile = filepath.Join(path, "nomad", "0000", pluginFile)
   324  		pluginFiles = append(pluginFiles, pluginFile)
   325  	}
   326  
   327  	testutil.WaitForFiles(t, pluginFiles)
   328  }
   329  
   330  func TestDebug_CapturedFiles(t *testing.T) {
   331  	srv, _, url := testServer(t, false, nil)
   332  	defer srv.Shutdown()
   333  	testutil.WaitForLeader(t, srv.Agent.RPC)
   334  
   335  	ui := cli.NewMockUi()
   336  	cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
   337  
   338  	code := cmd.Run([]string{
   339  		"-address", url,
   340  		"-output", os.TempDir(),
   341  		"-server-id", "leader",
   342  		"-duration", "1300ms",
   343  		"-interval", "600ms",
   344  	})
   345  
   346  	path := cmd.collectDir
   347  	defer os.Remove(path)
   348  
   349  	require.Empty(t, ui.ErrorWriter.String())
   350  	require.Equal(t, 0, code)
   351  	ui.ErrorWriter.Reset()
   352  
   353  	serverFiles := []string{
   354  		// Version is always captured
   355  		filepath.Join(path, "version", "agent-self.json"),
   356  
   357  		// Consul and Vault contain results or errors
   358  		filepath.Join(path, "version", "consul-agent-self.json"),
   359  		filepath.Join(path, "version", "vault-sys-health.json"),
   360  
   361  		// Monitor files are only created when selected
   362  		filepath.Join(path, "server", "leader", "monitor.log"),
   363  		filepath.Join(path, "server", "leader", "profile.prof"),
   364  		filepath.Join(path, "server", "leader", "trace.prof"),
   365  		filepath.Join(path, "server", "leader", "goroutine.prof"),
   366  		filepath.Join(path, "server", "leader", "goroutine-debug1.txt"),
   367  		filepath.Join(path, "server", "leader", "goroutine-debug2.txt"),
   368  
   369  		// Multiple snapshots are collected, 00 is always created
   370  		filepath.Join(path, "nomad", "0000", "jobs.json"),
   371  		filepath.Join(path, "nomad", "0000", "nodes.json"),
   372  		filepath.Join(path, "nomad", "0000", "metrics.json"),
   373  
   374  		// Multiple snapshots are collected, 01 requires two intervals
   375  		filepath.Join(path, "nomad", "0001", "jobs.json"),
   376  		filepath.Join(path, "nomad", "0001", "nodes.json"),
   377  		filepath.Join(path, "nomad", "0001", "metrics.json"),
   378  	}
   379  
   380  	testutil.WaitForFiles(t, serverFiles)
   381  }
   382  
   383  func TestDebug_ExistingOutput(t *testing.T) {
   384  	ui := cli.NewMockUi()
   385  	cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
   386  
   387  	// Fails existing output
   388  	format := "2006-01-02-150405Z"
   389  	stamped := "nomad-debug-" + time.Now().UTC().Format(format)
   390  	path := filepath.Join(os.TempDir(), stamped)
   391  	os.MkdirAll(path, 0755)
   392  	defer os.Remove(path)
   393  
   394  	code := cmd.Run([]string{"-output", os.TempDir(), "-duration", "50ms"})
   395  	require.Equal(t, 2, code)
   396  }
   397  
   398  func TestDebug_Fail_Pprof(t *testing.T) {
   399  	// Setup agent config with debug endpoints disabled
   400  	agentConfFunc := func(c *agent.Config) {
   401  		c.EnableDebug = false
   402  	}
   403  
   404  	// Start test server and API client
   405  	srv, _, url := testServer(t, false, agentConfFunc)
   406  	defer srv.Shutdown()
   407  
   408  	// Wait for leadership to establish
   409  	testutil.WaitForLeader(t, srv.Agent.RPC)
   410  
   411  	// Setup mock UI
   412  	ui := cli.NewMockUi()
   413  	cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
   414  
   415  	// Debug on client - node class = "clienta"
   416  	code := cmd.Run([]string{"-address", url, "-duration", "250ms", "-server-id", "all"})
   417  
   418  	assert.Equal(t, 0, code) // Pprof failure isn't fatal
   419  	require.Contains(t, ui.OutputWriter.String(), "Starting debugger")
   420  	require.Contains(t, ui.ErrorWriter.String(), "Failed to retrieve pprof") // Should report pprof failure
   421  	require.Contains(t, ui.ErrorWriter.String(), "Permission denied")        // Specifically permission denied
   422  	require.Contains(t, ui.OutputWriter.String(), "Created debug archive")   // Archive should be generated anyway
   423  }
   424  
   425  func TestDebug_Utils(t *testing.T) {
   426  	t.Parallel()
   427  
   428  	xs := argNodes("foo, bar")
   429  	require.Equal(t, []string{"foo", "bar"}, xs)
   430  
   431  	xs = argNodes("")
   432  	require.Len(t, xs, 0)
   433  	require.Empty(t, xs)
   434  
   435  	// address calculation honors CONSUL_HTTP_SSL
   436  	// ssl: true - Correct alignment
   437  	e := &external{addrVal: "https://127.0.0.1:8500", ssl: true}
   438  	addr := e.addr("foo")
   439  	require.Equal(t, "https://127.0.0.1:8500", addr)
   440  
   441  	// ssl: true - protocol incorrect
   442  	e = &external{addrVal: "http://127.0.0.1:8500", ssl: true}
   443  	addr = e.addr("foo")
   444  	require.Equal(t, "https://127.0.0.1:8500", addr)
   445  
   446  	// ssl: true - protocol missing
   447  	e = &external{addrVal: "127.0.0.1:8500", ssl: true}
   448  	addr = e.addr("foo")
   449  	require.Equal(t, "https://127.0.0.1:8500", addr)
   450  
   451  	// ssl: false - correct alignment
   452  	e = &external{addrVal: "http://127.0.0.1:8500", ssl: false}
   453  	addr = e.addr("foo")
   454  	require.Equal(t, "http://127.0.0.1:8500", addr)
   455  
   456  	// ssl: false - protocol incorrect
   457  	e = &external{addrVal: "https://127.0.0.1:8500", ssl: false}
   458  	addr = e.addr("foo")
   459  	require.Equal(t, "http://127.0.0.1:8500", addr)
   460  
   461  	// ssl: false - protocol missing
   462  	e = &external{addrVal: "127.0.0.1:8500", ssl: false}
   463  	addr = e.addr("foo")
   464  	require.Equal(t, "http://127.0.0.1:8500", addr)
   465  }
   466  
   467  func TestDebug_WriteBytes_Nil(t *testing.T) {
   468  	t.Parallel()
   469  
   470  	var testDir, testFile, testPath string
   471  	var testBytes []byte
   472  
   473  	// Setup mock UI
   474  	ui := cli.NewMockUi()
   475  	cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
   476  
   477  	testDir = os.TempDir()
   478  	cmd.collectDir = testDir
   479  
   480  	testFile = "test_nil.json"
   481  	testPath = filepath.Join(testDir, testFile)
   482  	defer os.Remove(testPath)
   483  
   484  	// Write nil file at top level of collect directory
   485  	err := cmd.writeBytes("", testFile, testBytes)
   486  	require.NoError(t, err)
   487  	require.FileExists(t, testPath)
   488  }
   489  
   490  func TestDebug_WriteBytes_PathEscapesSandbox(t *testing.T) {
   491  	t.Parallel()
   492  
   493  	var testDir, testFile string
   494  	var testBytes []byte
   495  
   496  	testDir = os.TempDir()
   497  	defer os.Remove(testDir)
   498  
   499  	testFile = "testing.json"
   500  	testPath := filepath.Join(testDir, testFile)
   501  	defer os.Remove(testPath)
   502  
   503  	// Setup mock UI
   504  	ui := cli.NewMockUi()
   505  	cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
   506  
   507  	// Empty collectDir will always appear to be escaped
   508  	cmd.collectDir = ""
   509  	err := cmd.writeBytes(testDir, testFile, testBytes)
   510  	require.Error(t, err)
   511  }