github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/command/operator_debug_test.go (about) 1 package command 2 3 import ( 4 "fmt" 5 "os" 6 "path/filepath" 7 "testing" 8 "time" 9 10 "github.com/hashicorp/nomad/command/agent" 11 "github.com/hashicorp/nomad/helper" 12 "github.com/hashicorp/nomad/nomad/state" 13 "github.com/hashicorp/nomad/testutil" 14 "github.com/mitchellh/cli" 15 "github.com/stretchr/testify/assert" 16 "github.com/stretchr/testify/require" 17 ) 18 19 // NOTE: most of these tests cannot be run in parallel 20 21 type testCase struct { 22 name string 23 args []string 24 expectedCode int 25 expectedOutputs []string 26 expectedError string 27 } 28 29 type testCases []testCase 30 31 func runTestCases(t *testing.T, cases testCases) { 32 t.Helper() 33 for _, c := range cases { 34 t.Run(c.name, func(t *testing.T) { 35 // Setup mock UI 36 ui := cli.NewMockUi() 37 cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}} 38 39 // Run test case 40 code := cmd.Run(c.args) 41 out := ui.OutputWriter.String() 42 outerr := ui.ErrorWriter.String() 43 44 // Verify case expectations 45 require.Equalf(t, code, c.expectedCode, "expected exit code %d, got: %d: %s", c.expectedCode, code, outerr) 46 for _, expectedOutput := range c.expectedOutputs { 47 require.Contains(t, out, expectedOutput, "expected output %q, got %q", expectedOutput, out) 48 } 49 require.Containsf(t, outerr, c.expectedError, "expected error %q, got %q", c.expectedError, outerr) 50 }) 51 } 52 } 53 54 func TestDebug_NodeClass(t *testing.T) { 55 // Start test server and API client 56 srv, _, url := testServer(t, false, nil) 57 defer srv.Shutdown() 58 59 // Wait for leadership to establish 60 testutil.WaitForLeader(t, srv.Agent.RPC) 61 62 // Retrieve server RPC address to join clients 63 srvRPCAddr := srv.GetConfig().AdvertiseAddrs.RPC 64 t.Logf("[TEST] Leader started, srv.GetConfig().AdvertiseAddrs.RPC: %s", srvRPCAddr) 65 66 // Setup client 1 (nodeclass = clienta) 67 agentConfFunc1 := func(c *agent.Config) { 68 c.Region = "global" 69 c.Server.Enabled = false 70 c.Client.NodeClass = "clienta" 71 c.Client.Enabled = true 72 c.Client.Servers = []string{srvRPCAddr} 73 } 74 75 // Start client 1 76 client1 := agent.NewTestAgent(t, "client1", agentConfFunc1) 77 defer client1.Shutdown() 78 79 // Wait for client1 to connect 80 client1NodeID := client1.Agent.Client().NodeID() 81 testutil.WaitForClient(t, srv.Agent.RPC, client1NodeID) 82 t.Logf("[TEST] Client1 ready, id: %s", client1NodeID) 83 84 // Setup client 2 (nodeclass = clientb) 85 agentConfFunc2 := func(c *agent.Config) { 86 c.Region = "global" 87 c.Server.Enabled = false 88 c.Client.NodeClass = "clientb" 89 c.Client.Enabled = true 90 c.Client.Servers = []string{srvRPCAddr} 91 } 92 93 // Start client 2 94 client2 := agent.NewTestAgent(t, "client2", agentConfFunc2) 95 defer client2.Shutdown() 96 97 // Wait for client2 to connect 98 client2NodeID := client2.Agent.Client().NodeID() 99 testutil.WaitForClient(t, srv.Agent.RPC, client2NodeID) 100 t.Logf("[TEST] Client2 ready, id: %s", client2NodeID) 101 102 // Setup client 3 (nodeclass = clienta) 103 agentConfFunc3 := func(c *agent.Config) { 104 c.Server.Enabled = false 105 c.Client.NodeClass = "clienta" 106 c.Client.Servers = []string{srvRPCAddr} 107 } 108 109 // Start client 3 110 client3 := agent.NewTestAgent(t, "client3", agentConfFunc3) 111 defer client3.Shutdown() 112 113 // Wait for client3 to connect 114 client3NodeID := client3.Agent.Client().NodeID() 115 testutil.WaitForClient(t, srv.Agent.RPC, client3NodeID) 116 t.Logf("[TEST] Client3 ready, id: %s", client3NodeID) 117 118 // Setup test cases 119 cases := testCases{ 120 { 121 name: "address=api, node-class=clienta, max-nodes=2", 122 args: []string{"-address", url, "-duration", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "clienta", "-max-nodes", "2"}, 123 expectedCode: 0, 124 expectedOutputs: []string{ 125 "Servers: (1/1)", 126 "Clients: (2/3)", 127 "Max node count reached (2)", 128 "Node Class: clienta", 129 "Created debug archive", 130 }, 131 expectedError: "", 132 }, 133 { 134 name: "address=api, node-class=clientb, max-nodes=2", 135 args: []string{"-address", url, "-duration", "250ms", "-server-id", "all", "-node-id", "all", "-node-class", "clientb", "-max-nodes", "2"}, 136 expectedCode: 0, 137 expectedOutputs: []string{ 138 "Servers: (1/1)", 139 "Clients: (1/3)", 140 "Node Class: clientb", 141 "Created debug archive", 142 }, 143 expectedError: "", 144 }, 145 } 146 147 runTestCases(t, cases) 148 } 149 150 func TestDebug_ClientToServer(t *testing.T) { 151 // Start test server and API client 152 srv, _, url := testServer(t, false, nil) 153 defer srv.Shutdown() 154 155 // Wait for leadership to establish 156 testutil.WaitForLeader(t, srv.Agent.RPC) 157 158 // Retrieve server RPC address to join client 159 srvRPCAddr := srv.GetConfig().AdvertiseAddrs.RPC 160 t.Logf("[TEST] Leader started, srv.GetConfig().AdvertiseAddrs.RPC: %s", srvRPCAddr) 161 162 // Setup client 1 (nodeclass = clienta) 163 agentConfFunc1 := func(c *agent.Config) { 164 c.Region = "global" 165 c.Server.Enabled = false 166 c.Client.NodeClass = "clienta" 167 c.Client.Enabled = true 168 c.Client.Servers = []string{srvRPCAddr} 169 } 170 171 // Start client 1 172 client1 := agent.NewTestAgent(t, "client1", agentConfFunc1) 173 defer client1.Shutdown() 174 175 // Wait for client 1 to connect 176 client1NodeID := client1.Agent.Client().NodeID() 177 testutil.WaitForClient(t, srv.Agent.RPC, client1NodeID) 178 t.Logf("[TEST] Client1 ready, id: %s", client1NodeID) 179 180 // Get API addresses 181 addrServer := srv.HTTPAddr() 182 addrClient1 := client1.HTTPAddr() 183 184 t.Logf("[TEST] testAgent api address: %s", url) 185 t.Logf("[TEST] Server api address: %s", addrServer) 186 t.Logf("[TEST] Client1 api address: %s", addrClient1) 187 188 // Setup test cases 189 var cases = testCases{ 190 { 191 name: "testAgent api server", 192 args: []string{"-address", url, "-duration", "250ms", "-server-id", "all", "-node-id", "all"}, 193 expectedCode: 0, 194 expectedOutputs: []string{"Created debug archive"}, 195 }, 196 { 197 name: "server address", 198 args: []string{"-address", addrServer, "-duration", "250ms", "-server-id", "all", "-node-id", "all"}, 199 expectedCode: 0, 200 expectedOutputs: []string{"Created debug archive"}, 201 }, 202 { 203 name: "client1 address - verify no SIGSEGV panic", 204 args: []string{"-address", addrClient1, "-duration", "250ms", "-server-id", "all", "-node-id", "all"}, 205 expectedCode: 0, 206 expectedOutputs: []string{"Created debug archive"}, 207 }, 208 } 209 210 runTestCases(t, cases) 211 } 212 213 func TestDebug_SingleServer(t *testing.T) { 214 srv, _, url := testServer(t, false, nil) 215 defer srv.Shutdown() 216 testutil.WaitForLeader(t, srv.Agent.RPC) 217 218 var cases = testCases{ 219 { 220 name: "address=api, server-id=leader", 221 args: []string{"-address", url, "-duration", "250ms", "-server-id", "leader"}, 222 expectedCode: 0, 223 expectedOutputs: []string{ 224 "Servers: (1/1)", 225 "Clients: (0/0)", 226 "Created debug archive", 227 }, 228 expectedError: "", 229 }, 230 { 231 name: "address=api, server-id=all", 232 args: []string{"-address", url, "-duration", "250ms", "-server-id", "all"}, 233 expectedCode: 0, 234 expectedOutputs: []string{ 235 "Servers: (1/1)", 236 "Clients: (0/0)", 237 "Created debug archive", 238 }, 239 expectedError: "", 240 }, 241 } 242 243 runTestCases(t, cases) 244 } 245 246 func TestDebug_Failures(t *testing.T) { 247 srv, _, url := testServer(t, false, nil) 248 defer srv.Shutdown() 249 testutil.WaitForLeader(t, srv.Agent.RPC) 250 251 var cases = testCases{ 252 { 253 name: "fails incorrect args", 254 args: []string{"some", "bad", "args"}, 255 expectedCode: 1, 256 }, 257 { 258 name: "Fails illegal node ids", 259 args: []string{"-node-id", "foo:bar"}, 260 expectedCode: 1, 261 }, 262 { 263 name: "Fails missing node ids", 264 args: []string{"-node-id", "abc,def", "-duration", "250ms"}, 265 expectedCode: 1, 266 }, 267 { 268 name: "Fails bad durations", 269 args: []string{"-duration", "foo"}, 270 expectedCode: 1, 271 }, 272 { 273 name: "Fails bad intervals", 274 args: []string{"-interval", "bar"}, 275 expectedCode: 1, 276 }, 277 { 278 name: "Fails bad address", 279 args: []string{"-address", url + "bogus"}, 280 expectedCode: 1, 281 expectedError: "invalid address", 282 }, 283 } 284 285 runTestCases(t, cases) 286 } 287 288 func TestDebug_Bad_CSIPlugin_Names(t *testing.T) { 289 // Start test server and API client 290 srv, _, url := testServer(t, false, nil) 291 defer srv.Shutdown() 292 293 // Wait for leadership to establish 294 testutil.WaitForLeader(t, srv.Agent.RPC) 295 296 cases := []string{ 297 "aws/ebs", 298 "gcp-*-1", 299 } 300 for _, pluginName := range cases { 301 cleanup := state.CreateTestCSIPlugin(srv.Agent.Server().State(), pluginName) 302 defer cleanup() 303 } 304 305 // Setup mock UI 306 ui := cli.NewMockUi() 307 cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}} 308 309 // Debug on the leader and all client nodes 310 code := cmd.Run([]string{"-address", url, "-duration", "250ms", "-server-id", "leader", "-node-id", "all", "-output", os.TempDir()}) 311 assert.Equal(t, 0, code) 312 313 // Bad plugin name should be escaped before it reaches the sandbox test 314 require.NotContains(t, ui.ErrorWriter.String(), "file path escapes capture directory") 315 require.Contains(t, ui.OutputWriter.String(), "Starting debugger") 316 317 path := cmd.collectDir 318 defer os.Remove(path) 319 320 var pluginFiles []string 321 for _, pluginName := range cases { 322 pluginFile := fmt.Sprintf("csi-plugin-id-%s.json", helper.CleanFilename(pluginName, "_")) 323 pluginFile = filepath.Join(path, "nomad", "0000", pluginFile) 324 pluginFiles = append(pluginFiles, pluginFile) 325 } 326 327 testutil.WaitForFiles(t, pluginFiles) 328 } 329 330 func TestDebug_CapturedFiles(t *testing.T) { 331 srv, _, url := testServer(t, false, nil) 332 defer srv.Shutdown() 333 testutil.WaitForLeader(t, srv.Agent.RPC) 334 335 ui := cli.NewMockUi() 336 cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}} 337 338 code := cmd.Run([]string{ 339 "-address", url, 340 "-output", os.TempDir(), 341 "-server-id", "leader", 342 "-duration", "1300ms", 343 "-interval", "600ms", 344 }) 345 346 path := cmd.collectDir 347 defer os.Remove(path) 348 349 require.Empty(t, ui.ErrorWriter.String()) 350 require.Equal(t, 0, code) 351 ui.ErrorWriter.Reset() 352 353 serverFiles := []string{ 354 // Version is always captured 355 filepath.Join(path, "version", "agent-self.json"), 356 357 // Consul and Vault contain results or errors 358 filepath.Join(path, "version", "consul-agent-self.json"), 359 filepath.Join(path, "version", "vault-sys-health.json"), 360 361 // Monitor files are only created when selected 362 filepath.Join(path, "server", "leader", "monitor.log"), 363 filepath.Join(path, "server", "leader", "profile.prof"), 364 filepath.Join(path, "server", "leader", "trace.prof"), 365 filepath.Join(path, "server", "leader", "goroutine.prof"), 366 filepath.Join(path, "server", "leader", "goroutine-debug1.txt"), 367 filepath.Join(path, "server", "leader", "goroutine-debug2.txt"), 368 369 // Multiple snapshots are collected, 00 is always created 370 filepath.Join(path, "nomad", "0000", "jobs.json"), 371 filepath.Join(path, "nomad", "0000", "nodes.json"), 372 filepath.Join(path, "nomad", "0000", "metrics.json"), 373 374 // Multiple snapshots are collected, 01 requires two intervals 375 filepath.Join(path, "nomad", "0001", "jobs.json"), 376 filepath.Join(path, "nomad", "0001", "nodes.json"), 377 filepath.Join(path, "nomad", "0001", "metrics.json"), 378 } 379 380 testutil.WaitForFiles(t, serverFiles) 381 } 382 383 func TestDebug_ExistingOutput(t *testing.T) { 384 ui := cli.NewMockUi() 385 cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}} 386 387 // Fails existing output 388 format := "2006-01-02-150405Z" 389 stamped := "nomad-debug-" + time.Now().UTC().Format(format) 390 path := filepath.Join(os.TempDir(), stamped) 391 os.MkdirAll(path, 0755) 392 defer os.Remove(path) 393 394 code := cmd.Run([]string{"-output", os.TempDir(), "-duration", "50ms"}) 395 require.Equal(t, 2, code) 396 } 397 398 func TestDebug_Fail_Pprof(t *testing.T) { 399 // Setup agent config with debug endpoints disabled 400 agentConfFunc := func(c *agent.Config) { 401 c.EnableDebug = false 402 } 403 404 // Start test server and API client 405 srv, _, url := testServer(t, false, agentConfFunc) 406 defer srv.Shutdown() 407 408 // Wait for leadership to establish 409 testutil.WaitForLeader(t, srv.Agent.RPC) 410 411 // Setup mock UI 412 ui := cli.NewMockUi() 413 cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}} 414 415 // Debug on client - node class = "clienta" 416 code := cmd.Run([]string{"-address", url, "-duration", "250ms", "-server-id", "all"}) 417 418 assert.Equal(t, 0, code) // Pprof failure isn't fatal 419 require.Contains(t, ui.OutputWriter.String(), "Starting debugger") 420 require.Contains(t, ui.ErrorWriter.String(), "Failed to retrieve pprof") // Should report pprof failure 421 require.Contains(t, ui.ErrorWriter.String(), "Permission denied") // Specifically permission denied 422 require.Contains(t, ui.OutputWriter.String(), "Created debug archive") // Archive should be generated anyway 423 } 424 425 func TestDebug_Utils(t *testing.T) { 426 t.Parallel() 427 428 xs := argNodes("foo, bar") 429 require.Equal(t, []string{"foo", "bar"}, xs) 430 431 xs = argNodes("") 432 require.Len(t, xs, 0) 433 require.Empty(t, xs) 434 435 // address calculation honors CONSUL_HTTP_SSL 436 // ssl: true - Correct alignment 437 e := &external{addrVal: "https://127.0.0.1:8500", ssl: true} 438 addr := e.addr("foo") 439 require.Equal(t, "https://127.0.0.1:8500", addr) 440 441 // ssl: true - protocol incorrect 442 e = &external{addrVal: "http://127.0.0.1:8500", ssl: true} 443 addr = e.addr("foo") 444 require.Equal(t, "https://127.0.0.1:8500", addr) 445 446 // ssl: true - protocol missing 447 e = &external{addrVal: "127.0.0.1:8500", ssl: true} 448 addr = e.addr("foo") 449 require.Equal(t, "https://127.0.0.1:8500", addr) 450 451 // ssl: false - correct alignment 452 e = &external{addrVal: "http://127.0.0.1:8500", ssl: false} 453 addr = e.addr("foo") 454 require.Equal(t, "http://127.0.0.1:8500", addr) 455 456 // ssl: false - protocol incorrect 457 e = &external{addrVal: "https://127.0.0.1:8500", ssl: false} 458 addr = e.addr("foo") 459 require.Equal(t, "http://127.0.0.1:8500", addr) 460 461 // ssl: false - protocol missing 462 e = &external{addrVal: "127.0.0.1:8500", ssl: false} 463 addr = e.addr("foo") 464 require.Equal(t, "http://127.0.0.1:8500", addr) 465 } 466 467 func TestDebug_WriteBytes_Nil(t *testing.T) { 468 t.Parallel() 469 470 var testDir, testFile, testPath string 471 var testBytes []byte 472 473 // Setup mock UI 474 ui := cli.NewMockUi() 475 cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}} 476 477 testDir = os.TempDir() 478 cmd.collectDir = testDir 479 480 testFile = "test_nil.json" 481 testPath = filepath.Join(testDir, testFile) 482 defer os.Remove(testPath) 483 484 // Write nil file at top level of collect directory 485 err := cmd.writeBytes("", testFile, testBytes) 486 require.NoError(t, err) 487 require.FileExists(t, testPath) 488 } 489 490 func TestDebug_WriteBytes_PathEscapesSandbox(t *testing.T) { 491 t.Parallel() 492 493 var testDir, testFile string 494 var testBytes []byte 495 496 testDir = os.TempDir() 497 defer os.Remove(testDir) 498 499 testFile = "testing.json" 500 testPath := filepath.Join(testDir, testFile) 501 defer os.Remove(testPath) 502 503 // Setup mock UI 504 ui := cli.NewMockUi() 505 cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}} 506 507 // Empty collectDir will always appear to be escaped 508 cmd.collectDir = "" 509 err := cmd.writeBytes(testDir, testFile, testBytes) 510 require.Error(t, err) 511 }