gopkg.in/hashicorp/nomad.v0@v0.11.8/nomad/client_agent_endpoint_test.go (about) 1 package nomad 2 3 import ( 4 "context" 5 "encoding/json" 6 "fmt" 7 "io" 8 "net" 9 "strings" 10 "testing" 11 "time" 12 13 "github.com/hashicorp/go-hclog" 14 "github.com/hashicorp/go-msgpack/codec" 15 "github.com/hashicorp/nomad/acl" 16 "github.com/hashicorp/nomad/client" 17 "github.com/hashicorp/nomad/client/config" 18 sframer "github.com/hashicorp/nomad/client/lib/streamframer" 19 cstructs "github.com/hashicorp/nomad/client/structs" 20 "github.com/hashicorp/nomad/command/agent/pprof" 21 "github.com/hashicorp/nomad/helper/uuid" 22 "github.com/hashicorp/nomad/nomad/mock" 23 "github.com/hashicorp/nomad/nomad/structs" 24 "github.com/hashicorp/nomad/testutil" 25 "github.com/stretchr/testify/assert" 26 "github.com/stretchr/testify/require" 27 ) 28 29 func TestMonitor_Monitor_Remote_Client(t *testing.T) { 30 t.Parallel() 31 require := require.New(t) 32 33 // start server and client 34 s1, cleanupS1 := TestServer(t, func(c *Config) { 35 c.BootstrapExpect = 2 36 }) 37 defer cleanupS1() 38 s2, cleanupS2 := TestServer(t, func(c *Config) { 39 c.BootstrapExpect = 2 40 }) 41 defer cleanupS2() 42 TestJoin(t, s1, s2) 43 testutil.WaitForLeader(t, s1.RPC) 44 testutil.WaitForLeader(t, s2.RPC) 45 46 c, cleanupC := client.TestClient(t, func(c *config.Config) { 47 c.Servers = []string{s2.GetConfig().RPCAddr.String()} 48 }) 49 defer cleanupC() 50 51 testutil.WaitForResult(func() (bool, error) { 52 nodes := s2.connectedNodes() 53 return len(nodes) == 1, nil 54 }, func(err error) { 55 t.Fatalf("should have a clients") 56 }) 57 58 // No node ID to monitor the remote server 59 req := cstructs.MonitorRequest{ 60 LogLevel: "debug", 61 NodeID: c.NodeID(), 62 } 63 64 handler, err := s1.StreamingRpcHandler("Agent.Monitor") 65 require.Nil(err) 66 67 // create pipe 68 p1, p2 := net.Pipe() 69 defer p1.Close() 70 defer p2.Close() 71 72 errCh := make(chan error) 73 streamMsg := make(chan *cstructs.StreamErrWrapper) 74 75 go handler(p2) 76 77 // Start decoder 78 go func() { 79 decoder := codec.NewDecoder(p1, structs.MsgpackHandle) 80 for { 81 var msg cstructs.StreamErrWrapper 82 if err := decoder.Decode(&msg); err != nil { 83 if err == io.EOF || strings.Contains(err.Error(), "closed") { 84 return 85 } 86 errCh <- fmt.Errorf("error decoding: %v", err) 87 } 88 89 streamMsg <- &msg 90 } 91 }() 92 93 // send request 94 encoder := codec.NewEncoder(p1, structs.MsgpackHandle) 95 require.Nil(encoder.Encode(req)) 96 97 timeout := time.After(3 * time.Second) 98 expected := "[DEBUG]" 99 received := "" 100 101 OUTER: 102 for { 103 select { 104 case <-timeout: 105 t.Fatal("timeout waiting for logs") 106 case err := <-errCh: 107 t.Fatal(err) 108 case msg := <-streamMsg: 109 if msg.Error != nil { 110 t.Fatalf("Got error: %v", msg.Error.Error()) 111 } 112 113 var frame sframer.StreamFrame 114 err := json.Unmarshal(msg.Payload, &frame) 115 assert.NoError(t, err) 116 117 received += string(frame.Data) 118 if strings.Contains(received, expected) { 119 require.Nil(p2.Close()) 120 break OUTER 121 } 122 } 123 } 124 } 125 126 func TestMonitor_Monitor_RemoteServer(t *testing.T) { 127 t.Parallel() 128 foreignRegion := "foo" 129 130 // start servers 131 s1, cleanupS1 := TestServer(t, func(c *Config) { 132 c.BootstrapExpect = 2 133 }) 134 defer cleanupS1() 135 s2, cleanupS2 := TestServer(t, func(c *Config) { 136 c.BootstrapExpect = 2 137 }) 138 defer cleanupS2() 139 140 s3, cleanupS3 := TestServer(t, func(c *Config) { 141 c.Region = foreignRegion 142 }) 143 defer cleanupS3() 144 145 TestJoin(t, s1, s2, s3) 146 testutil.WaitForLeader(t, s1.RPC) 147 testutil.WaitForLeader(t, s2.RPC) 148 testutil.WaitForLeader(t, s3.RPC) 149 150 // determine leader and nonleader 151 servers := []*Server{s1, s2} 152 var nonLeader *Server 153 var leader *Server 154 for _, s := range servers { 155 if !s.IsLeader() { 156 nonLeader = s 157 } else { 158 leader = s 159 } 160 } 161 162 cases := []struct { 163 desc string 164 serverID string 165 expectedLog string 166 logger hclog.InterceptLogger 167 origin *Server 168 region string 169 expectedErr string 170 }{ 171 { 172 desc: "remote leader", 173 serverID: "leader", 174 expectedLog: "leader log", 175 logger: leader.logger, 176 origin: nonLeader, 177 region: "global", 178 }, 179 { 180 desc: "remote server, server name", 181 serverID: nonLeader.serf.LocalMember().Name, 182 expectedLog: "nonleader log", 183 logger: nonLeader.logger, 184 origin: leader, 185 region: "global", 186 }, 187 { 188 desc: "remote server, server UUID", 189 serverID: nonLeader.serf.LocalMember().Tags["id"], 190 expectedLog: "nonleader log", 191 logger: nonLeader.logger, 192 origin: leader, 193 region: "global", 194 }, 195 { 196 desc: "serverID is current leader", 197 serverID: "leader", 198 expectedLog: "leader log", 199 logger: leader.logger, 200 origin: leader, 201 region: "global", 202 }, 203 { 204 desc: "serverID is current server", 205 serverID: nonLeader.serf.LocalMember().Name, 206 expectedLog: "non leader log", 207 logger: nonLeader.logger, 208 origin: nonLeader, 209 region: "global", 210 }, 211 { 212 desc: "remote server, different region", 213 serverID: s3.serf.LocalMember().Name, 214 expectedLog: "remote region logger", 215 logger: s3.logger, 216 origin: nonLeader, 217 region: foreignRegion, 218 }, 219 { 220 desc: "different region, region mismatch", 221 serverID: s3.serf.LocalMember().Name, 222 expectedLog: "remote region logger", 223 logger: s3.logger, 224 origin: nonLeader, 225 region: "bar", 226 expectedErr: "No path to region", 227 }, 228 } 229 230 for _, tc := range cases { 231 t.Run(tc.desc, func(t *testing.T) { 232 require := require.New(t) 233 234 // send some specific logs 235 ctx, cancel := context.WithCancel(context.Background()) 236 defer cancel() 237 238 go func() { 239 for { 240 select { 241 case <-ctx.Done(): 242 return 243 default: 244 tc.logger.Warn(tc.expectedLog) 245 time.Sleep(10 * time.Millisecond) 246 } 247 } 248 }() 249 250 req := cstructs.MonitorRequest{ 251 LogLevel: "warn", 252 ServerID: tc.serverID, 253 QueryOptions: structs.QueryOptions{ 254 Region: tc.region, 255 }, 256 } 257 258 handler, err := tc.origin.StreamingRpcHandler("Agent.Monitor") 259 require.Nil(err) 260 261 // create pipe 262 p1, p2 := net.Pipe() 263 defer p1.Close() 264 defer p2.Close() 265 266 errCh := make(chan error) 267 streamMsg := make(chan *cstructs.StreamErrWrapper) 268 269 go handler(p2) 270 271 // Start decoder 272 go func() { 273 decoder := codec.NewDecoder(p1, structs.MsgpackHandle) 274 for { 275 var msg cstructs.StreamErrWrapper 276 if err := decoder.Decode(&msg); err != nil { 277 if err == io.EOF || strings.Contains(err.Error(), "closed") { 278 return 279 } 280 errCh <- fmt.Errorf("error decoding: %v", err) 281 } 282 283 streamMsg <- &msg 284 } 285 }() 286 287 // send request 288 encoder := codec.NewEncoder(p1, structs.MsgpackHandle) 289 require.Nil(encoder.Encode(req)) 290 291 timeout := time.After(2 * time.Second) 292 received := "" 293 294 OUTER: 295 for { 296 select { 297 case <-timeout: 298 require.Fail("timeout waiting for logs") 299 case err := <-errCh: 300 require.Fail(err.Error()) 301 case msg := <-streamMsg: 302 if msg.Error != nil { 303 if tc.expectedErr != "" { 304 require.Contains(msg.Error.Error(), tc.expectedErr) 305 break OUTER 306 } else { 307 require.Failf("Got error: %v", msg.Error.Error()) 308 } 309 } else { 310 var frame sframer.StreamFrame 311 err := json.Unmarshal(msg.Payload, &frame) 312 assert.NoError(t, err) 313 314 received += string(frame.Data) 315 if strings.Contains(received, tc.expectedLog) { 316 cancel() 317 require.Nil(p2.Close()) 318 break OUTER 319 } 320 } 321 } 322 } 323 }) 324 } 325 } 326 327 func TestMonitor_MonitorServer(t *testing.T) { 328 t.Parallel() 329 require := require.New(t) 330 331 // start server 332 s, cleanupS := TestServer(t, nil) 333 defer cleanupS() 334 testutil.WaitForLeader(t, s.RPC) 335 336 // No node ID to monitor the remote server 337 req := cstructs.MonitorRequest{ 338 LogLevel: "debug", 339 QueryOptions: structs.QueryOptions{ 340 Region: "global", 341 }, 342 } 343 344 handler, err := s.StreamingRpcHandler("Agent.Monitor") 345 require.Nil(err) 346 347 // create pipe 348 p1, p2 := net.Pipe() 349 defer p1.Close() 350 defer p2.Close() 351 352 errCh := make(chan error) 353 streamMsg := make(chan *cstructs.StreamErrWrapper) 354 355 go handler(p2) 356 357 // Start decoder 358 go func() { 359 decoder := codec.NewDecoder(p1, structs.MsgpackHandle) 360 for { 361 var msg cstructs.StreamErrWrapper 362 if err := decoder.Decode(&msg); err != nil { 363 if err == io.EOF || strings.Contains(err.Error(), "closed") { 364 return 365 } 366 errCh <- fmt.Errorf("error decoding: %v", err) 367 } 368 369 streamMsg <- &msg 370 } 371 }() 372 373 // send request 374 encoder := codec.NewEncoder(p1, structs.MsgpackHandle) 375 require.Nil(encoder.Encode(req)) 376 377 timeout := time.After(1 * time.Second) 378 expected := "[DEBUG]" 379 received := "" 380 381 done := make(chan struct{}) 382 defer close(done) 383 384 // send logs 385 go func() { 386 for { 387 select { 388 case <-time.After(100 * time.Millisecond): 389 s.logger.Debug("test log") 390 case <-done: 391 return 392 } 393 } 394 }() 395 396 OUTER: 397 for { 398 select { 399 case <-timeout: 400 t.Fatal("timeout waiting for logs") 401 case err := <-errCh: 402 t.Fatal(err) 403 case msg := <-streamMsg: 404 if msg.Error != nil { 405 t.Fatalf("Got error: %v", msg.Error.Error()) 406 } 407 408 var frame sframer.StreamFrame 409 err := json.Unmarshal(msg.Payload, &frame) 410 assert.NoError(t, err) 411 412 received += string(frame.Data) 413 if strings.Contains(received, expected) { 414 require.Nil(p2.Close()) 415 break OUTER 416 } 417 } 418 } 419 } 420 421 func TestMonitor_Monitor_ACL(t *testing.T) { 422 t.Parallel() 423 require := require.New(t) 424 425 // start server 426 s, root, cleanupS := TestACLServer(t, nil) 427 defer cleanupS() 428 testutil.WaitForLeader(t, s.RPC) 429 430 policyBad := mock.NamespacePolicy("other", "", []string{acl.NamespaceCapabilityReadFS}) 431 tokenBad := mock.CreatePolicyAndToken(t, s.State(), 1005, "invalid", policyBad) 432 433 policyGood := mock.AgentPolicy(acl.PolicyRead) 434 tokenGood := mock.CreatePolicyAndToken(t, s.State(), 1009, "valid", policyGood) 435 436 cases := []struct { 437 Name string 438 Token string 439 ExpectedErr string 440 }{ 441 { 442 Name: "bad token", 443 Token: tokenBad.SecretID, 444 ExpectedErr: structs.ErrPermissionDenied.Error(), 445 }, 446 { 447 Name: "good token", 448 Token: tokenGood.SecretID, 449 ExpectedErr: "Unknown log level", 450 }, 451 { 452 Name: "root token", 453 Token: root.SecretID, 454 ExpectedErr: "Unknown log level", 455 }, 456 } 457 458 for _, tc := range cases { 459 t.Run(tc.Name, func(t *testing.T) { 460 req := &cstructs.MonitorRequest{ 461 LogLevel: "unknown", 462 QueryOptions: structs.QueryOptions{ 463 Namespace: structs.DefaultNamespace, 464 Region: "global", 465 AuthToken: tc.Token, 466 }, 467 } 468 469 handler, err := s.StreamingRpcHandler("Agent.Monitor") 470 require.Nil(err) 471 472 // create pipe 473 p1, p2 := net.Pipe() 474 defer p1.Close() 475 defer p2.Close() 476 477 errCh := make(chan error) 478 streamMsg := make(chan *cstructs.StreamErrWrapper) 479 480 go handler(p2) 481 482 // Start decoder 483 go func() { 484 decoder := codec.NewDecoder(p1, structs.MsgpackHandle) 485 for { 486 var msg cstructs.StreamErrWrapper 487 if err := decoder.Decode(&msg); err != nil { 488 if err == io.EOF || strings.Contains(err.Error(), "closed") { 489 return 490 } 491 errCh <- fmt.Errorf("error decoding: %v", err) 492 } 493 494 streamMsg <- &msg 495 } 496 }() 497 498 // send request 499 encoder := codec.NewEncoder(p1, structs.MsgpackHandle) 500 require.Nil(encoder.Encode(req)) 501 502 timeout := time.After(5 * time.Second) 503 OUTER: 504 for { 505 select { 506 case <-timeout: 507 t.Fatal("timeout") 508 case err := <-errCh: 509 t.Fatal(err) 510 case msg := <-streamMsg: 511 if msg.Error == nil { 512 continue 513 } 514 515 if strings.Contains(msg.Error.Error(), tc.ExpectedErr) { 516 break OUTER 517 } else { 518 t.Fatalf("Bad error: %v", msg.Error) 519 } 520 } 521 } 522 }) 523 } 524 } 525 526 func TestAgentProfile_RemoteClient(t *testing.T) { 527 t.Parallel() 528 require := require.New(t) 529 530 // start server and client 531 s1, cleanup := TestServer(t, func(c *Config) { 532 c.BootstrapExpect = 2 533 }) 534 defer cleanup() 535 536 s2, cleanup := TestServer(t, func(c *Config) { 537 c.BootstrapExpect = 2 538 }) 539 defer cleanup() 540 541 TestJoin(t, s1, s2) 542 testutil.WaitForLeader(t, s1.RPC) 543 testutil.WaitForLeader(t, s2.RPC) 544 545 c, cleanupC := client.TestClient(t, func(c *config.Config) { 546 c.Servers = []string{s2.GetConfig().RPCAddr.String()} 547 c.EnableDebug = true 548 }) 549 defer cleanupC() 550 551 testutil.WaitForResult(func() (bool, error) { 552 nodes := s2.connectedNodes() 553 return len(nodes) == 1, nil 554 }, func(err error) { 555 t.Fatalf("should have a clients") 556 }) 557 558 req := structs.AgentPprofRequest{ 559 ReqType: pprof.CPUReq, 560 NodeID: c.NodeID(), 561 QueryOptions: structs.QueryOptions{Region: "global"}, 562 } 563 564 reply := structs.AgentPprofResponse{} 565 566 err := s1.RPC("Agent.Profile", &req, &reply) 567 require.NoError(err) 568 569 require.NotNil(reply.Payload) 570 require.Equal(c.NodeID(), reply.AgentID) 571 } 572 573 // Test that we prevent a forwarding loop if the requested 574 // serverID does not exist in the requested region 575 func TestAgentProfile_RemoteRegionMisMatch(t *testing.T) { 576 t.Parallel() 577 require := require.New(t) 578 579 // start server and client 580 s1, cleanupS1 := TestServer(t, func(c *Config) { 581 c.NumSchedulers = 0 582 c.Region = "foo" 583 c.EnableDebug = true 584 }) 585 defer cleanupS1() 586 587 s2, cleanup := TestServer(t, func(c *Config) { 588 c.NumSchedulers = 0 589 c.Region = "bar" 590 c.EnableDebug = true 591 }) 592 defer cleanup() 593 594 TestJoin(t, s1, s2) 595 testutil.WaitForLeader(t, s1.RPC) 596 597 req := structs.AgentPprofRequest{ 598 ReqType: pprof.CPUReq, 599 ServerID: s1.serf.LocalMember().Name, 600 QueryOptions: structs.QueryOptions{ 601 Region: "bar", 602 }, 603 } 604 605 reply := structs.AgentPprofResponse{} 606 607 err := s1.RPC("Agent.Profile", &req, &reply) 608 require.Contains(err.Error(), "unknown Nomad server") 609 require.Nil(reply.Payload) 610 } 611 612 // Test that Agent.Profile can forward to a different region 613 func TestAgentProfile_RemoteRegion(t *testing.T) { 614 t.Parallel() 615 require := require.New(t) 616 617 // start server and client 618 s1, cleanupS1 := TestServer(t, func(c *Config) { 619 c.NumSchedulers = 0 620 c.Region = "foo" 621 }) 622 defer cleanupS1() 623 624 s2, cleanup := TestServer(t, func(c *Config) { 625 c.NumSchedulers = 0 626 c.Region = "bar" 627 c.EnableDebug = true 628 }) 629 defer cleanup() 630 631 TestJoin(t, s1, s2) 632 testutil.WaitForLeader(t, s1.RPC) 633 634 req := structs.AgentPprofRequest{ 635 ReqType: pprof.CPUReq, 636 ServerID: s2.serf.LocalMember().Name, 637 QueryOptions: structs.QueryOptions{ 638 Region: "bar", 639 }, 640 } 641 642 reply := structs.AgentPprofResponse{} 643 644 err := s1.RPC("Agent.Profile", &req, &reply) 645 require.NoError(err) 646 647 require.NotNil(reply.Payload) 648 require.Equal(s2.serf.LocalMember().Name, reply.AgentID) 649 } 650 651 func TestAgentProfile_Server(t *testing.T) { 652 t.Parallel() 653 654 // start servers 655 s1, cleanup := TestServer(t, func(c *Config) { 656 c.BootstrapExpect = 2 657 c.EnableDebug = true 658 }) 659 defer cleanup() 660 661 s2, cleanup := TestServer(t, func(c *Config) { 662 c.BootstrapExpect = 2 663 c.EnableDebug = true 664 }) 665 defer cleanup() 666 667 TestJoin(t, s1, s2) 668 testutil.WaitForLeader(t, s1.RPC) 669 testutil.WaitForLeader(t, s2.RPC) 670 671 // determine leader and nonleader 672 servers := []*Server{s1, s2} 673 var nonLeader *Server 674 var leader *Server 675 for _, s := range servers { 676 if !s.IsLeader() { 677 nonLeader = s 678 } else { 679 leader = s 680 } 681 } 682 683 cases := []struct { 684 desc string 685 serverID string 686 origin *Server 687 expectedErr string 688 expectedAgentID string 689 reqType pprof.ReqType 690 }{ 691 { 692 desc: "remote leader", 693 serverID: "leader", 694 origin: nonLeader, 695 reqType: pprof.CmdReq, 696 expectedAgentID: leader.serf.LocalMember().Name, 697 }, 698 { 699 desc: "remote server", 700 serverID: nonLeader.serf.LocalMember().Name, 701 origin: leader, 702 reqType: pprof.CmdReq, 703 expectedAgentID: nonLeader.serf.LocalMember().Name, 704 }, 705 { 706 desc: "serverID is current leader", 707 serverID: "leader", 708 origin: leader, 709 reqType: pprof.CmdReq, 710 expectedAgentID: leader.serf.LocalMember().Name, 711 }, 712 { 713 desc: "serverID is current server", 714 serverID: nonLeader.serf.LocalMember().Name, 715 origin: nonLeader, 716 reqType: pprof.CPUReq, 717 expectedAgentID: nonLeader.serf.LocalMember().Name, 718 }, 719 { 720 desc: "serverID is unknown", 721 serverID: uuid.Generate(), 722 origin: nonLeader, 723 reqType: pprof.CmdReq, 724 expectedErr: "unknown Nomad server", 725 expectedAgentID: "", 726 }, 727 } 728 729 for _, tc := range cases { 730 t.Run(tc.desc, func(t *testing.T) { 731 require := require.New(t) 732 733 req := structs.AgentPprofRequest{ 734 ReqType: tc.reqType, 735 ServerID: tc.serverID, 736 QueryOptions: structs.QueryOptions{Region: "global"}, 737 } 738 739 reply := structs.AgentPprofResponse{} 740 741 err := tc.origin.RPC("Agent.Profile", &req, &reply) 742 if tc.expectedErr != "" { 743 require.Contains(err.Error(), tc.expectedErr) 744 } else { 745 require.Nil(err) 746 require.NotNil(reply.Payload) 747 } 748 749 require.Equal(tc.expectedAgentID, reply.AgentID) 750 }) 751 } 752 } 753 754 func TestAgentProfile_ACL(t *testing.T) { 755 t.Parallel() 756 require := require.New(t) 757 758 // start server 759 s, root, cleanupS := TestACLServer(t, nil) 760 defer cleanupS() 761 testutil.WaitForLeader(t, s.RPC) 762 763 policyBad := mock.NamespacePolicy("other", "", []string{acl.NamespaceCapabilityReadFS}) 764 tokenBad := mock.CreatePolicyAndToken(t, s.State(), 1005, "invalid", policyBad) 765 766 policyGood := mock.AgentPolicy(acl.PolicyWrite) 767 tokenGood := mock.CreatePolicyAndToken(t, s.State(), 1009, "valid", policyGood) 768 769 cases := []struct { 770 Name string 771 Token string 772 ExpectedErr string 773 }{ 774 { 775 Name: "bad token", 776 Token: tokenBad.SecretID, 777 ExpectedErr: "Permission denied", 778 }, 779 { 780 Name: "good token", 781 Token: tokenGood.SecretID, 782 }, 783 { 784 Name: "root token", 785 Token: root.SecretID, 786 }, 787 } 788 789 for _, tc := range cases { 790 t.Run(tc.Name, func(t *testing.T) { 791 req := &structs.AgentPprofRequest{ 792 ReqType: pprof.CmdReq, 793 QueryOptions: structs.QueryOptions{ 794 Namespace: structs.DefaultNamespace, 795 Region: "global", 796 AuthToken: tc.Token, 797 }, 798 } 799 800 reply := &structs.AgentPprofResponse{} 801 802 err := s.RPC("Agent.Profile", req, reply) 803 if tc.ExpectedErr != "" { 804 require.Equal(tc.ExpectedErr, err.Error()) 805 } else { 806 require.NoError(err) 807 require.NotNil(reply.Payload) 808 } 809 }) 810 } 811 }