github.com/manicqin/nomad@v0.9.5/nomad/client_agent_endpoint_test.go (about) 1 package nomad 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "io" 7 "net" 8 "strings" 9 "testing" 10 "time" 11 12 "github.com/hashicorp/go-hclog" 13 "github.com/hashicorp/nomad/acl" 14 "github.com/hashicorp/nomad/client" 15 "github.com/hashicorp/nomad/client/config" 16 sframer "github.com/hashicorp/nomad/client/lib/streamframer" 17 cstructs "github.com/hashicorp/nomad/client/structs" 18 "github.com/hashicorp/nomad/command/agent/pprof" 19 "github.com/hashicorp/nomad/helper/uuid" 20 "github.com/hashicorp/nomad/nomad/mock" 21 "github.com/hashicorp/nomad/nomad/structs" 22 "github.com/hashicorp/nomad/testutil" 23 "github.com/stretchr/testify/assert" 24 "github.com/stretchr/testify/require" 25 "github.com/ugorji/go/codec" 26 ) 27 28 func TestMonitor_Monitor_Remote_Client(t *testing.T) { 29 t.Parallel() 30 require := require.New(t) 31 32 // start server and client 33 s1, cleanupS1 := TestServer(t, nil) 34 defer cleanupS1() 35 s2, cleanupS2 := TestServer(t, func(c *Config) { 36 c.DevDisableBootstrap = true 37 }) 38 defer cleanupS2() 39 TestJoin(t, s1, s2) 40 testutil.WaitForLeader(t, s1.RPC) 41 testutil.WaitForLeader(t, s2.RPC) 42 43 c, cleanupC := client.TestClient(t, func(c *config.Config) { 44 c.Servers = []string{s2.GetConfig().RPCAddr.String()} 45 }) 46 defer cleanupC() 47 48 testutil.WaitForResult(func() (bool, error) { 49 nodes := s2.connectedNodes() 50 return len(nodes) == 1, nil 51 }, func(err error) { 52 t.Fatalf("should have a clients") 53 }) 54 55 // No node ID to monitor the remote server 56 req := cstructs.MonitorRequest{ 57 LogLevel: "debug", 58 NodeID: c.NodeID(), 59 } 60 61 handler, err := s1.StreamingRpcHandler("Agent.Monitor") 62 require.Nil(err) 63 64 // create pipe 65 p1, p2 := net.Pipe() 66 defer p1.Close() 67 defer p2.Close() 68 69 errCh := make(chan error) 70 streamMsg := make(chan *cstructs.StreamErrWrapper) 71 72 go handler(p2) 73 74 // Start decoder 75 go func() { 76 decoder := codec.NewDecoder(p1, structs.MsgpackHandle) 77 for { 78 var msg cstructs.StreamErrWrapper 79 if err := decoder.Decode(&msg); err != nil { 80 if err == io.EOF || strings.Contains(err.Error(), "closed") { 81 return 82 } 83 errCh <- fmt.Errorf("error decoding: %v", err) 84 } 85 86 streamMsg <- &msg 87 } 88 }() 89 90 // send request 91 encoder := codec.NewEncoder(p1, structs.MsgpackHandle) 92 require.Nil(encoder.Encode(req)) 93 94 timeout := time.After(3 * time.Second) 95 expected := "[DEBUG]" 96 received := "" 97 98 OUTER: 99 for { 100 select { 101 case <-timeout: 102 t.Fatal("timeout waiting for logs") 103 case err := <-errCh: 104 t.Fatal(err) 105 case msg := <-streamMsg: 106 if msg.Error != nil { 107 t.Fatalf("Got error: %v", msg.Error.Error()) 108 } 109 110 var frame sframer.StreamFrame 111 err := json.Unmarshal(msg.Payload, &frame) 112 assert.NoError(t, err) 113 114 received += string(frame.Data) 115 if strings.Contains(received, expected) { 116 require.Nil(p2.Close()) 117 break OUTER 118 } 119 } 120 } 121 } 122 123 func TestMonitor_Monitor_RemoteServer(t *testing.T) { 124 t.Parallel() 125 126 // start servers 127 s1, cleanupS1 := TestServer(t, nil) 128 defer cleanupS1() 129 s2, cleanupS2 := TestServer(t, func(c *Config) { 130 c.DevDisableBootstrap = true 131 }) 132 defer cleanupS2() 133 TestJoin(t, s1, s2) 134 testutil.WaitForLeader(t, s1.RPC) 135 testutil.WaitForLeader(t, s2.RPC) 136 137 // determine leader and nonleader 138 servers := []*Server{s1, s2} 139 var nonLeader *Server 140 var leader *Server 141 for _, s := range servers { 142 if !s.IsLeader() { 143 nonLeader = s 144 } else { 145 leader = s 146 } 147 } 148 149 cases := []struct { 150 desc string 151 serverID string 152 expectedLog string 153 logger hclog.InterceptLogger 154 origin *Server 155 }{ 156 { 157 desc: "remote leader", 158 serverID: "leader", 159 expectedLog: "leader log", 160 logger: leader.logger, 161 origin: nonLeader, 162 }, 163 { 164 desc: "remote server", 165 serverID: nonLeader.serf.LocalMember().Name, 166 expectedLog: "nonleader log", 167 logger: nonLeader.logger, 168 origin: leader, 169 }, 170 { 171 desc: "serverID is current leader", 172 serverID: "leader", 173 expectedLog: "leader log", 174 logger: leader.logger, 175 origin: leader, 176 }, 177 { 178 desc: "serverID is current server", 179 serverID: nonLeader.serf.LocalMember().Name, 180 expectedLog: "non leader log", 181 logger: nonLeader.logger, 182 origin: nonLeader, 183 }, 184 } 185 186 for _, tc := range cases { 187 t.Run(tc.desc, func(t *testing.T) { 188 require := require.New(t) 189 190 // send some specific logs 191 doneCh := make(chan struct{}) 192 go func() { 193 for { 194 select { 195 case <-doneCh: 196 return 197 default: 198 tc.logger.Warn(tc.expectedLog) 199 time.Sleep(10 * time.Millisecond) 200 } 201 } 202 }() 203 204 req := cstructs.MonitorRequest{ 205 LogLevel: "warn", 206 ServerID: tc.serverID, 207 } 208 209 handler, err := tc.origin.StreamingRpcHandler("Agent.Monitor") 210 require.Nil(err) 211 212 // create pipe 213 p1, p2 := net.Pipe() 214 defer p1.Close() 215 defer p2.Close() 216 217 errCh := make(chan error) 218 streamMsg := make(chan *cstructs.StreamErrWrapper) 219 220 go handler(p2) 221 222 // Start decoder 223 go func() { 224 decoder := codec.NewDecoder(p1, structs.MsgpackHandle) 225 for { 226 var msg cstructs.StreamErrWrapper 227 if err := decoder.Decode(&msg); err != nil { 228 if err == io.EOF || strings.Contains(err.Error(), "closed") { 229 return 230 } 231 errCh <- fmt.Errorf("error decoding: %v", err) 232 } 233 234 streamMsg <- &msg 235 } 236 }() 237 238 // send request 239 encoder := codec.NewEncoder(p1, structs.MsgpackHandle) 240 require.Nil(encoder.Encode(req)) 241 242 timeout := time.After(2 * time.Second) 243 received := "" 244 245 OUTER: 246 for { 247 select { 248 case <-timeout: 249 t.Fatal("timeout waiting for logs") 250 case err := <-errCh: 251 t.Fatal(err) 252 case msg := <-streamMsg: 253 if msg.Error != nil { 254 t.Fatalf("Got error: %v", msg.Error.Error()) 255 } 256 257 var frame sframer.StreamFrame 258 err := json.Unmarshal(msg.Payload, &frame) 259 assert.NoError(t, err) 260 261 received += string(frame.Data) 262 if strings.Contains(received, tc.expectedLog) { 263 close(doneCh) 264 require.Nil(p2.Close()) 265 break OUTER 266 } 267 } 268 } 269 }) 270 } 271 } 272 273 func TestMonitor_MonitorServer(t *testing.T) { 274 t.Parallel() 275 require := require.New(t) 276 277 // start server 278 s, cleanupS := TestServer(t, nil) 279 defer cleanupS() 280 testutil.WaitForLeader(t, s.RPC) 281 282 // No node ID to monitor the remote server 283 req := cstructs.MonitorRequest{ 284 LogLevel: "debug", 285 } 286 287 handler, err := s.StreamingRpcHandler("Agent.Monitor") 288 require.Nil(err) 289 290 // create pipe 291 p1, p2 := net.Pipe() 292 defer p1.Close() 293 defer p2.Close() 294 295 errCh := make(chan error) 296 streamMsg := make(chan *cstructs.StreamErrWrapper) 297 298 go handler(p2) 299 300 // Start decoder 301 go func() { 302 decoder := codec.NewDecoder(p1, structs.MsgpackHandle) 303 for { 304 var msg cstructs.StreamErrWrapper 305 if err := decoder.Decode(&msg); err != nil { 306 if err == io.EOF || strings.Contains(err.Error(), "closed") { 307 return 308 } 309 errCh <- fmt.Errorf("error decoding: %v", err) 310 } 311 312 streamMsg <- &msg 313 } 314 }() 315 316 // send request 317 encoder := codec.NewEncoder(p1, structs.MsgpackHandle) 318 require.Nil(encoder.Encode(req)) 319 320 timeout := time.After(1 * time.Second) 321 expected := "[DEBUG]" 322 received := "" 323 324 // send logs 325 go func() { 326 for { 327 s.logger.Debug("test log") 328 time.Sleep(100 * time.Millisecond) 329 } 330 }() 331 332 OUTER: 333 for { 334 select { 335 case <-timeout: 336 t.Fatal("timeout waiting for logs") 337 case err := <-errCh: 338 t.Fatal(err) 339 case msg := <-streamMsg: 340 if msg.Error != nil { 341 t.Fatalf("Got error: %v", msg.Error.Error()) 342 } 343 344 var frame sframer.StreamFrame 345 err := json.Unmarshal(msg.Payload, &frame) 346 assert.NoError(t, err) 347 348 received += string(frame.Data) 349 if strings.Contains(received, expected) { 350 require.Nil(p2.Close()) 351 break OUTER 352 } 353 } 354 } 355 } 356 357 func TestMonitor_Monitor_ACL(t *testing.T) { 358 t.Parallel() 359 require := require.New(t) 360 361 // start server 362 s, root, cleanupS := TestACLServer(t, nil) 363 defer cleanupS() 364 testutil.WaitForLeader(t, s.RPC) 365 366 policyBad := mock.NamespacePolicy("other", "", []string{acl.NamespaceCapabilityReadFS}) 367 tokenBad := mock.CreatePolicyAndToken(t, s.State(), 1005, "invalid", policyBad) 368 369 policyGood := mock.AgentPolicy(acl.PolicyRead) 370 tokenGood := mock.CreatePolicyAndToken(t, s.State(), 1009, "valid", policyGood) 371 372 cases := []struct { 373 Name string 374 Token string 375 ExpectedErr string 376 }{ 377 { 378 Name: "bad token", 379 Token: tokenBad.SecretID, 380 ExpectedErr: structs.ErrPermissionDenied.Error(), 381 }, 382 { 383 Name: "good token", 384 Token: tokenGood.SecretID, 385 ExpectedErr: "Unknown log level", 386 }, 387 { 388 Name: "root token", 389 Token: root.SecretID, 390 ExpectedErr: "Unknown log level", 391 }, 392 } 393 394 for _, tc := range cases { 395 t.Run(tc.Name, func(t *testing.T) { 396 req := &cstructs.MonitorRequest{ 397 LogLevel: "unknown", 398 QueryOptions: structs.QueryOptions{ 399 Namespace: structs.DefaultNamespace, 400 Region: "global", 401 AuthToken: tc.Token, 402 }, 403 } 404 405 handler, err := s.StreamingRpcHandler("Agent.Monitor") 406 require.Nil(err) 407 408 // create pipe 409 p1, p2 := net.Pipe() 410 defer p1.Close() 411 defer p2.Close() 412 413 errCh := make(chan error) 414 streamMsg := make(chan *cstructs.StreamErrWrapper) 415 416 go handler(p2) 417 418 // Start decoder 419 go func() { 420 decoder := codec.NewDecoder(p1, structs.MsgpackHandle) 421 for { 422 var msg cstructs.StreamErrWrapper 423 if err := decoder.Decode(&msg); err != nil { 424 if err == io.EOF || strings.Contains(err.Error(), "closed") { 425 return 426 } 427 errCh <- fmt.Errorf("error decoding: %v", err) 428 } 429 430 streamMsg <- &msg 431 } 432 }() 433 434 // send request 435 encoder := codec.NewEncoder(p1, structs.MsgpackHandle) 436 require.Nil(encoder.Encode(req)) 437 438 timeout := time.After(5 * time.Second) 439 OUTER: 440 for { 441 select { 442 case <-timeout: 443 t.Fatal("timeout") 444 case err := <-errCh: 445 t.Fatal(err) 446 case msg := <-streamMsg: 447 if msg.Error == nil { 448 continue 449 } 450 451 if strings.Contains(msg.Error.Error(), tc.ExpectedErr) { 452 break OUTER 453 } else { 454 t.Fatalf("Bad error: %v", msg.Error) 455 } 456 } 457 } 458 }) 459 } 460 } 461 462 func TestAgentProfile_RemoteClient(t *testing.T) { 463 t.Parallel() 464 require := require.New(t) 465 466 // start server and client 467 s1, cleanup := TestServer(t, func(c *Config) { 468 c.DevDisableBootstrap = true 469 }) 470 defer cleanup() 471 472 s2, cleanup := TestServer(t, func(c *Config) { 473 c.DevDisableBootstrap = true 474 }) 475 defer cleanup() 476 477 TestJoin(t, s1, s2) 478 testutil.WaitForLeader(t, s1.RPC) 479 testutil.WaitForLeader(t, s2.RPC) 480 481 c, cleanupC := client.TestClient(t, func(c *config.Config) { 482 c.Servers = []string{s2.GetConfig().RPCAddr.String()} 483 c.EnableDebug = true 484 }) 485 defer cleanupC() 486 487 testutil.WaitForResult(func() (bool, error) { 488 nodes := s2.connectedNodes() 489 return len(nodes) == 1, nil 490 }, func(err error) { 491 t.Fatalf("should have a clients") 492 }) 493 494 req := structs.AgentPprofRequest{ 495 ReqType: pprof.CPUReq, 496 NodeID: c.NodeID(), 497 QueryOptions: structs.QueryOptions{Region: "global"}, 498 } 499 500 reply := structs.AgentPprofResponse{} 501 502 err := s1.RPC("Agent.Profile", &req, &reply) 503 require.NoError(err) 504 505 require.NotNil(reply.Payload) 506 require.Equal(c.NodeID(), reply.AgentID) 507 } 508 509 // Test that we prevent a forwarding loop if the requested 510 // serverID does not exist in the requested region 511 func TestAgentProfile_RemoteRegionMisMatch(t *testing.T) { 512 t.Parallel() 513 require := require.New(t) 514 515 // start server and client 516 s1, cleanupS1 := TestServer(t, func(c *Config) { 517 c.NumSchedulers = 0 518 c.Region = "foo" 519 c.EnableDebug = true 520 }) 521 defer cleanupS1() 522 523 s2, cleanup := TestServer(t, func(c *Config) { 524 c.NumSchedulers = 0 525 c.Region = "bar" 526 c.EnableDebug = true 527 }) 528 defer cleanup() 529 530 TestJoin(t, s1, s2) 531 testutil.WaitForLeader(t, s1.RPC) 532 533 req := structs.AgentPprofRequest{ 534 ReqType: pprof.CPUReq, 535 ServerID: s1.serf.LocalMember().Name, 536 QueryOptions: structs.QueryOptions{ 537 Region: "bar", 538 }, 539 } 540 541 reply := structs.AgentPprofResponse{} 542 543 err := s1.RPC("Agent.Profile", &req, &reply) 544 require.Contains(err.Error(), "does not exist in requested region") 545 require.Nil(reply.Payload) 546 } 547 548 // Test that Agent.Profile can forward to a different region 549 func TestAgentProfile_RemoteRegion(t *testing.T) { 550 t.Parallel() 551 require := require.New(t) 552 553 // start server and client 554 s1, cleanupS1 := TestServer(t, func(c *Config) { 555 c.NumSchedulers = 0 556 c.Region = "foo" 557 }) 558 defer cleanupS1() 559 560 s2, cleanup := TestServer(t, func(c *Config) { 561 c.NumSchedulers = 0 562 c.Region = "bar" 563 c.EnableDebug = true 564 }) 565 defer cleanup() 566 567 TestJoin(t, s1, s2) 568 testutil.WaitForLeader(t, s1.RPC) 569 570 req := structs.AgentPprofRequest{ 571 ReqType: pprof.CPUReq, 572 ServerID: s2.serf.LocalMember().Name, 573 QueryOptions: structs.QueryOptions{ 574 Region: "bar", 575 }, 576 } 577 578 reply := structs.AgentPprofResponse{} 579 580 err := s1.RPC("Agent.Profile", &req, &reply) 581 require.NoError(err) 582 583 require.NotNil(reply.Payload) 584 require.Equal(s2.serf.LocalMember().Name, reply.AgentID) 585 } 586 587 func TestAgentProfile_Server(t *testing.T) { 588 t.Parallel() 589 590 // start servers 591 s1, cleanup := TestServer(t, func(c *Config) { 592 c.EnableDebug = true 593 }) 594 defer cleanup() 595 596 s2, cleanup := TestServer(t, func(c *Config) { 597 c.DevDisableBootstrap = true 598 c.EnableDebug = true 599 }) 600 defer cleanup() 601 602 TestJoin(t, s1, s2) 603 testutil.WaitForLeader(t, s1.RPC) 604 testutil.WaitForLeader(t, s2.RPC) 605 606 // determine leader and nonleader 607 servers := []*Server{s1, s2} 608 var nonLeader *Server 609 var leader *Server 610 for _, s := range servers { 611 if !s.IsLeader() { 612 nonLeader = s 613 } else { 614 leader = s 615 } 616 } 617 618 cases := []struct { 619 desc string 620 serverID string 621 origin *Server 622 expectedErr string 623 expectedAgentID string 624 reqType pprof.ReqType 625 }{ 626 { 627 desc: "remote leader", 628 serverID: "leader", 629 origin: nonLeader, 630 reqType: pprof.CmdReq, 631 expectedAgentID: leader.serf.LocalMember().Name, 632 }, 633 { 634 desc: "remote server", 635 serverID: nonLeader.serf.LocalMember().Name, 636 origin: leader, 637 reqType: pprof.CmdReq, 638 expectedAgentID: nonLeader.serf.LocalMember().Name, 639 }, 640 { 641 desc: "serverID is current leader", 642 serverID: "leader", 643 origin: leader, 644 reqType: pprof.CmdReq, 645 expectedAgentID: leader.serf.LocalMember().Name, 646 }, 647 { 648 desc: "serverID is current server", 649 serverID: nonLeader.serf.LocalMember().Name, 650 origin: nonLeader, 651 reqType: pprof.CPUReq, 652 expectedAgentID: nonLeader.serf.LocalMember().Name, 653 }, 654 { 655 desc: "serverID is unknown", 656 serverID: uuid.Generate(), 657 origin: nonLeader, 658 reqType: pprof.CmdReq, 659 expectedErr: "unknown nomad server", 660 expectedAgentID: "", 661 }, 662 } 663 664 for _, tc := range cases { 665 t.Run(tc.desc, func(t *testing.T) { 666 require := require.New(t) 667 668 req := structs.AgentPprofRequest{ 669 ReqType: tc.reqType, 670 ServerID: tc.serverID, 671 QueryOptions: structs.QueryOptions{Region: "global"}, 672 } 673 674 reply := structs.AgentPprofResponse{} 675 676 err := tc.origin.RPC("Agent.Profile", &req, &reply) 677 if tc.expectedErr != "" { 678 require.Contains(err.Error(), tc.expectedErr) 679 } else { 680 require.Nil(err) 681 require.NotNil(reply.Payload) 682 } 683 684 require.Equal(tc.expectedAgentID, reply.AgentID) 685 }) 686 } 687 } 688 689 func TestAgentProfile_ACL(t *testing.T) { 690 t.Parallel() 691 require := require.New(t) 692 693 // start server 694 s, root, cleanupS := TestACLServer(t, nil) 695 defer cleanupS() 696 testutil.WaitForLeader(t, s.RPC) 697 698 policyBad := mock.NamespacePolicy("other", "", []string{acl.NamespaceCapabilityReadFS}) 699 tokenBad := mock.CreatePolicyAndToken(t, s.State(), 1005, "invalid", policyBad) 700 701 policyGood := mock.AgentPolicy(acl.PolicyWrite) 702 tokenGood := mock.CreatePolicyAndToken(t, s.State(), 1009, "valid", policyGood) 703 704 cases := []struct { 705 Name string 706 Token string 707 ExpectedErr string 708 }{ 709 { 710 Name: "bad token", 711 Token: tokenBad.SecretID, 712 ExpectedErr: "Permission denied", 713 }, 714 { 715 Name: "good token", 716 Token: tokenGood.SecretID, 717 }, 718 { 719 Name: "root token", 720 Token: root.SecretID, 721 }, 722 } 723 724 for _, tc := range cases { 725 t.Run(tc.Name, func(t *testing.T) { 726 req := &structs.AgentPprofRequest{ 727 ReqType: pprof.CmdReq, 728 QueryOptions: structs.QueryOptions{ 729 Namespace: structs.DefaultNamespace, 730 Region: "global", 731 AuthToken: tc.Token, 732 }, 733 } 734 735 reply := &structs.AgentPprofResponse{} 736 737 err := s.RPC("Agent.Profile", req, reply) 738 if tc.ExpectedErr != "" { 739 require.Equal(tc.ExpectedErr, err.Error()) 740 } else { 741 require.NoError(err) 742 require.NotNil(reply.Payload) 743 } 744 }) 745 } 746 }