github.com/hernad/nomad@v1.6.112/nomad/server_test.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package nomad 5 6 import ( 7 "context" 8 "fmt" 9 "path" 10 "strings" 11 "testing" 12 "time" 13 14 msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc" 15 "github.com/hernad/nomad/ci" 16 "github.com/hernad/nomad/helper/testlog" 17 "github.com/hernad/nomad/helper/uuid" 18 "github.com/hernad/nomad/nomad/mock" 19 "github.com/hernad/nomad/nomad/structs" 20 "github.com/hernad/nomad/nomad/structs/config" 21 "github.com/hernad/nomad/testutil" 22 "github.com/shoenig/test/must" 23 "github.com/stretchr/testify/assert" 24 "github.com/stretchr/testify/require" 25 ) 26 27 func TestServer_RPC(t *testing.T) { 28 ci.Parallel(t) 29 30 s1, cleanupS1 := TestServer(t, nil) 31 defer cleanupS1() 32 33 var out struct{} 34 if err := s1.RPC("Status.Ping", &structs.GenericRequest{}, &out); err != nil { 35 t.Fatalf("err: %v", err) 36 } 37 } 38 39 func TestServer_RPC_TLS(t *testing.T) { 40 ci.Parallel(t) 41 42 const ( 43 cafile = "../helper/tlsutil/testdata/nomad-agent-ca.pem" 44 foocert = "../helper/tlsutil/testdata/regionFoo-server-nomad.pem" 45 fookey = "../helper/tlsutil/testdata/regionFoo-server-nomad-key.pem" 46 ) 47 dir := t.TempDir() 48 49 s1, cleanupS1 := TestServer(t, func(c *Config) { 50 c.Region = "regionFoo" 51 c.BootstrapExpect = 3 52 c.DevMode = false 53 c.DataDir = path.Join(dir, "node1") 54 c.TLSConfig = &config.TLSConfig{ 55 EnableHTTP: true, 56 EnableRPC: true, 57 VerifyServerHostname: true, 58 CAFile: cafile, 59 CertFile: foocert, 60 KeyFile: fookey, 61 } 62 }) 63 defer cleanupS1() 64 65 s2, cleanupS2 := TestServer(t, func(c *Config) { 66 c.Region = "regionFoo" 67 c.BootstrapExpect = 3 68 c.DevMode = false 69 c.DataDir = path.Join(dir, "node2") 70 c.TLSConfig = &config.TLSConfig{ 71 EnableHTTP: true, 72 EnableRPC: true, 73 VerifyServerHostname: true, 74 CAFile: cafile, 75 CertFile: foocert, 76 KeyFile: fookey, 77 } 78 }) 79 defer cleanupS2() 80 81 s3, cleanupS3 := TestServer(t, func(c *Config) { 82 c.Region = "regionFoo" 83 c.BootstrapExpect = 3 84 c.DevMode = false 85 c.DataDir = path.Join(dir, "node3") 86 c.TLSConfig = &config.TLSConfig{ 87 EnableHTTP: true, 88 EnableRPC: true, 89 VerifyServerHostname: true, 90 CAFile: cafile, 91 CertFile: foocert, 92 KeyFile: fookey, 93 } 94 }) 95 defer cleanupS3() 96 97 TestJoin(t, s1, s2, s3) 98 testutil.WaitForLeader(t, s1.RPC) 99 100 // Part of a server joining is making an RPC request, so just by testing 101 // that there is a leader we verify that the RPCs are working over TLS. 102 } 103 104 func TestServer_RPC_MixedTLS(t *testing.T) { 105 ci.Parallel(t) 106 107 const ( 108 cafile = "../helper/tlsutil/testdata/nomad-agent-ca.pem" 109 foocert = "../helper/tlsutil/testdata/regionFoo-server-nomad.pem" 110 fookey = "../helper/tlsutil/testdata/regionFoo-server-nomad-key.pem" 111 ) 112 dir := t.TempDir() 113 114 s1, cleanupS1 := TestServer(t, func(c *Config) { 115 c.Region = "regionFoo" 116 c.BootstrapExpect = 3 117 c.DevMode = false 118 c.DataDir = path.Join(dir, "node1") 119 c.TLSConfig = &config.TLSConfig{ 120 EnableHTTP: true, 121 EnableRPC: true, 122 VerifyServerHostname: true, 123 CAFile: cafile, 124 CertFile: foocert, 125 KeyFile: fookey, 126 } 127 }) 128 defer cleanupS1() 129 130 s2, cleanupS2 := TestServer(t, func(c *Config) { 131 c.Region = "regionFoo" 132 c.BootstrapExpect = 3 133 c.DevMode = false 134 c.DataDir = path.Join(dir, "node2") 135 c.TLSConfig = &config.TLSConfig{ 136 EnableHTTP: true, 137 EnableRPC: true, 138 VerifyServerHostname: true, 139 CAFile: cafile, 140 CertFile: foocert, 141 KeyFile: fookey, 142 } 143 }) 144 defer cleanupS2() 145 146 s3, cleanupS3 := TestServer(t, func(c *Config) { 147 c.Region = "regionFoo" 148 c.BootstrapExpect = 3 149 c.DevMode = false 150 c.DataDir = path.Join(dir, "node3") 151 }) 152 defer cleanupS3() 153 154 TestJoin(t, s1, s2, s3) 155 156 // Ensure that we do not form a quorum 157 start := time.Now() 158 for { 159 if time.Now().After(start.Add(2 * time.Second)) { 160 break 161 } 162 163 args := &structs.GenericRequest{} 164 var leader string 165 err := s1.RPC("Status.Leader", args, &leader) 166 if err == nil || leader != "" { 167 t.Fatalf("Got leader or no error: %q %v", leader, err) 168 } 169 } 170 } 171 172 func TestServer_Regions(t *testing.T) { 173 ci.Parallel(t) 174 175 // Make the servers 176 s1, cleanupS1 := TestServer(t, func(c *Config) { 177 c.Region = "region1" 178 }) 179 defer cleanupS1() 180 181 s2, cleanupS2 := TestServer(t, func(c *Config) { 182 c.Region = "region2" 183 }) 184 defer cleanupS2() 185 186 // Join them together 187 s2Addr := fmt.Sprintf("127.0.0.1:%d", 188 s2.config.SerfConfig.MemberlistConfig.BindPort) 189 if n, err := s1.Join([]string{s2Addr}); err != nil || n != 1 { 190 t.Fatalf("Failed joining: %v (%d joined)", err, n) 191 } 192 193 // Try listing the regions 194 testutil.WaitForResult(func() (bool, error) { 195 out := s1.Regions() 196 if len(out) != 2 || out[0] != "region1" || out[1] != "region2" { 197 return false, fmt.Errorf("unexpected regions: %v", out) 198 } 199 return true, nil 200 }, func(err error) { 201 t.Fatalf("err: %v", err) 202 }) 203 } 204 205 func TestServer_Reload_Vault(t *testing.T) { 206 ci.Parallel(t) 207 208 s1, cleanupS1 := TestServer(t, func(c *Config) { 209 c.Region = "global" 210 }) 211 defer cleanupS1() 212 213 if s1.vault.Running() { 214 t.Fatalf("Vault client should not be running") 215 } 216 217 tr := true 218 config := DefaultConfig() 219 config.VaultConfig.Enabled = &tr 220 config.VaultConfig.Token = uuid.Generate() 221 config.VaultConfig.Namespace = "nondefault" 222 223 if err := s1.Reload(config); err != nil { 224 t.Fatalf("Reload failed: %v", err) 225 } 226 227 if !s1.vault.Running() { 228 t.Fatalf("Vault client should be running") 229 } 230 231 if s1.vault.GetConfig().Namespace != "nondefault" { 232 t.Fatalf("Vault client did not get new namespace") 233 } 234 } 235 236 func connectionReset(msg string) bool { 237 return strings.Contains(msg, "EOF") || strings.Contains(msg, "connection reset by peer") 238 } 239 240 // Tests that the server will successfully reload its network connections, 241 // upgrading from plaintext to TLS if the server's TLS configuration changes. 242 func TestServer_Reload_TLSConnections_PlaintextToTLS(t *testing.T) { 243 ci.Parallel(t) 244 assert := assert.New(t) 245 246 const ( 247 cafile = "../helper/tlsutil/testdata/nomad-agent-ca.pem" 248 foocert = "../helper/tlsutil/testdata/regionFoo-client-nomad.pem" 249 fookey = "../helper/tlsutil/testdata/regionFoo-client-nomad-key.pem" 250 ) 251 dir := t.TempDir() 252 253 s1, cleanupS1 := TestServer(t, func(c *Config) { 254 c.DataDir = path.Join(dir, "nodeA") 255 }) 256 defer cleanupS1() 257 258 // assert that the server started in plaintext mode 259 assert.Equal(s1.config.TLSConfig.CertFile, "") 260 261 newTLSConfig := &config.TLSConfig{ 262 EnableHTTP: true, 263 EnableRPC: true, 264 VerifyServerHostname: true, 265 CAFile: cafile, 266 CertFile: foocert, 267 KeyFile: fookey, 268 } 269 270 err := s1.reloadTLSConnections(newTLSConfig) 271 assert.Nil(err) 272 assert.True(s1.config.TLSConfig.CertificateInfoIsEqual(newTLSConfig)) 273 274 codec := rpcClient(t, s1) 275 276 node := mock.Node() 277 req := &structs.NodeRegisterRequest{ 278 Node: node, 279 WriteRequest: structs.WriteRequest{Region: "global"}, 280 } 281 282 var resp structs.GenericResponse 283 err = msgpackrpc.CallWithCodec(codec, "Node.Register", req, &resp) 284 assert.NotNil(err) 285 assert.True(connectionReset(err.Error())) 286 } 287 288 // Tests that the server will successfully reload its network connections, 289 // downgrading from TLS to plaintext if the server's TLS configuration changes. 290 func TestServer_Reload_TLSConnections_TLSToPlaintext_RPC(t *testing.T) { 291 ci.Parallel(t) 292 assert := assert.New(t) 293 294 const ( 295 cafile = "../helper/tlsutil/testdata/nomad-agent-ca.pem" 296 foocert = "../helper/tlsutil/testdata/regionFoo-client-nomad.pem" 297 fookey = "../helper/tlsutil/testdata/regionFoo-client-nomad-key.pem" 298 ) 299 300 dir := t.TempDir() 301 302 s1, cleanupS1 := TestServer(t, func(c *Config) { 303 c.DataDir = path.Join(dir, "nodeB") 304 c.TLSConfig = &config.TLSConfig{ 305 EnableHTTP: true, 306 EnableRPC: true, 307 VerifyServerHostname: true, 308 CAFile: cafile, 309 CertFile: foocert, 310 KeyFile: fookey, 311 } 312 }) 313 defer cleanupS1() 314 315 newTLSConfig := &config.TLSConfig{} 316 317 err := s1.reloadTLSConnections(newTLSConfig) 318 assert.Nil(err) 319 assert.True(s1.config.TLSConfig.CertificateInfoIsEqual(newTLSConfig)) 320 321 codec := rpcClient(t, s1) 322 323 node := mock.Node() 324 req := &structs.NodeRegisterRequest{ 325 Node: node, 326 WriteRequest: structs.WriteRequest{Region: "global"}, 327 } 328 329 var resp structs.GenericResponse 330 err = msgpackrpc.CallWithCodec(codec, "Node.Register", req, &resp) 331 assert.Nil(err) 332 } 333 334 // Tests that the server will successfully reload its network connections, 335 // downgrading only RPC connections 336 func TestServer_Reload_TLSConnections_TLSToPlaintext_OnlyRPC(t *testing.T) { 337 ci.Parallel(t) 338 assert := assert.New(t) 339 340 const ( 341 cafile = "../helper/tlsutil/testdata/nomad-agent-ca.pem" 342 foocert = "../helper/tlsutil/testdata/regionFoo-client-nomad.pem" 343 fookey = "../helper/tlsutil/testdata/regionFoo-client-nomad-key.pem" 344 ) 345 346 dir := t.TempDir() 347 348 s1, cleanupS1 := TestServer(t, func(c *Config) { 349 c.DataDir = path.Join(dir, "nodeB") 350 c.TLSConfig = &config.TLSConfig{ 351 EnableHTTP: true, 352 EnableRPC: true, 353 VerifyServerHostname: true, 354 CAFile: cafile, 355 CertFile: foocert, 356 KeyFile: fookey, 357 } 358 }) 359 defer cleanupS1() 360 361 newTLSConfig := &config.TLSConfig{ 362 EnableHTTP: true, 363 EnableRPC: false, 364 VerifyServerHostname: true, 365 CAFile: cafile, 366 CertFile: foocert, 367 KeyFile: fookey, 368 } 369 370 err := s1.reloadTLSConnections(newTLSConfig) 371 assert.Nil(err) 372 assert.True(s1.config.TLSConfig.CertificateInfoIsEqual(newTLSConfig)) 373 374 codec := rpcClient(t, s1) 375 376 node := mock.Node() 377 req := &structs.NodeRegisterRequest{ 378 Node: node, 379 WriteRequest: structs.WriteRequest{Region: "global"}, 380 } 381 382 var resp structs.GenericResponse 383 err = msgpackrpc.CallWithCodec(codec, "Node.Register", req, &resp) 384 assert.Nil(err) 385 } 386 387 // Tests that the server will successfully reload its network connections, 388 // upgrading only RPC connections 389 func TestServer_Reload_TLSConnections_PlaintextToTLS_OnlyRPC(t *testing.T) { 390 ci.Parallel(t) 391 assert := assert.New(t) 392 393 const ( 394 cafile = "../helper/tlsutil/testdata/nomad-agent-ca.pem" 395 foocert = "../helper/tlsutil/testdata/regionFoo-client-nomad.pem" 396 fookey = "../helper/tlsutil/testdata/regionFoo-client-nomad-key.pem" 397 ) 398 399 dir := t.TempDir() 400 401 s1, cleanupS1 := TestServer(t, func(c *Config) { 402 c.DataDir = path.Join(dir, "nodeB") 403 c.TLSConfig = &config.TLSConfig{ 404 EnableHTTP: true, 405 EnableRPC: false, 406 VerifyServerHostname: true, 407 CAFile: cafile, 408 CertFile: foocert, 409 KeyFile: fookey, 410 } 411 }) 412 defer cleanupS1() 413 414 newTLSConfig := &config.TLSConfig{ 415 EnableHTTP: true, 416 EnableRPC: true, 417 VerifyServerHostname: true, 418 CAFile: cafile, 419 CertFile: foocert, 420 KeyFile: fookey, 421 } 422 423 err := s1.reloadTLSConnections(newTLSConfig) 424 assert.Nil(err) 425 assert.True(s1.config.TLSConfig.EnableRPC) 426 assert.True(s1.config.TLSConfig.CertificateInfoIsEqual(newTLSConfig)) 427 428 codec := rpcClient(t, s1) 429 430 node := mock.Node() 431 req := &structs.NodeRegisterRequest{ 432 Node: node, 433 WriteRequest: structs.WriteRequest{Region: "global"}, 434 } 435 436 var resp structs.GenericResponse 437 err = msgpackrpc.CallWithCodec(codec, "Node.Register", req, &resp) 438 assert.NotNil(err) 439 assert.True(connectionReset(err.Error())) 440 } 441 442 // Test that Raft connections are reloaded as expected when a Nomad server is 443 // upgraded from plaintext to TLS 444 func TestServer_Reload_TLSConnections_Raft(t *testing.T) { 445 ci.Parallel(t) 446 assert := assert.New(t) 447 448 const ( 449 cafile = "../../helper/tlsutil/testdata/nomad-agent-ca.pem" 450 foocert = "../../helper/tlsutil/testdata/regionFoo-client-nomad.pem" 451 fookey = "../../helper/tlsutil/testdata/regionFoo-client-nomad-key.pem" 452 barcert = "../dev/tls_cluster/certs/nomad.pem" 453 barkey = "../dev/tls_cluster/certs/nomad-key.pem" 454 ) 455 dir := t.TempDir() 456 457 s1, cleanupS1 := TestServer(t, func(c *Config) { 458 c.BootstrapExpect = 2 459 c.DevMode = false 460 c.DataDir = path.Join(dir, "node1") 461 c.NodeName = "node1" 462 c.Region = "regionFoo" 463 }) 464 defer cleanupS1() 465 466 s2, cleanupS2 := TestServer(t, func(c *Config) { 467 c.BootstrapExpect = 2 468 c.DevMode = false 469 c.DataDir = path.Join(dir, "node2") 470 c.NodeName = "node2" 471 c.Region = "regionFoo" 472 }) 473 defer cleanupS2() 474 475 TestJoin(t, s1, s2) 476 servers := []*Server{s1, s2} 477 478 testutil.WaitForLeader(t, s1.RPC) 479 480 newTLSConfig := &config.TLSConfig{ 481 EnableHTTP: true, 482 VerifyHTTPSClient: true, 483 CAFile: cafile, 484 CertFile: foocert, 485 KeyFile: fookey, 486 } 487 488 err := s1.reloadTLSConnections(newTLSConfig) 489 assert.Nil(err) 490 491 { 492 for _, serv := range servers { 493 testutil.WaitForResult(func() (bool, error) { 494 args := &structs.GenericRequest{} 495 var leader string 496 err := serv.RPC("Status.Leader", args, &leader) 497 if leader != "" && err != nil { 498 return false, fmt.Errorf("Should not have found leader but got %s", leader) 499 } 500 return true, nil 501 }, func(err error) { 502 t.Fatalf("err: %v", err) 503 }) 504 } 505 } 506 507 secondNewTLSConfig := &config.TLSConfig{ 508 EnableHTTP: true, 509 VerifyHTTPSClient: true, 510 CAFile: cafile, 511 CertFile: barcert, 512 KeyFile: barkey, 513 } 514 515 // Now, transition the other server to TLS, which should restore their 516 // ability to communicate. 517 err = s2.reloadTLSConnections(secondNewTLSConfig) 518 assert.Nil(err) 519 520 testutil.WaitForLeader(t, s2.RPC) 521 } 522 523 func TestServer_ReloadRaftConfig(t *testing.T) { 524 ci.Parallel(t) 525 526 s1, cleanupS1 := TestServer(t, func(c *Config) { 527 c.NumSchedulers = 0 528 c.RaftConfig.TrailingLogs = 10 529 }) 530 defer cleanupS1() 531 532 testutil.WaitForLeader(t, s1.RPC) 533 rc := s1.raft.ReloadableConfig() 534 must.Eq(t, rc.TrailingLogs, uint64(10)) 535 cfg := s1.GetConfig() 536 cfg.RaftConfig.TrailingLogs = 100 537 538 // Hot-reload the configuration 539 s1.Reload(cfg) 540 541 // Check it from the raft library 542 rc = s1.raft.ReloadableConfig() 543 must.Eq(t, rc.TrailingLogs, uint64(100)) 544 } 545 546 func TestServer_InvalidSchedulers(t *testing.T) { 547 ci.Parallel(t) 548 require := require.New(t) 549 550 // Set the config to not have the core scheduler 551 config := DefaultConfig() 552 logger := testlog.HCLogger(t) 553 s := &Server{ 554 config: config, 555 logger: logger, 556 } 557 558 config.EnabledSchedulers = []string{"batch"} 559 err := s.setupWorkers(s.shutdownCtx) 560 require.NotNil(err) 561 require.Contains(err.Error(), "scheduler not enabled") 562 563 // Set the config to have an unknown scheduler 564 config.EnabledSchedulers = []string{"batch", structs.JobTypeCore, "foo"} 565 err = s.setupWorkers(s.shutdownCtx) 566 require.NotNil(err) 567 require.Contains(err.Error(), "foo") 568 } 569 570 func TestServer_RPCNameAndRegionValidation(t *testing.T) { 571 ci.Parallel(t) 572 for _, tc := range []struct { 573 name string 574 region string 575 expected bool 576 }{ 577 // OK 578 {name: "client.global.nomad", region: "global", expected: true}, 579 {name: "server.global.nomad", region: "global", expected: true}, 580 {name: "server.other.nomad", region: "global", expected: true}, 581 {name: "server.other.region.nomad", region: "other.region", expected: true}, 582 583 // Bad 584 {name: "client.other.nomad", region: "global", expected: false}, 585 {name: "client.global.nomad.other", region: "global", expected: false}, 586 {name: "server.global.nomad.other", region: "global", expected: false}, 587 {name: "other.global.nomad", region: "global", expected: false}, 588 {name: "server.nomad", region: "global", expected: false}, 589 {name: "localhost", region: "global", expected: false}, 590 } { 591 assert.Equal(t, tc.expected, validateRPCRegionPeer(tc.name, tc.region), 592 "expected %q in region %q to validate as %v", 593 tc.name, tc.region, tc.expected) 594 } 595 } 596 597 func TestServer_ReloadSchedulers_NumSchedulers(t *testing.T) { 598 ci.Parallel(t) 599 600 s1, cleanupS1 := TestServer(t, func(c *Config) { 601 c.NumSchedulers = 8 602 }) 603 defer cleanupS1() 604 605 require.Equal(t, s1.config.NumSchedulers, len(s1.workers)) 606 607 config := DefaultConfig() 608 config.NumSchedulers = 4 609 require.NoError(t, s1.Reload(config)) 610 611 time.Sleep(1 * time.Second) 612 require.Equal(t, config.NumSchedulers, len(s1.workers)) 613 } 614 615 func TestServer_ReloadSchedulers_EnabledSchedulers(t *testing.T) { 616 ci.Parallel(t) 617 618 s1, cleanupS1 := TestServer(t, func(c *Config) { 619 c.EnabledSchedulers = []string{structs.JobTypeCore, structs.JobTypeSystem} 620 }) 621 defer cleanupS1() 622 623 require.Equal(t, s1.config.NumSchedulers, len(s1.workers)) 624 625 config := DefaultConfig() 626 config.EnabledSchedulers = []string{structs.JobTypeCore, structs.JobTypeSystem, structs.JobTypeBatch} 627 require.NoError(t, s1.Reload(config)) 628 629 time.Sleep(1 * time.Second) 630 require.Equal(t, config.NumSchedulers, len(s1.workers)) 631 require.ElementsMatch(t, config.EnabledSchedulers, s1.GetSchedulerWorkerConfig().EnabledSchedulers) 632 633 } 634 635 func TestServer_ReloadSchedulers_InvalidSchedulers(t *testing.T) { 636 ci.Parallel(t) 637 638 // Set the config to not have the core scheduler 639 config := DefaultConfig() 640 logger := testlog.HCLogger(t) 641 s := &Server{ 642 config: config, 643 logger: logger, 644 } 645 s.config.NumSchedulers = 0 646 s.shutdownCtx, s.shutdownCancel = context.WithCancel(context.Background()) 647 s.shutdownCh = s.shutdownCtx.Done() 648 649 config.EnabledSchedulers = []string{"_core", "batch"} 650 err := s.setupWorkers(s.shutdownCtx) 651 require.Nil(t, err) 652 origWC := s.GetSchedulerWorkerConfig() 653 reloadSchedulers(s, &SchedulerWorkerPoolArgs{NumSchedulers: config.NumSchedulers, EnabledSchedulers: []string{"batch"}}) 654 currentWC := s.GetSchedulerWorkerConfig() 655 require.Equal(t, origWC, currentWC) 656 657 // Set the config to have an unknown scheduler 658 reloadSchedulers(s, &SchedulerWorkerPoolArgs{NumSchedulers: config.NumSchedulers, EnabledSchedulers: []string{"_core", "foo"}}) 659 currentWC = s.GetSchedulerWorkerConfig() 660 require.Equal(t, origWC, currentWC) 661 } 662 663 func TestServer_PreventRaftDowngrade(t *testing.T) { 664 ci.Parallel(t) 665 666 dir := t.TempDir() 667 _, cleanupv3 := TestServer(t, func(c *Config) { 668 c.DevMode = false 669 c.DataDir = dir 670 c.RaftConfig.ProtocolVersion = 3 671 }) 672 cleanupv3() 673 674 _, cleanupv2, err := TestServerErr(t, func(c *Config) { 675 c.DevMode = false 676 c.DataDir = dir 677 c.RaftConfig.ProtocolVersion = 2 678 }) 679 if cleanupv2 != nil { 680 defer cleanupv2() 681 } 682 683 // Downgrading Raft should prevent the server from starting. 684 require.Error(t, err) 685 }