github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/client/client_test.go (about) 1 package client 2 3 import ( 4 "fmt" 5 "io/ioutil" 6 "net" 7 "os" 8 "path/filepath" 9 "runtime" 10 "sort" 11 "testing" 12 "time" 13 14 memdb "github.com/hashicorp/go-memdb" 15 trstate "github.com/hashicorp/nomad/client/allocrunner/taskrunner/state" 16 "github.com/hashicorp/nomad/client/config" 17 consulApi "github.com/hashicorp/nomad/client/consul" 18 "github.com/hashicorp/nomad/client/fingerprint" 19 "github.com/hashicorp/nomad/client/state" 20 "github.com/hashicorp/nomad/command/agent/consul" 21 "github.com/hashicorp/nomad/helper/pluginutils/catalog" 22 "github.com/hashicorp/nomad/helper/pluginutils/singleton" 23 "github.com/hashicorp/nomad/helper/testlog" 24 "github.com/hashicorp/nomad/helper/uuid" 25 "github.com/hashicorp/nomad/nomad" 26 "github.com/hashicorp/nomad/nomad/mock" 27 "github.com/hashicorp/nomad/nomad/structs" 28 nconfig "github.com/hashicorp/nomad/nomad/structs/config" 29 "github.com/hashicorp/nomad/plugins/device" 30 psstructs "github.com/hashicorp/nomad/plugins/shared/structs" 31 "github.com/hashicorp/nomad/testutil" 32 "github.com/stretchr/testify/assert" 33 34 cstate "github.com/hashicorp/nomad/client/state" 35 "github.com/stretchr/testify/require" 36 ) 37 38 func testACLServer(t *testing.T, cb func(*nomad.Config)) (*nomad.Server, string, *structs.ACLToken, func()) { 39 server, token, cleanup := nomad.TestACLServer(t, cb) 40 return server, server.GetConfig().RPCAddr.String(), token, cleanup 41 } 42 43 func testServer(t *testing.T, cb func(*nomad.Config)) (*nomad.Server, string, func()) { 44 server, cleanup := nomad.TestServer(t, cb) 45 return server, server.GetConfig().RPCAddr.String(), cleanup 46 } 47 48 func TestClient_StartStop(t *testing.T) { 49 t.Parallel() 50 client, cleanup := TestClient(t, nil) 51 defer cleanup() 52 if err := client.Shutdown(); err != nil { 53 t.Fatalf("err: %v", err) 54 } 55 } 56 57 // Certain labels for metrics are dependant on client initial setup. This tests 58 // that the client has properly initialized before we assign values to labels 59 func TestClient_BaseLabels(t *testing.T) { 60 t.Parallel() 61 assert := assert.New(t) 62 63 client, cleanup := TestClient(t, nil) 64 if err := client.Shutdown(); err != nil { 65 t.Fatalf("err: %v", err) 66 } 67 defer cleanup() 68 69 // directly invoke this function, as otherwise this will fail on a CI build 70 // due to a race condition 71 client.emitStats() 72 73 baseLabels := client.baseLabels 74 assert.NotEqual(0, len(baseLabels)) 75 76 nodeID := client.Node().ID 77 for _, e := range baseLabels { 78 if e.Name == "node_id" { 79 assert.Equal(nodeID, e.Value) 80 } 81 } 82 } 83 84 func TestClient_RPC(t *testing.T) { 85 t.Parallel() 86 87 _, addr, cleanupS1 := testServer(t, nil) 88 defer cleanupS1() 89 90 c1, cleanupC1 := TestClient(t, func(c *config.Config) { 91 c.Servers = []string{addr} 92 }) 93 defer cleanupC1() 94 95 // RPC should succeed 96 testutil.WaitForResult(func() (bool, error) { 97 var out struct{} 98 err := c1.RPC("Status.Ping", struct{}{}, &out) 99 return err == nil, err 100 }, func(err error) { 101 t.Fatalf("err: %v", err) 102 }) 103 } 104 105 func TestClient_RPC_FireRetryWatchers(t *testing.T) { 106 t.Parallel() 107 108 _, addr, cleanupS1 := testServer(t, nil) 109 defer cleanupS1() 110 111 c1, cleanupC1 := TestClient(t, func(c *config.Config) { 112 c.Servers = []string{addr} 113 }) 114 defer cleanupC1() 115 116 watcher := c1.rpcRetryWatcher() 117 118 // RPC should succeed 119 testutil.WaitForResult(func() (bool, error) { 120 var out struct{} 121 err := c1.RPC("Status.Ping", struct{}{}, &out) 122 return err == nil, err 123 }, func(err error) { 124 t.Fatalf("err: %v", err) 125 }) 126 127 select { 128 case <-watcher: 129 default: 130 t.Fatal("watcher should be fired") 131 } 132 } 133 134 func TestClient_RPC_Passthrough(t *testing.T) { 135 t.Parallel() 136 137 s1, _, cleanupS1 := testServer(t, nil) 138 defer cleanupS1() 139 140 c1, cleanupC1 := TestClient(t, func(c *config.Config) { 141 c.RPCHandler = s1 142 }) 143 defer cleanupC1() 144 145 // RPC should succeed 146 testutil.WaitForResult(func() (bool, error) { 147 var out struct{} 148 err := c1.RPC("Status.Ping", struct{}{}, &out) 149 return err == nil, err 150 }, func(err error) { 151 t.Fatalf("err: %v", err) 152 }) 153 } 154 155 func TestClient_Fingerprint(t *testing.T) { 156 t.Parallel() 157 158 c, cleanup := TestClient(t, nil) 159 defer cleanup() 160 161 // Ensure we are fingerprinting 162 testutil.WaitForResult(func() (bool, error) { 163 node := c.Node() 164 if _, ok := node.Attributes["kernel.name"]; !ok { 165 return false, fmt.Errorf("Expected value for kernel.name") 166 } 167 if _, ok := node.Attributes["cpu.arch"]; !ok { 168 return false, fmt.Errorf("Expected value for cpu.arch") 169 } 170 return true, nil 171 }, func(err error) { 172 t.Fatalf("err: %v", err) 173 }) 174 } 175 176 // TestClient_Fingerprint_Periodic asserts that driver node attributes are 177 // periodically fingerprinted. 178 func TestClient_Fingerprint_Periodic(t *testing.T) { 179 t.Parallel() 180 181 c1, cleanup := TestClient(t, func(c *config.Config) { 182 confs := []*nconfig.PluginConfig{ 183 { 184 Name: "mock_driver", 185 Config: map[string]interface{}{ 186 "shutdown_periodic_after": true, 187 "shutdown_periodic_duration": time.Second, 188 }, 189 }, 190 } 191 c.PluginLoader = catalog.TestPluginLoaderWithOptions(t, "", nil, confs) 192 }) 193 defer cleanup() 194 195 node := c1.config.Node 196 { 197 // Ensure the mock driver is registered on the client 198 testutil.WaitForResult(func() (bool, error) { 199 c1.configLock.Lock() 200 defer c1.configLock.Unlock() 201 202 // assert that the driver is set on the node attributes 203 mockDriverInfoAttr := node.Attributes["driver.mock_driver"] 204 if mockDriverInfoAttr == "" { 205 return false, fmt.Errorf("mock driver is empty when it should be set on the node attributes") 206 } 207 208 mockDriverInfo := node.Drivers["mock_driver"] 209 210 // assert that the Driver information for the node is also set correctly 211 if mockDriverInfo == nil { 212 return false, fmt.Errorf("mock driver is nil when it should be set on node Drivers") 213 } 214 if !mockDriverInfo.Detected { 215 return false, fmt.Errorf("mock driver should be set as detected") 216 } 217 if !mockDriverInfo.Healthy { 218 return false, fmt.Errorf("mock driver should be set as healthy") 219 } 220 if mockDriverInfo.HealthDescription == "" { 221 return false, fmt.Errorf("mock driver description should not be empty") 222 } 223 return true, nil 224 }, func(err error) { 225 t.Fatalf("err: %v", err) 226 }) 227 } 228 229 { 230 testutil.WaitForResult(func() (bool, error) { 231 c1.configLock.Lock() 232 defer c1.configLock.Unlock() 233 mockDriverInfo := node.Drivers["mock_driver"] 234 // assert that the Driver information for the node is also set correctly 235 if mockDriverInfo == nil { 236 return false, fmt.Errorf("mock driver is nil when it should be set on node Drivers") 237 } 238 if mockDriverInfo.Detected { 239 return false, fmt.Errorf("mock driver should not be set as detected") 240 } 241 if mockDriverInfo.Healthy { 242 return false, fmt.Errorf("mock driver should not be set as healthy") 243 } 244 if mockDriverInfo.HealthDescription == "" { 245 return false, fmt.Errorf("mock driver description should not be empty") 246 } 247 return true, nil 248 }, func(err error) { 249 t.Fatalf("err: %v", err) 250 }) 251 } 252 } 253 254 // TestClient_MixedTLS asserts that when a server is running with TLS enabled 255 // it will reject any RPC connections from clients that lack TLS. See #2525 256 func TestClient_MixedTLS(t *testing.T) { 257 t.Parallel() 258 const ( 259 cafile = "../helper/tlsutil/testdata/ca.pem" 260 foocert = "../helper/tlsutil/testdata/nomad-foo.pem" 261 fookey = "../helper/tlsutil/testdata/nomad-foo-key.pem" 262 ) 263 s1, addr, cleanupS1 := testServer(t, func(c *nomad.Config) { 264 c.TLSConfig = &nconfig.TLSConfig{ 265 EnableHTTP: true, 266 EnableRPC: true, 267 VerifyServerHostname: true, 268 CAFile: cafile, 269 CertFile: foocert, 270 KeyFile: fookey, 271 } 272 }) 273 defer cleanupS1() 274 testutil.WaitForLeader(t, s1.RPC) 275 276 c1, cleanup := TestClient(t, func(c *config.Config) { 277 c.Servers = []string{addr} 278 }) 279 defer cleanup() 280 281 req := structs.NodeSpecificRequest{ 282 NodeID: c1.Node().ID, 283 QueryOptions: structs.QueryOptions{Region: "global"}, 284 } 285 var out structs.SingleNodeResponse 286 testutil.AssertUntil(100*time.Millisecond, 287 func() (bool, error) { 288 err := c1.RPC("Node.GetNode", &req, &out) 289 if err == nil { 290 return false, fmt.Errorf("client RPC succeeded when it should have failed:\n%+v", out) 291 } 292 return true, nil 293 }, 294 func(err error) { 295 t.Fatalf(err.Error()) 296 }, 297 ) 298 } 299 300 // TestClient_BadTLS asserts that when a client and server are running with TLS 301 // enabled -- but their certificates are signed by different CAs -- they're 302 // unable to communicate. 303 func TestClient_BadTLS(t *testing.T) { 304 t.Parallel() 305 306 const ( 307 cafile = "../helper/tlsutil/testdata/ca.pem" 308 foocert = "../helper/tlsutil/testdata/nomad-foo.pem" 309 fookey = "../helper/tlsutil/testdata/nomad-foo-key.pem" 310 badca = "../helper/tlsutil/testdata/ca-bad.pem" 311 badcert = "../helper/tlsutil/testdata/nomad-bad.pem" 312 badkey = "../helper/tlsutil/testdata/nomad-bad-key.pem" 313 ) 314 s1, addr, cleanupS1 := testServer(t, func(c *nomad.Config) { 315 c.TLSConfig = &nconfig.TLSConfig{ 316 EnableHTTP: true, 317 EnableRPC: true, 318 VerifyServerHostname: true, 319 CAFile: cafile, 320 CertFile: foocert, 321 KeyFile: fookey, 322 } 323 }) 324 defer cleanupS1() 325 testutil.WaitForLeader(t, s1.RPC) 326 327 c1, cleanupC1 := TestClient(t, func(c *config.Config) { 328 c.Servers = []string{addr} 329 c.TLSConfig = &nconfig.TLSConfig{ 330 EnableHTTP: true, 331 EnableRPC: true, 332 VerifyServerHostname: true, 333 CAFile: badca, 334 CertFile: badcert, 335 KeyFile: badkey, 336 } 337 }) 338 defer cleanupC1() 339 340 req := structs.NodeSpecificRequest{ 341 NodeID: c1.Node().ID, 342 QueryOptions: structs.QueryOptions{Region: "global"}, 343 } 344 var out structs.SingleNodeResponse 345 testutil.AssertUntil(100*time.Millisecond, 346 func() (bool, error) { 347 err := c1.RPC("Node.GetNode", &req, &out) 348 if err == nil { 349 return false, fmt.Errorf("client RPC succeeded when it should have failed:\n%+v", out) 350 } 351 return true, nil 352 }, 353 func(err error) { 354 t.Fatalf(err.Error()) 355 }, 356 ) 357 } 358 359 func TestClient_Register(t *testing.T) { 360 t.Parallel() 361 362 s1, _, cleanupS1 := testServer(t, nil) 363 defer cleanupS1() 364 testutil.WaitForLeader(t, s1.RPC) 365 366 c1, cleanupC1 := TestClient(t, func(c *config.Config) { 367 c.RPCHandler = s1 368 }) 369 defer cleanupC1() 370 371 req := structs.NodeSpecificRequest{ 372 NodeID: c1.Node().ID, 373 QueryOptions: structs.QueryOptions{Region: "global"}, 374 } 375 var out structs.SingleNodeResponse 376 377 // Register should succeed 378 testutil.WaitForResult(func() (bool, error) { 379 err := s1.RPC("Node.GetNode", &req, &out) 380 if err != nil { 381 return false, err 382 } 383 if out.Node == nil { 384 return false, fmt.Errorf("missing reg") 385 } 386 return out.Node.ID == req.NodeID, nil 387 }, func(err error) { 388 t.Fatalf("err: %v", err) 389 }) 390 } 391 392 func TestClient_Heartbeat(t *testing.T) { 393 t.Parallel() 394 395 s1, _, cleanupS1 := testServer(t, func(c *nomad.Config) { 396 c.MinHeartbeatTTL = 50 * time.Millisecond 397 }) 398 defer cleanupS1() 399 testutil.WaitForLeader(t, s1.RPC) 400 401 c1, cleanupC1 := TestClient(t, func(c *config.Config) { 402 c.RPCHandler = s1 403 }) 404 defer cleanupC1() 405 406 req := structs.NodeSpecificRequest{ 407 NodeID: c1.Node().ID, 408 QueryOptions: structs.QueryOptions{Region: "global"}, 409 } 410 var out structs.SingleNodeResponse 411 412 // Register should succeed 413 testutil.WaitForResult(func() (bool, error) { 414 err := s1.RPC("Node.GetNode", &req, &out) 415 if err != nil { 416 return false, err 417 } 418 if out.Node == nil { 419 return false, fmt.Errorf("missing reg") 420 } 421 return out.Node.Status == structs.NodeStatusReady, nil 422 }, func(err error) { 423 t.Fatalf("err: %v", err) 424 }) 425 } 426 427 // TestClient_UpdateAllocStatus that once running allocations send updates to 428 // the server. 429 func TestClient_UpdateAllocStatus(t *testing.T) { 430 t.Parallel() 431 432 s1, _, cleanupS1 := testServer(t, nil) 433 defer cleanupS1() 434 435 _, cleanup := TestClient(t, func(c *config.Config) { 436 c.RPCHandler = s1 437 }) 438 defer cleanup() 439 440 job := mock.Job() 441 // allow running job on any node including self client, that may not be a Linux box 442 job.Constraints = nil 443 job.TaskGroups[0].Count = 1 444 task := job.TaskGroups[0].Tasks[0] 445 task.Driver = "mock_driver" 446 task.Config = map[string]interface{}{ 447 "run_for": "10s", 448 } 449 task.Services = nil 450 451 // WaitForRunning polls the server until the ClientStatus is running 452 testutil.WaitForRunning(t, s1.RPC, job) 453 } 454 455 func TestClient_WatchAllocs(t *testing.T) { 456 t.Parallel() 457 458 s1, _, cleanupS1 := testServer(t, nil) 459 defer cleanupS1() 460 testutil.WaitForLeader(t, s1.RPC) 461 462 c1, cleanup := TestClient(t, func(c *config.Config) { 463 c.RPCHandler = s1 464 }) 465 defer cleanup() 466 467 // Wait until the node is ready 468 waitTilNodeReady(c1, t) 469 470 // Create mock allocations 471 job := mock.Job() 472 job.TaskGroups[0].Count = 3 473 job.TaskGroups[0].Tasks[0].Driver = "mock_driver" 474 job.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ 475 "run_for": "10s", 476 } 477 alloc1 := mock.Alloc() 478 alloc1.JobID = job.ID 479 alloc1.Job = job 480 alloc1.NodeID = c1.Node().ID 481 alloc2 := mock.Alloc() 482 alloc2.NodeID = c1.Node().ID 483 alloc2.JobID = job.ID 484 alloc2.Job = job 485 486 state := s1.State() 487 if err := state.UpsertJob(structs.MsgTypeTestSetup, 100, job); err != nil { 488 t.Fatal(err) 489 } 490 if err := state.UpsertJobSummary(101, mock.JobSummary(alloc1.JobID)); err != nil { 491 t.Fatal(err) 492 } 493 err := state.UpsertAllocs(structs.MsgTypeTestSetup, 102, []*structs.Allocation{alloc1, alloc2}) 494 if err != nil { 495 t.Fatalf("err: %v", err) 496 } 497 498 // Both allocations should get registered 499 testutil.WaitForResult(func() (bool, error) { 500 c1.allocLock.RLock() 501 num := len(c1.allocs) 502 c1.allocLock.RUnlock() 503 return num == 2, nil 504 }, func(err error) { 505 t.Fatalf("err: %v", err) 506 }) 507 508 // Delete one allocation 509 if err := state.DeleteEval(103, nil, []string{alloc1.ID}); err != nil { 510 t.Fatalf("err: %v", err) 511 } 512 513 // Update the other allocation. Have to make a copy because the allocs are 514 // shared in memory in the test and the modify index would be updated in the 515 // alloc runner. 516 alloc2_2 := alloc2.Copy() 517 alloc2_2.DesiredStatus = structs.AllocDesiredStatusStop 518 if err := state.UpsertAllocs(structs.MsgTypeTestSetup, 104, []*structs.Allocation{alloc2_2}); err != nil { 519 t.Fatalf("err upserting stopped alloc: %v", err) 520 } 521 522 // One allocation should get GC'd and removed 523 testutil.WaitForResult(func() (bool, error) { 524 c1.allocLock.RLock() 525 num := len(c1.allocs) 526 c1.allocLock.RUnlock() 527 return num == 1, nil 528 }, func(err error) { 529 t.Fatalf("err: %v", err) 530 }) 531 532 // One allocations should get updated 533 testutil.WaitForResult(func() (bool, error) { 534 c1.allocLock.RLock() 535 ar := c1.allocs[alloc2.ID] 536 c1.allocLock.RUnlock() 537 return ar.Alloc().DesiredStatus == structs.AllocDesiredStatusStop, nil 538 }, func(err error) { 539 t.Fatalf("err: %v", err) 540 }) 541 } 542 543 func waitTilNodeReady(client *Client, t *testing.T) { 544 testutil.WaitForResult(func() (bool, error) { 545 n := client.Node() 546 if n.Status != structs.NodeStatusReady { 547 return false, fmt.Errorf("node not registered") 548 } 549 return true, nil 550 }, func(err error) { 551 t.Fatalf("err: %v", err) 552 }) 553 } 554 555 func TestClient_SaveRestoreState(t *testing.T) { 556 t.Parallel() 557 558 s1, _, cleanupS1 := testServer(t, nil) 559 defer cleanupS1() 560 testutil.WaitForLeader(t, s1.RPC) 561 562 c1, cleanupC1 := TestClient(t, func(c *config.Config) { 563 c.DevMode = false 564 c.RPCHandler = s1 565 }) 566 defer cleanupC1() 567 568 // Wait until the node is ready 569 waitTilNodeReady(c1, t) 570 571 // Create mock allocations 572 job := mock.Job() 573 alloc1 := mock.Alloc() 574 alloc1.NodeID = c1.Node().ID 575 alloc1.Job = job 576 alloc1.JobID = job.ID 577 alloc1.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver" 578 alloc1.Job.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ 579 "run_for": "10s", 580 } 581 alloc1.ClientStatus = structs.AllocClientStatusRunning 582 583 state := s1.State() 584 if err := state.UpsertJob(structs.MsgTypeTestSetup, 100, job); err != nil { 585 t.Fatal(err) 586 } 587 if err := state.UpsertJobSummary(101, mock.JobSummary(alloc1.JobID)); err != nil { 588 t.Fatal(err) 589 } 590 if err := state.UpsertAllocs(structs.MsgTypeTestSetup, 102, []*structs.Allocation{alloc1}); err != nil { 591 t.Fatalf("err: %v", err) 592 } 593 594 // Allocations should get registered 595 testutil.WaitForResult(func() (bool, error) { 596 c1.allocLock.RLock() 597 ar := c1.allocs[alloc1.ID] 598 c1.allocLock.RUnlock() 599 if ar == nil { 600 return false, fmt.Errorf("nil alloc runner") 601 } 602 if ar.Alloc().ClientStatus != structs.AllocClientStatusRunning { 603 return false, fmt.Errorf("client status: got %v; want %v", ar.Alloc().ClientStatus, structs.AllocClientStatusRunning) 604 } 605 return true, nil 606 }, func(err error) { 607 t.Fatalf("err: %v", err) 608 }) 609 610 // Shutdown the client, saves state 611 if err := c1.Shutdown(); err != nil { 612 t.Fatalf("err: %v", err) 613 } 614 615 // Create a new client 616 logger := testlog.HCLogger(t) 617 c1.config.Logger = logger 618 consulCatalog := consul.NewMockCatalog(logger) 619 mockService := consulApi.NewMockConsulServiceClient(t, logger) 620 621 // ensure we use non-shutdown driver instances 622 c1.config.PluginLoader = catalog.TestPluginLoaderWithOptions(t, "", c1.config.Options, nil) 623 c1.config.PluginSingletonLoader = singleton.NewSingletonLoader(logger, c1.config.PluginLoader) 624 625 c2, err := NewClient(c1.config, consulCatalog, nil, mockService) 626 if err != nil { 627 t.Fatalf("err: %v", err) 628 } 629 defer c2.Shutdown() 630 631 // Ensure the allocation is running 632 testutil.WaitForResult(func() (bool, error) { 633 c2.allocLock.RLock() 634 ar := c2.allocs[alloc1.ID] 635 c2.allocLock.RUnlock() 636 status := ar.Alloc().ClientStatus 637 alive := status == structs.AllocClientStatusRunning || status == structs.AllocClientStatusPending 638 if !alive { 639 return false, fmt.Errorf("incorrect client status: %#v", ar.Alloc()) 640 } 641 return true, nil 642 }, func(err error) { 643 t.Fatalf("err: %v", err) 644 }) 645 646 // Destroy all the allocations 647 for _, ar := range c2.getAllocRunners() { 648 ar.Destroy() 649 } 650 651 for _, ar := range c2.getAllocRunners() { 652 <-ar.DestroyCh() 653 } 654 } 655 656 func TestClient_AddAllocError(t *testing.T) { 657 t.Parallel() 658 require := require.New(t) 659 660 s1, _, cleanupS1 := testServer(t, nil) 661 defer cleanupS1() 662 testutil.WaitForLeader(t, s1.RPC) 663 664 c1, cleanupC1 := TestClient(t, func(c *config.Config) { 665 c.DevMode = false 666 c.RPCHandler = s1 667 }) 668 defer cleanupC1() 669 670 // Wait until the node is ready 671 waitTilNodeReady(c1, t) 672 673 // Create mock allocation with invalid task group name 674 job := mock.Job() 675 alloc1 := mock.Alloc() 676 alloc1.NodeID = c1.Node().ID 677 alloc1.Job = job 678 alloc1.JobID = job.ID 679 alloc1.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver" 680 alloc1.Job.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ 681 "run_for": "10s", 682 } 683 alloc1.ClientStatus = structs.AllocClientStatusPending 684 685 // Set these two fields to nil to cause alloc runner creation to fail 686 alloc1.AllocatedResources = nil 687 alloc1.TaskResources = nil 688 689 state := s1.State() 690 err := state.UpsertJob(structs.MsgTypeTestSetup, 100, job) 691 require.Nil(err) 692 693 err = state.UpsertJobSummary(101, mock.JobSummary(alloc1.JobID)) 694 require.Nil(err) 695 696 err = state.UpsertAllocs(structs.MsgTypeTestSetup, 102, []*structs.Allocation{alloc1}) 697 require.Nil(err) 698 699 // Push this alloc update to the client 700 allocUpdates := &allocUpdates{ 701 pulled: map[string]*structs.Allocation{ 702 alloc1.ID: alloc1, 703 }, 704 } 705 c1.runAllocs(allocUpdates) 706 707 // Ensure the allocation has been marked as invalid and failed on the server 708 testutil.WaitForResult(func() (bool, error) { 709 c1.allocLock.RLock() 710 ar := c1.allocs[alloc1.ID] 711 _, isInvalid := c1.invalidAllocs[alloc1.ID] 712 c1.allocLock.RUnlock() 713 if ar != nil { 714 return false, fmt.Errorf("expected nil alloc runner") 715 } 716 if !isInvalid { 717 return false, fmt.Errorf("expected alloc to be marked as invalid") 718 } 719 alloc, err := s1.State().AllocByID(nil, alloc1.ID) 720 require.Nil(err) 721 failed := alloc.ClientStatus == structs.AllocClientStatusFailed 722 if !failed { 723 return false, fmt.Errorf("Expected failed client status, but got %v", alloc.ClientStatus) 724 } 725 return true, nil 726 }, func(err error) { 727 require.NoError(err) 728 }) 729 730 } 731 732 func TestClient_Init(t *testing.T) { 733 t.Parallel() 734 dir, err := ioutil.TempDir("", "nomad") 735 if err != nil { 736 t.Fatalf("err: %s", err) 737 } 738 defer os.RemoveAll(dir) 739 allocDir := filepath.Join(dir, "alloc") 740 741 client := &Client{ 742 config: &config.Config{ 743 AllocDir: allocDir, 744 StateDBFactory: cstate.GetStateDBFactory(true), 745 }, 746 logger: testlog.HCLogger(t), 747 } 748 749 if err := client.init(); err != nil { 750 t.Fatalf("err: %s", err) 751 } 752 753 if _, err := os.Stat(allocDir); err != nil { 754 t.Fatalf("err: %s", err) 755 } 756 } 757 758 func TestClient_BlockedAllocations(t *testing.T) { 759 t.Parallel() 760 761 s1, _, cleanupS1 := testServer(t, nil) 762 defer cleanupS1() 763 testutil.WaitForLeader(t, s1.RPC) 764 765 c1, cleanup := TestClient(t, func(c *config.Config) { 766 c.RPCHandler = s1 767 }) 768 defer cleanup() 769 770 // Wait for the node to be ready 771 state := s1.State() 772 testutil.WaitForResult(func() (bool, error) { 773 ws := memdb.NewWatchSet() 774 out, err := state.NodeByID(ws, c1.Node().ID) 775 if err != nil { 776 return false, err 777 } 778 if out == nil || out.Status != structs.NodeStatusReady { 779 return false, fmt.Errorf("bad node: %#v", out) 780 } 781 return true, nil 782 }, func(err error) { 783 t.Fatalf("err: %v", err) 784 }) 785 786 // Add an allocation 787 alloc := mock.Alloc() 788 alloc.NodeID = c1.Node().ID 789 alloc.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver" 790 alloc.Job.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ 791 "kill_after": "1s", 792 "run_for": "100s", 793 "exit_code": 0, 794 "exit_signal": 0, 795 } 796 797 state.UpsertJobSummary(99, mock.JobSummary(alloc.JobID)) 798 state.UpsertAllocs(structs.MsgTypeTestSetup, 100, []*structs.Allocation{alloc}) 799 800 // Wait until the client downloads and starts the allocation 801 testutil.WaitForResult(func() (bool, error) { 802 ws := memdb.NewWatchSet() 803 out, err := state.AllocByID(ws, alloc.ID) 804 if err != nil { 805 return false, err 806 } 807 if out == nil || out.ClientStatus != structs.AllocClientStatusRunning { 808 return false, fmt.Errorf("bad alloc: %#v", out) 809 } 810 return true, nil 811 }, func(err error) { 812 t.Fatalf("err: %v", err) 813 }) 814 815 // Add a new chained alloc 816 alloc2 := alloc.Copy() 817 alloc2.ID = uuid.Generate() 818 alloc2.Job = alloc.Job 819 alloc2.JobID = alloc.JobID 820 alloc2.PreviousAllocation = alloc.ID 821 if err := state.UpsertAllocs(structs.MsgTypeTestSetup, 200, []*structs.Allocation{alloc2}); err != nil { 822 t.Fatalf("err: %v", err) 823 } 824 825 // Ensure that the chained allocation is being tracked as blocked 826 testutil.WaitForResult(func() (bool, error) { 827 ar := c1.getAllocRunners()[alloc2.ID] 828 if ar == nil { 829 return false, fmt.Errorf("alloc 2's alloc runner does not exist") 830 } 831 if !ar.IsWaiting() { 832 return false, fmt.Errorf("alloc 2 is not blocked") 833 } 834 return true, nil 835 }, func(err error) { 836 t.Fatalf("err: %v", err) 837 }) 838 839 // Change the desired state of the parent alloc to stop 840 alloc1 := alloc.Copy() 841 alloc1.DesiredStatus = structs.AllocDesiredStatusStop 842 if err := state.UpsertAllocs(structs.MsgTypeTestSetup, 300, []*structs.Allocation{alloc1}); err != nil { 843 t.Fatalf("err: %v", err) 844 } 845 846 // Ensure that there are no blocked allocations 847 testutil.WaitForResult(func() (bool, error) { 848 for id, ar := range c1.getAllocRunners() { 849 if ar.IsWaiting() { 850 return false, fmt.Errorf("%q still blocked", id) 851 } 852 if ar.IsMigrating() { 853 return false, fmt.Errorf("%q still migrating", id) 854 } 855 } 856 return true, nil 857 }, func(err error) { 858 t.Fatalf("err: %v", err) 859 }) 860 861 // Destroy all the allocations 862 for _, ar := range c1.getAllocRunners() { 863 ar.Destroy() 864 } 865 866 for _, ar := range c1.getAllocRunners() { 867 <-ar.DestroyCh() 868 } 869 } 870 871 func TestClient_ValidateMigrateToken_ValidToken(t *testing.T) { 872 t.Parallel() 873 assert := assert.New(t) 874 875 c, cleanup := TestClient(t, func(c *config.Config) { 876 c.ACLEnabled = true 877 }) 878 defer cleanup() 879 880 alloc := mock.Alloc() 881 validToken, err := structs.GenerateMigrateToken(alloc.ID, c.secretNodeID()) 882 assert.Nil(err) 883 884 assert.Equal(c.ValidateMigrateToken(alloc.ID, validToken), true) 885 } 886 887 func TestClient_ValidateMigrateToken_InvalidToken(t *testing.T) { 888 t.Parallel() 889 assert := assert.New(t) 890 891 c, cleanup := TestClient(t, func(c *config.Config) { 892 c.ACLEnabled = true 893 }) 894 defer cleanup() 895 896 assert.Equal(c.ValidateMigrateToken("", ""), false) 897 898 alloc := mock.Alloc() 899 assert.Equal(c.ValidateMigrateToken(alloc.ID, alloc.ID), false) 900 assert.Equal(c.ValidateMigrateToken(alloc.ID, ""), false) 901 } 902 903 func TestClient_ValidateMigrateToken_ACLDisabled(t *testing.T) { 904 t.Parallel() 905 assert := assert.New(t) 906 907 c, cleanup := TestClient(t, func(c *config.Config) {}) 908 defer cleanup() 909 910 assert.Equal(c.ValidateMigrateToken("", ""), true) 911 } 912 913 func TestClient_ReloadTLS_UpgradePlaintextToTLS(t *testing.T) { 914 t.Parallel() 915 assert := assert.New(t) 916 917 s1, addr, cleanupS1 := testServer(t, func(c *nomad.Config) { 918 c.Region = "global" 919 }) 920 defer cleanupS1() 921 testutil.WaitForLeader(t, s1.RPC) 922 923 const ( 924 cafile = "../helper/tlsutil/testdata/ca.pem" 925 foocert = "../helper/tlsutil/testdata/nomad-foo.pem" 926 fookey = "../helper/tlsutil/testdata/nomad-foo-key.pem" 927 ) 928 929 c1, cleanup := TestClient(t, func(c *config.Config) { 930 c.Servers = []string{addr} 931 }) 932 defer cleanup() 933 934 // Registering a node over plaintext should succeed 935 { 936 req := structs.NodeSpecificRequest{ 937 NodeID: c1.Node().ID, 938 QueryOptions: structs.QueryOptions{Region: "global"}, 939 } 940 941 testutil.WaitForResult(func() (bool, error) { 942 var out structs.SingleNodeResponse 943 err := c1.RPC("Node.GetNode", &req, &out) 944 if err != nil { 945 return false, fmt.Errorf("client RPC failed when it should have succeeded:\n%+v", err) 946 } 947 return true, nil 948 }, 949 func(err error) { 950 t.Fatalf(err.Error()) 951 }, 952 ) 953 } 954 955 newConfig := &nconfig.TLSConfig{ 956 EnableHTTP: true, 957 EnableRPC: true, 958 VerifyServerHostname: true, 959 CAFile: cafile, 960 CertFile: foocert, 961 KeyFile: fookey, 962 } 963 964 err := c1.reloadTLSConnections(newConfig) 965 assert.Nil(err) 966 967 // Registering a node over plaintext should fail after the node has upgraded 968 // to TLS 969 { 970 req := structs.NodeSpecificRequest{ 971 NodeID: c1.Node().ID, 972 QueryOptions: structs.QueryOptions{Region: "global"}, 973 } 974 testutil.WaitForResult(func() (bool, error) { 975 var out structs.SingleNodeResponse 976 err := c1.RPC("Node.GetNode", &req, &out) 977 if err == nil { 978 return false, fmt.Errorf("client RPC succeeded when it should have failed:\n%+v", err) 979 } 980 return true, nil 981 }, 982 func(err error) { 983 t.Fatalf(err.Error()) 984 }, 985 ) 986 } 987 } 988 989 func TestClient_ReloadTLS_DowngradeTLSToPlaintext(t *testing.T) { 990 t.Parallel() 991 assert := assert.New(t) 992 993 s1, addr, cleanupS1 := testServer(t, func(c *nomad.Config) { 994 c.Region = "global" 995 }) 996 defer cleanupS1() 997 testutil.WaitForLeader(t, s1.RPC) 998 999 const ( 1000 cafile = "../helper/tlsutil/testdata/ca.pem" 1001 foocert = "../helper/tlsutil/testdata/nomad-foo.pem" 1002 fookey = "../helper/tlsutil/testdata/nomad-foo-key.pem" 1003 ) 1004 1005 c1, cleanup := TestClient(t, func(c *config.Config) { 1006 c.Servers = []string{addr} 1007 c.TLSConfig = &nconfig.TLSConfig{ 1008 EnableHTTP: true, 1009 EnableRPC: true, 1010 VerifyServerHostname: true, 1011 CAFile: cafile, 1012 CertFile: foocert, 1013 KeyFile: fookey, 1014 } 1015 }) 1016 defer cleanup() 1017 1018 // assert that when one node is running in encrypted mode, a RPC request to a 1019 // node running in plaintext mode should fail 1020 { 1021 req := structs.NodeSpecificRequest{ 1022 NodeID: c1.Node().ID, 1023 QueryOptions: structs.QueryOptions{Region: "global"}, 1024 } 1025 testutil.WaitForResult(func() (bool, error) { 1026 var out structs.SingleNodeResponse 1027 err := c1.RPC("Node.GetNode", &req, &out) 1028 if err == nil { 1029 return false, fmt.Errorf("client RPC succeeded when it should have failed :\n%+v", err) 1030 } 1031 return true, nil 1032 }, func(err error) { 1033 t.Fatalf(err.Error()) 1034 }, 1035 ) 1036 } 1037 1038 newConfig := &nconfig.TLSConfig{} 1039 1040 err := c1.reloadTLSConnections(newConfig) 1041 assert.Nil(err) 1042 1043 // assert that when both nodes are in plaintext mode, a RPC request should 1044 // succeed 1045 { 1046 req := structs.NodeSpecificRequest{ 1047 NodeID: c1.Node().ID, 1048 QueryOptions: structs.QueryOptions{Region: "global"}, 1049 } 1050 testutil.WaitForResult(func() (bool, error) { 1051 var out structs.SingleNodeResponse 1052 err := c1.RPC("Node.GetNode", &req, &out) 1053 if err != nil { 1054 return false, fmt.Errorf("client RPC failed when it should have succeeded:\n%+v", err) 1055 } 1056 return true, nil 1057 }, func(err error) { 1058 t.Fatalf(err.Error()) 1059 }, 1060 ) 1061 } 1062 } 1063 1064 // TestClient_ServerList tests client methods that interact with the internal 1065 // nomad server list. 1066 func TestClient_ServerList(t *testing.T) { 1067 t.Parallel() 1068 client, cleanup := TestClient(t, func(c *config.Config) {}) 1069 defer cleanup() 1070 1071 if s := client.GetServers(); len(s) != 0 { 1072 t.Fatalf("expected server lit to be empty but found: %+q", s) 1073 } 1074 if _, err := client.SetServers(nil); err != noServersErr { 1075 t.Fatalf("expected setting an empty list to return a 'no servers' error but received %v", err) 1076 } 1077 if _, err := client.SetServers([]string{"123.456.13123.123.13:80"}); err == nil { 1078 t.Fatalf("expected setting a bad server to return an error") 1079 } 1080 if _, err := client.SetServers([]string{"123.456.13123.123.13:80", "127.0.0.1:1234", "127.0.0.1"}); err == nil { 1081 t.Fatalf("expected setting at least one good server to succeed but received: %v", err) 1082 } 1083 s := client.GetServers() 1084 if len(s) != 0 { 1085 t.Fatalf("expected 2 servers but received: %+q", s) 1086 } 1087 } 1088 1089 func TestClient_UpdateNodeFromDevicesAccumulates(t *testing.T) { 1090 t.Parallel() 1091 client, cleanup := TestClient(t, func(c *config.Config) {}) 1092 defer cleanup() 1093 1094 client.updateNodeFromFingerprint(&fingerprint.FingerprintResponse{ 1095 NodeResources: &structs.NodeResources{ 1096 Cpu: structs.NodeCpuResources{CpuShares: 123}, 1097 }, 1098 }) 1099 1100 client.updateNodeFromFingerprint(&fingerprint.FingerprintResponse{ 1101 NodeResources: &structs.NodeResources{ 1102 Memory: structs.NodeMemoryResources{MemoryMB: 1024}, 1103 }, 1104 }) 1105 1106 client.updateNodeFromDevices([]*structs.NodeDeviceResource{ 1107 { 1108 Vendor: "vendor", 1109 Type: "type", 1110 }, 1111 }) 1112 1113 // initial check 1114 expectedResources := &structs.NodeResources{ 1115 // computed through test client initialization 1116 Networks: client.configCopy.Node.NodeResources.Networks, 1117 NodeNetworks: client.configCopy.Node.NodeResources.NodeNetworks, 1118 Disk: client.configCopy.Node.NodeResources.Disk, 1119 1120 // injected 1121 Cpu: structs.NodeCpuResources{CpuShares: 123}, 1122 Memory: structs.NodeMemoryResources{MemoryMB: 1024}, 1123 Devices: []*structs.NodeDeviceResource{ 1124 { 1125 Vendor: "vendor", 1126 Type: "type", 1127 }, 1128 }, 1129 } 1130 1131 assert.EqualValues(t, expectedResources, client.configCopy.Node.NodeResources) 1132 1133 // overrides of values 1134 1135 client.updateNodeFromFingerprint(&fingerprint.FingerprintResponse{ 1136 NodeResources: &structs.NodeResources{ 1137 Memory: structs.NodeMemoryResources{MemoryMB: 2048}, 1138 }, 1139 }) 1140 1141 client.updateNodeFromDevices([]*structs.NodeDeviceResource{ 1142 { 1143 Vendor: "vendor", 1144 Type: "type", 1145 }, 1146 { 1147 Vendor: "vendor2", 1148 Type: "type2", 1149 }, 1150 }) 1151 1152 expectedResources2 := &structs.NodeResources{ 1153 // computed through test client initialization 1154 Networks: client.configCopy.Node.NodeResources.Networks, 1155 NodeNetworks: client.configCopy.Node.NodeResources.NodeNetworks, 1156 Disk: client.configCopy.Node.NodeResources.Disk, 1157 1158 // injected 1159 Cpu: structs.NodeCpuResources{CpuShares: 123}, 1160 Memory: structs.NodeMemoryResources{MemoryMB: 2048}, 1161 Devices: []*structs.NodeDeviceResource{ 1162 { 1163 Vendor: "vendor", 1164 Type: "type", 1165 }, 1166 { 1167 Vendor: "vendor2", 1168 Type: "type2", 1169 }, 1170 }, 1171 } 1172 1173 assert.EqualValues(t, expectedResources2, client.configCopy.Node.NodeResources) 1174 1175 } 1176 1177 // TestClient_UpdateNodeFromFingerprintKeepsConfig asserts manually configured 1178 // network interfaces take precedence over fingerprinted ones. 1179 func TestClient_UpdateNodeFromFingerprintKeepsConfig(t *testing.T) { 1180 t.Parallel() 1181 1182 // Client without network configured updates to match fingerprint 1183 client, cleanup := TestClient(t, nil) 1184 defer cleanup() 1185 1186 client.updateNodeFromFingerprint(&fingerprint.FingerprintResponse{ 1187 NodeResources: &structs.NodeResources{ 1188 Cpu: structs.NodeCpuResources{CpuShares: 123}, 1189 Networks: []*structs.NetworkResource{{Mode: "host", Device: "any-interface"}}, 1190 }, 1191 Resources: &structs.Resources{ 1192 CPU: 80, 1193 }, 1194 }) 1195 idx := len(client.config.Node.NodeResources.Networks) - 1 1196 require.Equal(t, int64(123), client.config.Node.NodeResources.Cpu.CpuShares) 1197 require.Equal(t, "any-interface", client.config.Node.NodeResources.Networks[idx].Device) 1198 require.Equal(t, 80, client.config.Node.Resources.CPU) 1199 1200 // lookup an interface. client.Node starts with a hardcoded value, eth0, 1201 // and is only updated async through fingerprinter. 1202 // Let's just lookup network device; anyone will do for this test 1203 interfaces, err := net.Interfaces() 1204 require.NoError(t, err) 1205 require.NotEmpty(t, interfaces) 1206 dev := interfaces[0].Name 1207 1208 // Client with network interface configured keeps the config 1209 // setting on update 1210 name := "TestClient_UpdateNodeFromFingerprintKeepsConfig2" 1211 client, cleanup = TestClient(t, func(c *config.Config) { 1212 c.NetworkInterface = dev 1213 c.Node.Name = name 1214 c.Options["fingerprint.denylist"] = "network" 1215 // Node is already a mock.Node, with a device 1216 c.Node.NodeResources.Networks[0].Device = dev 1217 }) 1218 defer cleanup() 1219 client.updateNodeFromFingerprint(&fingerprint.FingerprintResponse{ 1220 NodeResources: &structs.NodeResources{ 1221 Cpu: structs.NodeCpuResources{CpuShares: 123}, 1222 Networks: []*structs.NetworkResource{ 1223 {Mode: "host", Device: "any-interface", MBits: 20}, 1224 }, 1225 }, 1226 }) 1227 require.Equal(t, int64(123), client.config.Node.NodeResources.Cpu.CpuShares) 1228 // only the configured device is kept 1229 require.Equal(t, 2, len(client.config.Node.NodeResources.Networks)) 1230 require.Equal(t, dev, client.config.Node.NodeResources.Networks[0].Device) 1231 require.Equal(t, "bridge", client.config.Node.NodeResources.Networks[1].Mode) 1232 1233 // Network speed is applied to all NetworkResources 1234 client.config.NetworkInterface = "" 1235 client.config.NetworkSpeed = 100 1236 client.updateNodeFromFingerprint(&fingerprint.FingerprintResponse{ 1237 NodeResources: &structs.NodeResources{ 1238 Cpu: structs.NodeCpuResources{CpuShares: 123}, 1239 Networks: []*structs.NetworkResource{ 1240 {Mode: "host", Device: "any-interface", MBits: 20}, 1241 }, 1242 }, 1243 Resources: &structs.Resources{ 1244 CPU: 80, 1245 }, 1246 }) 1247 assert.Equal(t, 3, len(client.config.Node.NodeResources.Networks)) 1248 assert.Equal(t, "any-interface", client.config.Node.NodeResources.Networks[2].Device) 1249 assert.Equal(t, 100, client.config.Node.NodeResources.Networks[2].MBits) 1250 assert.Equal(t, 0, client.config.Node.NodeResources.Networks[1].MBits) 1251 } 1252 1253 // Support multiple IP addresses (ipv4 vs. 6, e.g.) on the configured network interface 1254 func Test_UpdateNodeFromFingerprintMultiIP(t *testing.T) { 1255 t.Parallel() 1256 1257 var dev string 1258 switch runtime.GOOS { 1259 case "linux": 1260 dev = "lo" 1261 case "darwin": 1262 dev = "lo0" 1263 } 1264 1265 // Client without network configured updates to match fingerprint 1266 client, cleanup := TestClient(t, func(c *config.Config) { 1267 c.NetworkInterface = dev 1268 c.Options["fingerprint.denylist"] = "network,cni,bridge" 1269 c.Node.Resources.Networks = c.Node.NodeResources.Networks 1270 }) 1271 defer cleanup() 1272 1273 client.updateNodeFromFingerprint(&fingerprint.FingerprintResponse{ 1274 NodeResources: &structs.NodeResources{ 1275 Cpu: structs.NodeCpuResources{CpuShares: 123}, 1276 Networks: []*structs.NetworkResource{ 1277 {Device: dev, IP: "127.0.0.1"}, 1278 {Device: dev, IP: "::1"}, 1279 }, 1280 }, 1281 }) 1282 1283 nets := structs.Networks{ 1284 mock.Node().NodeResources.Networks[0], 1285 {Device: dev, IP: "127.0.0.1"}, 1286 {Device: dev, IP: "::1"}, 1287 } 1288 1289 require.Equal(t, nets, client.config.Node.NodeResources.Networks) 1290 } 1291 1292 func TestClient_computeAllocatedDeviceStats(t *testing.T) { 1293 logger := testlog.HCLogger(t) 1294 c := &Client{logger: logger} 1295 1296 newDeviceStats := func(strValue string) *device.DeviceStats { 1297 return &device.DeviceStats{ 1298 Summary: &psstructs.StatValue{ 1299 StringVal: &strValue, 1300 }, 1301 } 1302 } 1303 1304 allocatedDevices := []*structs.AllocatedDeviceResource{ 1305 { 1306 Vendor: "vendor", 1307 Type: "type", 1308 Name: "name", 1309 DeviceIDs: []string{"d2", "d3", "notfoundid"}, 1310 }, 1311 { 1312 Vendor: "vendor2", 1313 Type: "type2", 1314 Name: "name2", 1315 DeviceIDs: []string{"a2"}, 1316 }, 1317 { 1318 Vendor: "vendor_notfound", 1319 Type: "type_notfound", 1320 Name: "name_notfound", 1321 DeviceIDs: []string{"d3"}, 1322 }, 1323 } 1324 1325 hostDeviceGroupStats := []*device.DeviceGroupStats{ 1326 { 1327 Vendor: "vendor", 1328 Type: "type", 1329 Name: "name", 1330 InstanceStats: map[string]*device.DeviceStats{ 1331 "unallocated": newDeviceStats("unallocated"), 1332 "d2": newDeviceStats("d2"), 1333 "d3": newDeviceStats("d3"), 1334 }, 1335 }, 1336 { 1337 Vendor: "vendor2", 1338 Type: "type2", 1339 Name: "name2", 1340 InstanceStats: map[string]*device.DeviceStats{ 1341 "a2": newDeviceStats("a2"), 1342 }, 1343 }, 1344 { 1345 Vendor: "vendor_unused", 1346 Type: "type_unused", 1347 Name: "name_unused", 1348 InstanceStats: map[string]*device.DeviceStats{ 1349 "unallocated_unused": newDeviceStats("unallocated_unused"), 1350 }, 1351 }, 1352 } 1353 1354 // test some edge conditions 1355 assert.Empty(t, c.computeAllocatedDeviceGroupStats(nil, nil)) 1356 assert.Empty(t, c.computeAllocatedDeviceGroupStats(nil, hostDeviceGroupStats)) 1357 assert.Empty(t, c.computeAllocatedDeviceGroupStats(allocatedDevices, nil)) 1358 1359 // actual test 1360 result := c.computeAllocatedDeviceGroupStats(allocatedDevices, hostDeviceGroupStats) 1361 sort.Slice(result, func(i, j int) bool { 1362 return result[i].Vendor < result[j].Vendor 1363 }) 1364 1365 expected := []*device.DeviceGroupStats{ 1366 { 1367 Vendor: "vendor", 1368 Type: "type", 1369 Name: "name", 1370 InstanceStats: map[string]*device.DeviceStats{ 1371 "d2": newDeviceStats("d2"), 1372 "d3": newDeviceStats("d3"), 1373 }, 1374 }, 1375 { 1376 Vendor: "vendor2", 1377 Type: "type2", 1378 Name: "name2", 1379 InstanceStats: map[string]*device.DeviceStats{ 1380 "a2": newDeviceStats("a2"), 1381 }, 1382 }, 1383 } 1384 1385 assert.EqualValues(t, expected, result) 1386 } 1387 1388 func TestClient_getAllocatedResources(t *testing.T) { 1389 t.Parallel() 1390 require := require.New(t) 1391 client, cleanup := TestClient(t, nil) 1392 defer cleanup() 1393 1394 allocStops := mock.BatchAlloc() 1395 allocStops.Job.TaskGroups[0].Count = 1 1396 allocStops.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver" 1397 allocStops.Job.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ 1398 "run_for": "1ms", 1399 "exit_code": "0", 1400 } 1401 allocStops.Job.TaskGroups[0].RestartPolicy.Attempts = 0 1402 allocStops.AllocatedResources.Shared.DiskMB = 64 1403 allocStops.AllocatedResources.Tasks["web"].Cpu = structs.AllocatedCpuResources{CpuShares: 64} 1404 allocStops.AllocatedResources.Tasks["web"].Memory = structs.AllocatedMemoryResources{MemoryMB: 64} 1405 require.Nil(client.addAlloc(allocStops, "")) 1406 1407 allocFails := mock.BatchAlloc() 1408 allocFails.Job.TaskGroups[0].Count = 1 1409 allocFails.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver" 1410 allocFails.Job.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ 1411 "run_for": "1ms", 1412 "exit_code": "1", 1413 } 1414 allocFails.Job.TaskGroups[0].RestartPolicy.Attempts = 0 1415 allocFails.AllocatedResources.Shared.DiskMB = 128 1416 allocFails.AllocatedResources.Tasks["web"].Cpu = structs.AllocatedCpuResources{CpuShares: 128} 1417 allocFails.AllocatedResources.Tasks["web"].Memory = structs.AllocatedMemoryResources{MemoryMB: 128} 1418 require.Nil(client.addAlloc(allocFails, "")) 1419 1420 allocRuns := mock.Alloc() 1421 allocRuns.Job.TaskGroups[0].Count = 1 1422 allocRuns.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver" 1423 allocRuns.Job.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ 1424 "run_for": "3s", 1425 } 1426 allocRuns.AllocatedResources.Shared.DiskMB = 256 1427 allocRuns.AllocatedResources.Tasks["web"].Cpu = structs.AllocatedCpuResources{CpuShares: 256} 1428 allocRuns.AllocatedResources.Tasks["web"].Memory = structs.AllocatedMemoryResources{MemoryMB: 256} 1429 require.Nil(client.addAlloc(allocRuns, "")) 1430 1431 allocPends := mock.Alloc() 1432 allocPends.Job.TaskGroups[0].Count = 1 1433 allocPends.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver" 1434 allocPends.Job.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ 1435 "run_for": "5s", 1436 "start_block_for": "10s", 1437 } 1438 allocPends.AllocatedResources.Shared.DiskMB = 512 1439 allocPends.AllocatedResources.Tasks["web"].Cpu = structs.AllocatedCpuResources{CpuShares: 512} 1440 allocPends.AllocatedResources.Tasks["web"].Memory = structs.AllocatedMemoryResources{MemoryMB: 512} 1441 require.Nil(client.addAlloc(allocPends, "")) 1442 1443 // wait for allocStops to stop running and for allocRuns to be pending/running 1444 testutil.WaitForResult(func() (bool, error) { 1445 as, err := client.GetAllocState(allocPends.ID) 1446 if err != nil { 1447 return false, err 1448 } else if as.ClientStatus != structs.AllocClientStatusPending { 1449 return false, fmt.Errorf("allocPends not yet pending: %#v", as) 1450 } 1451 1452 as, err = client.GetAllocState(allocRuns.ID) 1453 if as.ClientStatus != structs.AllocClientStatusRunning { 1454 return false, fmt.Errorf("allocRuns not yet running: %#v", as) 1455 } else if err != nil { 1456 return false, err 1457 } 1458 1459 as, err = client.GetAllocState(allocStops.ID) 1460 if err != nil { 1461 return false, err 1462 } else if as.ClientStatus != structs.AllocClientStatusComplete { 1463 return false, fmt.Errorf("allocStops not yet complete: %#v", as) 1464 } 1465 1466 as, err = client.GetAllocState(allocFails.ID) 1467 if err != nil { 1468 return false, err 1469 } else if as.ClientStatus != structs.AllocClientStatusFailed { 1470 return false, fmt.Errorf("allocFails not yet failed: %#v", as) 1471 } 1472 1473 return true, nil 1474 }, func(err error) { 1475 require.NoError(err) 1476 }) 1477 1478 result := client.getAllocatedResources(client.config.Node) 1479 1480 // Ignore comparing networks for now 1481 result.Flattened.Networks = nil 1482 1483 expected := structs.ComparableResources{ 1484 Flattened: structs.AllocatedTaskResources{ 1485 Cpu: structs.AllocatedCpuResources{ 1486 CpuShares: 768, 1487 }, 1488 Memory: structs.AllocatedMemoryResources{ 1489 MemoryMB: 768, 1490 }, 1491 Networks: nil, 1492 }, 1493 Shared: structs.AllocatedSharedResources{ 1494 DiskMB: 768, 1495 }, 1496 } 1497 1498 assert.EqualValues(t, expected, *result) 1499 } 1500 1501 func TestClient_updateNodeFromDriverUpdatesAll(t *testing.T) { 1502 t.Parallel() 1503 client, cleanup := TestClient(t, nil) 1504 defer cleanup() 1505 1506 // initial update 1507 { 1508 info := &structs.DriverInfo{ 1509 Detected: true, 1510 Healthy: false, 1511 HealthDescription: "not healthy at start", 1512 Attributes: map[string]string{ 1513 "node.mock.testattr1": "val1", 1514 }, 1515 } 1516 client.updateNodeFromDriver("mock", info) 1517 n := client.config.Node 1518 1519 updatedInfo := *n.Drivers["mock"] 1520 // compare without update time 1521 updatedInfo.UpdateTime = info.UpdateTime 1522 assert.EqualValues(t, updatedInfo, *info) 1523 1524 // check node attributes 1525 assert.Equal(t, "val1", n.Attributes["node.mock.testattr1"]) 1526 } 1527 1528 // initial update 1529 { 1530 info := &structs.DriverInfo{ 1531 Detected: true, 1532 Healthy: true, 1533 HealthDescription: "healthy", 1534 Attributes: map[string]string{ 1535 "node.mock.testattr1": "val2", 1536 }, 1537 } 1538 client.updateNodeFromDriver("mock", info) 1539 n := client.Node() 1540 1541 updatedInfo := *n.Drivers["mock"] 1542 // compare without update time 1543 updatedInfo.UpdateTime = info.UpdateTime 1544 assert.EqualValues(t, updatedInfo, *info) 1545 1546 // check node attributes are updated 1547 assert.Equal(t, "val2", n.Attributes["node.mock.testattr1"]) 1548 1549 // update once more with the same info, updateTime shouldn't change 1550 client.updateNodeFromDriver("mock", info) 1551 un := client.Node() 1552 assert.EqualValues(t, n, un) 1553 } 1554 1555 // update once more to unhealthy because why not 1556 { 1557 info := &structs.DriverInfo{ 1558 Detected: true, 1559 Healthy: false, 1560 HealthDescription: "lost track", 1561 Attributes: map[string]string{ 1562 "node.mock.testattr1": "", 1563 }, 1564 } 1565 client.updateNodeFromDriver("mock", info) 1566 n := client.Node() 1567 1568 updatedInfo := *n.Drivers["mock"] 1569 // compare without update time 1570 updatedInfo.UpdateTime = info.UpdateTime 1571 assert.EqualValues(t, updatedInfo, *info) 1572 1573 // check node attributes are updated 1574 assert.Equal(t, "", n.Attributes["node.mock.testattr1"]) 1575 1576 // update once more with the same info, updateTime shouldn't change 1577 client.updateNodeFromDriver("mock", info) 1578 un := client.Node() 1579 assert.EqualValues(t, n, un) 1580 } 1581 } 1582 1583 // COMPAT(0.12): remove once upgrading from 0.9.5 is no longer supported 1584 func TestClient_hasLocalState(t *testing.T) { 1585 t.Parallel() 1586 1587 c, cleanup := TestClient(t, nil) 1588 defer cleanup() 1589 1590 c.stateDB = state.NewMemDB(c.logger) 1591 1592 t.Run("plain alloc", func(t *testing.T) { 1593 alloc := mock.BatchAlloc() 1594 c.stateDB.PutAllocation(alloc) 1595 1596 require.False(t, c.hasLocalState(alloc)) 1597 }) 1598 1599 t.Run("alloc with a task with local state", func(t *testing.T) { 1600 alloc := mock.BatchAlloc() 1601 taskName := alloc.Job.LookupTaskGroup(alloc.TaskGroup).Tasks[0].Name 1602 ls := &trstate.LocalState{} 1603 1604 c.stateDB.PutAllocation(alloc) 1605 c.stateDB.PutTaskRunnerLocalState(alloc.ID, taskName, ls) 1606 1607 require.True(t, c.hasLocalState(alloc)) 1608 }) 1609 1610 t.Run("alloc with a task with task state", func(t *testing.T) { 1611 alloc := mock.BatchAlloc() 1612 taskName := alloc.Job.LookupTaskGroup(alloc.TaskGroup).Tasks[0].Name 1613 ts := &structs.TaskState{ 1614 State: structs.TaskStateRunning, 1615 } 1616 1617 c.stateDB.PutAllocation(alloc) 1618 c.stateDB.PutTaskState(alloc.ID, taskName, ts) 1619 1620 require.True(t, c.hasLocalState(alloc)) 1621 }) 1622 } 1623 1624 func Test_verifiedTasks(t *testing.T) { 1625 t.Parallel() 1626 logger := testlog.HCLogger(t) 1627 1628 // produce a result and check against expected tasks and/or error output 1629 try := func(t *testing.T, a *structs.Allocation, tasks, expTasks []string, expErr string) { 1630 result, err := verifiedTasks(logger, a, tasks) 1631 if expErr != "" { 1632 require.EqualError(t, err, expErr) 1633 } else { 1634 require.NoError(t, err) 1635 require.Equal(t, expTasks, result) 1636 } 1637 } 1638 1639 // create an alloc with TaskGroup=g1, tasks configured given g1Tasks 1640 alloc := func(g1Tasks []string) *structs.Allocation { 1641 var tasks []*structs.Task 1642 for _, taskName := range g1Tasks { 1643 tasks = append(tasks, &structs.Task{Name: taskName}) 1644 } 1645 1646 return &structs.Allocation{ 1647 Job: &structs.Job{ 1648 TaskGroups: []*structs.TaskGroup{ 1649 {Name: "g0", Tasks: []*structs.Task{{Name: "g0t1"}}}, 1650 {Name: "g1", Tasks: tasks}, 1651 }, 1652 }, 1653 TaskGroup: "g1", 1654 } 1655 } 1656 1657 t.Run("nil alloc", func(t *testing.T) { 1658 tasks := []string{"g1t1"} 1659 try(t, nil, tasks, nil, "nil allocation") 1660 }) 1661 1662 t.Run("missing task names", func(t *testing.T) { 1663 var tasks []string 1664 tgTasks := []string{"g1t1"} 1665 try(t, alloc(tgTasks), tasks, nil, "missing task names") 1666 }) 1667 1668 t.Run("missing group", func(t *testing.T) { 1669 tasks := []string{"g1t1"} 1670 a := alloc(tasks) 1671 a.TaskGroup = "other" 1672 try(t, a, tasks, nil, "group name in allocation is not present in job") 1673 }) 1674 1675 t.Run("nonexistent task", func(t *testing.T) { 1676 tasks := []string{"missing"} 1677 try(t, alloc([]string{"task1"}), tasks, nil, `task "missing" not found in allocation`) 1678 }) 1679 1680 t.Run("matching task", func(t *testing.T) { 1681 tasks := []string{"g1t1"} 1682 try(t, alloc(tasks), tasks, tasks, "") 1683 }) 1684 1685 t.Run("matching task subset", func(t *testing.T) { 1686 tasks := []string{"g1t1", "g1t3"} 1687 tgTasks := []string{"g1t1", "g1t2", "g1t3"} 1688 try(t, alloc(tgTasks), tasks, tasks, "") 1689 }) 1690 }