github.com/manicqin/nomad@v0.9.5/nomad/client_alloc_endpoint_test.go (about) 1 package nomad 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "io" 7 "net" 8 "strings" 9 "testing" 10 "time" 11 12 msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc" 13 "github.com/hashicorp/nomad/acl" 14 "github.com/hashicorp/nomad/client" 15 "github.com/hashicorp/nomad/client/config" 16 cstructs "github.com/hashicorp/nomad/client/structs" 17 "github.com/hashicorp/nomad/helper/uuid" 18 "github.com/hashicorp/nomad/nomad/mock" 19 "github.com/hashicorp/nomad/nomad/structs" 20 nstructs "github.com/hashicorp/nomad/nomad/structs" 21 "github.com/hashicorp/nomad/plugins/drivers" 22 "github.com/hashicorp/nomad/testutil" 23 "github.com/kr/pretty" 24 "github.com/stretchr/testify/require" 25 "github.com/ugorji/go/codec" 26 ) 27 28 func TestClientAllocations_GarbageCollectAll_Local(t *testing.T) { 29 t.Parallel() 30 require := require.New(t) 31 32 // Start a server and client 33 s, cleanupS := TestServer(t, nil) 34 defer cleanupS() 35 codec := rpcClient(t, s) 36 testutil.WaitForLeader(t, s.RPC) 37 38 c, cleanupC := client.TestClient(t, func(c *config.Config) { 39 c.Servers = []string{s.config.RPCAddr.String()} 40 }) 41 defer cleanupC() 42 43 testutil.WaitForResult(func() (bool, error) { 44 nodes := s.connectedNodes() 45 return len(nodes) == 1, nil 46 }, func(err error) { 47 t.Fatalf("should have a clients") 48 }) 49 50 // Make the request without having a node-id 51 req := &structs.NodeSpecificRequest{ 52 QueryOptions: structs.QueryOptions{Region: "global"}, 53 } 54 55 // Fetch the response 56 var resp structs.GenericResponse 57 err := msgpackrpc.CallWithCodec(codec, "ClientAllocations.GarbageCollectAll", req, &resp) 58 require.NotNil(err) 59 require.Contains(err.Error(), "missing") 60 61 // Fetch the response setting the node id 62 req.NodeID = c.NodeID() 63 var resp2 structs.GenericResponse 64 err = msgpackrpc.CallWithCodec(codec, "ClientAllocations.GarbageCollectAll", req, &resp2) 65 require.Nil(err) 66 } 67 68 func TestClientAllocations_GarbageCollectAll_Local_ACL(t *testing.T) { 69 t.Parallel() 70 require := require.New(t) 71 72 // Start a server 73 s, root, cleanupS := TestACLServer(t, nil) 74 defer cleanupS() 75 codec := rpcClient(t, s) 76 testutil.WaitForLeader(t, s.RPC) 77 78 // Create a bad token 79 policyBad := mock.NamespacePolicy("other", "", []string{acl.NamespaceCapabilityReadFS}) 80 tokenBad := mock.CreatePolicyAndToken(t, s.State(), 1005, "invalid", policyBad) 81 82 policyGood := mock.NodePolicy(acl.PolicyWrite) 83 tokenGood := mock.CreatePolicyAndToken(t, s.State(), 1009, "valid2", policyGood) 84 85 cases := []struct { 86 Name string 87 Token string 88 ExpectedError string 89 }{ 90 { 91 Name: "bad token", 92 Token: tokenBad.SecretID, 93 ExpectedError: structs.ErrPermissionDenied.Error(), 94 }, 95 { 96 Name: "good token", 97 Token: tokenGood.SecretID, 98 ExpectedError: "Unknown node", 99 }, 100 { 101 Name: "root token", 102 Token: root.SecretID, 103 ExpectedError: "Unknown node", 104 }, 105 } 106 107 for _, c := range cases { 108 t.Run(c.Name, func(t *testing.T) { 109 110 // Make the request without having a node-id 111 req := &structs.NodeSpecificRequest{ 112 NodeID: uuid.Generate(), 113 QueryOptions: structs.QueryOptions{ 114 AuthToken: c.Token, 115 Region: "global", 116 }, 117 } 118 119 // Fetch the response 120 var resp structs.GenericResponse 121 err := msgpackrpc.CallWithCodec(codec, "ClientAllocations.GarbageCollectAll", req, &resp) 122 require.NotNil(err) 123 require.Contains(err.Error(), c.ExpectedError) 124 }) 125 } 126 } 127 128 func TestClientAllocations_GarbageCollectAll_NoNode(t *testing.T) { 129 t.Parallel() 130 require := require.New(t) 131 132 // Start a server and client 133 s, cleanupS := TestServer(t, nil) 134 defer cleanupS() 135 codec := rpcClient(t, s) 136 testutil.WaitForLeader(t, s.RPC) 137 138 // Make the request without having a node-id 139 req := &structs.NodeSpecificRequest{ 140 NodeID: uuid.Generate(), 141 QueryOptions: structs.QueryOptions{Region: "global"}, 142 } 143 144 // Fetch the response 145 var resp structs.GenericResponse 146 err := msgpackrpc.CallWithCodec(codec, "ClientAllocations.GarbageCollectAll", req, &resp) 147 require.NotNil(err) 148 require.Contains(err.Error(), "Unknown node") 149 } 150 151 func TestClientAllocations_GarbageCollectAll_OldNode(t *testing.T) { 152 t.Parallel() 153 require := require.New(t) 154 155 // Start a server and fake an old client 156 s, cleanupS := TestServer(t, nil) 157 defer cleanupS() 158 state := s.State() 159 codec := rpcClient(t, s) 160 testutil.WaitForLeader(t, s.RPC) 161 162 // Test for an old version error 163 node := mock.Node() 164 node.Attributes["nomad.version"] = "0.7.1" 165 require.Nil(state.UpsertNode(1005, node)) 166 167 req := &structs.NodeSpecificRequest{ 168 NodeID: node.ID, 169 QueryOptions: structs.QueryOptions{Region: "global"}, 170 } 171 172 var resp structs.GenericResponse 173 err := msgpackrpc.CallWithCodec(codec, "ClientAllocations.GarbageCollectAll", req, &resp) 174 require.True(structs.IsErrNodeLacksRpc(err)) 175 176 // Test for a missing version error 177 delete(node.Attributes, "nomad.version") 178 require.Nil(state.UpsertNode(1006, node)) 179 180 err = msgpackrpc.CallWithCodec(codec, "ClientAllocations.GarbageCollectAll", req, &resp) 181 require.True(structs.IsErrUnknownNomadVersion(err)) 182 } 183 184 func TestClientAllocations_GarbageCollectAll_Remote(t *testing.T) { 185 t.Parallel() 186 require := require.New(t) 187 188 // Start a server and client 189 s1, cleanupS1 := TestServer(t, nil) 190 defer cleanupS1() 191 s2, cleanupS2 := TestServer(t, func(c *Config) { 192 c.DevDisableBootstrap = true 193 }) 194 defer cleanupS2() 195 TestJoin(t, s1, s2) 196 testutil.WaitForLeader(t, s1.RPC) 197 testutil.WaitForLeader(t, s2.RPC) 198 codec := rpcClient(t, s2) 199 200 c, cleanupC := client.TestClient(t, func(c *config.Config) { 201 c.Servers = []string{s2.config.RPCAddr.String()} 202 c.GCDiskUsageThreshold = 100.0 203 }) 204 defer cleanupC() 205 206 testutil.WaitForResult(func() (bool, error) { 207 nodes := s2.connectedNodes() 208 if len(nodes) != 1 { 209 return false, fmt.Errorf("should have 1 client. found %d", len(nodes)) 210 } 211 req := &structs.NodeSpecificRequest{ 212 NodeID: c.NodeID(), 213 QueryOptions: structs.QueryOptions{Region: "global"}, 214 } 215 resp := structs.SingleNodeResponse{} 216 if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", req, &resp); err != nil { 217 return false, err 218 } 219 return resp.Node != nil && resp.Node.Status == structs.NodeStatusReady, fmt.Errorf( 220 "expected ready but found %s", pretty.Sprint(resp.Node)) 221 }, func(err error) { 222 t.Fatalf("should have a clients") 223 }) 224 225 // Force remove the connection locally in case it exists 226 s1.nodeConnsLock.Lock() 227 delete(s1.nodeConns, c.NodeID()) 228 s1.nodeConnsLock.Unlock() 229 230 // Make the request 231 req := &structs.NodeSpecificRequest{ 232 NodeID: c.NodeID(), 233 QueryOptions: structs.QueryOptions{Region: "global"}, 234 } 235 236 // Fetch the response 237 var resp cstructs.ClientStatsResponse 238 err := msgpackrpc.CallWithCodec(codec, "ClientAllocations.GarbageCollectAll", req, &resp) 239 require.Nil(err) 240 } 241 242 func TestClientAllocations_GarbageCollect_OldNode(t *testing.T) { 243 t.Parallel() 244 require := require.New(t) 245 246 // Start a server and fake an old client 247 s, cleanupS := TestServer(t, nil) 248 defer cleanupS() 249 state := s.State() 250 codec := rpcClient(t, s) 251 testutil.WaitForLeader(t, s.RPC) 252 253 // Test for an old version error 254 node := mock.Node() 255 node.Attributes["nomad.version"] = "0.7.1" 256 require.Nil(state.UpsertNode(1005, node)) 257 258 alloc := mock.Alloc() 259 alloc.NodeID = node.ID 260 require.Nil(state.UpsertAllocs(1006, []*structs.Allocation{alloc})) 261 262 req := &structs.AllocSpecificRequest{ 263 AllocID: alloc.ID, 264 QueryOptions: structs.QueryOptions{ 265 Region: "global", 266 Namespace: structs.DefaultNamespace, 267 }, 268 } 269 270 var resp structs.GenericResponse 271 err := msgpackrpc.CallWithCodec(codec, "ClientAllocations.GarbageCollect", req, &resp) 272 require.True(structs.IsErrNodeLacksRpc(err), err.Error()) 273 274 // Test for a missing version error 275 delete(node.Attributes, "nomad.version") 276 require.Nil(state.UpsertNode(1007, node)) 277 278 err = msgpackrpc.CallWithCodec(codec, "ClientAllocations.GarbageCollect", req, &resp) 279 require.True(structs.IsErrUnknownNomadVersion(err), err.Error()) 280 } 281 282 func TestClientAllocations_GarbageCollect_Local(t *testing.T) { 283 t.Parallel() 284 require := require.New(t) 285 286 // Start a server and client 287 s, cleanupS := TestServer(t, nil) 288 defer cleanupS() 289 codec := rpcClient(t, s) 290 testutil.WaitForLeader(t, s.RPC) 291 292 c, cleanupC := client.TestClient(t, func(c *config.Config) { 293 c.Servers = []string{s.config.RPCAddr.String()} 294 c.GCDiskUsageThreshold = 100.0 295 }) 296 defer cleanupC() 297 298 // Force an allocation onto the node 299 a := mock.Alloc() 300 a.Job.Type = structs.JobTypeBatch 301 a.NodeID = c.NodeID() 302 a.Job.TaskGroups[0].Count = 1 303 a.Job.TaskGroups[0].Tasks[0] = &structs.Task{ 304 Name: "web", 305 Driver: "mock_driver", 306 Config: map[string]interface{}{ 307 "run_for": "2s", 308 }, 309 LogConfig: structs.DefaultLogConfig(), 310 Resources: &structs.Resources{ 311 CPU: 500, 312 MemoryMB: 256, 313 }, 314 } 315 316 testutil.WaitForResult(func() (bool, error) { 317 nodes := s.connectedNodes() 318 return len(nodes) == 1, nil 319 }, func(err error) { 320 t.Fatalf("should have a clients") 321 }) 322 323 // Upsert the allocation 324 state := s.State() 325 require.Nil(state.UpsertJob(999, a.Job)) 326 require.Nil(state.UpsertAllocs(1003, []*structs.Allocation{a})) 327 328 // Wait for the client to run the allocation 329 testutil.WaitForResult(func() (bool, error) { 330 alloc, err := state.AllocByID(nil, a.ID) 331 if err != nil { 332 return false, err 333 } 334 if alloc == nil { 335 return false, fmt.Errorf("unknown alloc") 336 } 337 if alloc.ClientStatus != structs.AllocClientStatusComplete { 338 return false, fmt.Errorf("alloc client status: %v", alloc.ClientStatus) 339 } 340 341 return true, nil 342 }, func(err error) { 343 t.Fatalf("Alloc on node %q not finished: %v", c.NodeID(), err) 344 }) 345 346 // Make the request without having an alloc id 347 req := &structs.AllocSpecificRequest{ 348 QueryOptions: structs.QueryOptions{Region: "global"}, 349 } 350 351 // Fetch the response 352 var resp structs.GenericResponse 353 err := msgpackrpc.CallWithCodec(codec, "ClientAllocations.GarbageCollect", req, &resp) 354 require.NotNil(err) 355 require.Contains(err.Error(), "missing") 356 357 // Fetch the response setting the node id 358 req.AllocID = a.ID 359 var resp2 structs.GenericResponse 360 err = msgpackrpc.CallWithCodec(codec, "ClientAllocations.GarbageCollect", req, &resp2) 361 require.Nil(err) 362 } 363 364 func TestClientAllocations_GarbageCollect_Local_ACL(t *testing.T) { 365 t.Parallel() 366 367 // Start a server 368 s, root, cleanupS := TestACLServer(t, nil) 369 defer cleanupS() 370 codec := rpcClient(t, s) 371 testutil.WaitForLeader(t, s.RPC) 372 373 // Create a bad token 374 policyBad := mock.NamespacePolicy("other", "", []string{acl.NamespaceCapabilityReadFS}) 375 tokenBad := mock.CreatePolicyAndToken(t, s.State(), 1005, "invalid", policyBad) 376 377 policyGood := mock.NamespacePolicy(structs.DefaultNamespace, "", []string{acl.NamespaceCapabilitySubmitJob}) 378 tokenGood := mock.CreatePolicyAndToken(t, s.State(), 1009, "valid2", policyGood) 379 380 // Upsert the allocation 381 state := s.State() 382 alloc := mock.Alloc() 383 require.NoError(t, state.UpsertJob(1010, alloc.Job)) 384 require.NoError(t, state.UpsertAllocs(1011, []*structs.Allocation{alloc})) 385 386 cases := []struct { 387 Name string 388 Token string 389 ExpectedError string 390 }{ 391 { 392 Name: "bad token", 393 Token: tokenBad.SecretID, 394 ExpectedError: structs.ErrPermissionDenied.Error(), 395 }, 396 { 397 Name: "good token", 398 Token: tokenGood.SecretID, 399 ExpectedError: structs.ErrUnknownNodePrefix, 400 }, 401 { 402 Name: "root token", 403 Token: root.SecretID, 404 ExpectedError: structs.ErrUnknownNodePrefix, 405 }, 406 } 407 408 for _, c := range cases { 409 t.Run(c.Name, func(t *testing.T) { 410 411 // Make the request without having a node-id 412 req := &structs.AllocSpecificRequest{ 413 AllocID: alloc.ID, 414 QueryOptions: structs.QueryOptions{ 415 AuthToken: c.Token, 416 Region: "global", 417 Namespace: structs.DefaultNamespace, 418 }, 419 } 420 421 // Fetch the response 422 var resp structs.GenericResponse 423 err := msgpackrpc.CallWithCodec(codec, "ClientAllocations.GarbageCollect", req, &resp) 424 require.NotNil(t, err) 425 require.Contains(t, err.Error(), c.ExpectedError) 426 }) 427 } 428 } 429 430 func TestClientAllocations_GarbageCollect_Remote(t *testing.T) { 431 t.Parallel() 432 require := require.New(t) 433 434 // Start a server and client 435 s1, cleanupS1 := TestServer(t, nil) 436 defer cleanupS1() 437 s2, cleanupS2 := TestServer(t, func(c *Config) { 438 c.DevDisableBootstrap = true 439 }) 440 defer cleanupS2() 441 TestJoin(t, s1, s2) 442 testutil.WaitForLeader(t, s1.RPC) 443 testutil.WaitForLeader(t, s2.RPC) 444 codec := rpcClient(t, s2) 445 446 c, cleanup := client.TestClient(t, func(c *config.Config) { 447 c.Servers = []string{s2.config.RPCAddr.String()} 448 c.GCDiskUsageThreshold = 100.0 449 }) 450 defer cleanup() 451 452 // Force an allocation onto the node 453 a := mock.Alloc() 454 a.Job.Type = structs.JobTypeBatch 455 a.NodeID = c.NodeID() 456 a.Job.TaskGroups[0].Count = 1 457 a.Job.TaskGroups[0].Tasks[0] = &structs.Task{ 458 Name: "web", 459 Driver: "mock_driver", 460 Config: map[string]interface{}{ 461 "run_for": "2s", 462 }, 463 LogConfig: structs.DefaultLogConfig(), 464 Resources: &structs.Resources{ 465 CPU: 500, 466 MemoryMB: 256, 467 }, 468 } 469 testutil.WaitForResult(func() (bool, error) { 470 nodes := s2.connectedNodes() 471 if len(nodes) != 1 { 472 return false, fmt.Errorf("should have 1 client. found %d", len(nodes)) 473 } 474 req := &structs.NodeSpecificRequest{ 475 NodeID: c.NodeID(), 476 QueryOptions: structs.QueryOptions{Region: "global"}, 477 } 478 resp := structs.SingleNodeResponse{} 479 if err := msgpackrpc.CallWithCodec(codec, "Node.GetNode", req, &resp); err != nil { 480 return false, err 481 } 482 return resp.Node != nil && resp.Node.Status == structs.NodeStatusReady, fmt.Errorf( 483 "expected ready but found %s", pretty.Sprint(resp.Node)) 484 }, func(err error) { 485 t.Fatalf("should have a clients") 486 }) 487 488 // Upsert the allocation 489 state1 := s1.State() 490 state2 := s2.State() 491 require.Nil(state1.UpsertJob(999, a.Job)) 492 require.Nil(state1.UpsertAllocs(1003, []*structs.Allocation{a})) 493 require.Nil(state2.UpsertJob(999, a.Job)) 494 require.Nil(state2.UpsertAllocs(1003, []*structs.Allocation{a})) 495 496 // Wait for the client to run the allocation 497 testutil.WaitForResult(func() (bool, error) { 498 alloc, err := state2.AllocByID(nil, a.ID) 499 if err != nil { 500 return false, err 501 } 502 if alloc == nil { 503 return false, fmt.Errorf("unknown alloc") 504 } 505 if alloc.ClientStatus != structs.AllocClientStatusComplete { 506 return false, fmt.Errorf("alloc client status: %v", alloc.ClientStatus) 507 } 508 509 return true, nil 510 }, func(err error) { 511 t.Fatalf("Alloc on node %q not finished: %v", c.NodeID(), err) 512 }) 513 514 // Force remove the connection locally in case it exists 515 s1.nodeConnsLock.Lock() 516 delete(s1.nodeConns, c.NodeID()) 517 s1.nodeConnsLock.Unlock() 518 519 // Make the request 520 req := &structs.AllocSpecificRequest{ 521 AllocID: a.ID, 522 QueryOptions: structs.QueryOptions{Region: "global"}, 523 } 524 525 // Fetch the response 526 var resp cstructs.ClientStatsResponse 527 err := msgpackrpc.CallWithCodec(codec, "ClientAllocations.GarbageCollect", req, &resp) 528 require.Nil(err) 529 } 530 531 func TestClientAllocations_Stats_OldNode(t *testing.T) { 532 t.Parallel() 533 require := require.New(t) 534 535 // Start a server and fake an old client 536 s, cleanupS := TestServer(t, nil) 537 defer cleanupS() 538 state := s.State() 539 codec := rpcClient(t, s) 540 testutil.WaitForLeader(t, s.RPC) 541 542 // Test for an old version error 543 node := mock.Node() 544 node.Attributes["nomad.version"] = "0.7.1" 545 require.Nil(state.UpsertNode(1005, node)) 546 547 alloc := mock.Alloc() 548 alloc.NodeID = node.ID 549 require.Nil(state.UpsertAllocs(1006, []*structs.Allocation{alloc})) 550 551 req := &structs.AllocSpecificRequest{ 552 AllocID: alloc.ID, 553 QueryOptions: structs.QueryOptions{ 554 Region: "global", 555 }, 556 } 557 558 var resp structs.GenericResponse 559 err := msgpackrpc.CallWithCodec(codec, "ClientAllocations.Stats", req, &resp) 560 require.True(structs.IsErrNodeLacksRpc(err), err.Error()) 561 562 // Test for a missing version error 563 delete(node.Attributes, "nomad.version") 564 require.Nil(state.UpsertNode(1007, node)) 565 566 err = msgpackrpc.CallWithCodec(codec, "ClientAllocations.Stats", req, &resp) 567 require.True(structs.IsErrUnknownNomadVersion(err), err.Error()) 568 } 569 570 func TestClientAllocations_Stats_Local(t *testing.T) { 571 t.Parallel() 572 require := require.New(t) 573 574 // Start a server and client 575 s, cleanupS := TestServer(t, nil) 576 defer cleanupS() 577 codec := rpcClient(t, s) 578 testutil.WaitForLeader(t, s.RPC) 579 580 c, cleanupC := client.TestClient(t, func(c *config.Config) { 581 c.Servers = []string{s.config.RPCAddr.String()} 582 }) 583 defer cleanupC() 584 585 // Force an allocation onto the node 586 a := mock.Alloc() 587 a.Job.Type = structs.JobTypeBatch 588 a.NodeID = c.NodeID() 589 a.Job.TaskGroups[0].Count = 1 590 a.Job.TaskGroups[0].Tasks[0] = &structs.Task{ 591 Name: "web", 592 Driver: "mock_driver", 593 Config: map[string]interface{}{ 594 "run_for": "2s", 595 }, 596 LogConfig: structs.DefaultLogConfig(), 597 Resources: &structs.Resources{ 598 CPU: 500, 599 MemoryMB: 256, 600 }, 601 } 602 603 testutil.WaitForResult(func() (bool, error) { 604 nodes := s.connectedNodes() 605 return len(nodes) == 1, nil 606 }, func(err error) { 607 t.Fatalf("should have a clients") 608 }) 609 610 // Upsert the allocation 611 state := s.State() 612 require.Nil(state.UpsertJob(999, a.Job)) 613 require.Nil(state.UpsertAllocs(1003, []*structs.Allocation{a})) 614 615 // Wait for the client to run the allocation 616 testutil.WaitForResult(func() (bool, error) { 617 alloc, err := state.AllocByID(nil, a.ID) 618 if err != nil { 619 return false, err 620 } 621 if alloc == nil { 622 return false, fmt.Errorf("unknown alloc") 623 } 624 if alloc.ClientStatus != structs.AllocClientStatusComplete { 625 return false, fmt.Errorf("alloc client status: %v", alloc.ClientStatus) 626 } 627 628 return true, nil 629 }, func(err error) { 630 t.Fatalf("Alloc on node %q not finished: %v", c.NodeID(), err) 631 }) 632 633 // Make the request without having an alloc id 634 req := &structs.AllocSpecificRequest{ 635 QueryOptions: structs.QueryOptions{Region: "global"}, 636 } 637 638 // Fetch the response 639 var resp cstructs.AllocStatsResponse 640 err := msgpackrpc.CallWithCodec(codec, "ClientAllocations.Stats", req, &resp) 641 require.NotNil(err) 642 require.EqualError(err, structs.ErrMissingAllocID.Error(), "(%T) %v") 643 644 // Fetch the response setting the node id 645 req.AllocID = a.ID 646 var resp2 cstructs.AllocStatsResponse 647 err = msgpackrpc.CallWithCodec(codec, "ClientAllocations.Stats", req, &resp2) 648 require.Nil(err) 649 require.NotNil(resp2.Stats) 650 } 651 652 func TestClientAllocations_Stats_Local_ACL(t *testing.T) { 653 t.Parallel() 654 655 // Start a server 656 s, root, cleanupS := TestACLServer(t, nil) 657 defer cleanupS() 658 codec := rpcClient(t, s) 659 testutil.WaitForLeader(t, s.RPC) 660 661 // Create a bad token 662 policyBad := mock.NamespacePolicy("other", "", []string{acl.NamespaceCapabilityReadFS}) 663 tokenBad := mock.CreatePolicyAndToken(t, s.State(), 1005, "invalid", policyBad) 664 665 policyGood := mock.NamespacePolicy(structs.DefaultNamespace, "", []string{acl.NamespaceCapabilityReadJob}) 666 tokenGood := mock.CreatePolicyAndToken(t, s.State(), 1009, "valid2", policyGood) 667 668 // Upsert the allocation 669 state := s.State() 670 alloc := mock.Alloc() 671 require.NoError(t, state.UpsertJob(1010, alloc.Job)) 672 require.NoError(t, state.UpsertAllocs(1011, []*structs.Allocation{alloc})) 673 674 cases := []struct { 675 Name string 676 Token string 677 ExpectedError string 678 }{ 679 { 680 Name: "bad token", 681 Token: tokenBad.SecretID, 682 ExpectedError: structs.ErrPermissionDenied.Error(), 683 }, 684 { 685 Name: "good token", 686 Token: tokenGood.SecretID, 687 ExpectedError: structs.ErrUnknownNodePrefix, 688 }, 689 { 690 Name: "root token", 691 Token: root.SecretID, 692 ExpectedError: structs.ErrUnknownNodePrefix, 693 }, 694 } 695 696 for _, c := range cases { 697 t.Run(c.Name, func(t *testing.T) { 698 699 // Make the request without having a node-id 700 req := &structs.AllocSpecificRequest{ 701 AllocID: alloc.ID, 702 QueryOptions: structs.QueryOptions{ 703 AuthToken: c.Token, 704 Region: "global", 705 Namespace: structs.DefaultNamespace, 706 }, 707 } 708 709 // Fetch the response 710 var resp cstructs.AllocStatsResponse 711 err := msgpackrpc.CallWithCodec(codec, "ClientAllocations.Stats", req, &resp) 712 require.NotNil(t, err) 713 require.Contains(t, err.Error(), c.ExpectedError) 714 }) 715 } 716 } 717 718 func TestClientAllocations_Stats_Remote(t *testing.T) { 719 t.Parallel() 720 require := require.New(t) 721 722 // Start a server and client 723 s1, cleanupS1 := TestServer(t, nil) 724 defer cleanupS1() 725 s2, cleanupS2 := TestServer(t, func(c *Config) { 726 c.DevDisableBootstrap = true 727 }) 728 defer cleanupS2() 729 TestJoin(t, s1, s2) 730 testutil.WaitForLeader(t, s1.RPC) 731 testutil.WaitForLeader(t, s2.RPC) 732 codec := rpcClient(t, s2) 733 734 c, cleanupC := client.TestClient(t, func(c *config.Config) { 735 c.Servers = []string{s2.config.RPCAddr.String()} 736 }) 737 defer cleanupC() 738 739 // Force an allocation onto the node 740 a := mock.Alloc() 741 a.Job.Type = structs.JobTypeBatch 742 a.NodeID = c.NodeID() 743 a.Job.TaskGroups[0].Count = 1 744 a.Job.TaskGroups[0].Tasks[0] = &structs.Task{ 745 Name: "web", 746 Driver: "mock_driver", 747 Config: map[string]interface{}{ 748 "run_for": "2s", 749 }, 750 LogConfig: structs.DefaultLogConfig(), 751 Resources: &structs.Resources{ 752 CPU: 500, 753 MemoryMB: 256, 754 }, 755 } 756 testutil.WaitForResult(func() (bool, error) { 757 nodes := s2.connectedNodes() 758 return len(nodes) == 1, nil 759 }, func(err error) { 760 t.Fatalf("should have a clients") 761 }) 762 763 // Upsert the allocation 764 state1 := s1.State() 765 state2 := s2.State() 766 require.Nil(state1.UpsertJob(999, a.Job)) 767 require.Nil(state1.UpsertAllocs(1003, []*structs.Allocation{a})) 768 require.Nil(state2.UpsertJob(999, a.Job)) 769 require.Nil(state2.UpsertAllocs(1003, []*structs.Allocation{a})) 770 771 // Wait for the client to run the allocation 772 testutil.WaitForResult(func() (bool, error) { 773 alloc, err := state2.AllocByID(nil, a.ID) 774 if err != nil { 775 return false, err 776 } 777 if alloc == nil { 778 return false, fmt.Errorf("unknown alloc") 779 } 780 if alloc.ClientStatus != structs.AllocClientStatusComplete { 781 return false, fmt.Errorf("alloc client status: %v", alloc.ClientStatus) 782 } 783 784 return true, nil 785 }, func(err error) { 786 t.Fatalf("Alloc on node %q not finished: %v", c.NodeID(), err) 787 }) 788 789 // Force remove the connection locally in case it exists 790 s1.nodeConnsLock.Lock() 791 delete(s1.nodeConns, c.NodeID()) 792 s1.nodeConnsLock.Unlock() 793 794 // Make the request 795 req := &structs.AllocSpecificRequest{ 796 AllocID: a.ID, 797 QueryOptions: structs.QueryOptions{Region: "global"}, 798 } 799 800 // Fetch the response 801 var resp cstructs.AllocStatsResponse 802 err := msgpackrpc.CallWithCodec(codec, "ClientAllocations.Stats", req, &resp) 803 require.Nil(err) 804 require.NotNil(resp.Stats) 805 } 806 807 func TestClientAllocations_Restart_Local(t *testing.T) { 808 t.Parallel() 809 require := require.New(t) 810 811 // Start a server and client 812 s, cleanupS := TestServer(t, nil) 813 defer cleanupS() 814 codec := rpcClient(t, s) 815 testutil.WaitForLeader(t, s.RPC) 816 817 c, cleanupC := client.TestClient(t, func(c *config.Config) { 818 c.Servers = []string{s.config.RPCAddr.String()} 819 c.GCDiskUsageThreshold = 100.0 820 }) 821 defer cleanupC() 822 823 // Force an allocation onto the node 824 a := mock.Alloc() 825 a.Job.Type = structs.JobTypeService 826 a.NodeID = c.NodeID() 827 a.Job.TaskGroups[0].Count = 1 828 a.Job.TaskGroups[0].Tasks[0] = &structs.Task{ 829 Name: "web", 830 Driver: "mock_driver", 831 Config: map[string]interface{}{ 832 "run_for": "10s", 833 }, 834 LogConfig: structs.DefaultLogConfig(), 835 Resources: &structs.Resources{ 836 CPU: 500, 837 MemoryMB: 256, 838 }, 839 } 840 841 testutil.WaitForResult(func() (bool, error) { 842 nodes := s.connectedNodes() 843 return len(nodes) == 1, nil 844 }, func(err error) { 845 t.Fatalf("should have a client") 846 }) 847 848 // Upsert the allocation 849 state := s.State() 850 require.Nil(state.UpsertJob(999, a.Job)) 851 require.Nil(state.UpsertAllocs(1003, []*structs.Allocation{a})) 852 853 // Wait for the client to run the allocation 854 testutil.WaitForResult(func() (bool, error) { 855 alloc, err := state.AllocByID(nil, a.ID) 856 if err != nil { 857 return false, err 858 } 859 if alloc == nil { 860 return false, fmt.Errorf("unknown alloc") 861 } 862 if alloc.ClientStatus != structs.AllocClientStatusRunning { 863 return false, fmt.Errorf("alloc client status: %v", alloc.ClientStatus) 864 } 865 866 return true, nil 867 }, func(err error) { 868 t.Fatalf("Alloc on node %q not running: %v", c.NodeID(), err) 869 }) 870 871 // Make the request without having an alloc id 872 req := &structs.AllocRestartRequest{ 873 QueryOptions: structs.QueryOptions{Region: "global"}, 874 } 875 876 // Fetch the response 877 var resp structs.GenericResponse 878 err := msgpackrpc.CallWithCodec(codec, "ClientAllocations.Restart", req, &resp) 879 require.NotNil(err) 880 require.EqualError(err, structs.ErrMissingAllocID.Error(), "(%T) %v") 881 882 // Fetch the response setting the alloc id - This should not error because the 883 // alloc is running. 884 req.AllocID = a.ID 885 var resp2 structs.GenericResponse 886 err = msgpackrpc.CallWithCodec(codec, "ClientAllocations.Restart", req, &resp2) 887 require.Nil(err) 888 889 testutil.WaitForResult(func() (bool, error) { 890 alloc, err := state.AllocByID(nil, a.ID) 891 if err != nil { 892 return false, err 893 } 894 if alloc == nil { 895 return false, fmt.Errorf("unknown alloc") 896 } 897 898 taskState := alloc.TaskStates["web"] 899 if taskState == nil { 900 return false, fmt.Errorf("could not find task state") 901 } 902 903 if taskState.Restarts != 1 { 904 return false, fmt.Errorf("expected task 'web' to have 1 restart, got: %d", taskState.Restarts) 905 } 906 907 return true, nil 908 }, func(err error) { 909 t.Fatalf("Alloc on node %q not running: %v", c.NodeID(), err) 910 }) 911 } 912 913 func TestClientAllocations_Restart_Remote(t *testing.T) { 914 t.Parallel() 915 require := require.New(t) 916 917 // Start a server and client 918 s1, cleanupS1 := TestServer(t, nil) 919 defer cleanupS1() 920 s2, cleanupS2 := TestServer(t, func(c *Config) { 921 c.DevDisableBootstrap = true 922 }) 923 defer cleanupS2() 924 TestJoin(t, s1, s2) 925 testutil.WaitForLeader(t, s1.RPC) 926 testutil.WaitForLeader(t, s2.RPC) 927 codec := rpcClient(t, s2) 928 929 c, cleanupC := client.TestClient(t, func(c *config.Config) { 930 c.Servers = []string{s2.config.RPCAddr.String()} 931 }) 932 defer cleanupC() 933 934 // Force an allocation onto the node 935 a := mock.Alloc() 936 a.Job.Type = structs.JobTypeService 937 a.NodeID = c.NodeID() 938 a.Job.TaskGroups[0].Count = 1 939 a.Job.TaskGroups[0].Tasks[0] = &structs.Task{ 940 Name: "web", 941 Driver: "mock_driver", 942 Config: map[string]interface{}{ 943 "run_for": "10s", 944 }, 945 LogConfig: structs.DefaultLogConfig(), 946 Resources: &structs.Resources{ 947 CPU: 500, 948 MemoryMB: 256, 949 }, 950 } 951 952 testutil.WaitForResult(func() (bool, error) { 953 nodes := s2.connectedNodes() 954 return len(nodes) == 1, nil 955 }, func(err error) { 956 t.Fatalf("should have a client") 957 }) 958 959 // Upsert the allocation 960 state1 := s1.State() 961 state2 := s2.State() 962 require.Nil(state1.UpsertJob(999, a.Job)) 963 require.Nil(state1.UpsertAllocs(1003, []*structs.Allocation{a})) 964 require.Nil(state2.UpsertJob(999, a.Job)) 965 require.Nil(state2.UpsertAllocs(1003, []*structs.Allocation{a})) 966 967 // Wait for the client to run the allocation 968 testutil.WaitForResult(func() (bool, error) { 969 alloc, err := state2.AllocByID(nil, a.ID) 970 if err != nil { 971 return false, err 972 } 973 if alloc == nil { 974 return false, fmt.Errorf("unknown alloc") 975 } 976 if alloc.ClientStatus != structs.AllocClientStatusRunning { 977 return false, fmt.Errorf("alloc client status: %v", alloc.ClientStatus) 978 } 979 980 return true, nil 981 }, func(err error) { 982 t.Fatalf("Alloc on node %q not running: %v", c.NodeID(), err) 983 }) 984 985 // Make the request without having an alloc id 986 req := &structs.AllocRestartRequest{ 987 QueryOptions: structs.QueryOptions{Region: "global"}, 988 } 989 990 // Fetch the response 991 var resp structs.GenericResponse 992 err := msgpackrpc.CallWithCodec(codec, "ClientAllocations.Restart", req, &resp) 993 require.NotNil(err) 994 require.EqualError(err, structs.ErrMissingAllocID.Error(), "(%T) %v") 995 996 // Fetch the response setting the alloc id - This should succeed because the 997 // alloc is running 998 req.AllocID = a.ID 999 var resp2 structs.GenericResponse 1000 err = msgpackrpc.CallWithCodec(codec, "ClientAllocations.Restart", req, &resp2) 1001 require.NoError(err) 1002 } 1003 1004 func TestClientAllocations_Restart_ACL(t *testing.T) { 1005 // Start a server 1006 s, root, cleanupS := TestACLServer(t, nil) 1007 defer cleanupS() 1008 codec := rpcClient(t, s) 1009 testutil.WaitForLeader(t, s.RPC) 1010 1011 // Create a bad token 1012 policyBad := mock.NamespacePolicy("other", "", []string{acl.NamespaceCapabilityReadFS}) 1013 tokenBad := mock.CreatePolicyAndToken(t, s.State(), 1005, "invalid", policyBad) 1014 1015 policyGood := mock.NamespacePolicy(structs.DefaultNamespace, acl.PolicyWrite, nil) 1016 tokenGood := mock.CreatePolicyAndToken(t, s.State(), 1009, "valid2", policyGood) 1017 1018 // Upsert the allocation 1019 state := s.State() 1020 alloc := mock.Alloc() 1021 require.NoError(t, state.UpsertJob(1010, alloc.Job)) 1022 require.NoError(t, state.UpsertAllocs(1011, []*structs.Allocation{alloc})) 1023 1024 cases := []struct { 1025 Name string 1026 Token string 1027 ExpectedError string 1028 }{ 1029 { 1030 Name: "bad token", 1031 Token: tokenBad.SecretID, 1032 ExpectedError: structs.ErrPermissionDenied.Error(), 1033 }, 1034 { 1035 Name: "good token", 1036 Token: tokenGood.SecretID, 1037 ExpectedError: "Unknown node", 1038 }, 1039 { 1040 Name: "root token", 1041 Token: root.SecretID, 1042 ExpectedError: "Unknown node", 1043 }, 1044 } 1045 1046 for _, c := range cases { 1047 t.Run(c.Name, func(t *testing.T) { 1048 1049 // Make the request without having a node-id 1050 req := &structs.AllocRestartRequest{ 1051 AllocID: alloc.ID, 1052 QueryOptions: structs.QueryOptions{ 1053 Namespace: structs.DefaultNamespace, 1054 AuthToken: c.Token, 1055 Region: "global", 1056 }, 1057 } 1058 1059 // Fetch the response 1060 var resp structs.GenericResponse 1061 err := msgpackrpc.CallWithCodec(codec, "ClientAllocations.Restart", req, &resp) 1062 require.NotNil(t, err) 1063 require.Contains(t, err.Error(), c.ExpectedError) 1064 }) 1065 } 1066 } 1067 1068 // TestAlloc_ExecStreaming asserts that exec task requests are forwarded 1069 // to appropriate server or remote regions 1070 func TestAlloc_ExecStreaming(t *testing.T) { 1071 t.Parallel() 1072 1073 ////// Nomad clusters topology - not specific to test 1074 localServer, cleanupLS := TestServer(t, nil) 1075 defer cleanupLS() 1076 1077 remoteServer, cleanupRS := TestServer(t, func(c *Config) { 1078 c.DevDisableBootstrap = true 1079 }) 1080 defer cleanupRS() 1081 1082 remoteRegionServer, cleanupRRS := TestServer(t, func(c *Config) { 1083 c.Region = "two" 1084 }) 1085 defer cleanupRRS() 1086 1087 TestJoin(t, localServer, remoteServer) 1088 TestJoin(t, localServer, remoteRegionServer) 1089 testutil.WaitForLeader(t, localServer.RPC) 1090 testutil.WaitForLeader(t, remoteServer.RPC) 1091 testutil.WaitForLeader(t, remoteRegionServer.RPC) 1092 1093 c, cleanup := client.TestClient(t, func(c *config.Config) { 1094 c.Servers = []string{localServer.config.RPCAddr.String()} 1095 }) 1096 defer cleanup() 1097 1098 // Wait for the client to connect 1099 testutil.WaitForResult(func() (bool, error) { 1100 nodes := remoteServer.connectedNodes() 1101 return len(nodes) == 1, nil 1102 }, func(err error) { 1103 require.NoError(t, err, "failed to have a client") 1104 }) 1105 1106 // Force remove the connection locally in case it exists 1107 remoteServer.nodeConnsLock.Lock() 1108 delete(remoteServer.nodeConns, c.NodeID()) 1109 remoteServer.nodeConnsLock.Unlock() 1110 1111 ///// Start task 1112 a := mock.BatchAlloc() 1113 a.NodeID = c.NodeID() 1114 a.Job.Type = structs.JobTypeBatch 1115 a.Job.TaskGroups[0].Count = 1 1116 a.Job.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ 1117 "run_for": "20s", 1118 "exec_command": map[string]interface{}{ 1119 "run_for": "1ms", 1120 "stdout_string": "expected output", 1121 "exit_code": 3, 1122 }, 1123 } 1124 1125 // Upsert the allocation 1126 localState := localServer.State() 1127 require.Nil(t, localState.UpsertJob(999, a.Job)) 1128 require.Nil(t, localState.UpsertAllocs(1003, []*structs.Allocation{a})) 1129 remoteState := remoteServer.State() 1130 require.Nil(t, remoteState.UpsertJob(999, a.Job)) 1131 require.Nil(t, remoteState.UpsertAllocs(1003, []*structs.Allocation{a})) 1132 1133 // Wait for the client to run the allocation 1134 testutil.WaitForResult(func() (bool, error) { 1135 alloc, err := localState.AllocByID(nil, a.ID) 1136 if err != nil { 1137 return false, err 1138 } 1139 if alloc == nil { 1140 return false, fmt.Errorf("unknown alloc") 1141 } 1142 if alloc.ClientStatus != structs.AllocClientStatusRunning { 1143 return false, fmt.Errorf("alloc client status: %v", alloc.ClientStatus) 1144 } 1145 1146 return true, nil 1147 }, func(err error) { 1148 require.NoError(t, err, "task didn't start yet") 1149 }) 1150 1151 ///////// Actually run query now 1152 cases := []struct { 1153 name string 1154 rpc func(string) (structs.StreamingRpcHandler, error) 1155 }{ 1156 {"client", c.StreamingRpcHandler}, 1157 {"local_server", localServer.StreamingRpcHandler}, 1158 {"remote_server", remoteServer.StreamingRpcHandler}, 1159 {"remote_region", remoteRegionServer.StreamingRpcHandler}, 1160 } 1161 1162 for _, tc := range cases { 1163 t.Run(tc.name, func(t *testing.T) { 1164 1165 // Make the request 1166 req := &cstructs.AllocExecRequest{ 1167 AllocID: a.ID, 1168 Task: a.Job.TaskGroups[0].Tasks[0].Name, 1169 Tty: true, 1170 Cmd: []string{"placeholder command"}, 1171 QueryOptions: nstructs.QueryOptions{Region: "global"}, 1172 } 1173 1174 // Get the handler 1175 handler, err := tc.rpc("Allocations.Exec") 1176 require.Nil(t, err) 1177 1178 // Create a pipe 1179 p1, p2 := net.Pipe() 1180 defer p1.Close() 1181 defer p2.Close() 1182 1183 errCh := make(chan error) 1184 frames := make(chan *drivers.ExecTaskStreamingResponseMsg) 1185 1186 // Start the handler 1187 go handler(p2) 1188 go decodeFrames(t, p1, frames, errCh) 1189 1190 // Send the request 1191 encoder := codec.NewEncoder(p1, nstructs.MsgpackHandle) 1192 require.Nil(t, encoder.Encode(req)) 1193 1194 timeout := time.After(3 * time.Second) 1195 1196 OUTER: 1197 for { 1198 select { 1199 case <-timeout: 1200 require.FailNow(t, "timed out before getting exit code") 1201 case err := <-errCh: 1202 require.NoError(t, err) 1203 case f := <-frames: 1204 if f.Exited && f.Result != nil { 1205 code := int(f.Result.ExitCode) 1206 require.Equal(t, 3, code) 1207 break OUTER 1208 } 1209 } 1210 } 1211 }) 1212 } 1213 } 1214 1215 func decodeFrames(t *testing.T, p1 net.Conn, frames chan<- *drivers.ExecTaskStreamingResponseMsg, errCh chan<- error) { 1216 // Start the decoder 1217 decoder := codec.NewDecoder(p1, nstructs.MsgpackHandle) 1218 1219 for { 1220 var msg cstructs.StreamErrWrapper 1221 if err := decoder.Decode(&msg); err != nil { 1222 if err == io.EOF || strings.Contains(err.Error(), "closed") { 1223 return 1224 } 1225 t.Logf("received error decoding: %#v", err) 1226 1227 errCh <- fmt.Errorf("error decoding: %v", err) 1228 return 1229 } 1230 1231 if msg.Error != nil { 1232 errCh <- msg.Error 1233 continue 1234 } 1235 1236 var frame drivers.ExecTaskStreamingResponseMsg 1237 json.Unmarshal(msg.Payload, &frame) 1238 t.Logf("received message: %#v", msg) 1239 frames <- &frame 1240 } 1241 }