github.com/hashicorp/nomad/api@v0.0.0-20240306165712-3193ac204f65/nodes_test.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package api 5 6 import ( 7 "context" 8 "fmt" 9 "sort" 10 "testing" 11 "time" 12 13 "github.com/hashicorp/nomad/api/internal/testutil" 14 "github.com/shoenig/test/must" 15 "github.com/shoenig/test/wait" 16 ) 17 18 func queryNodeList(t *testing.T, nodes *Nodes) ([]*NodeListStub, *QueryMeta) { 19 t.Helper() 20 var ( 21 nodeListStub []*NodeListStub 22 queryMeta *QueryMeta 23 err error 24 ) 25 26 f := func() error { 27 nodeListStub, queryMeta, err = nodes.List(nil) 28 if err != nil { 29 return fmt.Errorf("failed to list nodes: %w", err) 30 } 31 if len(nodeListStub) == 0 { 32 return fmt.Errorf("no nodes yet") 33 } 34 return nil 35 } 36 37 must.Wait(t, wait.InitialSuccess( 38 wait.ErrorFunc(f), 39 wait.Timeout(10*time.Second), 40 wait.Gap(1*time.Second), 41 )) 42 43 return nodeListStub, queryMeta 44 } 45 46 func oneNodeFromNodeList(t *testing.T, nodes *Nodes) *NodeListStub { 47 nodeListStub, _ := queryNodeList(t, nodes) 48 must.Len(t, 1, nodeListStub, must.Sprint("expected 1 node")) 49 return nodeListStub[0] 50 } 51 52 func TestNodes_List(t *testing.T) { 53 testutil.Parallel(t) 54 55 c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { 56 c.DevMode = true 57 }) 58 defer s.Stop() 59 nodes := c.Nodes() 60 61 nodeListStub, queryMeta := queryNodeList(t, nodes) 62 must.Len(t, 1, nodeListStub) 63 must.Eq(t, NodePoolDefault, nodeListStub[0].NodePool) 64 65 // Check that we got valid QueryMeta. 66 assertQueryMeta(t, queryMeta) 67 } 68 69 func TestNodes_PrefixList(t *testing.T) { 70 testutil.Parallel(t) 71 72 c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { 73 c.DevMode = true 74 }) 75 defer s.Stop() 76 nodes := c.Nodes() 77 78 // Get the node ID 79 nodeID := oneNodeFromNodeList(t, nodes).ID 80 81 // Find node based on four character prefix 82 out, qm, err := nodes.PrefixList(nodeID[:4]) 83 must.NoError(t, err) 84 must.Len(t, 1, out, must.Sprint("expected only 1 node")) 85 86 // Check that we got valid QueryMeta. 87 assertQueryMeta(t, qm) 88 } 89 90 // TestNodes_List_Resources asserts that ?resources=true includes allocated and 91 // reserved resources in the response. 92 func TestNodes_List_Resources(t *testing.T) { 93 testutil.Parallel(t) 94 95 c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { 96 c.DevMode = true 97 }) 98 defer s.Stop() 99 nodes := c.Nodes() 100 101 node := oneNodeFromNodeList(t, nodes) 102 103 // By default resources should *not* be included 104 must.Nil(t, node.NodeResources) 105 must.Nil(t, node.ReservedResources) 106 107 qo := &QueryOptions{ 108 Params: map[string]string{"resources": "true"}, 109 } 110 111 out, _, err := nodes.List(qo) 112 must.NoError(t, err) 113 must.NotNil(t, out[0].NodeResources) 114 must.NotNil(t, out[0].ReservedResources) 115 } 116 117 func TestNodes_Info(t *testing.T) { 118 testutil.Parallel(t) 119 120 startTime := time.Now().Unix() 121 c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { 122 c.DevMode = true 123 }) 124 defer s.Stop() 125 nodes := c.Nodes() 126 127 // Retrieving a nonexistent node returns error 128 _, _, infoErr := nodes.Info("12345678-abcd-efab-cdef-123456789abc", nil) 129 must.ErrorContains(t, infoErr, "not found") 130 131 // Get the node ID and DC 132 node := oneNodeFromNodeList(t, nodes) 133 nodeID, dc := node.ID, node.Datacenter 134 135 // Querying for existing nodes returns properly 136 result, qm, err := nodes.Info(nodeID, nil) 137 must.NoError(t, err) 138 139 assertQueryMeta(t, qm) 140 141 // Check that the result is what we expect 142 must.Eq(t, nodeID, result.ID) 143 must.Eq(t, dc, result.Datacenter) 144 must.Eq(t, NodePoolDefault, result.NodePool) 145 146 must.Eq(t, 20000, result.NodeResources.MinDynamicPort) 147 must.Eq(t, 32000, result.NodeResources.MaxDynamicPort) 148 149 // Check that the StatusUpdatedAt field is being populated correctly 150 must.Less(t, result.StatusUpdatedAt, startTime) 151 152 // check we have at least one event 153 must.GreaterEq(t, 1, len(result.Events)) 154 } 155 156 func TestNodes_NoSecretID(t *testing.T) { 157 testutil.Parallel(t) 158 159 c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { 160 c.DevMode = true 161 }) 162 defer s.Stop() 163 nodes := c.Nodes() 164 165 // Get the node ID 166 nodeID := oneNodeFromNodeList(t, nodes).ID 167 168 // perform a raw http call and make sure that: 169 // - "ID" to make sure that raw decoding is working correctly 170 // - "SecretID" to make sure it's not present 171 resp := make(map[string]interface{}) 172 _, err := c.query("/v1/node/"+nodeID, &resp, nil) 173 must.NoError(t, err) 174 must.Eq(t, nodeID, resp["ID"].(string)) 175 must.Eq(t, "", resp["SecretID"]) 176 } 177 178 func TestNodes_ToggleDrain(t *testing.T) { 179 testutil.Parallel(t) 180 181 c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { 182 c.DevMode = true 183 }) 184 defer s.Stop() 185 nodes := c.Nodes() 186 187 // Wait for node registration and get the ID 188 nodeID := oneNodeFromNodeList(t, nodes).ID 189 190 // Check for drain mode 191 out, _, err := nodes.Info(nodeID, nil) 192 must.NoError(t, err) 193 must.False(t, out.Drain) 194 must.Nil(t, out.LastDrain) 195 196 // Toggle it on 197 timeBeforeDrain := time.Now().Add(-1 * time.Second) 198 spec := &DrainSpec{ 199 Deadline: 10 * time.Second, 200 } 201 drainMeta := map[string]string{ 202 "reason": "this node needs to go", 203 } 204 drainOut, err := nodes.UpdateDrainOpts(nodeID, &DrainOptions{ 205 DrainSpec: spec, 206 MarkEligible: false, 207 Meta: drainMeta, 208 }, nil) 209 must.NoError(t, err) 210 assertWriteMeta(t, &drainOut.WriteMeta) 211 212 // Drain may have completed before we can check, use event stream 213 ctx, cancel := context.WithCancel(context.Background()) 214 defer cancel() 215 216 streamCh, err := c.EventStream().Stream(ctx, map[Topic][]string{ 217 TopicNode: {nodeID}, 218 }, 0, nil) 219 must.NoError(t, err) 220 221 // we expect to see the node change to Drain:true and then back to Drain:false+ineligible 222 var sawDraining, sawDrainComplete uint64 223 for sawDrainComplete == 0 { 224 select { 225 case events := <-streamCh: 226 must.NoError(t, events.Err) 227 for _, e := range events.Events { 228 node, err := e.Node() 229 must.NoError(t, err) 230 must.Eq(t, node.DrainStrategy != nil, node.Drain) 231 must.True(t, !node.Drain || node.SchedulingEligibility == NodeSchedulingIneligible) // node.Drain => "ineligible" 232 if node.Drain && node.SchedulingEligibility == NodeSchedulingIneligible { 233 must.NotNil(t, node.LastDrain) 234 must.Eq(t, DrainStatusDraining, node.LastDrain.Status) 235 now := time.Now() 236 must.False(t, node.LastDrain.StartedAt.Before(timeBeforeDrain)) 237 must.False(t, node.LastDrain.StartedAt.After(now)) 238 must.Eq(t, drainMeta, node.LastDrain.Meta) 239 sawDraining = node.ModifyIndex 240 } else if sawDraining != 0 && !node.Drain && node.SchedulingEligibility == NodeSchedulingIneligible { 241 must.NotNil(t, node.LastDrain) 242 must.Eq(t, DrainStatusComplete, node.LastDrain.Status) 243 must.True(t, !node.LastDrain.UpdatedAt.Before(node.LastDrain.StartedAt)) 244 must.Eq(t, drainMeta, node.LastDrain.Meta) 245 sawDrainComplete = node.ModifyIndex 246 } 247 } 248 case <-time.After(5 * time.Second): 249 must.Unreachable(t, must.Sprint("waiting on stream event that never happened")) 250 } 251 } 252 253 // Toggle off again 254 drainOut, err = nodes.UpdateDrain(nodeID, nil, true, nil) 255 must.NoError(t, err) 256 assertWriteMeta(t, &drainOut.WriteMeta) 257 258 // Check again 259 out, _, err = nodes.Info(nodeID, nil) 260 must.NoError(t, err) 261 must.False(t, out.Drain) 262 must.Nil(t, out.DrainStrategy) 263 must.Eq(t, NodeSchedulingEligible, out.SchedulingEligibility) 264 } 265 266 func TestNodes_ToggleEligibility(t *testing.T) { 267 testutil.Parallel(t) 268 269 c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { 270 c.DevMode = true 271 }) 272 defer s.Stop() 273 nodes := c.Nodes() 274 275 // Get node ID 276 nodeID := oneNodeFromNodeList(t, nodes).ID 277 278 // Check for eligibility 279 out, _, err := nodes.Info(nodeID, nil) 280 must.NoError(t, err) 281 must.Eq(t, NodeSchedulingEligible, out.SchedulingEligibility) 282 283 // Toggle it off 284 eligOut, err := nodes.ToggleEligibility(nodeID, false, nil) 285 must.NoError(t, err) 286 assertWriteMeta(t, &eligOut.WriteMeta) 287 288 // Check again 289 out, _, err = nodes.Info(nodeID, nil) 290 must.NoError(t, err) 291 must.Eq(t, NodeSchedulingIneligible, out.SchedulingEligibility) 292 293 // Toggle on 294 eligOut, err = nodes.ToggleEligibility(nodeID, true, nil) 295 must.NoError(t, err) 296 assertWriteMeta(t, &eligOut.WriteMeta) 297 298 // Check again 299 out, _, err = nodes.Info(nodeID, nil) 300 must.NoError(t, err) 301 must.Eq(t, NodeSchedulingEligible, out.SchedulingEligibility) 302 must.Nil(t, out.DrainStrategy) 303 } 304 305 func TestNodes_Allocations(t *testing.T) { 306 testutil.Parallel(t) 307 308 c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { 309 c.DevMode = true 310 }) 311 defer s.Stop() 312 nodes := c.Nodes() 313 314 // Looking up by a nonexistent node returns nothing. We 315 // don't check the index here because it's possible the node 316 // has already registered, in which case we will get a non- 317 // zero result anyways. 318 allocations, _, err := nodes.Allocations("nope", nil) 319 must.NoError(t, err) 320 must.Len(t, 0, allocations) 321 } 322 323 func TestNodes_ForceEvaluate(t *testing.T) { 324 testutil.Parallel(t) 325 326 c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { 327 c.DevMode = true 328 }) 329 defer s.Stop() 330 nodes := c.Nodes() 331 332 // Force-eval on a nonexistent node fails 333 _, _, err := nodes.ForceEvaluate("12345678-abcd-efab-cdef-123456789abc", nil) 334 must.ErrorContains(t, err, "not found") 335 336 // Wait for node registration and get the ID 337 nodeID := oneNodeFromNodeList(t, nodes).ID 338 339 // Try force-eval again. We don't check the WriteMeta because 340 // there are no allocations to process, so we would get an index 341 // of zero. Same goes for the eval ID. 342 _, _, err = nodes.ForceEvaluate(nodeID, nil) 343 must.NoError(t, err) 344 } 345 346 func TestNodes_Sort(t *testing.T) { 347 testutil.Parallel(t) 348 349 nodes := []*NodeListStub{ 350 {CreateIndex: 2}, 351 {CreateIndex: 1}, 352 {CreateIndex: 5}, 353 } 354 sort.Sort(NodeIndexSort(nodes)) 355 356 expect := []*NodeListStub{ 357 {CreateIndex: 5}, 358 {CreateIndex: 2}, 359 {CreateIndex: 1}, 360 } 361 must.Eq(t, expect, nodes) 362 } 363 364 // Unittest monitorDrainMultiplex when an error occurs 365 func TestNodes_MonitorDrain_Multiplex_Bad(t *testing.T) { 366 testutil.Parallel(t) 367 368 ctx := context.Background() 369 multiplexCtx, cancel := context.WithCancel(ctx) 370 371 // monitorDrainMultiplex doesn't require anything on *Nodes, so we 372 // don't need to use a full Client 373 var nodeClient *Nodes 374 375 outCh := make(chan *MonitorMessage, 8) 376 nodeCh := make(chan *MonitorMessage, 1) 377 allocCh := make(chan *MonitorMessage, 8) 378 exitedCh := make(chan struct{}) 379 go func() { 380 defer close(exitedCh) 381 nodeClient.monitorDrainMultiplex(ctx, cancel, outCh, nodeCh, allocCh) 382 }() 383 384 // Fake an alloc update 385 msg := Messagef(0, "alloc update") 386 allocCh <- msg 387 must.Eq(t, msg, <-outCh) 388 389 // Fake a node update 390 msg = Messagef(0, "node update") 391 nodeCh <- msg 392 must.Eq(t, msg, <-outCh) 393 394 // Fake an error that should shut everything down 395 msg = Messagef(MonitorMsgLevelError, "fake error") 396 nodeCh <- msg 397 must.Eq(t, msg, <-outCh) 398 399 _, ok := <-exitedCh 400 must.False(t, ok) 401 402 _, ok = <-outCh 403 must.False(t, ok) 404 405 // Exiting should also cancel the context that would be passed to the 406 // node & alloc watchers 407 select { 408 case <-multiplexCtx.Done(): 409 case <-time.After(100 * time.Millisecond): 410 must.Unreachable(t, must.Sprint("multiplex context was not cancelled")) 411 } 412 } 413 414 // Unittest monitorDrainMultiplex when drain finishes 415 func TestNodes_MonitorDrain_Multiplex_Good(t *testing.T) { 416 testutil.Parallel(t) 417 418 ctx := context.Background() 419 multiplexCtx, cancel := context.WithCancel(ctx) 420 421 // monitorDrainMultiplex doesn't require anything on *Nodes, so we 422 // don't need to use a full Client 423 var nodeClient *Nodes 424 425 outCh := make(chan *MonitorMessage, 8) 426 nodeCh := make(chan *MonitorMessage, 1) 427 allocCh := make(chan *MonitorMessage, 8) 428 exitedCh := make(chan struct{}) 429 go func() { 430 defer close(exitedCh) 431 nodeClient.monitorDrainMultiplex(ctx, cancel, outCh, nodeCh, allocCh) 432 }() 433 434 // Fake a node updating and finishing 435 msg := Messagef(MonitorMsgLevelInfo, "node update") 436 nodeCh <- msg 437 close(nodeCh) 438 must.Eq(t, msg, <-outCh) 439 440 // Nothing else should have exited yet 441 select { 442 case badMsg, ok := <-outCh: 443 must.False(t, ok, must.Sprintf("unexpected output %v", badMsg)) 444 must.Unreachable(t, must.Sprint("out channel closed unexpectedly")) 445 case <-exitedCh: 446 must.Unreachable(t, must.Sprint("multiplexer exited unexpectedly")) 447 case <-multiplexCtx.Done(): 448 must.Unreachable(t, must.Sprint("multiplexer context canceled unexpectedly")) 449 case <-time.After(10 * time.Millisecond): 450 t.Logf("multiplexer still running as expected") 451 } 452 453 // Fake an alloc update coming in after the node monitor has finished 454 msg = Messagef(0, "alloc update") 455 allocCh <- msg 456 must.Eq(t, msg, <-outCh) 457 458 // Closing the allocCh should cause everything to exit 459 close(allocCh) 460 461 _, ok := <-exitedCh 462 must.False(t, ok) 463 464 _, ok = <-outCh 465 must.False(t, ok) 466 467 // Exiting should also cancel the context that would be passed to the 468 // node & alloc watchers 469 select { 470 case <-multiplexCtx.Done(): 471 case <-time.After(100 * time.Millisecond): 472 must.Unreachable(t, must.Sprint("context was not cancelled")) 473 } 474 } 475 476 func TestNodes_DrainStrategy_Equal(t *testing.T) { 477 testutil.Parallel(t) 478 479 // nil 480 var d *DrainStrategy 481 must.Equal(t, nil, d) 482 483 o := &DrainStrategy{} 484 must.NotEqual(t, d, o) 485 must.NotEqual(t, o, d) 486 487 d = &DrainStrategy{} 488 must.Equal(t, d, o) 489 must.Equal(t, o, d) 490 491 // ForceDeadline 492 d.ForceDeadline = time.Now() 493 must.NotEqual(t, d, o) 494 495 o.ForceDeadline = d.ForceDeadline 496 must.Equal(t, d, o) 497 498 // Deadline 499 d.Deadline = 1 500 must.NotEqual(t, d, o) 501 502 o.Deadline = 1 503 must.Equal(t, d, o) 504 505 // IgnoreSystemJobs 506 d.IgnoreSystemJobs = true 507 must.NotEqual(t, d, o) 508 509 o.IgnoreSystemJobs = true 510 must.Equal(t, d, o) 511 } 512 513 func TestNodes_Purge(t *testing.T) { 514 testutil.Parallel(t) 515 516 c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { 517 c.DevMode = true 518 }) 519 defer s.Stop() 520 nodes := c.Nodes() 521 522 // Purge on a nonexistent node fails. 523 _, _, err := c.Nodes().Purge("12345678-abcd-efab-cdef-123456789abc", nil) 524 must.ErrorContains(t, err, "not found") 525 526 // Wait for nodeID 527 nodeID := oneNodeFromNodeList(t, nodes).ID 528 529 // Perform the node purge and check the response objects. 530 out, meta, err := c.Nodes().Purge(nodeID, nil) 531 must.NoError(t, err) 532 must.NotNil(t, out) 533 534 // We can't use assertQueryMeta here, as the RPC response does not populate 535 // the known leader field. 536 must.Positive(t, meta.LastIndex) 537 } 538 539 func TestNodeStatValueFormatting(t *testing.T) { 540 testutil.Parallel(t) 541 542 cases := []struct { 543 expected string 544 value StatValue 545 }{ 546 { 547 "true", 548 StatValue{BoolVal: pointerOf(true)}, 549 }, 550 { 551 "false", 552 StatValue{BoolVal: pointerOf(false)}, 553 }, 554 { 555 "myvalue", 556 StatValue{StringVal: pointerOf("myvalue")}, 557 }, 558 { 559 "2.718", 560 StatValue{ 561 FloatNumeratorVal: float64ToPtr(2.718), 562 }, 563 }, 564 { 565 "2.718 / 3.14", 566 StatValue{ 567 FloatNumeratorVal: float64ToPtr(2.718), 568 FloatDenominatorVal: float64ToPtr(3.14), 569 }, 570 }, 571 { 572 "2.718 MHz", 573 StatValue{ 574 FloatNumeratorVal: float64ToPtr(2.718), 575 Unit: "MHz", 576 }, 577 }, 578 { 579 "2.718 / 3.14 MHz", 580 StatValue{ 581 FloatNumeratorVal: float64ToPtr(2.718), 582 FloatDenominatorVal: float64ToPtr(3.14), 583 Unit: "MHz", 584 }, 585 }, 586 { 587 "2", 588 StatValue{ 589 IntNumeratorVal: pointerOf(int64(2)), 590 }, 591 }, 592 { 593 "2 / 3", 594 StatValue{ 595 IntNumeratorVal: pointerOf(int64(2)), 596 IntDenominatorVal: pointerOf(int64(3)), 597 }, 598 }, 599 { 600 "2 MHz", 601 StatValue{ 602 IntNumeratorVal: pointerOf(int64(2)), 603 Unit: "MHz", 604 }, 605 }, 606 { 607 "2 / 3 MHz", 608 StatValue{ 609 IntNumeratorVal: pointerOf(int64(2)), 610 IntDenominatorVal: pointerOf(int64(3)), 611 Unit: "MHz", 612 }, 613 }, 614 } 615 616 for i, c := range cases { 617 t.Run(fmt.Sprintf("case %d %v", i, c.expected), func(t *testing.T) { 618 formatted := c.value.String() 619 must.Eq(t, c.expected, formatted) 620 }) 621 } 622 }