github.com/dkerwin/nomad@v0.3.3-0.20160525181927-74554135514b/nomad/node_endpoint.go (about) 1 package nomad 2 3 import ( 4 "fmt" 5 "sync" 6 "time" 7 8 "github.com/armon/go-metrics" 9 "github.com/hashicorp/go-memdb" 10 "github.com/hashicorp/nomad/nomad/structs" 11 "github.com/hashicorp/nomad/nomad/watch" 12 ) 13 14 const ( 15 // batchUpdateInterval is how long we wait to batch updates 16 batchUpdateInterval = 50 * time.Millisecond 17 ) 18 19 // Node endpoint is used for client interactions 20 type Node struct { 21 srv *Server 22 23 // updates holds pending client status updates for allocations 24 updates []*structs.Allocation 25 26 // updateFuture is used to wait for the pending batch update 27 // to complete. This may be nil if no batch is pending. 28 updateFuture *batchFuture 29 30 // updateTimer is the timer that will trigger the next batch 31 // update, and may be nil if there is no batch pending. 32 updateTimer *time.Timer 33 34 // updatesLock synchronizes access to the updates list, 35 // the future and the timer. 36 updatesLock sync.Mutex 37 } 38 39 // Register is used to upsert a client that is available for scheduling 40 func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUpdateResponse) error { 41 if done, err := n.srv.forward("Node.Register", args, args, reply); done { 42 return err 43 } 44 defer metrics.MeasureSince([]string{"nomad", "client", "register"}, time.Now()) 45 46 // Validate the arguments 47 if args.Node == nil { 48 return fmt.Errorf("missing node for client registration") 49 } 50 if args.Node.ID == "" { 51 return fmt.Errorf("missing node ID for client registration") 52 } 53 if args.Node.Datacenter == "" { 54 return fmt.Errorf("missing datacenter for client registration") 55 } 56 if args.Node.Name == "" { 57 return fmt.Errorf("missing node name for client registration") 58 } 59 60 // Default the status if none is given 61 if args.Node.Status == "" { 62 args.Node.Status = structs.NodeStatusInit 63 } 64 if !structs.ValidNodeStatus(args.Node.Status) { 65 return fmt.Errorf("invalid status for node") 66 } 67 68 // Compute the node class 69 if err := args.Node.ComputeClass(); err != nil { 70 return fmt.Errorf("failed to computed node class: %v", err) 71 } 72 73 // Commit this update via Raft 74 _, index, err := n.srv.raftApply(structs.NodeRegisterRequestType, args) 75 if err != nil { 76 n.srv.logger.Printf("[ERR] nomad.client: Register failed: %v", err) 77 return err 78 } 79 reply.NodeModifyIndex = index 80 81 // Check if we should trigger evaluations 82 if structs.ShouldDrainNode(args.Node.Status) { 83 evalIDs, evalIndex, err := n.createNodeEvals(args.Node.ID, index) 84 if err != nil { 85 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 86 return err 87 } 88 reply.EvalIDs = evalIDs 89 reply.EvalCreateIndex = evalIndex 90 } 91 92 // Check if we need to setup a heartbeat 93 if !args.Node.TerminalStatus() { 94 ttl, err := n.srv.resetHeartbeatTimer(args.Node.ID) 95 if err != nil { 96 n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err) 97 return err 98 } 99 reply.HeartbeatTTL = ttl 100 } 101 102 // Set the reply index 103 reply.Index = index 104 return nil 105 } 106 107 // Deregister is used to remove a client from the client. If a client should 108 // just be made unavailable for scheduling, a status update is preferred. 109 func (n *Node) Deregister(args *structs.NodeDeregisterRequest, reply *structs.NodeUpdateResponse) error { 110 if done, err := n.srv.forward("Node.Deregister", args, args, reply); done { 111 return err 112 } 113 defer metrics.MeasureSince([]string{"nomad", "client", "deregister"}, time.Now()) 114 115 // Verify the arguments 116 if args.NodeID == "" { 117 return fmt.Errorf("missing node ID for client deregistration") 118 } 119 120 // Commit this update via Raft 121 _, index, err := n.srv.raftApply(structs.NodeDeregisterRequestType, args) 122 if err != nil { 123 n.srv.logger.Printf("[ERR] nomad.client: Deregister failed: %v", err) 124 return err 125 } 126 127 // Clear the heartbeat timer if any 128 n.srv.clearHeartbeatTimer(args.NodeID) 129 130 // Create the evaluations for this node 131 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) 132 if err != nil { 133 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 134 return err 135 } 136 137 // Setup the reply 138 reply.EvalIDs = evalIDs 139 reply.EvalCreateIndex = evalIndex 140 reply.NodeModifyIndex = index 141 reply.Index = index 142 return nil 143 } 144 145 // UpdateStatus is used to update the status of a client node 146 func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *structs.NodeUpdateResponse) error { 147 if done, err := n.srv.forward("Node.UpdateStatus", args, args, reply); done { 148 return err 149 } 150 defer metrics.MeasureSince([]string{"nomad", "client", "update_status"}, time.Now()) 151 152 // Verify the arguments 153 if args.NodeID == "" { 154 return fmt.Errorf("missing node ID for client deregistration") 155 } 156 if !structs.ValidNodeStatus(args.Status) { 157 return fmt.Errorf("invalid status for node") 158 } 159 160 // Look for the node 161 snap, err := n.srv.fsm.State().Snapshot() 162 if err != nil { 163 return err 164 } 165 node, err := snap.NodeByID(args.NodeID) 166 if err != nil { 167 return err 168 } 169 if node == nil { 170 return fmt.Errorf("node not found") 171 } 172 173 // Commit this update via Raft 174 var index uint64 175 if node.Status != args.Status { 176 _, index, err = n.srv.raftApply(structs.NodeUpdateStatusRequestType, args) 177 if err != nil { 178 n.srv.logger.Printf("[ERR] nomad.client: status update failed: %v", err) 179 return err 180 } 181 reply.NodeModifyIndex = index 182 } 183 184 // Check if we should trigger evaluations 185 initToReady := node.Status == structs.NodeStatusInit && args.Status == structs.NodeStatusReady 186 terminalToReady := node.Status == structs.NodeStatusDown && args.Status == structs.NodeStatusReady 187 transitionToReady := initToReady || terminalToReady 188 if structs.ShouldDrainNode(args.Status) || transitionToReady { 189 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) 190 if err != nil { 191 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 192 return err 193 } 194 reply.EvalIDs = evalIDs 195 reply.EvalCreateIndex = evalIndex 196 } 197 198 // Check if we need to setup a heartbeat 199 if args.Status != structs.NodeStatusDown { 200 ttl, err := n.srv.resetHeartbeatTimer(args.NodeID) 201 if err != nil { 202 n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err) 203 return err 204 } 205 reply.HeartbeatTTL = ttl 206 } 207 208 // Set the reply index 209 reply.Index = index 210 return nil 211 } 212 213 // UpdateDrain is used to update the drain mode of a client node 214 func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest, 215 reply *structs.NodeDrainUpdateResponse) error { 216 if done, err := n.srv.forward("Node.UpdateDrain", args, args, reply); done { 217 return err 218 } 219 defer metrics.MeasureSince([]string{"nomad", "client", "update_drain"}, time.Now()) 220 221 // Verify the arguments 222 if args.NodeID == "" { 223 return fmt.Errorf("missing node ID for drain update") 224 } 225 226 // Look for the node 227 snap, err := n.srv.fsm.State().Snapshot() 228 if err != nil { 229 return err 230 } 231 node, err := snap.NodeByID(args.NodeID) 232 if err != nil { 233 return err 234 } 235 if node == nil { 236 return fmt.Errorf("node not found") 237 } 238 239 // Commit this update via Raft 240 var index uint64 241 if node.Drain != args.Drain { 242 _, index, err = n.srv.raftApply(structs.NodeUpdateDrainRequestType, args) 243 if err != nil { 244 n.srv.logger.Printf("[ERR] nomad.client: drain update failed: %v", err) 245 return err 246 } 247 reply.NodeModifyIndex = index 248 } 249 250 // Always attempt to create Node evaluations because there may be a System 251 // job registered that should be evaluated. 252 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) 253 if err != nil { 254 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 255 return err 256 } 257 reply.EvalIDs = evalIDs 258 reply.EvalCreateIndex = evalIndex 259 260 // Set the reply index 261 reply.Index = index 262 return nil 263 } 264 265 // Evaluate is used to force a re-evaluation of the node 266 func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUpdateResponse) error { 267 if done, err := n.srv.forward("Node.Evaluate", args, args, reply); done { 268 return err 269 } 270 defer metrics.MeasureSince([]string{"nomad", "client", "evaluate"}, time.Now()) 271 272 // Verify the arguments 273 if args.NodeID == "" { 274 return fmt.Errorf("missing node ID for evaluation") 275 } 276 277 // Look for the node 278 snap, err := n.srv.fsm.State().Snapshot() 279 if err != nil { 280 return err 281 } 282 node, err := snap.NodeByID(args.NodeID) 283 if err != nil { 284 return err 285 } 286 if node == nil { 287 return fmt.Errorf("node not found") 288 } 289 290 // Create the evaluation 291 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, node.ModifyIndex) 292 if err != nil { 293 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 294 return err 295 } 296 reply.EvalIDs = evalIDs 297 reply.EvalCreateIndex = evalIndex 298 299 // Set the reply index 300 reply.Index = evalIndex 301 return nil 302 } 303 304 // GetNode is used to request information about a specific node 305 func (n *Node) GetNode(args *structs.NodeSpecificRequest, 306 reply *structs.SingleNodeResponse) error { 307 if done, err := n.srv.forward("Node.GetNode", args, args, reply); done { 308 return err 309 } 310 defer metrics.MeasureSince([]string{"nomad", "client", "get_node"}, time.Now()) 311 312 // Setup the blocking query 313 opts := blockingOptions{ 314 queryOpts: &args.QueryOptions, 315 queryMeta: &reply.QueryMeta, 316 watch: watch.NewItems(watch.Item{Node: args.NodeID}), 317 run: func() error { 318 // Verify the arguments 319 if args.NodeID == "" { 320 return fmt.Errorf("missing node ID") 321 } 322 323 // Look for the node 324 snap, err := n.srv.fsm.State().Snapshot() 325 if err != nil { 326 return err 327 } 328 out, err := snap.NodeByID(args.NodeID) 329 if err != nil { 330 return err 331 } 332 333 // Setup the output 334 reply.Node = out 335 if out != nil { 336 reply.Index = out.ModifyIndex 337 } else { 338 // Use the last index that affected the nodes table 339 index, err := snap.Index("nodes") 340 if err != nil { 341 return err 342 } 343 reply.Index = index 344 } 345 346 // Set the query response 347 n.srv.setQueryMeta(&reply.QueryMeta) 348 return nil 349 }} 350 return n.srv.blockingRPC(&opts) 351 } 352 353 // GetAllocs is used to request allocations for a specific node 354 func (n *Node) GetAllocs(args *structs.NodeSpecificRequest, 355 reply *structs.NodeAllocsResponse) error { 356 if done, err := n.srv.forward("Node.GetAllocs", args, args, reply); done { 357 return err 358 } 359 defer metrics.MeasureSince([]string{"nomad", "client", "get_allocs"}, time.Now()) 360 361 // Verify the arguments 362 if args.NodeID == "" { 363 return fmt.Errorf("missing node ID") 364 } 365 366 // Setup the blocking query 367 opts := blockingOptions{ 368 queryOpts: &args.QueryOptions, 369 queryMeta: &reply.QueryMeta, 370 watch: watch.NewItems(watch.Item{AllocNode: args.NodeID}), 371 run: func() error { 372 // Look for the node 373 snap, err := n.srv.fsm.State().Snapshot() 374 if err != nil { 375 return err 376 } 377 allocs, err := snap.AllocsByNode(args.NodeID) 378 if err != nil { 379 return err 380 } 381 382 // Setup the output 383 if len(allocs) != 0 { 384 reply.Allocs = allocs 385 for _, alloc := range allocs { 386 reply.Index = maxUint64(reply.Index, alloc.ModifyIndex) 387 } 388 } else { 389 reply.Allocs = nil 390 391 // Use the last index that affected the nodes table 392 index, err := snap.Index("allocs") 393 if err != nil { 394 return err 395 } 396 397 // Must provide non-zero index to prevent blocking 398 // Index 1 is impossible anyways (due to Raft internals) 399 if index == 0 { 400 reply.Index = 1 401 } else { 402 reply.Index = index 403 } 404 } 405 return nil 406 }} 407 return n.srv.blockingRPC(&opts) 408 } 409 410 // GetClientAllocs is used to request a lightweight list of alloc modify indexes 411 // per allocation. 412 func (n *Node) GetClientAllocs(args *structs.NodeSpecificRequest, 413 reply *structs.NodeClientAllocsResponse) error { 414 if done, err := n.srv.forward("Node.GetClientAllocs", args, args, reply); done { 415 return err 416 } 417 defer metrics.MeasureSince([]string{"nomad", "client", "get_client_allocs"}, time.Now()) 418 419 // Verify the arguments 420 if args.NodeID == "" { 421 return fmt.Errorf("missing node ID") 422 } 423 424 // Setup the blocking query 425 opts := blockingOptions{ 426 queryOpts: &args.QueryOptions, 427 queryMeta: &reply.QueryMeta, 428 watch: watch.NewItems(watch.Item{AllocNode: args.NodeID}), 429 run: func() error { 430 // Look for the node 431 snap, err := n.srv.fsm.State().Snapshot() 432 if err != nil { 433 return err 434 } 435 allocs, err := snap.AllocsByNode(args.NodeID) 436 if err != nil { 437 return err 438 } 439 440 reply.Allocs = make(map[string]uint64) 441 // Setup the output 442 if len(allocs) != 0 { 443 for _, alloc := range allocs { 444 reply.Allocs[alloc.ID] = alloc.AllocModifyIndex 445 reply.Index = maxUint64(reply.Index, alloc.ModifyIndex) 446 } 447 } else { 448 // Use the last index that affected the nodes table 449 index, err := snap.Index("allocs") 450 if err != nil { 451 return err 452 } 453 454 // Must provide non-zero index to prevent blocking 455 // Index 1 is impossible anyways (due to Raft internals) 456 if index == 0 { 457 reply.Index = 1 458 } else { 459 reply.Index = index 460 } 461 } 462 return nil 463 }} 464 return n.srv.blockingRPC(&opts) 465 } 466 467 // UpdateAlloc is used to update the client status of an allocation 468 func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.GenericResponse) error { 469 if done, err := n.srv.forward("Node.UpdateAlloc", args, args, reply); done { 470 return err 471 } 472 defer metrics.MeasureSince([]string{"nomad", "client", "update_alloc"}, time.Now()) 473 474 // Ensure at least a single alloc 475 if len(args.Alloc) == 0 { 476 return fmt.Errorf("must update at least one allocation") 477 } 478 479 // Add this to the batch 480 n.updatesLock.Lock() 481 n.updates = append(n.updates, args.Alloc...) 482 483 // Start a new batch if none 484 future := n.updateFuture 485 if future == nil { 486 future = NewBatchFuture() 487 n.updateFuture = future 488 n.updateTimer = time.AfterFunc(batchUpdateInterval, func() { 489 // Get the pending updates 490 n.updatesLock.Lock() 491 updates := n.updates 492 future := n.updateFuture 493 n.updates = nil 494 n.updateFuture = nil 495 n.updateTimer = nil 496 n.updatesLock.Unlock() 497 498 // Perform the batch update 499 n.batchUpdate(future, updates) 500 }) 501 } 502 n.updatesLock.Unlock() 503 504 // Wait for the future 505 if err := future.Wait(); err != nil { 506 return err 507 } 508 509 // Setup the response 510 reply.Index = future.Index() 511 return nil 512 } 513 514 // batchUpdate is used to update all the allocations 515 func (n *Node) batchUpdate(future *batchFuture, updates []*structs.Allocation) { 516 // Prepare the batch update 517 batch := &structs.AllocUpdateRequest{ 518 Alloc: updates, 519 WriteRequest: structs.WriteRequest{Region: n.srv.config.Region}, 520 } 521 522 // Commit this update via Raft 523 _, index, err := n.srv.raftApply(structs.AllocClientUpdateRequestType, batch) 524 if err != nil { 525 n.srv.logger.Printf("[ERR] nomad.client: alloc update failed: %v", err) 526 } 527 528 // Respond to the future 529 future.Respond(index, err) 530 } 531 532 // List is used to list the available nodes 533 func (n *Node) List(args *structs.NodeListRequest, 534 reply *structs.NodeListResponse) error { 535 if done, err := n.srv.forward("Node.List", args, args, reply); done { 536 return err 537 } 538 defer metrics.MeasureSince([]string{"nomad", "client", "list"}, time.Now()) 539 540 // Setup the blocking query 541 opts := blockingOptions{ 542 queryOpts: &args.QueryOptions, 543 queryMeta: &reply.QueryMeta, 544 watch: watch.NewItems(watch.Item{Table: "nodes"}), 545 run: func() error { 546 // Capture all the nodes 547 snap, err := n.srv.fsm.State().Snapshot() 548 if err != nil { 549 return err 550 } 551 var iter memdb.ResultIterator 552 if prefix := args.QueryOptions.Prefix; prefix != "" { 553 iter, err = snap.NodesByIDPrefix(prefix) 554 } else { 555 iter, err = snap.Nodes() 556 } 557 if err != nil { 558 return err 559 } 560 561 var nodes []*structs.NodeListStub 562 for { 563 raw := iter.Next() 564 if raw == nil { 565 break 566 } 567 node := raw.(*structs.Node) 568 nodes = append(nodes, node.Stub()) 569 } 570 reply.Nodes = nodes 571 572 // Use the last index that affected the jobs table 573 index, err := snap.Index("nodes") 574 if err != nil { 575 return err 576 } 577 reply.Index = index 578 579 // Set the query response 580 n.srv.setQueryMeta(&reply.QueryMeta) 581 return nil 582 }} 583 return n.srv.blockingRPC(&opts) 584 } 585 586 // createNodeEvals is used to create evaluations for each alloc on a node. 587 // Each Eval is scoped to a job, so we need to potentially trigger many evals. 588 func (n *Node) createNodeEvals(nodeID string, nodeIndex uint64) ([]string, uint64, error) { 589 // Snapshot the state 590 snap, err := n.srv.fsm.State().Snapshot() 591 if err != nil { 592 return nil, 0, fmt.Errorf("failed to snapshot state: %v", err) 593 } 594 595 // Find all the allocations for this node 596 allocs, err := snap.AllocsByNode(nodeID) 597 if err != nil { 598 return nil, 0, fmt.Errorf("failed to find allocs for '%s': %v", nodeID, err) 599 } 600 601 sysJobsIter, err := snap.JobsByScheduler("system") 602 if err != nil { 603 return nil, 0, fmt.Errorf("failed to find system jobs for '%s': %v", nodeID, err) 604 } 605 606 var sysJobs []*structs.Job 607 for job := sysJobsIter.Next(); job != nil; job = sysJobsIter.Next() { 608 sysJobs = append(sysJobs, job.(*structs.Job)) 609 } 610 611 // Fast-path if nothing to do 612 if len(allocs) == 0 && len(sysJobs) == 0 { 613 return nil, 0, nil 614 } 615 616 // Create an eval for each JobID affected 617 var evals []*structs.Evaluation 618 var evalIDs []string 619 jobIDs := make(map[string]struct{}) 620 621 for _, alloc := range allocs { 622 // Deduplicate on JobID 623 if _, ok := jobIDs[alloc.JobID]; ok { 624 continue 625 } 626 jobIDs[alloc.JobID] = struct{}{} 627 628 // Create a new eval 629 eval := &structs.Evaluation{ 630 ID: structs.GenerateUUID(), 631 Priority: alloc.Job.Priority, 632 Type: alloc.Job.Type, 633 TriggeredBy: structs.EvalTriggerNodeUpdate, 634 JobID: alloc.JobID, 635 NodeID: nodeID, 636 NodeModifyIndex: nodeIndex, 637 Status: structs.EvalStatusPending, 638 } 639 evals = append(evals, eval) 640 evalIDs = append(evalIDs, eval.ID) 641 } 642 643 // Create an evaluation for each system job. 644 for _, job := range sysJobs { 645 // Still dedup on JobID as the node may already have the system job. 646 if _, ok := jobIDs[job.ID]; ok { 647 continue 648 } 649 jobIDs[job.ID] = struct{}{} 650 651 // Create a new eval 652 eval := &structs.Evaluation{ 653 ID: structs.GenerateUUID(), 654 Priority: job.Priority, 655 Type: job.Type, 656 TriggeredBy: structs.EvalTriggerNodeUpdate, 657 JobID: job.ID, 658 NodeID: nodeID, 659 NodeModifyIndex: nodeIndex, 660 Status: structs.EvalStatusPending, 661 } 662 evals = append(evals, eval) 663 evalIDs = append(evalIDs, eval.ID) 664 } 665 666 // Create the Raft transaction 667 update := &structs.EvalUpdateRequest{ 668 Evals: evals, 669 WriteRequest: structs.WriteRequest{Region: n.srv.config.Region}, 670 } 671 672 // Commit this evaluation via Raft 673 // XXX: There is a risk of partial failure where the node update succeeds 674 // but that the EvalUpdate does not. 675 _, evalIndex, err := n.srv.raftApply(structs.EvalUpdateRequestType, update) 676 if err != nil { 677 return nil, 0, err 678 } 679 return evalIDs, evalIndex, nil 680 } 681 682 // batchFuture is used to wait on a batch update to complete 683 type batchFuture struct { 684 doneCh chan struct{} 685 err error 686 index uint64 687 } 688 689 // NewBatchFuture creates a new batch future 690 func NewBatchFuture() *batchFuture { 691 return &batchFuture{ 692 doneCh: make(chan struct{}), 693 } 694 } 695 696 // Wait is used to block for the future to complete and returns the error 697 func (b *batchFuture) Wait() error { 698 <-b.doneCh 699 return b.err 700 } 701 702 // Index is used to return the index of the batch, only after Wait() 703 func (b *batchFuture) Index() uint64 { 704 return b.index 705 } 706 707 // Respond is used to unblock the future 708 func (b *batchFuture) Respond(index uint64, err error) { 709 b.index = index 710 b.err = err 711 close(b.doneCh) 712 }