github.com/blixtra/nomad@v0.7.2-0.20171221000451-da9a1d7bb050/nomad/node_endpoint.go (about) 1 package nomad 2 3 import ( 4 "context" 5 "crypto/subtle" 6 "encoding/base64" 7 "fmt" 8 "strings" 9 "sync" 10 "time" 11 12 "golang.org/x/crypto/blake2b" 13 "golang.org/x/sync/errgroup" 14 15 "github.com/armon/go-metrics" 16 "github.com/hashicorp/go-memdb" 17 "github.com/hashicorp/go-multierror" 18 "github.com/hashicorp/nomad/acl" 19 "github.com/hashicorp/nomad/helper/uuid" 20 "github.com/hashicorp/nomad/nomad/state" 21 "github.com/hashicorp/nomad/nomad/structs" 22 "github.com/hashicorp/raft" 23 vapi "github.com/hashicorp/vault/api" 24 ) 25 26 const ( 27 // batchUpdateInterval is how long we wait to batch updates 28 batchUpdateInterval = 50 * time.Millisecond 29 30 // maxParallelRequestsPerDerive is the maximum number of parallel Vault 31 // create token requests that may be outstanding per derive request 32 maxParallelRequestsPerDerive = 16 33 ) 34 35 // Node endpoint is used for client interactions 36 type Node struct { 37 srv *Server 38 39 // updates holds pending client status updates for allocations 40 updates []*structs.Allocation 41 42 // updateFuture is used to wait for the pending batch update 43 // to complete. This may be nil if no batch is pending. 44 updateFuture *batchFuture 45 46 // updateTimer is the timer that will trigger the next batch 47 // update, and may be nil if there is no batch pending. 48 updateTimer *time.Timer 49 50 // updatesLock synchronizes access to the updates list, 51 // the future and the timer. 52 updatesLock sync.Mutex 53 } 54 55 // Register is used to upsert a client that is available for scheduling 56 func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUpdateResponse) error { 57 if done, err := n.srv.forward("Node.Register", args, args, reply); done { 58 return err 59 } 60 defer metrics.MeasureSince([]string{"nomad", "client", "register"}, time.Now()) 61 62 // Validate the arguments 63 if args.Node == nil { 64 return fmt.Errorf("missing node for client registration") 65 } 66 if args.Node.ID == "" { 67 return fmt.Errorf("missing node ID for client registration") 68 } 69 if args.Node.Datacenter == "" { 70 return fmt.Errorf("missing datacenter for client registration") 71 } 72 if args.Node.Name == "" { 73 return fmt.Errorf("missing node name for client registration") 74 } 75 if len(args.Node.Attributes) == 0 { 76 return fmt.Errorf("missing attributes for client registration") 77 } 78 if args.Node.SecretID == "" { 79 return fmt.Errorf("missing node secret ID for client registration") 80 } 81 82 // Default the status if none is given 83 if args.Node.Status == "" { 84 args.Node.Status = structs.NodeStatusInit 85 } 86 if !structs.ValidNodeStatus(args.Node.Status) { 87 return fmt.Errorf("invalid status for node") 88 } 89 90 // Set the timestamp when the node is registered 91 args.Node.StatusUpdatedAt = time.Now().Unix() 92 93 // Compute the node class 94 if err := args.Node.ComputeClass(); err != nil { 95 return fmt.Errorf("failed to computed node class: %v", err) 96 } 97 98 // Look for the node so we can detect a state transition 99 snap, err := n.srv.fsm.State().Snapshot() 100 if err != nil { 101 return err 102 } 103 104 ws := memdb.NewWatchSet() 105 originalNode, err := snap.NodeByID(ws, args.Node.ID) 106 if err != nil { 107 return err 108 } 109 110 // Check if the SecretID has been tampered with 111 if originalNode != nil { 112 if args.Node.SecretID != originalNode.SecretID && originalNode.SecretID != "" { 113 return fmt.Errorf("node secret ID does not match. Not registering node.") 114 } 115 } 116 117 // Commit this update via Raft 118 _, index, err := n.srv.raftApply(structs.NodeRegisterRequestType, args) 119 if err != nil { 120 n.srv.logger.Printf("[ERR] nomad.client: Register failed: %v", err) 121 return err 122 } 123 reply.NodeModifyIndex = index 124 125 // Check if we should trigger evaluations 126 originalStatus := structs.NodeStatusInit 127 if originalNode != nil { 128 originalStatus = originalNode.Status 129 } 130 transitionToReady := transitionedToReady(args.Node.Status, originalStatus) 131 if structs.ShouldDrainNode(args.Node.Status) || transitionToReady { 132 evalIDs, evalIndex, err := n.createNodeEvals(args.Node.ID, index) 133 if err != nil { 134 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 135 return err 136 } 137 reply.EvalIDs = evalIDs 138 reply.EvalCreateIndex = evalIndex 139 } 140 141 // Check if we need to setup a heartbeat 142 if !args.Node.TerminalStatus() { 143 ttl, err := n.srv.resetHeartbeatTimer(args.Node.ID) 144 if err != nil { 145 n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err) 146 return err 147 } 148 reply.HeartbeatTTL = ttl 149 } 150 151 // Set the reply index 152 reply.Index = index 153 snap, err = n.srv.fsm.State().Snapshot() 154 if err != nil { 155 return err 156 } 157 158 n.srv.peerLock.RLock() 159 defer n.srv.peerLock.RUnlock() 160 if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { 161 n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err) 162 return err 163 } 164 165 return nil 166 } 167 168 // updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading. 169 func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error { 170 reply.LeaderRPCAddr = string(n.srv.raft.Leader()) 171 172 // Reply with config information required for future RPC requests 173 reply.Servers = make([]*structs.NodeServerInfo, 0, len(n.srv.localPeers)) 174 for k, v := range n.srv.localPeers { 175 reply.Servers = append(reply.Servers, 176 &structs.NodeServerInfo{ 177 RPCAdvertiseAddr: string(k), 178 RPCMajorVersion: int32(v.MajorVersion), 179 RPCMinorVersion: int32(v.MinorVersion), 180 Datacenter: v.Datacenter, 181 }) 182 } 183 184 // TODO(sean@): Use an indexed node count instead 185 // 186 // Snapshot is used only to iterate over all nodes to create a node 187 // count to send back to Nomad Clients in their heartbeat so Clients 188 // can estimate the size of the cluster. 189 ws := memdb.NewWatchSet() 190 iter, err := snap.Nodes(ws) 191 if err == nil { 192 for { 193 raw := iter.Next() 194 if raw == nil { 195 break 196 } 197 reply.NumNodes++ 198 } 199 } 200 201 return nil 202 } 203 204 // Deregister is used to remove a client from the cluster. If a client should 205 // just be made unavailable for scheduling, a status update is preferred. 206 func (n *Node) Deregister(args *structs.NodeDeregisterRequest, reply *structs.NodeUpdateResponse) error { 207 if done, err := n.srv.forward("Node.Deregister", args, args, reply); done { 208 return err 209 } 210 defer metrics.MeasureSince([]string{"nomad", "client", "deregister"}, time.Now()) 211 212 // Check node permissions 213 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 214 return err 215 } else if aclObj != nil && !aclObj.AllowNodeWrite() { 216 return structs.ErrPermissionDenied 217 } 218 219 // Verify the arguments 220 if args.NodeID == "" { 221 return fmt.Errorf("missing node ID for client deregistration") 222 } 223 // Look for the node 224 snap, err := n.srv.fsm.State().Snapshot() 225 if err != nil { 226 return err 227 } 228 229 ws := memdb.NewWatchSet() 230 node, err := snap.NodeByID(ws, args.NodeID) 231 if err != nil { 232 return err 233 } 234 if node == nil { 235 return fmt.Errorf("node not found") 236 } 237 238 // Commit this update via Raft 239 _, index, err := n.srv.raftApply(structs.NodeDeregisterRequestType, args) 240 if err != nil { 241 n.srv.logger.Printf("[ERR] nomad.client: Deregister failed: %v", err) 242 return err 243 } 244 245 // Clear the heartbeat timer if any 246 n.srv.clearHeartbeatTimer(args.NodeID) 247 248 // Create the evaluations for this node 249 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) 250 if err != nil { 251 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 252 return err 253 } 254 255 // Determine if there are any Vault accessors on the node 256 accessors, err := snap.VaultAccessorsByNode(ws, args.NodeID) 257 if err != nil { 258 n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for node %q failed: %v", args.NodeID, err) 259 return err 260 } 261 262 if l := len(accessors); l != 0 { 263 n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors on node %q due to deregister", l, args.NodeID) 264 if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil { 265 n.srv.logger.Printf("[ERR] nomad.client: revoking accessors for node %q failed: %v", args.NodeID, err) 266 return err 267 } 268 } 269 270 // Setup the reply 271 reply.EvalIDs = evalIDs 272 reply.EvalCreateIndex = evalIndex 273 reply.NodeModifyIndex = index 274 reply.Index = index 275 return nil 276 } 277 278 // UpdateStatus is used to update the status of a client node 279 func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *structs.NodeUpdateResponse) error { 280 if done, err := n.srv.forward("Node.UpdateStatus", args, args, reply); done { 281 return err 282 } 283 defer metrics.MeasureSince([]string{"nomad", "client", "update_status"}, time.Now()) 284 285 // Verify the arguments 286 if args.NodeID == "" { 287 return fmt.Errorf("missing node ID for client status update") 288 } 289 if !structs.ValidNodeStatus(args.Status) { 290 return fmt.Errorf("invalid status for node") 291 } 292 293 // Look for the node 294 snap, err := n.srv.fsm.State().Snapshot() 295 if err != nil { 296 return err 297 } 298 299 ws := memdb.NewWatchSet() 300 node, err := snap.NodeByID(ws, args.NodeID) 301 if err != nil { 302 return err 303 } 304 if node == nil { 305 return fmt.Errorf("node not found") 306 } 307 308 // XXX: Could use the SecretID here but have to update the heartbeat system 309 // to track SecretIDs. 310 311 // Update the timestamp of when the node status was updated 312 node.StatusUpdatedAt = time.Now().Unix() 313 314 // Commit this update via Raft 315 var index uint64 316 if node.Status != args.Status { 317 _, index, err = n.srv.raftApply(structs.NodeUpdateStatusRequestType, args) 318 if err != nil { 319 n.srv.logger.Printf("[ERR] nomad.client: status update failed: %v", err) 320 return err 321 } 322 reply.NodeModifyIndex = index 323 } 324 325 // Check if we should trigger evaluations 326 transitionToReady := transitionedToReady(args.Status, node.Status) 327 if structs.ShouldDrainNode(args.Status) || transitionToReady { 328 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) 329 if err != nil { 330 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 331 return err 332 } 333 reply.EvalIDs = evalIDs 334 reply.EvalCreateIndex = evalIndex 335 } 336 337 // Check if we need to setup a heartbeat 338 switch args.Status { 339 case structs.NodeStatusDown: 340 // Determine if there are any Vault accessors on the node 341 accessors, err := n.srv.State().VaultAccessorsByNode(ws, args.NodeID) 342 if err != nil { 343 n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for node %q failed: %v", args.NodeID, err) 344 return err 345 } 346 347 if l := len(accessors); l != 0 { 348 n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors on node %q due to down state", l, args.NodeID) 349 if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil { 350 n.srv.logger.Printf("[ERR] nomad.client: revoking accessors for node %q failed: %v", args.NodeID, err) 351 return err 352 } 353 } 354 default: 355 ttl, err := n.srv.resetHeartbeatTimer(args.NodeID) 356 if err != nil { 357 n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err) 358 return err 359 } 360 reply.HeartbeatTTL = ttl 361 } 362 363 // Set the reply index and leader 364 reply.Index = index 365 n.srv.peerLock.RLock() 366 defer n.srv.peerLock.RUnlock() 367 if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { 368 n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err) 369 return err 370 } 371 372 return nil 373 } 374 375 // transitionedToReady is a helper that takes a nodes new and old status and 376 // returns whether it has transistioned to ready. 377 func transitionedToReady(newStatus, oldStatus string) bool { 378 initToReady := oldStatus == structs.NodeStatusInit && newStatus == structs.NodeStatusReady 379 terminalToReady := oldStatus == structs.NodeStatusDown && newStatus == structs.NodeStatusReady 380 return initToReady || terminalToReady 381 } 382 383 // UpdateDrain is used to update the drain mode of a client node 384 func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest, 385 reply *structs.NodeDrainUpdateResponse) error { 386 if done, err := n.srv.forward("Node.UpdateDrain", args, args, reply); done { 387 return err 388 } 389 defer metrics.MeasureSince([]string{"nomad", "client", "update_drain"}, time.Now()) 390 391 // Check node write permissions 392 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 393 return err 394 } else if aclObj != nil && !aclObj.AllowNodeWrite() { 395 return structs.ErrPermissionDenied 396 } 397 398 // Verify the arguments 399 if args.NodeID == "" { 400 return fmt.Errorf("missing node ID for drain update") 401 } 402 403 // Look for the node 404 snap, err := n.srv.fsm.State().Snapshot() 405 if err != nil { 406 return err 407 } 408 ws := memdb.NewWatchSet() 409 node, err := snap.NodeByID(ws, args.NodeID) 410 if err != nil { 411 return err 412 } 413 if node == nil { 414 return fmt.Errorf("node not found") 415 } 416 417 // Update the timestamp to 418 node.StatusUpdatedAt = time.Now().Unix() 419 420 // Commit this update via Raft 421 var index uint64 422 if node.Drain != args.Drain { 423 _, index, err = n.srv.raftApply(structs.NodeUpdateDrainRequestType, args) 424 if err != nil { 425 n.srv.logger.Printf("[ERR] nomad.client: drain update failed: %v", err) 426 return err 427 } 428 reply.NodeModifyIndex = index 429 } 430 431 // Always attempt to create Node evaluations because there may be a System 432 // job registered that should be evaluated. 433 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) 434 if err != nil { 435 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 436 return err 437 } 438 reply.EvalIDs = evalIDs 439 reply.EvalCreateIndex = evalIndex 440 441 // Set the reply index 442 reply.Index = index 443 return nil 444 } 445 446 // Evaluate is used to force a re-evaluation of the node 447 func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUpdateResponse) error { 448 if done, err := n.srv.forward("Node.Evaluate", args, args, reply); done { 449 return err 450 } 451 defer metrics.MeasureSince([]string{"nomad", "client", "evaluate"}, time.Now()) 452 453 // Check node write permissions 454 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 455 return err 456 } else if aclObj != nil && !aclObj.AllowNodeWrite() { 457 return structs.ErrPermissionDenied 458 } 459 460 // Verify the arguments 461 if args.NodeID == "" { 462 return fmt.Errorf("missing node ID for evaluation") 463 } 464 465 // Look for the node 466 snap, err := n.srv.fsm.State().Snapshot() 467 if err != nil { 468 return err 469 } 470 ws := memdb.NewWatchSet() 471 node, err := snap.NodeByID(ws, args.NodeID) 472 if err != nil { 473 return err 474 } 475 if node == nil { 476 return fmt.Errorf("node not found") 477 } 478 479 // Create the evaluation 480 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, node.ModifyIndex) 481 if err != nil { 482 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 483 return err 484 } 485 reply.EvalIDs = evalIDs 486 reply.EvalCreateIndex = evalIndex 487 488 // Set the reply index 489 reply.Index = evalIndex 490 491 n.srv.peerLock.RLock() 492 defer n.srv.peerLock.RUnlock() 493 if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { 494 n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err) 495 return err 496 } 497 return nil 498 } 499 500 // GetNode is used to request information about a specific node 501 func (n *Node) GetNode(args *structs.NodeSpecificRequest, 502 reply *structs.SingleNodeResponse) error { 503 if done, err := n.srv.forward("Node.GetNode", args, args, reply); done { 504 return err 505 } 506 defer metrics.MeasureSince([]string{"nomad", "client", "get_node"}, time.Now()) 507 508 // Check node read permissions 509 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 510 // If ResolveToken had an unexpected error return that 511 if err != structs.ErrTokenNotFound { 512 return err 513 } 514 515 // Attempt to lookup AuthToken as a Node.SecretID since nodes 516 // call this endpoint and don't have an ACL token. 517 node, stateErr := n.srv.fsm.State().NodeBySecretID(nil, args.AuthToken) 518 if stateErr != nil { 519 // Return the original ResolveToken error with this err 520 var merr multierror.Error 521 merr.Errors = append(merr.Errors, err, stateErr) 522 return merr.ErrorOrNil() 523 } 524 525 // Not a node or a valid ACL token 526 if node == nil { 527 return structs.ErrTokenNotFound 528 } 529 } else if aclObj != nil && !aclObj.AllowNodeRead() { 530 return structs.ErrPermissionDenied 531 } 532 533 // Setup the blocking query 534 opts := blockingOptions{ 535 queryOpts: &args.QueryOptions, 536 queryMeta: &reply.QueryMeta, 537 run: func(ws memdb.WatchSet, state *state.StateStore) error { 538 // Verify the arguments 539 if args.NodeID == "" { 540 return fmt.Errorf("missing node ID") 541 } 542 543 // Look for the node 544 out, err := state.NodeByID(ws, args.NodeID) 545 if err != nil { 546 return err 547 } 548 549 // Setup the output 550 if out != nil { 551 // Clear the secret ID 552 reply.Node = out.Copy() 553 reply.Node.SecretID = "" 554 reply.Index = out.ModifyIndex 555 } else { 556 // Use the last index that affected the nodes table 557 index, err := state.Index("nodes") 558 if err != nil { 559 return err 560 } 561 reply.Node = nil 562 reply.Index = index 563 } 564 565 // Set the query response 566 n.srv.setQueryMeta(&reply.QueryMeta) 567 return nil 568 }} 569 return n.srv.blockingRPC(&opts) 570 } 571 572 // GetAllocs is used to request allocations for a specific node 573 func (n *Node) GetAllocs(args *structs.NodeSpecificRequest, 574 reply *structs.NodeAllocsResponse) error { 575 if done, err := n.srv.forward("Node.GetAllocs", args, args, reply); done { 576 return err 577 } 578 defer metrics.MeasureSince([]string{"nomad", "client", "get_allocs"}, time.Now()) 579 580 // Check node read and namespace job read permissions 581 aclObj, err := n.srv.ResolveToken(args.AuthToken) 582 if err != nil { 583 return err 584 } 585 if aclObj != nil && !aclObj.AllowNodeRead() { 586 return structs.ErrPermissionDenied 587 } 588 589 // cache namespace perms 590 readableNamespaces := map[string]bool{} 591 592 // readNS is a caching namespace read-job helper 593 readNS := func(ns string) bool { 594 if aclObj == nil { 595 // ACLs are disabled; everything is readable 596 return true 597 } 598 599 if readable, ok := readableNamespaces[ns]; ok { 600 // cache hit 601 return readable 602 } 603 604 // cache miss 605 readable := aclObj.AllowNsOp(ns, acl.NamespaceCapabilityReadJob) 606 readableNamespaces[ns] = readable 607 return readable 608 } 609 610 // Verify the arguments 611 if args.NodeID == "" { 612 return fmt.Errorf("missing node ID") 613 } 614 615 // Setup the blocking query 616 opts := blockingOptions{ 617 queryOpts: &args.QueryOptions, 618 queryMeta: &reply.QueryMeta, 619 run: func(ws memdb.WatchSet, state *state.StateStore) error { 620 // Look for the node 621 allocs, err := state.AllocsByNode(ws, args.NodeID) 622 if err != nil { 623 return err 624 } 625 626 // Setup the output 627 if n := len(allocs); n != 0 { 628 reply.Allocs = make([]*structs.Allocation, 0, n) 629 for _, alloc := range allocs { 630 if readNS(alloc.Namespace) { 631 reply.Allocs = append(reply.Allocs, alloc) 632 } 633 634 // Get the max of all allocs since 635 // subsequent requests need to start 636 // from the latest index 637 reply.Index = maxUint64(reply.Index, alloc.ModifyIndex) 638 } 639 } else { 640 reply.Allocs = nil 641 642 // Use the last index that affected the nodes table 643 index, err := state.Index("allocs") 644 if err != nil { 645 return err 646 } 647 648 // Must provide non-zero index to prevent blocking 649 // Index 1 is impossible anyways (due to Raft internals) 650 if index == 0 { 651 reply.Index = 1 652 } else { 653 reply.Index = index 654 } 655 } 656 return nil 657 }} 658 return n.srv.blockingRPC(&opts) 659 } 660 661 // GenerateMigrateToken will create a token for a client to access an 662 // authenticated volume of another client to migrate data for sticky volumes. 663 func GenerateMigrateToken(allocID, nodeSecretID string) (string, error) { 664 h, err := blake2b.New512([]byte(nodeSecretID)) 665 if err != nil { 666 return "", err 667 } 668 h.Write([]byte(allocID)) 669 return base64.URLEncoding.EncodeToString(h.Sum(nil)), nil 670 } 671 672 // CompareMigrateToken returns true if two migration tokens can be computed and 673 // are equal. 674 func CompareMigrateToken(allocID, nodeSecretID, otherMigrateToken string) bool { 675 h, err := blake2b.New512([]byte(nodeSecretID)) 676 if err != nil { 677 return false 678 } 679 h.Write([]byte(allocID)) 680 681 otherBytes, err := base64.URLEncoding.DecodeString(otherMigrateToken) 682 if err != nil { 683 return false 684 } 685 return subtle.ConstantTimeCompare(h.Sum(nil), otherBytes) == 1 686 } 687 688 // GetClientAllocs is used to request a lightweight list of alloc modify indexes 689 // per allocation. 690 func (n *Node) GetClientAllocs(args *structs.NodeSpecificRequest, 691 reply *structs.NodeClientAllocsResponse) error { 692 if done, err := n.srv.forward("Node.GetClientAllocs", args, args, reply); done { 693 return err 694 } 695 defer metrics.MeasureSince([]string{"nomad", "client", "get_client_allocs"}, time.Now()) 696 697 // Verify the arguments 698 if args.NodeID == "" { 699 return fmt.Errorf("missing node ID") 700 } 701 702 // numOldAllocs is used to detect if there is a garbage collection event 703 // that effects the node. When an allocation is garbage collected, that does 704 // not change the modify index changes and thus the query won't unblock, 705 // even though the set of allocations on the node has changed. 706 var numOldAllocs int 707 708 // Setup the blocking query 709 opts := blockingOptions{ 710 queryOpts: &args.QueryOptions, 711 queryMeta: &reply.QueryMeta, 712 run: func(ws memdb.WatchSet, state *state.StateStore) error { 713 // Look for the node 714 node, err := state.NodeByID(ws, args.NodeID) 715 if err != nil { 716 return err 717 } 718 719 var allocs []*structs.Allocation 720 if node != nil { 721 if args.SecretID == "" { 722 return fmt.Errorf("missing node secret ID for client status update") 723 } else if args.SecretID != node.SecretID { 724 return fmt.Errorf("node secret ID does not match") 725 } 726 727 var err error 728 allocs, err = state.AllocsByNode(ws, args.NodeID) 729 if err != nil { 730 return err 731 } 732 } 733 734 reply.Allocs = make(map[string]uint64) 735 reply.MigrateTokens = make(map[string]string) 736 737 // preferTableIndex is used to determine whether we should build the 738 // response index based on the full table indexes versus the modify 739 // indexes of the allocations on the specific node. This is 740 // preferred in the case that the node doesn't yet have allocations 741 // or when we detect a GC that effects the node. 742 preferTableIndex := true 743 744 // Setup the output 745 if numAllocs := len(allocs); numAllocs != 0 { 746 preferTableIndex = false 747 748 for _, alloc := range allocs { 749 reply.Allocs[alloc.ID] = alloc.AllocModifyIndex 750 751 // If the allocation is going to do a migration, create a 752 // migration token so that the client can authenticate with 753 // the node hosting the previous allocation. 754 if alloc.ShouldMigrate() { 755 prevAllocation, err := state.AllocByID(ws, alloc.PreviousAllocation) 756 if err != nil { 757 return err 758 } 759 760 if prevAllocation != nil && prevAllocation.NodeID != alloc.NodeID { 761 allocNode, err := state.NodeByID(ws, prevAllocation.NodeID) 762 if err != nil { 763 return err 764 } 765 if allocNode == nil { 766 // Node must have been GC'd so skip the token 767 continue 768 } 769 770 token, err := GenerateMigrateToken(prevAllocation.ID, allocNode.SecretID) 771 if err != nil { 772 return err 773 } 774 reply.MigrateTokens[alloc.ID] = token 775 } 776 } 777 778 reply.Index = maxUint64(reply.Index, alloc.ModifyIndex) 779 } 780 781 // Determine if we have less allocations than before. This 782 // indicates there was a garbage collection 783 if numAllocs < numOldAllocs { 784 preferTableIndex = true 785 } 786 787 // Store the new number of allocations 788 numOldAllocs = numAllocs 789 } 790 791 if preferTableIndex { 792 // Use the last index that affected the nodes table 793 index, err := state.Index("allocs") 794 if err != nil { 795 return err 796 } 797 798 // Must provide non-zero index to prevent blocking 799 // Index 1 is impossible anyways (due to Raft internals) 800 if index == 0 { 801 reply.Index = 1 802 } else { 803 reply.Index = index 804 } 805 } 806 return nil 807 }} 808 return n.srv.blockingRPC(&opts) 809 } 810 811 // UpdateAlloc is used to update the client status of an allocation 812 func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.GenericResponse) error { 813 if done, err := n.srv.forward("Node.UpdateAlloc", args, args, reply); done { 814 return err 815 } 816 defer metrics.MeasureSince([]string{"nomad", "client", "update_alloc"}, time.Now()) 817 818 // Ensure at least a single alloc 819 if len(args.Alloc) == 0 { 820 return fmt.Errorf("must update at least one allocation") 821 } 822 823 // Update modified timestamp for client initiated allocation updates 824 now := time.Now().UTC().UnixNano() 825 for _, alloc := range args.Alloc { 826 alloc.ModifyTime = now 827 } 828 // Add this to the batch 829 n.updatesLock.Lock() 830 n.updates = append(n.updates, args.Alloc...) 831 832 // Start a new batch if none 833 future := n.updateFuture 834 if future == nil { 835 future = NewBatchFuture() 836 n.updateFuture = future 837 n.updateTimer = time.AfterFunc(batchUpdateInterval, func() { 838 // Get the pending updates 839 n.updatesLock.Lock() 840 updates := n.updates 841 future := n.updateFuture 842 n.updates = nil 843 n.updateFuture = nil 844 n.updateTimer = nil 845 n.updatesLock.Unlock() 846 847 // Perform the batch update 848 n.batchUpdate(future, updates) 849 }) 850 } 851 n.updatesLock.Unlock() 852 853 // Wait for the future 854 if err := future.Wait(); err != nil { 855 return err 856 } 857 858 // Setup the response 859 reply.Index = future.Index() 860 return nil 861 } 862 863 // batchUpdate is used to update all the allocations 864 func (n *Node) batchUpdate(future *batchFuture, updates []*structs.Allocation) { 865 // Prepare the batch update 866 batch := &structs.AllocUpdateRequest{ 867 Alloc: updates, 868 WriteRequest: structs.WriteRequest{Region: n.srv.config.Region}, 869 } 870 871 // Commit this update via Raft 872 var mErr multierror.Error 873 _, index, err := n.srv.raftApply(structs.AllocClientUpdateRequestType, batch) 874 if err != nil { 875 n.srv.logger.Printf("[ERR] nomad.client: alloc update failed: %v", err) 876 mErr.Errors = append(mErr.Errors, err) 877 } 878 879 // For each allocation we are updating check if we should revoke any 880 // Vault Accessors 881 var revoke []*structs.VaultAccessor 882 for _, alloc := range updates { 883 // Skip any allocation that isn't dead on the client 884 if !alloc.Terminated() { 885 continue 886 } 887 888 // Determine if there are any Vault accessors for the allocation 889 ws := memdb.NewWatchSet() 890 accessors, err := n.srv.State().VaultAccessorsByAlloc(ws, alloc.ID) 891 if err != nil { 892 n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for alloc %q failed: %v", alloc.ID, err) 893 mErr.Errors = append(mErr.Errors, err) 894 } 895 896 revoke = append(revoke, accessors...) 897 } 898 899 if l := len(revoke); l != 0 { 900 n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors due to terminal allocations", l) 901 if err := n.srv.vault.RevokeTokens(context.Background(), revoke, true); err != nil { 902 n.srv.logger.Printf("[ERR] nomad.client: batched accessor revocation failed: %v", err) 903 mErr.Errors = append(mErr.Errors, err) 904 } 905 } 906 907 // Respond to the future 908 future.Respond(index, mErr.ErrorOrNil()) 909 } 910 911 // List is used to list the available nodes 912 func (n *Node) List(args *structs.NodeListRequest, 913 reply *structs.NodeListResponse) error { 914 if done, err := n.srv.forward("Node.List", args, args, reply); done { 915 return err 916 } 917 defer metrics.MeasureSince([]string{"nomad", "client", "list"}, time.Now()) 918 919 // Check node read permissions 920 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 921 return err 922 } else if aclObj != nil && !aclObj.AllowNodeRead() { 923 return structs.ErrPermissionDenied 924 } 925 926 // Setup the blocking query 927 opts := blockingOptions{ 928 queryOpts: &args.QueryOptions, 929 queryMeta: &reply.QueryMeta, 930 run: func(ws memdb.WatchSet, state *state.StateStore) error { 931 // Capture all the nodes 932 var err error 933 var iter memdb.ResultIterator 934 if prefix := args.QueryOptions.Prefix; prefix != "" { 935 iter, err = state.NodesByIDPrefix(ws, prefix) 936 } else { 937 iter, err = state.Nodes(ws) 938 } 939 if err != nil { 940 return err 941 } 942 943 var nodes []*structs.NodeListStub 944 for { 945 raw := iter.Next() 946 if raw == nil { 947 break 948 } 949 node := raw.(*structs.Node) 950 nodes = append(nodes, node.Stub()) 951 } 952 reply.Nodes = nodes 953 954 // Use the last index that affected the jobs table 955 index, err := state.Index("nodes") 956 if err != nil { 957 return err 958 } 959 reply.Index = index 960 961 // Set the query response 962 n.srv.setQueryMeta(&reply.QueryMeta) 963 return nil 964 }} 965 return n.srv.blockingRPC(&opts) 966 } 967 968 // createNodeEvals is used to create evaluations for each alloc on a node. 969 // Each Eval is scoped to a job, so we need to potentially trigger many evals. 970 func (n *Node) createNodeEvals(nodeID string, nodeIndex uint64) ([]string, uint64, error) { 971 // Snapshot the state 972 snap, err := n.srv.fsm.State().Snapshot() 973 if err != nil { 974 return nil, 0, fmt.Errorf("failed to snapshot state: %v", err) 975 } 976 977 // Find all the allocations for this node 978 ws := memdb.NewWatchSet() 979 allocs, err := snap.AllocsByNode(ws, nodeID) 980 if err != nil { 981 return nil, 0, fmt.Errorf("failed to find allocs for '%s': %v", nodeID, err) 982 } 983 984 sysJobsIter, err := snap.JobsByScheduler(ws, "system") 985 if err != nil { 986 return nil, 0, fmt.Errorf("failed to find system jobs for '%s': %v", nodeID, err) 987 } 988 989 var sysJobs []*structs.Job 990 for job := sysJobsIter.Next(); job != nil; job = sysJobsIter.Next() { 991 sysJobs = append(sysJobs, job.(*structs.Job)) 992 } 993 994 // Fast-path if nothing to do 995 if len(allocs) == 0 && len(sysJobs) == 0 { 996 return nil, 0, nil 997 } 998 999 // Create an eval for each JobID affected 1000 var evals []*structs.Evaluation 1001 var evalIDs []string 1002 jobIDs := make(map[string]struct{}) 1003 1004 for _, alloc := range allocs { 1005 // Deduplicate on JobID 1006 if _, ok := jobIDs[alloc.JobID]; ok { 1007 continue 1008 } 1009 jobIDs[alloc.JobID] = struct{}{} 1010 1011 // Create a new eval 1012 eval := &structs.Evaluation{ 1013 ID: uuid.Generate(), 1014 Namespace: alloc.Namespace, 1015 Priority: alloc.Job.Priority, 1016 Type: alloc.Job.Type, 1017 TriggeredBy: structs.EvalTriggerNodeUpdate, 1018 JobID: alloc.JobID, 1019 NodeID: nodeID, 1020 NodeModifyIndex: nodeIndex, 1021 Status: structs.EvalStatusPending, 1022 } 1023 evals = append(evals, eval) 1024 evalIDs = append(evalIDs, eval.ID) 1025 } 1026 1027 // Create an evaluation for each system job. 1028 for _, job := range sysJobs { 1029 // Still dedup on JobID as the node may already have the system job. 1030 if _, ok := jobIDs[job.ID]; ok { 1031 continue 1032 } 1033 jobIDs[job.ID] = struct{}{} 1034 1035 // Create a new eval 1036 eval := &structs.Evaluation{ 1037 ID: uuid.Generate(), 1038 Namespace: job.Namespace, 1039 Priority: job.Priority, 1040 Type: job.Type, 1041 TriggeredBy: structs.EvalTriggerNodeUpdate, 1042 JobID: job.ID, 1043 NodeID: nodeID, 1044 NodeModifyIndex: nodeIndex, 1045 Status: structs.EvalStatusPending, 1046 } 1047 evals = append(evals, eval) 1048 evalIDs = append(evalIDs, eval.ID) 1049 } 1050 1051 // Create the Raft transaction 1052 update := &structs.EvalUpdateRequest{ 1053 Evals: evals, 1054 WriteRequest: structs.WriteRequest{Region: n.srv.config.Region}, 1055 } 1056 1057 // Commit this evaluation via Raft 1058 // XXX: There is a risk of partial failure where the node update succeeds 1059 // but that the EvalUpdate does not. 1060 _, evalIndex, err := n.srv.raftApply(structs.EvalUpdateRequestType, update) 1061 if err != nil { 1062 return nil, 0, err 1063 } 1064 return evalIDs, evalIndex, nil 1065 } 1066 1067 // batchFuture is used to wait on a batch update to complete 1068 type batchFuture struct { 1069 doneCh chan struct{} 1070 err error 1071 index uint64 1072 } 1073 1074 // NewBatchFuture creates a new batch future 1075 func NewBatchFuture() *batchFuture { 1076 return &batchFuture{ 1077 doneCh: make(chan struct{}), 1078 } 1079 } 1080 1081 // Wait is used to block for the future to complete and returns the error 1082 func (b *batchFuture) Wait() error { 1083 <-b.doneCh 1084 return b.err 1085 } 1086 1087 // Index is used to return the index of the batch, only after Wait() 1088 func (b *batchFuture) Index() uint64 { 1089 return b.index 1090 } 1091 1092 // Respond is used to unblock the future 1093 func (b *batchFuture) Respond(index uint64, err error) { 1094 b.index = index 1095 b.err = err 1096 close(b.doneCh) 1097 } 1098 1099 // DeriveVaultToken is used by the clients to request wrapped Vault tokens for 1100 // tasks 1101 func (n *Node) DeriveVaultToken(args *structs.DeriveVaultTokenRequest, 1102 reply *structs.DeriveVaultTokenResponse) error { 1103 1104 // setErr is a helper for setting the recoverable error on the reply and 1105 // logging it 1106 setErr := func(e error, recoverable bool) { 1107 if e == nil { 1108 return 1109 } 1110 reply.Error = structs.NewRecoverableError(e, recoverable).(*structs.RecoverableError) 1111 n.srv.logger.Printf("[ERR] nomad.client: DeriveVaultToken failed (recoverable %v): %v", recoverable, e) 1112 } 1113 1114 if done, err := n.srv.forward("Node.DeriveVaultToken", args, args, reply); done { 1115 setErr(err, structs.IsRecoverable(err) || err == structs.ErrNoLeader) 1116 return nil 1117 } 1118 defer metrics.MeasureSince([]string{"nomad", "client", "derive_vault_token"}, time.Now()) 1119 1120 // Verify the arguments 1121 if args.NodeID == "" { 1122 setErr(fmt.Errorf("missing node ID"), false) 1123 return nil 1124 } 1125 if args.SecretID == "" { 1126 setErr(fmt.Errorf("missing node SecretID"), false) 1127 return nil 1128 } 1129 if args.AllocID == "" { 1130 setErr(fmt.Errorf("missing allocation ID"), false) 1131 return nil 1132 } 1133 if len(args.Tasks) == 0 { 1134 setErr(fmt.Errorf("no tasks specified"), false) 1135 return nil 1136 } 1137 1138 // Verify the following: 1139 // * The Node exists and has the correct SecretID 1140 // * The Allocation exists on the specified node 1141 // * The allocation contains the given tasks and they each require Vault 1142 // tokens 1143 snap, err := n.srv.fsm.State().Snapshot() 1144 if err != nil { 1145 setErr(err, false) 1146 return nil 1147 } 1148 ws := memdb.NewWatchSet() 1149 node, err := snap.NodeByID(ws, args.NodeID) 1150 if err != nil { 1151 setErr(err, false) 1152 return nil 1153 } 1154 if node == nil { 1155 setErr(fmt.Errorf("Node %q does not exist", args.NodeID), false) 1156 return nil 1157 } 1158 if node.SecretID != args.SecretID { 1159 setErr(fmt.Errorf("SecretID mismatch"), false) 1160 return nil 1161 } 1162 1163 alloc, err := snap.AllocByID(ws, args.AllocID) 1164 if err != nil { 1165 setErr(err, false) 1166 return nil 1167 } 1168 if alloc == nil { 1169 setErr(fmt.Errorf("Allocation %q does not exist", args.AllocID), false) 1170 return nil 1171 } 1172 if alloc.NodeID != args.NodeID { 1173 setErr(fmt.Errorf("Allocation %q not running on Node %q", args.AllocID, args.NodeID), false) 1174 return nil 1175 } 1176 if alloc.TerminalStatus() { 1177 setErr(fmt.Errorf("Can't request Vault token for terminal allocation"), false) 1178 return nil 1179 } 1180 1181 // Check the policies 1182 policies := alloc.Job.VaultPolicies() 1183 if policies == nil { 1184 setErr(fmt.Errorf("Job doesn't require Vault policies"), false) 1185 return nil 1186 } 1187 tg, ok := policies[alloc.TaskGroup] 1188 if !ok { 1189 setErr(fmt.Errorf("Task group does not require Vault policies"), false) 1190 return nil 1191 } 1192 1193 var unneeded []string 1194 for _, task := range args.Tasks { 1195 taskVault := tg[task] 1196 if taskVault == nil || len(taskVault.Policies) == 0 { 1197 unneeded = append(unneeded, task) 1198 } 1199 } 1200 1201 if len(unneeded) != 0 { 1202 e := fmt.Errorf("Requested Vault tokens for tasks without defined Vault policies: %s", 1203 strings.Join(unneeded, ", ")) 1204 setErr(e, false) 1205 return nil 1206 } 1207 1208 // At this point the request is valid and we should contact Vault for 1209 // tokens. 1210 1211 // Create an error group where we will spin up a fixed set of goroutines to 1212 // handle deriving tokens but where if any fails the whole group is 1213 // canceled. 1214 g, ctx := errgroup.WithContext(context.Background()) 1215 1216 // Cap the handlers 1217 handlers := len(args.Tasks) 1218 if handlers > maxParallelRequestsPerDerive { 1219 handlers = maxParallelRequestsPerDerive 1220 } 1221 1222 // Create the Vault Tokens 1223 input := make(chan string, handlers) 1224 results := make(map[string]*vapi.Secret, len(args.Tasks)) 1225 for i := 0; i < handlers; i++ { 1226 g.Go(func() error { 1227 for { 1228 select { 1229 case task, ok := <-input: 1230 if !ok { 1231 return nil 1232 } 1233 1234 secret, err := n.srv.vault.CreateToken(ctx, alloc, task) 1235 if err != nil { 1236 wrapped := fmt.Sprintf("failed to create token for task %q on alloc %q: %v", task, alloc.ID, err) 1237 return structs.WrapRecoverable(wrapped, err) 1238 } 1239 1240 results[task] = secret 1241 case <-ctx.Done(): 1242 return nil 1243 } 1244 } 1245 }) 1246 } 1247 1248 // Send the input 1249 go func() { 1250 defer close(input) 1251 for _, task := range args.Tasks { 1252 select { 1253 case <-ctx.Done(): 1254 return 1255 case input <- task: 1256 } 1257 } 1258 1259 }() 1260 1261 // Wait for everything to complete or for an error 1262 createErr := g.Wait() 1263 1264 // Retrieve the results 1265 accessors := make([]*structs.VaultAccessor, 0, len(results)) 1266 tokens := make(map[string]string, len(results)) 1267 for task, secret := range results { 1268 w := secret.WrapInfo 1269 if w == nil { 1270 return fmt.Errorf("Vault returned Secret without WrapInfo") 1271 } 1272 1273 tokens[task] = w.Token 1274 accessor := &structs.VaultAccessor{ 1275 Accessor: w.WrappedAccessor, 1276 Task: task, 1277 NodeID: alloc.NodeID, 1278 AllocID: alloc.ID, 1279 CreationTTL: w.TTL, 1280 } 1281 1282 accessors = append(accessors, accessor) 1283 } 1284 1285 // If there was an error revoke the created tokens 1286 if createErr != nil { 1287 n.srv.logger.Printf("[ERR] nomad.node: Vault token creation for alloc %q failed: %v", alloc.ID, createErr) 1288 1289 if revokeErr := n.srv.vault.RevokeTokens(context.Background(), accessors, false); revokeErr != nil { 1290 n.srv.logger.Printf("[ERR] nomad.node: Vault token revocation for alloc %q failed: %v", alloc.ID, revokeErr) 1291 } 1292 1293 if rerr, ok := createErr.(*structs.RecoverableError); ok { 1294 reply.Error = rerr 1295 } else { 1296 reply.Error = structs.NewRecoverableError(createErr, false).(*structs.RecoverableError) 1297 } 1298 1299 return nil 1300 } 1301 1302 // Commit to Raft before returning any of the tokens 1303 req := structs.VaultAccessorsRequest{Accessors: accessors} 1304 _, index, err := n.srv.raftApply(structs.VaultAccessorRegisterRequestType, &req) 1305 if err != nil { 1306 n.srv.logger.Printf("[ERR] nomad.client: Register Vault accessors for alloc %q failed: %v", alloc.ID, err) 1307 1308 // Determine if we can recover from the error 1309 retry := false 1310 switch err { 1311 case raft.ErrNotLeader, raft.ErrLeadershipLost, raft.ErrRaftShutdown, raft.ErrEnqueueTimeout: 1312 retry = true 1313 } 1314 1315 setErr(err, retry) 1316 return nil 1317 } 1318 1319 reply.Index = index 1320 reply.Tasks = tokens 1321 n.srv.setQueryMeta(&reply.QueryMeta) 1322 return nil 1323 }