github.com/anuvu/nomad@v0.8.7-atom1/api/nodes.go (about) 1 package api 2 3 import ( 4 "context" 5 "fmt" 6 "sort" 7 "time" 8 9 "github.com/hashicorp/nomad/nomad/structs" 10 ) 11 12 // Nodes is used to query node-related API endpoints 13 type Nodes struct { 14 client *Client 15 } 16 17 // Nodes returns a handle on the node endpoints. 18 func (c *Client) Nodes() *Nodes { 19 return &Nodes{client: c} 20 } 21 22 // List is used to list out all of the nodes 23 func (n *Nodes) List(q *QueryOptions) ([]*NodeListStub, *QueryMeta, error) { 24 var resp NodeIndexSort 25 qm, err := n.client.query("/v1/nodes", &resp, q) 26 if err != nil { 27 return nil, nil, err 28 } 29 sort.Sort(resp) 30 return resp, qm, nil 31 } 32 33 func (n *Nodes) PrefixList(prefix string) ([]*NodeListStub, *QueryMeta, error) { 34 return n.List(&QueryOptions{Prefix: prefix}) 35 } 36 37 // Info is used to query a specific node by its ID. 38 func (n *Nodes) Info(nodeID string, q *QueryOptions) (*Node, *QueryMeta, error) { 39 var resp Node 40 qm, err := n.client.query("/v1/node/"+nodeID, &resp, q) 41 if err != nil { 42 return nil, nil, err 43 } 44 return &resp, qm, nil 45 } 46 47 // NodeUpdateDrainRequest is used to update the drain specification for a node. 48 type NodeUpdateDrainRequest struct { 49 // NodeID is the node to update the drain specification for. 50 NodeID string 51 52 // DrainSpec is the drain specification to set for the node. A nil DrainSpec 53 // will disable draining. 54 DrainSpec *DrainSpec 55 56 // MarkEligible marks the node as eligible for scheduling if removing 57 // the drain strategy. 58 MarkEligible bool 59 } 60 61 // NodeDrainUpdateResponse is used to respond to a node drain update 62 type NodeDrainUpdateResponse struct { 63 NodeModifyIndex uint64 64 EvalIDs []string 65 EvalCreateIndex uint64 66 WriteMeta 67 } 68 69 // UpdateDrain is used to update the drain strategy for a given node. If 70 // markEligible is true and the drain is being removed, the node will be marked 71 // as having its scheduling being eligible 72 func (n *Nodes) UpdateDrain(nodeID string, spec *DrainSpec, markEligible bool, q *WriteOptions) (*NodeDrainUpdateResponse, error) { 73 req := &NodeUpdateDrainRequest{ 74 NodeID: nodeID, 75 DrainSpec: spec, 76 MarkEligible: markEligible, 77 } 78 79 var resp NodeDrainUpdateResponse 80 wm, err := n.client.write("/v1/node/"+nodeID+"/drain", req, &resp, q) 81 if err != nil { 82 return nil, err 83 } 84 resp.WriteMeta = *wm 85 return &resp, nil 86 } 87 88 // MonitorMsgLevels represents the severity log level of a MonitorMessage. 89 type MonitorMsgLevel int 90 91 const ( 92 MonitorMsgLevelNormal MonitorMsgLevel = 0 93 MonitorMsgLevelInfo MonitorMsgLevel = 1 94 MonitorMsgLevelWarn MonitorMsgLevel = 2 95 MonitorMsgLevelError MonitorMsgLevel = 3 96 ) 97 98 // MonitorMessage contains a message and log level. 99 type MonitorMessage struct { 100 Level MonitorMsgLevel 101 Message string 102 } 103 104 // Messagef formats a new MonitorMessage. 105 func Messagef(lvl MonitorMsgLevel, msg string, args ...interface{}) *MonitorMessage { 106 return &MonitorMessage{ 107 Level: lvl, 108 Message: fmt.Sprintf(msg, args...), 109 } 110 } 111 112 func (m *MonitorMessage) String() string { 113 return m.Message 114 } 115 116 // MonitorDrain emits drain related events on the returned string channel. The 117 // channel will be closed when all allocations on the draining node have 118 // stopped or the context is canceled. 119 func (n *Nodes) MonitorDrain(ctx context.Context, nodeID string, index uint64, ignoreSys bool) <-chan *MonitorMessage { 120 outCh := make(chan *MonitorMessage, 8) 121 nodeCh := make(chan *MonitorMessage, 1) 122 allocCh := make(chan *MonitorMessage, 8) 123 124 // Multiplex node and alloc chans onto outCh. This goroutine closes 125 // outCh when other chans have been closed or context canceled. 126 multiplexCtx, cancel := context.WithCancel(ctx) 127 go n.monitorDrainMultiplex(multiplexCtx, cancel, outCh, nodeCh, allocCh) 128 129 // Monitor node for updates 130 go n.monitorDrainNode(multiplexCtx, cancel, nodeID, index, nodeCh) 131 132 // Monitor allocs on node for updates 133 go n.monitorDrainAllocs(multiplexCtx, nodeID, ignoreSys, allocCh) 134 135 return outCh 136 } 137 138 // monitorDrainMultiplex multiplexes node and alloc updates onto the out chan. 139 // Closes out chan when either the context is canceled, both update chans are 140 // closed, or an error occurs. 141 func (n *Nodes) monitorDrainMultiplex(ctx context.Context, cancel func(), 142 outCh chan<- *MonitorMessage, nodeCh, allocCh <-chan *MonitorMessage) { 143 144 defer cancel() 145 defer close(outCh) 146 147 nodeOk := true 148 allocOk := true 149 var msg *MonitorMessage 150 for { 151 // If both chans have been closed, close the output chan 152 if !nodeOk && !allocOk { 153 return 154 } 155 156 select { 157 case msg, nodeOk = <-nodeCh: 158 if !nodeOk { 159 // nil chan to prevent further recvs 160 nodeCh = nil 161 } 162 163 case msg, allocOk = <-allocCh: 164 if !allocOk { 165 // nil chan to prevent further recvs 166 allocCh = nil 167 } 168 169 case <-ctx.Done(): 170 return 171 } 172 173 if msg == nil { 174 continue 175 } 176 177 select { 178 case outCh <- msg: 179 case <-ctx.Done(): 180 181 // If we are exiting but we have a message, attempt to send it 182 // so we don't lose a message but do not block. 183 select { 184 case outCh <- msg: 185 default: 186 } 187 188 return 189 } 190 191 // Abort on error messages 192 if msg.Level == MonitorMsgLevelError { 193 return 194 } 195 } 196 } 197 198 // monitorDrainNode emits node updates on nodeCh and closes the channel when 199 // the node has finished draining. 200 func (n *Nodes) monitorDrainNode(ctx context.Context, cancel func(), 201 nodeID string, index uint64, nodeCh chan<- *MonitorMessage) { 202 defer close(nodeCh) 203 204 var lastStrategy *DrainStrategy 205 var strategyChanged bool 206 q := QueryOptions{ 207 AllowStale: true, 208 WaitIndex: index, 209 } 210 for { 211 node, meta, err := n.Info(nodeID, &q) 212 if err != nil { 213 msg := Messagef(MonitorMsgLevelError, "Error monitoring node: %v", err) 214 select { 215 case nodeCh <- msg: 216 case <-ctx.Done(): 217 } 218 return 219 } 220 221 if node.DrainStrategy == nil { 222 var msg *MonitorMessage 223 if strategyChanged { 224 msg = Messagef(MonitorMsgLevelInfo, "Node %q has marked all allocations for migration", nodeID) 225 } else { 226 msg = Messagef(MonitorMsgLevelInfo, "No drain strategy set for node %s", nodeID) 227 defer cancel() 228 } 229 select { 230 case nodeCh <- msg: 231 case <-ctx.Done(): 232 } 233 return 234 } 235 236 if node.Status == structs.NodeStatusDown { 237 msg := Messagef(MonitorMsgLevelWarn, "Node %q down", nodeID) 238 select { 239 case nodeCh <- msg: 240 case <-ctx.Done(): 241 } 242 } 243 244 // DrainStrategy changed 245 if lastStrategy != nil && !node.DrainStrategy.Equal(lastStrategy) { 246 msg := Messagef(MonitorMsgLevelInfo, "Node %q drain updated: %s", nodeID, node.DrainStrategy) 247 select { 248 case nodeCh <- msg: 249 case <-ctx.Done(): 250 return 251 } 252 } 253 254 lastStrategy = node.DrainStrategy 255 strategyChanged = true 256 257 // Drain still ongoing, update index and block for updates 258 q.WaitIndex = meta.LastIndex 259 } 260 } 261 262 // monitorDrainAllocs emits alloc updates on allocCh and closes the channel 263 // when the node has finished draining. 264 func (n *Nodes) monitorDrainAllocs(ctx context.Context, nodeID string, ignoreSys bool, allocCh chan<- *MonitorMessage) { 265 defer close(allocCh) 266 267 q := QueryOptions{AllowStale: true} 268 initial := make(map[string]*Allocation, 4) 269 270 for { 271 allocs, meta, err := n.Allocations(nodeID, &q) 272 if err != nil { 273 msg := Messagef(MonitorMsgLevelError, "Error monitoring allocations: %v", err) 274 select { 275 case allocCh <- msg: 276 case <-ctx.Done(): 277 } 278 return 279 } 280 281 q.WaitIndex = meta.LastIndex 282 283 runningAllocs := 0 284 for _, a := range allocs { 285 // Get previous version of alloc 286 orig, existing := initial[a.ID] 287 288 // Update local alloc state 289 initial[a.ID] = a 290 291 migrating := a.DesiredTransition.ShouldMigrate() 292 293 var msg string 294 switch { 295 case !existing: 296 // Should only be possible if response 297 // from initial Allocations call was 298 // stale. No need to output 299 300 case orig.ClientStatus != a.ClientStatus: 301 // Alloc status has changed; output 302 msg = fmt.Sprintf("status %s -> %s", orig.ClientStatus, a.ClientStatus) 303 304 case migrating && !orig.DesiredTransition.ShouldMigrate(): 305 // Alloc was marked for migration 306 msg = "marked for migration" 307 308 case migrating && (orig.DesiredStatus != a.DesiredStatus) && a.DesiredStatus == structs.AllocDesiredStatusStop: 309 // Alloc has already been marked for migration and is now being stopped 310 msg = "draining" 311 } 312 313 if msg != "" { 314 select { 315 case allocCh <- Messagef(MonitorMsgLevelNormal, "Alloc %q %s", a.ID, msg): 316 case <-ctx.Done(): 317 return 318 } 319 } 320 321 // Ignore malformed allocs 322 if a.Job == nil || a.Job.Type == nil { 323 continue 324 } 325 326 // Track how many allocs are still running 327 if ignoreSys && a.Job.Type != nil && *a.Job.Type == structs.JobTypeSystem { 328 continue 329 } 330 331 switch a.ClientStatus { 332 case structs.AllocClientStatusPending, structs.AllocClientStatusRunning: 333 runningAllocs++ 334 } 335 } 336 337 // Exit if all allocs are terminal 338 if runningAllocs == 0 { 339 msg := Messagef(MonitorMsgLevelInfo, "All allocations on node %q have stopped.", nodeID) 340 select { 341 case allocCh <- msg: 342 case <-ctx.Done(): 343 } 344 return 345 } 346 } 347 } 348 349 // NodeUpdateEligibilityRequest is used to update the drain specification for a node. 350 type NodeUpdateEligibilityRequest struct { 351 // NodeID is the node to update the drain specification for. 352 NodeID string 353 Eligibility string 354 } 355 356 // NodeEligibilityUpdateResponse is used to respond to a node eligibility update 357 type NodeEligibilityUpdateResponse struct { 358 NodeModifyIndex uint64 359 EvalIDs []string 360 EvalCreateIndex uint64 361 WriteMeta 362 } 363 364 // ToggleEligibility is used to update the scheduling eligibility of the node 365 func (n *Nodes) ToggleEligibility(nodeID string, eligible bool, q *WriteOptions) (*NodeEligibilityUpdateResponse, error) { 366 e := structs.NodeSchedulingEligible 367 if !eligible { 368 e = structs.NodeSchedulingIneligible 369 } 370 371 req := &NodeUpdateEligibilityRequest{ 372 NodeID: nodeID, 373 Eligibility: e, 374 } 375 376 var resp NodeEligibilityUpdateResponse 377 wm, err := n.client.write("/v1/node/"+nodeID+"/eligibility", req, &resp, q) 378 if err != nil { 379 return nil, err 380 } 381 resp.WriteMeta = *wm 382 return &resp, nil 383 } 384 385 // Allocations is used to return the allocations associated with a node. 386 func (n *Nodes) Allocations(nodeID string, q *QueryOptions) ([]*Allocation, *QueryMeta, error) { 387 var resp []*Allocation 388 qm, err := n.client.query("/v1/node/"+nodeID+"/allocations", &resp, q) 389 if err != nil { 390 return nil, nil, err 391 } 392 sort.Sort(AllocationSort(resp)) 393 return resp, qm, nil 394 } 395 396 // ForceEvaluate is used to force-evaluate an existing node. 397 func (n *Nodes) ForceEvaluate(nodeID string, q *WriteOptions) (string, *WriteMeta, error) { 398 var resp nodeEvalResponse 399 wm, err := n.client.write("/v1/node/"+nodeID+"/evaluate", nil, &resp, q) 400 if err != nil { 401 return "", nil, err 402 } 403 return resp.EvalID, wm, nil 404 } 405 406 func (n *Nodes) Stats(nodeID string, q *QueryOptions) (*HostStats, error) { 407 var resp HostStats 408 path := fmt.Sprintf("/v1/client/stats?node_id=%s", nodeID) 409 if _, err := n.client.query(path, &resp, q); err != nil { 410 return nil, err 411 } 412 return &resp, nil 413 } 414 415 func (n *Nodes) GC(nodeID string, q *QueryOptions) error { 416 var resp struct{} 417 path := fmt.Sprintf("/v1/client/gc?node_id=%s", nodeID) 418 _, err := n.client.query(path, &resp, q) 419 return err 420 } 421 422 // TODO Add tests 423 func (n *Nodes) GcAlloc(allocID string, q *QueryOptions) error { 424 var resp struct{} 425 path := fmt.Sprintf("/v1/client/allocation/%s/gc", allocID) 426 _, err := n.client.query(path, &resp, q) 427 return err 428 } 429 430 // DriverInfo is used to deserialize a DriverInfo entry 431 type DriverInfo struct { 432 Attributes map[string]string 433 Detected bool 434 Healthy bool 435 HealthDescription string 436 UpdateTime time.Time 437 } 438 439 // Node is used to deserialize a node entry. 440 type Node struct { 441 ID string 442 Datacenter string 443 Name string 444 HTTPAddr string 445 TLSEnabled bool 446 Attributes map[string]string 447 Resources *Resources 448 Reserved *Resources 449 Links map[string]string 450 Meta map[string]string 451 NodeClass string 452 Drain bool 453 DrainStrategy *DrainStrategy 454 SchedulingEligibility string 455 Status string 456 StatusDescription string 457 StatusUpdatedAt int64 458 Events []*NodeEvent 459 Drivers map[string]*DriverInfo 460 CreateIndex uint64 461 ModifyIndex uint64 462 } 463 464 // DrainStrategy describes a Node's drain behavior. 465 type DrainStrategy struct { 466 // DrainSpec is the user declared drain specification 467 DrainSpec 468 469 // ForceDeadline is the deadline time for the drain after which drains will 470 // be forced 471 ForceDeadline time.Time 472 } 473 474 // DrainSpec describes a Node's drain behavior. 475 type DrainSpec struct { 476 // Deadline is the duration after StartTime when the remaining 477 // allocations on a draining Node should be told to stop. 478 Deadline time.Duration 479 480 // IgnoreSystemJobs allows systems jobs to remain on the node even though it 481 // has been marked for draining. 482 IgnoreSystemJobs bool 483 } 484 485 func (d *DrainStrategy) Equal(o *DrainStrategy) bool { 486 if d == nil || o == nil { 487 return d == o 488 } 489 490 if d.ForceDeadline != o.ForceDeadline { 491 return false 492 } 493 if d.Deadline != o.Deadline { 494 return false 495 } 496 if d.IgnoreSystemJobs != o.IgnoreSystemJobs { 497 return false 498 } 499 500 return true 501 } 502 503 // String returns a human readable version of the drain strategy. 504 func (d *DrainStrategy) String() string { 505 if d.IgnoreSystemJobs { 506 return fmt.Sprintf("drain ignoring system jobs and deadline at %s", d.ForceDeadline) 507 } 508 return fmt.Sprintf("drain with deadline at %s", d.ForceDeadline) 509 } 510 511 const ( 512 NodeEventSubsystemDrain = "Drain" 513 NodeEventSubsystemDriver = "Driver" 514 NodeEventSubsystemHeartbeat = "Heartbeat" 515 NodeEventSubsystemCluster = "Cluster" 516 ) 517 518 // NodeEvent is a single unit representing a node’s state change 519 type NodeEvent struct { 520 Message string 521 Subsystem string 522 Details map[string]string 523 Timestamp time.Time 524 CreateIndex uint64 525 } 526 527 // HostStats represents resource usage stats of the host running a Nomad client 528 type HostStats struct { 529 Memory *HostMemoryStats 530 CPU []*HostCPUStats 531 DiskStats []*HostDiskStats 532 Uptime uint64 533 CPUTicksConsumed float64 534 } 535 536 type HostMemoryStats struct { 537 Total uint64 538 Available uint64 539 Used uint64 540 Free uint64 541 } 542 543 type HostCPUStats struct { 544 CPU string 545 User float64 546 System float64 547 Idle float64 548 } 549 550 type HostDiskStats struct { 551 Device string 552 Mountpoint string 553 Size uint64 554 Used uint64 555 Available uint64 556 UsedPercent float64 557 InodesUsedPercent float64 558 } 559 560 // NodeListStub is a subset of information returned during 561 // node list operations. 562 type NodeListStub struct { 563 Address string 564 ID string 565 Datacenter string 566 Name string 567 NodeClass string 568 Version string 569 Drain bool 570 SchedulingEligibility string 571 Status string 572 StatusDescription string 573 Drivers map[string]*DriverInfo 574 CreateIndex uint64 575 ModifyIndex uint64 576 } 577 578 // NodeIndexSort reverse sorts nodes by CreateIndex 579 type NodeIndexSort []*NodeListStub 580 581 func (n NodeIndexSort) Len() int { 582 return len(n) 583 } 584 585 func (n NodeIndexSort) Less(i, j int) bool { 586 return n[i].CreateIndex > n[j].CreateIndex 587 } 588 589 func (n NodeIndexSort) Swap(i, j int) { 590 n[i], n[j] = n[j], n[i] 591 } 592 593 // nodeEvalResponse is used to decode a force-eval. 594 type nodeEvalResponse struct { 595 EvalID string 596 } 597 598 // AllocationSort reverse sorts allocs by CreateIndex. 599 type AllocationSort []*Allocation 600 601 func (a AllocationSort) Len() int { 602 return len(a) 603 } 604 605 func (a AllocationSort) Less(i, j int) bool { 606 return a[i].CreateIndex > a[j].CreateIndex 607 } 608 609 func (a AllocationSort) Swap(i, j int) { 610 a[i], a[j] = a[j], a[i] 611 }