github.com/anuvu/nomad@v0.8.7-atom1/api/nodes.go (about)

     1  package api
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sort"
     7  	"time"
     8  
     9  	"github.com/hashicorp/nomad/nomad/structs"
    10  )
    11  
    12  // Nodes is used to query node-related API endpoints
    13  type Nodes struct {
    14  	client *Client
    15  }
    16  
    17  // Nodes returns a handle on the node endpoints.
    18  func (c *Client) Nodes() *Nodes {
    19  	return &Nodes{client: c}
    20  }
    21  
    22  // List is used to list out all of the nodes
    23  func (n *Nodes) List(q *QueryOptions) ([]*NodeListStub, *QueryMeta, error) {
    24  	var resp NodeIndexSort
    25  	qm, err := n.client.query("/v1/nodes", &resp, q)
    26  	if err != nil {
    27  		return nil, nil, err
    28  	}
    29  	sort.Sort(resp)
    30  	return resp, qm, nil
    31  }
    32  
    33  func (n *Nodes) PrefixList(prefix string) ([]*NodeListStub, *QueryMeta, error) {
    34  	return n.List(&QueryOptions{Prefix: prefix})
    35  }
    36  
    37  // Info is used to query a specific node by its ID.
    38  func (n *Nodes) Info(nodeID string, q *QueryOptions) (*Node, *QueryMeta, error) {
    39  	var resp Node
    40  	qm, err := n.client.query("/v1/node/"+nodeID, &resp, q)
    41  	if err != nil {
    42  		return nil, nil, err
    43  	}
    44  	return &resp, qm, nil
    45  }
    46  
    47  // NodeUpdateDrainRequest is used to update the drain specification for a node.
    48  type NodeUpdateDrainRequest struct {
    49  	// NodeID is the node to update the drain specification for.
    50  	NodeID string
    51  
    52  	// DrainSpec is the drain specification to set for the node. A nil DrainSpec
    53  	// will disable draining.
    54  	DrainSpec *DrainSpec
    55  
    56  	// MarkEligible marks the node as eligible for scheduling if removing
    57  	// the drain strategy.
    58  	MarkEligible bool
    59  }
    60  
    61  // NodeDrainUpdateResponse is used to respond to a node drain update
    62  type NodeDrainUpdateResponse struct {
    63  	NodeModifyIndex uint64
    64  	EvalIDs         []string
    65  	EvalCreateIndex uint64
    66  	WriteMeta
    67  }
    68  
    69  // UpdateDrain is used to update the drain strategy for a given node. If
    70  // markEligible is true and the drain is being removed, the node will be marked
    71  // as having its scheduling being eligible
    72  func (n *Nodes) UpdateDrain(nodeID string, spec *DrainSpec, markEligible bool, q *WriteOptions) (*NodeDrainUpdateResponse, error) {
    73  	req := &NodeUpdateDrainRequest{
    74  		NodeID:       nodeID,
    75  		DrainSpec:    spec,
    76  		MarkEligible: markEligible,
    77  	}
    78  
    79  	var resp NodeDrainUpdateResponse
    80  	wm, err := n.client.write("/v1/node/"+nodeID+"/drain", req, &resp, q)
    81  	if err != nil {
    82  		return nil, err
    83  	}
    84  	resp.WriteMeta = *wm
    85  	return &resp, nil
    86  }
    87  
    88  // MonitorMsgLevels represents the severity log level of a MonitorMessage.
    89  type MonitorMsgLevel int
    90  
    91  const (
    92  	MonitorMsgLevelNormal MonitorMsgLevel = 0
    93  	MonitorMsgLevelInfo   MonitorMsgLevel = 1
    94  	MonitorMsgLevelWarn   MonitorMsgLevel = 2
    95  	MonitorMsgLevelError  MonitorMsgLevel = 3
    96  )
    97  
    98  // MonitorMessage contains a message and log level.
    99  type MonitorMessage struct {
   100  	Level   MonitorMsgLevel
   101  	Message string
   102  }
   103  
   104  // Messagef formats a new MonitorMessage.
   105  func Messagef(lvl MonitorMsgLevel, msg string, args ...interface{}) *MonitorMessage {
   106  	return &MonitorMessage{
   107  		Level:   lvl,
   108  		Message: fmt.Sprintf(msg, args...),
   109  	}
   110  }
   111  
   112  func (m *MonitorMessage) String() string {
   113  	return m.Message
   114  }
   115  
   116  // MonitorDrain emits drain related events on the returned string channel. The
   117  // channel will be closed when all allocations on the draining node have
   118  // stopped or the context is canceled.
   119  func (n *Nodes) MonitorDrain(ctx context.Context, nodeID string, index uint64, ignoreSys bool) <-chan *MonitorMessage {
   120  	outCh := make(chan *MonitorMessage, 8)
   121  	nodeCh := make(chan *MonitorMessage, 1)
   122  	allocCh := make(chan *MonitorMessage, 8)
   123  
   124  	// Multiplex node and alloc chans onto outCh. This goroutine closes
   125  	// outCh when other chans have been closed or context canceled.
   126  	multiplexCtx, cancel := context.WithCancel(ctx)
   127  	go n.monitorDrainMultiplex(multiplexCtx, cancel, outCh, nodeCh, allocCh)
   128  
   129  	// Monitor node for updates
   130  	go n.monitorDrainNode(multiplexCtx, cancel, nodeID, index, nodeCh)
   131  
   132  	// Monitor allocs on node for updates
   133  	go n.monitorDrainAllocs(multiplexCtx, nodeID, ignoreSys, allocCh)
   134  
   135  	return outCh
   136  }
   137  
   138  // monitorDrainMultiplex multiplexes node and alloc updates onto the out chan.
   139  // Closes out chan when either the context is canceled, both update chans are
   140  // closed, or an error occurs.
   141  func (n *Nodes) monitorDrainMultiplex(ctx context.Context, cancel func(),
   142  	outCh chan<- *MonitorMessage, nodeCh, allocCh <-chan *MonitorMessage) {
   143  
   144  	defer cancel()
   145  	defer close(outCh)
   146  
   147  	nodeOk := true
   148  	allocOk := true
   149  	var msg *MonitorMessage
   150  	for {
   151  		// If both chans have been closed, close the output chan
   152  		if !nodeOk && !allocOk {
   153  			return
   154  		}
   155  
   156  		select {
   157  		case msg, nodeOk = <-nodeCh:
   158  			if !nodeOk {
   159  				// nil chan to prevent further recvs
   160  				nodeCh = nil
   161  			}
   162  
   163  		case msg, allocOk = <-allocCh:
   164  			if !allocOk {
   165  				// nil chan to prevent further recvs
   166  				allocCh = nil
   167  			}
   168  
   169  		case <-ctx.Done():
   170  			return
   171  		}
   172  
   173  		if msg == nil {
   174  			continue
   175  		}
   176  
   177  		select {
   178  		case outCh <- msg:
   179  		case <-ctx.Done():
   180  
   181  			// If we are exiting but we have a message, attempt to send it
   182  			// so we don't lose a message but do not block.
   183  			select {
   184  			case outCh <- msg:
   185  			default:
   186  			}
   187  
   188  			return
   189  		}
   190  
   191  		// Abort on error messages
   192  		if msg.Level == MonitorMsgLevelError {
   193  			return
   194  		}
   195  	}
   196  }
   197  
   198  // monitorDrainNode emits node updates on nodeCh and closes the channel when
   199  // the node has finished draining.
   200  func (n *Nodes) monitorDrainNode(ctx context.Context, cancel func(),
   201  	nodeID string, index uint64, nodeCh chan<- *MonitorMessage) {
   202  	defer close(nodeCh)
   203  
   204  	var lastStrategy *DrainStrategy
   205  	var strategyChanged bool
   206  	q := QueryOptions{
   207  		AllowStale: true,
   208  		WaitIndex:  index,
   209  	}
   210  	for {
   211  		node, meta, err := n.Info(nodeID, &q)
   212  		if err != nil {
   213  			msg := Messagef(MonitorMsgLevelError, "Error monitoring node: %v", err)
   214  			select {
   215  			case nodeCh <- msg:
   216  			case <-ctx.Done():
   217  			}
   218  			return
   219  		}
   220  
   221  		if node.DrainStrategy == nil {
   222  			var msg *MonitorMessage
   223  			if strategyChanged {
   224  				msg = Messagef(MonitorMsgLevelInfo, "Node %q has marked all allocations for migration", nodeID)
   225  			} else {
   226  				msg = Messagef(MonitorMsgLevelInfo, "No drain strategy set for node %s", nodeID)
   227  				defer cancel()
   228  			}
   229  			select {
   230  			case nodeCh <- msg:
   231  			case <-ctx.Done():
   232  			}
   233  			return
   234  		}
   235  
   236  		if node.Status == structs.NodeStatusDown {
   237  			msg := Messagef(MonitorMsgLevelWarn, "Node %q down", nodeID)
   238  			select {
   239  			case nodeCh <- msg:
   240  			case <-ctx.Done():
   241  			}
   242  		}
   243  
   244  		// DrainStrategy changed
   245  		if lastStrategy != nil && !node.DrainStrategy.Equal(lastStrategy) {
   246  			msg := Messagef(MonitorMsgLevelInfo, "Node %q drain updated: %s", nodeID, node.DrainStrategy)
   247  			select {
   248  			case nodeCh <- msg:
   249  			case <-ctx.Done():
   250  				return
   251  			}
   252  		}
   253  
   254  		lastStrategy = node.DrainStrategy
   255  		strategyChanged = true
   256  
   257  		// Drain still ongoing, update index and block for updates
   258  		q.WaitIndex = meta.LastIndex
   259  	}
   260  }
   261  
   262  // monitorDrainAllocs emits alloc updates on allocCh and closes the channel
   263  // when the node has finished draining.
   264  func (n *Nodes) monitorDrainAllocs(ctx context.Context, nodeID string, ignoreSys bool, allocCh chan<- *MonitorMessage) {
   265  	defer close(allocCh)
   266  
   267  	q := QueryOptions{AllowStale: true}
   268  	initial := make(map[string]*Allocation, 4)
   269  
   270  	for {
   271  		allocs, meta, err := n.Allocations(nodeID, &q)
   272  		if err != nil {
   273  			msg := Messagef(MonitorMsgLevelError, "Error monitoring allocations: %v", err)
   274  			select {
   275  			case allocCh <- msg:
   276  			case <-ctx.Done():
   277  			}
   278  			return
   279  		}
   280  
   281  		q.WaitIndex = meta.LastIndex
   282  
   283  		runningAllocs := 0
   284  		for _, a := range allocs {
   285  			// Get previous version of alloc
   286  			orig, existing := initial[a.ID]
   287  
   288  			// Update local alloc state
   289  			initial[a.ID] = a
   290  
   291  			migrating := a.DesiredTransition.ShouldMigrate()
   292  
   293  			var msg string
   294  			switch {
   295  			case !existing:
   296  				// Should only be possible if response
   297  				// from initial Allocations call was
   298  				// stale. No need to output
   299  
   300  			case orig.ClientStatus != a.ClientStatus:
   301  				// Alloc status has changed; output
   302  				msg = fmt.Sprintf("status %s -> %s", orig.ClientStatus, a.ClientStatus)
   303  
   304  			case migrating && !orig.DesiredTransition.ShouldMigrate():
   305  				// Alloc was marked for migration
   306  				msg = "marked for migration"
   307  
   308  			case migrating && (orig.DesiredStatus != a.DesiredStatus) && a.DesiredStatus == structs.AllocDesiredStatusStop:
   309  				// Alloc has already been marked for migration and is now being stopped
   310  				msg = "draining"
   311  			}
   312  
   313  			if msg != "" {
   314  				select {
   315  				case allocCh <- Messagef(MonitorMsgLevelNormal, "Alloc %q %s", a.ID, msg):
   316  				case <-ctx.Done():
   317  					return
   318  				}
   319  			}
   320  
   321  			// Ignore malformed allocs
   322  			if a.Job == nil || a.Job.Type == nil {
   323  				continue
   324  			}
   325  
   326  			// Track how many allocs are still running
   327  			if ignoreSys && a.Job.Type != nil && *a.Job.Type == structs.JobTypeSystem {
   328  				continue
   329  			}
   330  
   331  			switch a.ClientStatus {
   332  			case structs.AllocClientStatusPending, structs.AllocClientStatusRunning:
   333  				runningAllocs++
   334  			}
   335  		}
   336  
   337  		// Exit if all allocs are terminal
   338  		if runningAllocs == 0 {
   339  			msg := Messagef(MonitorMsgLevelInfo, "All allocations on node %q have stopped.", nodeID)
   340  			select {
   341  			case allocCh <- msg:
   342  			case <-ctx.Done():
   343  			}
   344  			return
   345  		}
   346  	}
   347  }
   348  
   349  // NodeUpdateEligibilityRequest is used to update the drain specification for a node.
   350  type NodeUpdateEligibilityRequest struct {
   351  	// NodeID is the node to update the drain specification for.
   352  	NodeID      string
   353  	Eligibility string
   354  }
   355  
   356  // NodeEligibilityUpdateResponse is used to respond to a node eligibility update
   357  type NodeEligibilityUpdateResponse struct {
   358  	NodeModifyIndex uint64
   359  	EvalIDs         []string
   360  	EvalCreateIndex uint64
   361  	WriteMeta
   362  }
   363  
   364  // ToggleEligibility is used to update the scheduling eligibility of the node
   365  func (n *Nodes) ToggleEligibility(nodeID string, eligible bool, q *WriteOptions) (*NodeEligibilityUpdateResponse, error) {
   366  	e := structs.NodeSchedulingEligible
   367  	if !eligible {
   368  		e = structs.NodeSchedulingIneligible
   369  	}
   370  
   371  	req := &NodeUpdateEligibilityRequest{
   372  		NodeID:      nodeID,
   373  		Eligibility: e,
   374  	}
   375  
   376  	var resp NodeEligibilityUpdateResponse
   377  	wm, err := n.client.write("/v1/node/"+nodeID+"/eligibility", req, &resp, q)
   378  	if err != nil {
   379  		return nil, err
   380  	}
   381  	resp.WriteMeta = *wm
   382  	return &resp, nil
   383  }
   384  
   385  // Allocations is used to return the allocations associated with a node.
   386  func (n *Nodes) Allocations(nodeID string, q *QueryOptions) ([]*Allocation, *QueryMeta, error) {
   387  	var resp []*Allocation
   388  	qm, err := n.client.query("/v1/node/"+nodeID+"/allocations", &resp, q)
   389  	if err != nil {
   390  		return nil, nil, err
   391  	}
   392  	sort.Sort(AllocationSort(resp))
   393  	return resp, qm, nil
   394  }
   395  
   396  // ForceEvaluate is used to force-evaluate an existing node.
   397  func (n *Nodes) ForceEvaluate(nodeID string, q *WriteOptions) (string, *WriteMeta, error) {
   398  	var resp nodeEvalResponse
   399  	wm, err := n.client.write("/v1/node/"+nodeID+"/evaluate", nil, &resp, q)
   400  	if err != nil {
   401  		return "", nil, err
   402  	}
   403  	return resp.EvalID, wm, nil
   404  }
   405  
   406  func (n *Nodes) Stats(nodeID string, q *QueryOptions) (*HostStats, error) {
   407  	var resp HostStats
   408  	path := fmt.Sprintf("/v1/client/stats?node_id=%s", nodeID)
   409  	if _, err := n.client.query(path, &resp, q); err != nil {
   410  		return nil, err
   411  	}
   412  	return &resp, nil
   413  }
   414  
   415  func (n *Nodes) GC(nodeID string, q *QueryOptions) error {
   416  	var resp struct{}
   417  	path := fmt.Sprintf("/v1/client/gc?node_id=%s", nodeID)
   418  	_, err := n.client.query(path, &resp, q)
   419  	return err
   420  }
   421  
   422  // TODO Add tests
   423  func (n *Nodes) GcAlloc(allocID string, q *QueryOptions) error {
   424  	var resp struct{}
   425  	path := fmt.Sprintf("/v1/client/allocation/%s/gc", allocID)
   426  	_, err := n.client.query(path, &resp, q)
   427  	return err
   428  }
   429  
   430  // DriverInfo is used to deserialize a DriverInfo entry
   431  type DriverInfo struct {
   432  	Attributes        map[string]string
   433  	Detected          bool
   434  	Healthy           bool
   435  	HealthDescription string
   436  	UpdateTime        time.Time
   437  }
   438  
   439  // Node is used to deserialize a node entry.
   440  type Node struct {
   441  	ID                    string
   442  	Datacenter            string
   443  	Name                  string
   444  	HTTPAddr              string
   445  	TLSEnabled            bool
   446  	Attributes            map[string]string
   447  	Resources             *Resources
   448  	Reserved              *Resources
   449  	Links                 map[string]string
   450  	Meta                  map[string]string
   451  	NodeClass             string
   452  	Drain                 bool
   453  	DrainStrategy         *DrainStrategy
   454  	SchedulingEligibility string
   455  	Status                string
   456  	StatusDescription     string
   457  	StatusUpdatedAt       int64
   458  	Events                []*NodeEvent
   459  	Drivers               map[string]*DriverInfo
   460  	CreateIndex           uint64
   461  	ModifyIndex           uint64
   462  }
   463  
   464  // DrainStrategy describes a Node's drain behavior.
   465  type DrainStrategy struct {
   466  	// DrainSpec is the user declared drain specification
   467  	DrainSpec
   468  
   469  	// ForceDeadline is the deadline time for the drain after which drains will
   470  	// be forced
   471  	ForceDeadline time.Time
   472  }
   473  
   474  // DrainSpec describes a Node's drain behavior.
   475  type DrainSpec struct {
   476  	// Deadline is the duration after StartTime when the remaining
   477  	// allocations on a draining Node should be told to stop.
   478  	Deadline time.Duration
   479  
   480  	// IgnoreSystemJobs allows systems jobs to remain on the node even though it
   481  	// has been marked for draining.
   482  	IgnoreSystemJobs bool
   483  }
   484  
   485  func (d *DrainStrategy) Equal(o *DrainStrategy) bool {
   486  	if d == nil || o == nil {
   487  		return d == o
   488  	}
   489  
   490  	if d.ForceDeadline != o.ForceDeadline {
   491  		return false
   492  	}
   493  	if d.Deadline != o.Deadline {
   494  		return false
   495  	}
   496  	if d.IgnoreSystemJobs != o.IgnoreSystemJobs {
   497  		return false
   498  	}
   499  
   500  	return true
   501  }
   502  
   503  // String returns a human readable version of the drain strategy.
   504  func (d *DrainStrategy) String() string {
   505  	if d.IgnoreSystemJobs {
   506  		return fmt.Sprintf("drain ignoring system jobs and deadline at %s", d.ForceDeadline)
   507  	}
   508  	return fmt.Sprintf("drain with deadline at %s", d.ForceDeadline)
   509  }
   510  
   511  const (
   512  	NodeEventSubsystemDrain     = "Drain"
   513  	NodeEventSubsystemDriver    = "Driver"
   514  	NodeEventSubsystemHeartbeat = "Heartbeat"
   515  	NodeEventSubsystemCluster   = "Cluster"
   516  )
   517  
   518  // NodeEvent is a single unit representing a node’s state change
   519  type NodeEvent struct {
   520  	Message     string
   521  	Subsystem   string
   522  	Details     map[string]string
   523  	Timestamp   time.Time
   524  	CreateIndex uint64
   525  }
   526  
   527  // HostStats represents resource usage stats of the host running a Nomad client
   528  type HostStats struct {
   529  	Memory           *HostMemoryStats
   530  	CPU              []*HostCPUStats
   531  	DiskStats        []*HostDiskStats
   532  	Uptime           uint64
   533  	CPUTicksConsumed float64
   534  }
   535  
   536  type HostMemoryStats struct {
   537  	Total     uint64
   538  	Available uint64
   539  	Used      uint64
   540  	Free      uint64
   541  }
   542  
   543  type HostCPUStats struct {
   544  	CPU    string
   545  	User   float64
   546  	System float64
   547  	Idle   float64
   548  }
   549  
   550  type HostDiskStats struct {
   551  	Device            string
   552  	Mountpoint        string
   553  	Size              uint64
   554  	Used              uint64
   555  	Available         uint64
   556  	UsedPercent       float64
   557  	InodesUsedPercent float64
   558  }
   559  
   560  // NodeListStub is a subset of information returned during
   561  // node list operations.
   562  type NodeListStub struct {
   563  	Address               string
   564  	ID                    string
   565  	Datacenter            string
   566  	Name                  string
   567  	NodeClass             string
   568  	Version               string
   569  	Drain                 bool
   570  	SchedulingEligibility string
   571  	Status                string
   572  	StatusDescription     string
   573  	Drivers               map[string]*DriverInfo
   574  	CreateIndex           uint64
   575  	ModifyIndex           uint64
   576  }
   577  
   578  // NodeIndexSort reverse sorts nodes by CreateIndex
   579  type NodeIndexSort []*NodeListStub
   580  
   581  func (n NodeIndexSort) Len() int {
   582  	return len(n)
   583  }
   584  
   585  func (n NodeIndexSort) Less(i, j int) bool {
   586  	return n[i].CreateIndex > n[j].CreateIndex
   587  }
   588  
   589  func (n NodeIndexSort) Swap(i, j int) {
   590  	n[i], n[j] = n[j], n[i]
   591  }
   592  
   593  // nodeEvalResponse is used to decode a force-eval.
   594  type nodeEvalResponse struct {
   595  	EvalID string
   596  }
   597  
   598  // AllocationSort reverse sorts allocs by CreateIndex.
   599  type AllocationSort []*Allocation
   600  
   601  func (a AllocationSort) Len() int {
   602  	return len(a)
   603  }
   604  
   605  func (a AllocationSort) Less(i, j int) bool {
   606  	return a[i].CreateIndex > a[j].CreateIndex
   607  }
   608  
   609  func (a AllocationSort) Swap(i, j int) {
   610  	a[i], a[j] = a[j], a[i]
   611  }