github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/command/node_drain.go (about)

     1  package command
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"strings"
     7  	"time"
     8  
     9  	"github.com/hashicorp/nomad/api"
    10  	"github.com/hashicorp/nomad/api/contexts"
    11  	flaghelper "github.com/hashicorp/nomad/helper/flags"
    12  
    13  	"github.com/posener/complete"
    14  )
    15  
    16  var (
    17  	// defaultDrainDuration is the default drain duration if it is not specified
    18  	// explicitly
    19  	defaultDrainDuration = 1 * time.Hour
    20  )
    21  
    22  type NodeDrainCommand struct {
    23  	Meta
    24  }
    25  
    26  func (c *NodeDrainCommand) Help() string {
    27  	helpText := `
    28  Usage: nomad node drain [options] <node>
    29  
    30    Toggles node draining on a specified node. It is required that either
    31    -enable or -disable is specified, but not both.  The -self flag is useful to
    32    drain the local node.
    33  
    34    If ACLs are enabled, this option requires a token with the 'node:write'
    35    capability.
    36  
    37  General Options:
    38  
    39    ` + generalOptionsUsage(usageOptsDefault|usageOptsNoNamespace) + `
    40  
    41  Node Drain Options:
    42  
    43    -disable
    44      Disable draining for the specified node.
    45  
    46    -enable
    47      Enable draining for the specified node.
    48  
    49    -deadline <duration>
    50      Set the deadline by which all allocations must be moved off the node.
    51      Remaining allocations after the deadline are forced removed from the node.
    52      If unspecified, a default deadline of one hour is applied.
    53  
    54    -detach
    55      Return immediately instead of entering monitor mode.
    56  
    57    -monitor
    58      Enter monitor mode directly without modifying the drain status.
    59  
    60    -force
    61      Force remove allocations off the node immediately.
    62  
    63    -no-deadline
    64      No deadline allows the allocations to drain off the node without being force
    65      stopped after a certain deadline.
    66  
    67    -ignore-system
    68      Ignore system allows the drain to complete without stopping system job
    69      allocations. By default system jobs are stopped last.
    70  
    71    -keep-ineligible
    72      Keep ineligible will maintain the node's scheduling ineligibility even if
    73      the drain is being disabled. This is useful when an existing drain is being
    74      cancelled but additional scheduling on the node is not desired.
    75  
    76    -m 
    77      Message for the drain update operation. Registered in drain metadata as
    78      "message" during drain enable and "cancel_message" during drain disable.
    79  
    80    -meta <key>=<value>
    81      Custom metadata to store on the drain operation, can be used multiple times.
    82  
    83    -self
    84      Set the drain status of the local node.
    85  
    86    -yes
    87      Automatic yes to prompts.
    88  `
    89  	return strings.TrimSpace(helpText)
    90  }
    91  
    92  func (c *NodeDrainCommand) Synopsis() string {
    93  	return "Toggle drain mode on a given node"
    94  }
    95  
    96  func (c *NodeDrainCommand) AutocompleteFlags() complete.Flags {
    97  	return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
    98  		complete.Flags{
    99  			"-disable":         complete.PredictNothing,
   100  			"-enable":          complete.PredictNothing,
   101  			"-deadline":        complete.PredictAnything,
   102  			"-detach":          complete.PredictNothing,
   103  			"-force":           complete.PredictNothing,
   104  			"-no-deadline":     complete.PredictNothing,
   105  			"-ignore-system":   complete.PredictNothing,
   106  			"-keep-ineligible": complete.PredictNothing,
   107  			"-m":               complete.PredictNothing,
   108  			"-meta":            complete.PredictNothing,
   109  			"-self":            complete.PredictNothing,
   110  			"-yes":             complete.PredictNothing,
   111  		})
   112  }
   113  
   114  func (c *NodeDrainCommand) AutocompleteArgs() complete.Predictor {
   115  	return complete.PredictFunc(func(a complete.Args) []string {
   116  		client, err := c.Meta.Client()
   117  		if err != nil {
   118  			return nil
   119  		}
   120  
   121  		resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Nodes, nil)
   122  		if err != nil {
   123  			return []string{}
   124  		}
   125  		return resp.Matches[contexts.Nodes]
   126  	})
   127  }
   128  
   129  func (c *NodeDrainCommand) Name() string { return "node drain" }
   130  
   131  func (c *NodeDrainCommand) Run(args []string) int {
   132  	var enable, disable, detach, force,
   133  		noDeadline, ignoreSystem, keepIneligible,
   134  		self, autoYes, monitor bool
   135  	var deadline, message string
   136  	var metaVars flaghelper.StringFlag
   137  
   138  	flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
   139  	flags.Usage = func() { c.Ui.Output(c.Help()) }
   140  	flags.BoolVar(&enable, "enable", false, "Enable drain mode")
   141  	flags.BoolVar(&disable, "disable", false, "Disable drain mode")
   142  	flags.StringVar(&deadline, "deadline", "", "Deadline after which allocations are force stopped")
   143  	flags.BoolVar(&detach, "detach", false, "")
   144  	flags.BoolVar(&force, "force", false, "Force immediate drain")
   145  	flags.BoolVar(&noDeadline, "no-deadline", false, "Drain node with no deadline")
   146  	flags.BoolVar(&ignoreSystem, "ignore-system", false, "Do not drain system job allocations from the node")
   147  	flags.BoolVar(&keepIneligible, "keep-ineligible", false, "Do not update the nodes scheduling eligibility")
   148  	flags.BoolVar(&self, "self", false, "")
   149  	flags.BoolVar(&autoYes, "yes", false, "Automatic yes to prompts.")
   150  	flags.BoolVar(&monitor, "monitor", false, "Monitor drain status.")
   151  	flags.StringVar(&message, "m", "", "Drain message")
   152  	flags.Var(&metaVars, "meta", "Drain metadata")
   153  
   154  	if err := flags.Parse(args); err != nil {
   155  		return 1
   156  	}
   157  
   158  	// Check that enable or disable is not set with monitor
   159  	if monitor && (enable || disable) {
   160  		c.Ui.Error("The -monitor flag cannot be used with the '-enable' or '-disable' flags")
   161  		c.Ui.Error(commandErrorText(c))
   162  		return 1
   163  	}
   164  
   165  	// Check that we got either enable or disable, but not both.
   166  	if (enable && disable) || (!monitor && !enable && !disable) {
   167  		c.Ui.Error("Either the '-enable' or '-disable' flag must be set, unless using '-monitor'")
   168  		c.Ui.Error(commandErrorText(c))
   169  		return 1
   170  	}
   171  
   172  	// Check that we got a node ID
   173  	args = flags.Args()
   174  	if l := len(args); self && l != 0 || !self && l != 1 {
   175  		c.Ui.Error("Node ID must be specified if -self isn't being used")
   176  		c.Ui.Error(commandErrorText(c))
   177  		return 1
   178  	}
   179  
   180  	// Validate a compatible set of flags were set
   181  	if disable && (deadline != "" || force || noDeadline || ignoreSystem) {
   182  		c.Ui.Error("-disable can't be combined with flags configuring drain strategy")
   183  		c.Ui.Error(commandErrorText(c))
   184  		return 1
   185  	}
   186  	if deadline != "" && (force || noDeadline) {
   187  		c.Ui.Error("-deadline can't be combined with -force or -no-deadline")
   188  		c.Ui.Error(commandErrorText(c))
   189  		return 1
   190  	}
   191  	if force && noDeadline {
   192  		c.Ui.Error("-force and -no-deadline are mutually exclusive")
   193  		c.Ui.Error(commandErrorText(c))
   194  		return 1
   195  	}
   196  
   197  	// Parse the duration
   198  	var d time.Duration
   199  	if force {
   200  		d = -1 * time.Second
   201  	} else if noDeadline {
   202  		d = 0
   203  	} else if deadline != "" {
   204  		dur, err := time.ParseDuration(deadline)
   205  		if err != nil {
   206  			c.Ui.Error(fmt.Sprintf("Failed to parse deadline %q: %v", deadline, err))
   207  			return 1
   208  		}
   209  		if dur <= 0 {
   210  			c.Ui.Error("A positive drain duration must be given")
   211  			return 1
   212  		}
   213  
   214  		d = dur
   215  	} else {
   216  		d = defaultDrainDuration
   217  	}
   218  
   219  	// Get the HTTP client
   220  	client, err := c.Meta.Client()
   221  	if err != nil {
   222  		c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
   223  		return 1
   224  	}
   225  
   226  	// If -self flag is set then determine the current node.
   227  	var nodeID string
   228  	if !self {
   229  		nodeID = args[0]
   230  	} else {
   231  		var err error
   232  		if nodeID, err = getLocalNodeID(client); err != nil {
   233  			c.Ui.Error(err.Error())
   234  			return 1
   235  		}
   236  	}
   237  
   238  	// Check if node exists
   239  	if len(nodeID) == 1 {
   240  		c.Ui.Error("Identifier must contain at least two characters.")
   241  		return 1
   242  	}
   243  
   244  	nodeID = sanitizeUUIDPrefix(nodeID)
   245  	nodes, _, err := client.Nodes().PrefixList(nodeID)
   246  	if err != nil {
   247  		c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err))
   248  		return 1
   249  	}
   250  	// Return error if no nodes are found
   251  	if len(nodes) == 0 {
   252  		c.Ui.Error(fmt.Sprintf("No node(s) with prefix or id %q found", nodeID))
   253  		return 1
   254  	}
   255  	if len(nodes) > 1 {
   256  		c.Ui.Error(fmt.Sprintf("Prefix matched multiple nodes\n\n%s",
   257  			formatNodeStubList(nodes, true)))
   258  		return 1
   259  	}
   260  
   261  	// Prefix lookup matched a single node
   262  	node, meta, err := client.Nodes().Info(nodes[0].ID, nil)
   263  	if err != nil {
   264  		c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err))
   265  		return 1
   266  	}
   267  
   268  	// If monitoring the drain start the monitor and return when done
   269  	if monitor {
   270  		if node.DrainStrategy == nil {
   271  			c.Ui.Warn("No drain strategy set")
   272  			return 0
   273  		}
   274  		c.Ui.Info(fmt.Sprintf("%s: Monitoring node %q: Ctrl-C to detach monitoring", formatTime(time.Now()), node.ID))
   275  		c.monitorDrain(client, context.Background(), node, meta.LastIndex, ignoreSystem)
   276  		return 0
   277  	}
   278  
   279  	// Confirm drain if the node was a prefix match.
   280  	if nodeID != node.ID && !autoYes {
   281  		verb := "enable"
   282  		if disable {
   283  			verb = "disable"
   284  		}
   285  		question := fmt.Sprintf("Are you sure you want to %s drain mode for node %q? [y/N]", verb, node.ID)
   286  		answer, err := c.Ui.Ask(question)
   287  		if err != nil {
   288  			c.Ui.Error(fmt.Sprintf("Failed to parse answer: %v", err))
   289  			return 1
   290  		}
   291  
   292  		if answer == "" || strings.ToLower(answer)[0] == 'n' {
   293  			// No case
   294  			c.Ui.Output("Canceling drain toggle")
   295  			return 0
   296  		} else if strings.ToLower(answer)[0] == 'y' && len(answer) > 1 {
   297  			// Non exact match yes
   298  			c.Ui.Output("For confirmation, an exact ‘y’ is required.")
   299  			return 0
   300  		} else if answer != "y" {
   301  			c.Ui.Output("No confirmation detected. For confirmation, an exact 'y' is required.")
   302  			return 1
   303  		}
   304  	}
   305  
   306  	var spec *api.DrainSpec
   307  	if enable {
   308  		spec = &api.DrainSpec{
   309  			Deadline:         d,
   310  			IgnoreSystemJobs: ignoreSystem,
   311  		}
   312  	}
   313  
   314  	// propagate drain metadata if cancelling
   315  	drainMeta := make(map[string]string)
   316  	if disable && node.LastDrain != nil && node.LastDrain.Meta != nil {
   317  		drainMeta = node.LastDrain.Meta
   318  	}
   319  	if message != "" {
   320  		if enable {
   321  			drainMeta["message"] = message
   322  		} else {
   323  			drainMeta["cancel_message"] = message
   324  		}
   325  	}
   326  	for _, m := range metaVars {
   327  		if len(m) == 0 {
   328  			continue
   329  		}
   330  		kv := strings.SplitN(m, "=", 2)
   331  		if len(kv) == 2 {
   332  			drainMeta[kv[0]] = kv[1]
   333  		} else {
   334  			drainMeta[kv[0]] = ""
   335  		}
   336  	}
   337  
   338  	// Toggle node draining
   339  	drainResponse, err := client.Nodes().UpdateDrainOpts(node.ID,
   340  		&api.DrainOptions{
   341  			DrainSpec:    spec,
   342  			MarkEligible: !keepIneligible,
   343  			Meta:         drainMeta,
   344  		}, nil)
   345  	if err != nil {
   346  		c.Ui.Error(fmt.Sprintf("Error updating drain specification: %s", err))
   347  		return 1
   348  	}
   349  
   350  	if !enable || detach {
   351  		if enable {
   352  			c.Ui.Output(fmt.Sprintf("Node %q drain strategy set", node.ID))
   353  		} else {
   354  			c.Ui.Output(fmt.Sprintf("Node %q drain strategy unset", node.ID))
   355  		}
   356  	}
   357  
   358  	if enable && !detach {
   359  		now := time.Now()
   360  		c.Ui.Info(fmt.Sprintf("%s: Ctrl-C to stop monitoring: will not cancel the node drain", formatTime(now)))
   361  		c.Ui.Output(fmt.Sprintf("%s: Node %q drain strategy set", formatTime(now), node.ID))
   362  		c.monitorDrain(client, context.Background(), node, drainResponse.LastIndex, ignoreSystem)
   363  	}
   364  	return 0
   365  }
   366  
   367  func (c *NodeDrainCommand) monitorDrain(client *api.Client, ctx context.Context, node *api.Node, index uint64, ignoreSystem bool) {
   368  	outCh := client.Nodes().MonitorDrain(ctx, node.ID, index, ignoreSystem)
   369  	for msg := range outCh {
   370  		switch msg.Level {
   371  		case api.MonitorMsgLevelInfo:
   372  			c.Ui.Info(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg))
   373  		case api.MonitorMsgLevelWarn:
   374  			c.Ui.Warn(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg))
   375  		case api.MonitorMsgLevelError:
   376  			c.Ui.Error(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg))
   377  		default:
   378  			c.Ui.Output(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg))
   379  		}
   380  	}
   381  }