github.com/hernad/nomad@v1.6.112/command/node_drain.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package command
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"strings"
    10  	"time"
    11  
    12  	"github.com/hernad/nomad/api"
    13  	"github.com/hernad/nomad/api/contexts"
    14  	flaghelper "github.com/hernad/nomad/helper/flags"
    15  
    16  	"github.com/posener/complete"
    17  )
    18  
    19  var (
    20  	// defaultDrainDuration is the default drain duration if it is not specified
    21  	// explicitly
    22  	defaultDrainDuration = 1 * time.Hour
    23  )
    24  
    25  type NodeDrainCommand struct {
    26  	Meta
    27  }
    28  
    29  func (c *NodeDrainCommand) Help() string {
    30  	helpText := `
    31  Usage: nomad node drain [options] <node>
    32  
    33    Toggles node draining on a specified node. It is required that either
    34    -enable or -disable is specified, but not both.  The -self flag is useful to
    35    drain the local node.
    36  
    37    If ACLs are enabled, this option requires a token with the 'node:write'
    38    capability.
    39  
    40  General Options:
    41  
    42    ` + generalOptionsUsage(usageOptsDefault|usageOptsNoNamespace) + `
    43  
    44  Node Drain Options:
    45  
    46    -disable
    47      Disable draining for the specified node.
    48  
    49    -enable
    50      Enable draining for the specified node.
    51  
    52    -deadline <duration>
    53      Set the deadline by which all allocations must be moved off the node.
    54      Remaining allocations after the deadline are forced removed from the node.
    55      If unspecified, a default deadline of one hour is applied.
    56  
    57    -detach
    58      Return immediately instead of entering monitor mode.
    59  
    60    -monitor
    61      Enter monitor mode directly without modifying the drain status.
    62  
    63    -force
    64      Force remove allocations off the node immediately.
    65  
    66    -no-deadline
    67      No deadline allows the allocations to drain off the node without being force
    68      stopped after a certain deadline.
    69  
    70    -ignore-system
    71      Ignore system allows the drain to complete without stopping system job
    72      allocations. By default system jobs are stopped last.
    73  
    74    -keep-ineligible
    75      Keep ineligible will maintain the node's scheduling ineligibility even if
    76      the drain is being disabled. This is useful when an existing drain is being
    77      cancelled but additional scheduling on the node is not desired.
    78  
    79    -m 
    80      Message for the drain update operation. Registered in drain metadata as
    81      "message" during drain enable and "cancel_message" during drain disable.
    82  
    83    -meta <key>=<value>
    84      Custom metadata to store on the drain operation, can be used multiple times.
    85  
    86    -self
    87      Set the drain status of the local node.
    88  
    89    -yes
    90      Automatic yes to prompts.
    91  `
    92  	return strings.TrimSpace(helpText)
    93  }
    94  
    95  func (c *NodeDrainCommand) Synopsis() string {
    96  	return "Toggle drain mode on a given node"
    97  }
    98  
    99  func (c *NodeDrainCommand) AutocompleteFlags() complete.Flags {
   100  	return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
   101  		complete.Flags{
   102  			"-disable":         complete.PredictNothing,
   103  			"-enable":          complete.PredictNothing,
   104  			"-deadline":        complete.PredictAnything,
   105  			"-detach":          complete.PredictNothing,
   106  			"-force":           complete.PredictNothing,
   107  			"-no-deadline":     complete.PredictNothing,
   108  			"-ignore-system":   complete.PredictNothing,
   109  			"-keep-ineligible": complete.PredictNothing,
   110  			"-m":               complete.PredictNothing,
   111  			"-meta":            complete.PredictNothing,
   112  			"-self":            complete.PredictNothing,
   113  			"-yes":             complete.PredictNothing,
   114  		})
   115  }
   116  
   117  func (c *NodeDrainCommand) AutocompleteArgs() complete.Predictor {
   118  	return complete.PredictFunc(func(a complete.Args) []string {
   119  		client, err := c.Meta.Client()
   120  		if err != nil {
   121  			return nil
   122  		}
   123  
   124  		resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Nodes, nil)
   125  		if err != nil {
   126  			return []string{}
   127  		}
   128  		return resp.Matches[contexts.Nodes]
   129  	})
   130  }
   131  
   132  func (c *NodeDrainCommand) Name() string { return "node drain" }
   133  
   134  func (c *NodeDrainCommand) Run(args []string) int {
   135  	var enable, disable, detach, force,
   136  		noDeadline, ignoreSystem, keepIneligible,
   137  		self, autoYes, monitor bool
   138  	var deadline, message string
   139  	var metaVars flaghelper.StringFlag
   140  
   141  	flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
   142  	flags.Usage = func() { c.Ui.Output(c.Help()) }
   143  	flags.BoolVar(&enable, "enable", false, "Enable drain mode")
   144  	flags.BoolVar(&disable, "disable", false, "Disable drain mode")
   145  	flags.StringVar(&deadline, "deadline", "", "Deadline after which allocations are force stopped")
   146  	flags.BoolVar(&detach, "detach", false, "")
   147  	flags.BoolVar(&force, "force", false, "Force immediate drain")
   148  	flags.BoolVar(&noDeadline, "no-deadline", false, "Drain node with no deadline")
   149  	flags.BoolVar(&ignoreSystem, "ignore-system", false, "Do not drain system job allocations from the node")
   150  	flags.BoolVar(&keepIneligible, "keep-ineligible", false, "Do not update the nodes scheduling eligibility")
   151  	flags.BoolVar(&self, "self", false, "")
   152  	flags.BoolVar(&autoYes, "yes", false, "Automatic yes to prompts.")
   153  	flags.BoolVar(&monitor, "monitor", false, "Monitor drain status.")
   154  	flags.StringVar(&message, "m", "", "Drain message")
   155  	flags.Var(&metaVars, "meta", "Drain metadata")
   156  
   157  	if err := flags.Parse(args); err != nil {
   158  		return 1
   159  	}
   160  
   161  	// Check that enable or disable is not set with monitor
   162  	if monitor && (enable || disable) {
   163  		c.Ui.Error("The -monitor flag cannot be used with the '-enable' or '-disable' flags")
   164  		c.Ui.Error(commandErrorText(c))
   165  		return 1
   166  	}
   167  
   168  	// Check that we got either enable or disable, but not both.
   169  	if (enable && disable) || (!monitor && !enable && !disable) {
   170  		c.Ui.Error("Either the '-enable' or '-disable' flag must be set, unless using '-monitor'")
   171  		c.Ui.Error(commandErrorText(c))
   172  		return 1
   173  	}
   174  
   175  	// Check that we got a node ID
   176  	args = flags.Args()
   177  	if l := len(args); self && l != 0 || !self && l != 1 {
   178  		c.Ui.Error("Node ID must be specified if -self isn't being used")
   179  		c.Ui.Error(commandErrorText(c))
   180  		return 1
   181  	}
   182  
   183  	// Validate a compatible set of flags were set
   184  	if disable && (deadline != "" || force || noDeadline || ignoreSystem) {
   185  		c.Ui.Error("-disable can't be combined with flags configuring drain strategy")
   186  		c.Ui.Error(commandErrorText(c))
   187  		return 1
   188  	}
   189  	if deadline != "" && (force || noDeadline) {
   190  		c.Ui.Error("-deadline can't be combined with -force or -no-deadline")
   191  		c.Ui.Error(commandErrorText(c))
   192  		return 1
   193  	}
   194  	if force && noDeadline {
   195  		c.Ui.Error("-force and -no-deadline are mutually exclusive")
   196  		c.Ui.Error(commandErrorText(c))
   197  		return 1
   198  	}
   199  
   200  	// Parse the duration
   201  	var d time.Duration
   202  	if force {
   203  		d = -1 * time.Second
   204  	} else if noDeadline {
   205  		d = 0
   206  	} else if deadline != "" {
   207  		dur, err := time.ParseDuration(deadline)
   208  		if err != nil {
   209  			c.Ui.Error(fmt.Sprintf("Failed to parse deadline %q: %v", deadline, err))
   210  			return 1
   211  		}
   212  		if dur <= 0 {
   213  			c.Ui.Error("A positive drain duration must be given")
   214  			return 1
   215  		}
   216  
   217  		d = dur
   218  	} else {
   219  		d = defaultDrainDuration
   220  	}
   221  
   222  	// Get the HTTP client
   223  	client, err := c.Meta.Client()
   224  	if err != nil {
   225  		c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
   226  		return 1
   227  	}
   228  
   229  	// If -self flag is set then determine the current node.
   230  	var nodeID string
   231  	if !self {
   232  		nodeID = args[0]
   233  	} else {
   234  		var err error
   235  		if nodeID, err = getLocalNodeID(client); err != nil {
   236  			c.Ui.Error(err.Error())
   237  			return 1
   238  		}
   239  	}
   240  
   241  	// Check if node exists
   242  	if len(nodeID) == 1 {
   243  		c.Ui.Error("Identifier must contain at least two characters.")
   244  		return 1
   245  	}
   246  
   247  	nodeID = sanitizeUUIDPrefix(nodeID)
   248  	nodes, _, err := client.Nodes().PrefixList(nodeID)
   249  	if err != nil {
   250  		c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err))
   251  		return 1
   252  	}
   253  	// Return error if no nodes are found
   254  	if len(nodes) == 0 {
   255  		c.Ui.Error(fmt.Sprintf("No node(s) with prefix or id %q found", nodeID))
   256  		return 1
   257  	}
   258  	if len(nodes) > 1 {
   259  		c.Ui.Error(fmt.Sprintf("Prefix matched multiple nodes\n\n%s",
   260  			formatNodeStubList(nodes, true)))
   261  		return 1
   262  	}
   263  
   264  	// Prefix lookup matched a single node
   265  	node, meta, err := client.Nodes().Info(nodes[0].ID, nil)
   266  	if err != nil {
   267  		c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err))
   268  		return 1
   269  	}
   270  
   271  	// If monitoring the drain start the monitor and return when done
   272  	if monitor {
   273  		if node.DrainStrategy == nil {
   274  			c.Ui.Warn("No drain strategy set")
   275  			return 0
   276  		}
   277  		c.Ui.Info(fmt.Sprintf("%s: Monitoring node %q: Ctrl-C to detach monitoring", formatTime(time.Now()), node.ID))
   278  		c.monitorDrain(client, context.Background(), node, meta.LastIndex, ignoreSystem)
   279  		return 0
   280  	}
   281  
   282  	// Confirm drain if the node was a prefix match.
   283  	if nodeID != node.ID && !autoYes {
   284  		verb := "enable"
   285  		if disable {
   286  			verb = "disable"
   287  		}
   288  		question := fmt.Sprintf("Are you sure you want to %s drain mode for node %q? [y/N]", verb, node.ID)
   289  		answer, err := c.Ui.Ask(question)
   290  		if err != nil {
   291  			c.Ui.Error(fmt.Sprintf("Failed to parse answer: %v", err))
   292  			return 1
   293  		}
   294  
   295  		if answer == "" || strings.ToLower(answer)[0] == 'n' {
   296  			// No case
   297  			c.Ui.Output("Canceling drain toggle")
   298  			return 0
   299  		} else if strings.ToLower(answer)[0] == 'y' && len(answer) > 1 {
   300  			// Non exact match yes
   301  			c.Ui.Output("For confirmation, an exact ‘y’ is required.")
   302  			return 0
   303  		} else if answer != "y" {
   304  			c.Ui.Output("No confirmation detected. For confirmation, an exact 'y' is required.")
   305  			return 1
   306  		}
   307  	}
   308  
   309  	var spec *api.DrainSpec
   310  	if enable {
   311  		spec = &api.DrainSpec{
   312  			Deadline:         d,
   313  			IgnoreSystemJobs: ignoreSystem,
   314  		}
   315  	}
   316  
   317  	// propagate drain metadata if cancelling
   318  	drainMeta := make(map[string]string)
   319  	if disable && node.LastDrain != nil && node.LastDrain.Meta != nil {
   320  		drainMeta = node.LastDrain.Meta
   321  	}
   322  	if message != "" {
   323  		if enable {
   324  			drainMeta["message"] = message
   325  		} else {
   326  			drainMeta["cancel_message"] = message
   327  		}
   328  	}
   329  	for _, m := range metaVars {
   330  		if len(m) == 0 {
   331  			continue
   332  		}
   333  		kv := strings.SplitN(m, "=", 2)
   334  		if len(kv) == 2 {
   335  			drainMeta[kv[0]] = kv[1]
   336  		} else {
   337  			drainMeta[kv[0]] = ""
   338  		}
   339  	}
   340  
   341  	// Toggle node draining
   342  	drainResponse, err := client.Nodes().UpdateDrainOpts(node.ID,
   343  		&api.DrainOptions{
   344  			DrainSpec:    spec,
   345  			MarkEligible: !keepIneligible,
   346  			Meta:         drainMeta,
   347  		}, nil)
   348  	if err != nil {
   349  		c.Ui.Error(fmt.Sprintf("Error updating drain specification: %s", err))
   350  		return 1
   351  	}
   352  
   353  	if !enable || detach {
   354  		if enable {
   355  			c.Ui.Output(fmt.Sprintf("Node %q drain strategy set", node.ID))
   356  		} else {
   357  			c.Ui.Output(fmt.Sprintf("Node %q drain strategy unset", node.ID))
   358  		}
   359  	}
   360  
   361  	if enable && !detach {
   362  		now := time.Now()
   363  		c.Ui.Info(fmt.Sprintf("%s: Ctrl-C to stop monitoring: will not cancel the node drain", formatTime(now)))
   364  		c.Ui.Output(fmt.Sprintf("%s: Node %q drain strategy set", formatTime(now), node.ID))
   365  		c.monitorDrain(client, context.Background(), node, drainResponse.LastIndex, ignoreSystem)
   366  	}
   367  	return 0
   368  }
   369  
   370  func (c *NodeDrainCommand) monitorDrain(client *api.Client, ctx context.Context, node *api.Node, index uint64, ignoreSystem bool) {
   371  	outCh := client.Nodes().MonitorDrain(ctx, node.ID, index, ignoreSystem)
   372  	for msg := range outCh {
   373  		switch msg.Level {
   374  		case api.MonitorMsgLevelInfo:
   375  			c.Ui.Info(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg))
   376  		case api.MonitorMsgLevelWarn:
   377  			c.Ui.Warn(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg))
   378  		case api.MonitorMsgLevelError:
   379  			c.Ui.Error(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg))
   380  		default:
   381  			c.Ui.Output(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg))
   382  		}
   383  	}
   384  }