github.com/hernad/nomad@v1.6.112/command/node_drain.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package command 5 6 import ( 7 "context" 8 "fmt" 9 "strings" 10 "time" 11 12 "github.com/hernad/nomad/api" 13 "github.com/hernad/nomad/api/contexts" 14 flaghelper "github.com/hernad/nomad/helper/flags" 15 16 "github.com/posener/complete" 17 ) 18 19 var ( 20 // defaultDrainDuration is the default drain duration if it is not specified 21 // explicitly 22 defaultDrainDuration = 1 * time.Hour 23 ) 24 25 type NodeDrainCommand struct { 26 Meta 27 } 28 29 func (c *NodeDrainCommand) Help() string { 30 helpText := ` 31 Usage: nomad node drain [options] <node> 32 33 Toggles node draining on a specified node. It is required that either 34 -enable or -disable is specified, but not both. The -self flag is useful to 35 drain the local node. 36 37 If ACLs are enabled, this option requires a token with the 'node:write' 38 capability. 39 40 General Options: 41 42 ` + generalOptionsUsage(usageOptsDefault|usageOptsNoNamespace) + ` 43 44 Node Drain Options: 45 46 -disable 47 Disable draining for the specified node. 48 49 -enable 50 Enable draining for the specified node. 51 52 -deadline <duration> 53 Set the deadline by which all allocations must be moved off the node. 54 Remaining allocations after the deadline are forced removed from the node. 55 If unspecified, a default deadline of one hour is applied. 56 57 -detach 58 Return immediately instead of entering monitor mode. 59 60 -monitor 61 Enter monitor mode directly without modifying the drain status. 62 63 -force 64 Force remove allocations off the node immediately. 65 66 -no-deadline 67 No deadline allows the allocations to drain off the node without being force 68 stopped after a certain deadline. 69 70 -ignore-system 71 Ignore system allows the drain to complete without stopping system job 72 allocations. By default system jobs are stopped last. 73 74 -keep-ineligible 75 Keep ineligible will maintain the node's scheduling ineligibility even if 76 the drain is being disabled. This is useful when an existing drain is being 77 cancelled but additional scheduling on the node is not desired. 78 79 -m 80 Message for the drain update operation. Registered in drain metadata as 81 "message" during drain enable and "cancel_message" during drain disable. 82 83 -meta <key>=<value> 84 Custom metadata to store on the drain operation, can be used multiple times. 85 86 -self 87 Set the drain status of the local node. 88 89 -yes 90 Automatic yes to prompts. 91 ` 92 return strings.TrimSpace(helpText) 93 } 94 95 func (c *NodeDrainCommand) Synopsis() string { 96 return "Toggle drain mode on a given node" 97 } 98 99 func (c *NodeDrainCommand) AutocompleteFlags() complete.Flags { 100 return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), 101 complete.Flags{ 102 "-disable": complete.PredictNothing, 103 "-enable": complete.PredictNothing, 104 "-deadline": complete.PredictAnything, 105 "-detach": complete.PredictNothing, 106 "-force": complete.PredictNothing, 107 "-no-deadline": complete.PredictNothing, 108 "-ignore-system": complete.PredictNothing, 109 "-keep-ineligible": complete.PredictNothing, 110 "-m": complete.PredictNothing, 111 "-meta": complete.PredictNothing, 112 "-self": complete.PredictNothing, 113 "-yes": complete.PredictNothing, 114 }) 115 } 116 117 func (c *NodeDrainCommand) AutocompleteArgs() complete.Predictor { 118 return complete.PredictFunc(func(a complete.Args) []string { 119 client, err := c.Meta.Client() 120 if err != nil { 121 return nil 122 } 123 124 resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Nodes, nil) 125 if err != nil { 126 return []string{} 127 } 128 return resp.Matches[contexts.Nodes] 129 }) 130 } 131 132 func (c *NodeDrainCommand) Name() string { return "node drain" } 133 134 func (c *NodeDrainCommand) Run(args []string) int { 135 var enable, disable, detach, force, 136 noDeadline, ignoreSystem, keepIneligible, 137 self, autoYes, monitor bool 138 var deadline, message string 139 var metaVars flaghelper.StringFlag 140 141 flags := c.Meta.FlagSet(c.Name(), FlagSetClient) 142 flags.Usage = func() { c.Ui.Output(c.Help()) } 143 flags.BoolVar(&enable, "enable", false, "Enable drain mode") 144 flags.BoolVar(&disable, "disable", false, "Disable drain mode") 145 flags.StringVar(&deadline, "deadline", "", "Deadline after which allocations are force stopped") 146 flags.BoolVar(&detach, "detach", false, "") 147 flags.BoolVar(&force, "force", false, "Force immediate drain") 148 flags.BoolVar(&noDeadline, "no-deadline", false, "Drain node with no deadline") 149 flags.BoolVar(&ignoreSystem, "ignore-system", false, "Do not drain system job allocations from the node") 150 flags.BoolVar(&keepIneligible, "keep-ineligible", false, "Do not update the nodes scheduling eligibility") 151 flags.BoolVar(&self, "self", false, "") 152 flags.BoolVar(&autoYes, "yes", false, "Automatic yes to prompts.") 153 flags.BoolVar(&monitor, "monitor", false, "Monitor drain status.") 154 flags.StringVar(&message, "m", "", "Drain message") 155 flags.Var(&metaVars, "meta", "Drain metadata") 156 157 if err := flags.Parse(args); err != nil { 158 return 1 159 } 160 161 // Check that enable or disable is not set with monitor 162 if monitor && (enable || disable) { 163 c.Ui.Error("The -monitor flag cannot be used with the '-enable' or '-disable' flags") 164 c.Ui.Error(commandErrorText(c)) 165 return 1 166 } 167 168 // Check that we got either enable or disable, but not both. 169 if (enable && disable) || (!monitor && !enable && !disable) { 170 c.Ui.Error("Either the '-enable' or '-disable' flag must be set, unless using '-monitor'") 171 c.Ui.Error(commandErrorText(c)) 172 return 1 173 } 174 175 // Check that we got a node ID 176 args = flags.Args() 177 if l := len(args); self && l != 0 || !self && l != 1 { 178 c.Ui.Error("Node ID must be specified if -self isn't being used") 179 c.Ui.Error(commandErrorText(c)) 180 return 1 181 } 182 183 // Validate a compatible set of flags were set 184 if disable && (deadline != "" || force || noDeadline || ignoreSystem) { 185 c.Ui.Error("-disable can't be combined with flags configuring drain strategy") 186 c.Ui.Error(commandErrorText(c)) 187 return 1 188 } 189 if deadline != "" && (force || noDeadline) { 190 c.Ui.Error("-deadline can't be combined with -force or -no-deadline") 191 c.Ui.Error(commandErrorText(c)) 192 return 1 193 } 194 if force && noDeadline { 195 c.Ui.Error("-force and -no-deadline are mutually exclusive") 196 c.Ui.Error(commandErrorText(c)) 197 return 1 198 } 199 200 // Parse the duration 201 var d time.Duration 202 if force { 203 d = -1 * time.Second 204 } else if noDeadline { 205 d = 0 206 } else if deadline != "" { 207 dur, err := time.ParseDuration(deadline) 208 if err != nil { 209 c.Ui.Error(fmt.Sprintf("Failed to parse deadline %q: %v", deadline, err)) 210 return 1 211 } 212 if dur <= 0 { 213 c.Ui.Error("A positive drain duration must be given") 214 return 1 215 } 216 217 d = dur 218 } else { 219 d = defaultDrainDuration 220 } 221 222 // Get the HTTP client 223 client, err := c.Meta.Client() 224 if err != nil { 225 c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err)) 226 return 1 227 } 228 229 // If -self flag is set then determine the current node. 230 var nodeID string 231 if !self { 232 nodeID = args[0] 233 } else { 234 var err error 235 if nodeID, err = getLocalNodeID(client); err != nil { 236 c.Ui.Error(err.Error()) 237 return 1 238 } 239 } 240 241 // Check if node exists 242 if len(nodeID) == 1 { 243 c.Ui.Error("Identifier must contain at least two characters.") 244 return 1 245 } 246 247 nodeID = sanitizeUUIDPrefix(nodeID) 248 nodes, _, err := client.Nodes().PrefixList(nodeID) 249 if err != nil { 250 c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err)) 251 return 1 252 } 253 // Return error if no nodes are found 254 if len(nodes) == 0 { 255 c.Ui.Error(fmt.Sprintf("No node(s) with prefix or id %q found", nodeID)) 256 return 1 257 } 258 if len(nodes) > 1 { 259 c.Ui.Error(fmt.Sprintf("Prefix matched multiple nodes\n\n%s", 260 formatNodeStubList(nodes, true))) 261 return 1 262 } 263 264 // Prefix lookup matched a single node 265 node, meta, err := client.Nodes().Info(nodes[0].ID, nil) 266 if err != nil { 267 c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err)) 268 return 1 269 } 270 271 // If monitoring the drain start the monitor and return when done 272 if monitor { 273 if node.DrainStrategy == nil { 274 c.Ui.Warn("No drain strategy set") 275 return 0 276 } 277 c.Ui.Info(fmt.Sprintf("%s: Monitoring node %q: Ctrl-C to detach monitoring", formatTime(time.Now()), node.ID)) 278 c.monitorDrain(client, context.Background(), node, meta.LastIndex, ignoreSystem) 279 return 0 280 } 281 282 // Confirm drain if the node was a prefix match. 283 if nodeID != node.ID && !autoYes { 284 verb := "enable" 285 if disable { 286 verb = "disable" 287 } 288 question := fmt.Sprintf("Are you sure you want to %s drain mode for node %q? [y/N]", verb, node.ID) 289 answer, err := c.Ui.Ask(question) 290 if err != nil { 291 c.Ui.Error(fmt.Sprintf("Failed to parse answer: %v", err)) 292 return 1 293 } 294 295 if answer == "" || strings.ToLower(answer)[0] == 'n' { 296 // No case 297 c.Ui.Output("Canceling drain toggle") 298 return 0 299 } else if strings.ToLower(answer)[0] == 'y' && len(answer) > 1 { 300 // Non exact match yes 301 c.Ui.Output("For confirmation, an exact ‘y’ is required.") 302 return 0 303 } else if answer != "y" { 304 c.Ui.Output("No confirmation detected. For confirmation, an exact 'y' is required.") 305 return 1 306 } 307 } 308 309 var spec *api.DrainSpec 310 if enable { 311 spec = &api.DrainSpec{ 312 Deadline: d, 313 IgnoreSystemJobs: ignoreSystem, 314 } 315 } 316 317 // propagate drain metadata if cancelling 318 drainMeta := make(map[string]string) 319 if disable && node.LastDrain != nil && node.LastDrain.Meta != nil { 320 drainMeta = node.LastDrain.Meta 321 } 322 if message != "" { 323 if enable { 324 drainMeta["message"] = message 325 } else { 326 drainMeta["cancel_message"] = message 327 } 328 } 329 for _, m := range metaVars { 330 if len(m) == 0 { 331 continue 332 } 333 kv := strings.SplitN(m, "=", 2) 334 if len(kv) == 2 { 335 drainMeta[kv[0]] = kv[1] 336 } else { 337 drainMeta[kv[0]] = "" 338 } 339 } 340 341 // Toggle node draining 342 drainResponse, err := client.Nodes().UpdateDrainOpts(node.ID, 343 &api.DrainOptions{ 344 DrainSpec: spec, 345 MarkEligible: !keepIneligible, 346 Meta: drainMeta, 347 }, nil) 348 if err != nil { 349 c.Ui.Error(fmt.Sprintf("Error updating drain specification: %s", err)) 350 return 1 351 } 352 353 if !enable || detach { 354 if enable { 355 c.Ui.Output(fmt.Sprintf("Node %q drain strategy set", node.ID)) 356 } else { 357 c.Ui.Output(fmt.Sprintf("Node %q drain strategy unset", node.ID)) 358 } 359 } 360 361 if enable && !detach { 362 now := time.Now() 363 c.Ui.Info(fmt.Sprintf("%s: Ctrl-C to stop monitoring: will not cancel the node drain", formatTime(now))) 364 c.Ui.Output(fmt.Sprintf("%s: Node %q drain strategy set", formatTime(now), node.ID)) 365 c.monitorDrain(client, context.Background(), node, drainResponse.LastIndex, ignoreSystem) 366 } 367 return 0 368 } 369 370 func (c *NodeDrainCommand) monitorDrain(client *api.Client, ctx context.Context, node *api.Node, index uint64, ignoreSystem bool) { 371 outCh := client.Nodes().MonitorDrain(ctx, node.ID, index, ignoreSystem) 372 for msg := range outCh { 373 switch msg.Level { 374 case api.MonitorMsgLevelInfo: 375 c.Ui.Info(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg)) 376 case api.MonitorMsgLevelWarn: 377 c.Ui.Warn(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg)) 378 case api.MonitorMsgLevelError: 379 c.Ui.Error(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg)) 380 default: 381 c.Ui.Output(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg)) 382 } 383 } 384 }