github.com/zhizhiboom/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/command/monitor.go (about) 1 package command 2 3 import ( 4 "fmt" 5 "strings" 6 "sync" 7 "time" 8 9 "github.com/hashicorp/nomad/api" 10 "github.com/hashicorp/nomad/nomad/structs" 11 "github.com/mitchellh/cli" 12 ) 13 14 const ( 15 // updateWait is the amount of time to wait between status 16 // updates. Because the monitor is poll-based, we use this 17 // delay to avoid overwhelming the API server. 18 updateWait = time.Second 19 ) 20 21 // evalState is used to store the current "state of the world" 22 // in the context of monitoring an evaluation. 23 type evalState struct { 24 status string 25 desc string 26 node string 27 deployment string 28 job string 29 allocs map[string]*allocState 30 wait time.Duration 31 index uint64 32 } 33 34 // newEvalState creates and initializes a new monitorState 35 func newEvalState() *evalState { 36 return &evalState{ 37 status: structs.EvalStatusPending, 38 allocs: make(map[string]*allocState), 39 } 40 } 41 42 // allocState is used to track the state of an allocation 43 type allocState struct { 44 id string 45 group string 46 node string 47 desired string 48 desiredDesc string 49 client string 50 clientDesc string 51 index uint64 52 } 53 54 // monitor wraps an evaluation monitor and holds metadata and 55 // state information. 56 type monitor struct { 57 ui cli.Ui 58 client *api.Client 59 state *evalState 60 61 // length determines the number of characters for identifiers in the ui. 62 length int 63 64 sync.Mutex 65 } 66 67 // newMonitor returns a new monitor. The returned monitor will 68 // write output information to the provided ui. The length parameter determines 69 // the number of characters for identifiers in the ui. 70 func newMonitor(ui cli.Ui, client *api.Client, length int) *monitor { 71 if colorUi, ok := ui.(*cli.ColoredUi); ok { 72 // Disable Info color for monitored output 73 ui = &cli.ColoredUi{ 74 ErrorColor: colorUi.ErrorColor, 75 WarnColor: colorUi.WarnColor, 76 InfoColor: cli.UiColorNone, 77 Ui: colorUi.Ui, 78 } 79 } 80 mon := &monitor{ 81 ui: &cli.PrefixedUi{ 82 InfoPrefix: "==> ", 83 OutputPrefix: " ", 84 ErrorPrefix: "==> ", 85 Ui: ui, 86 }, 87 client: client, 88 state: newEvalState(), 89 length: length, 90 } 91 return mon 92 } 93 94 // update is used to update our monitor with new state. It can be 95 // called whether the passed information is new or not, and will 96 // only dump update messages when state changes. 97 func (m *monitor) update(update *evalState) { 98 m.Lock() 99 defer m.Unlock() 100 101 existing := m.state 102 103 // Swap in the new state at the end 104 defer func() { 105 m.state = update 106 }() 107 108 // Check if the evaluation was triggered by a node 109 if existing.node == "" && update.node != "" { 110 m.ui.Output(fmt.Sprintf("Evaluation triggered by node %q", 111 limit(update.node, m.length))) 112 } 113 114 // Check if the evaluation was triggered by a job 115 if existing.job == "" && update.job != "" { 116 m.ui.Output(fmt.Sprintf("Evaluation triggered by job %q", update.job)) 117 } 118 119 // Check if the evaluation was triggered by a deployment 120 if existing.deployment == "" && update.deployment != "" { 121 m.ui.Output(fmt.Sprintf("Evaluation within deployment: %q", limit(update.deployment, m.length))) 122 } 123 124 // Check the allocations 125 for allocID, alloc := range update.allocs { 126 if existing, ok := existing.allocs[allocID]; !ok { 127 switch { 128 case alloc.index < update.index: 129 // New alloc with create index lower than the eval 130 // create index indicates modification 131 m.ui.Output(fmt.Sprintf( 132 "Allocation %q modified: node %q, group %q", 133 limit(alloc.id, m.length), limit(alloc.node, m.length), alloc.group)) 134 135 case alloc.desired == structs.AllocDesiredStatusRun: 136 // New allocation with desired status running 137 m.ui.Output(fmt.Sprintf( 138 "Allocation %q created: node %q, group %q", 139 limit(alloc.id, m.length), limit(alloc.node, m.length), alloc.group)) 140 } 141 } else { 142 switch { 143 case existing.client != alloc.client: 144 description := "" 145 if alloc.clientDesc != "" { 146 description = fmt.Sprintf(" (%s)", alloc.clientDesc) 147 } 148 // Allocation status has changed 149 m.ui.Output(fmt.Sprintf( 150 "Allocation %q status changed: %q -> %q%s", 151 limit(alloc.id, m.length), existing.client, alloc.client, description)) 152 } 153 } 154 } 155 156 // Check if the status changed. We skip any transitions to pending status. 157 if existing.status != "" && 158 update.status != structs.AllocClientStatusPending && 159 existing.status != update.status { 160 m.ui.Output(fmt.Sprintf("Evaluation status changed: %q -> %q", 161 existing.status, update.status)) 162 } 163 } 164 165 // monitor is used to start monitoring the given evaluation ID. It 166 // writes output directly to the monitor's ui, and returns the 167 // exit code for the command. If allowPrefix is false, monitor will only accept 168 // exact matching evalIDs. 169 // 170 // The return code will be 0 on successful evaluation. If there are 171 // problems scheduling the job (impossible constraints, resources 172 // exhausted, etc), then the return code will be 2. For any other 173 // failures (API connectivity, internal errors, etc), the return code 174 // will be 1. 175 func (m *monitor) monitor(evalID string, allowPrefix bool) int { 176 // Track if we encounter a scheduling failure. This can only be 177 // detected while querying allocations, so we use this bool to 178 // carry that status into the return code. 179 var schedFailure bool 180 181 // The user may have specified a prefix as eval id. We need to lookup the 182 // full id from the database first. Since we do this in a loop we need a 183 // variable to keep track if we've already written the header message. 184 var headerWritten bool 185 186 // Add the initial pending state 187 m.update(newEvalState()) 188 189 for { 190 // Query the evaluation 191 eval, _, err := m.client.Evaluations().Info(evalID, nil) 192 if err != nil { 193 if !allowPrefix { 194 m.ui.Error(fmt.Sprintf("No evaluation with id %q found", evalID)) 195 return 1 196 } 197 if len(evalID) == 1 { 198 m.ui.Error(fmt.Sprintf("Identifier must contain at least two characters.")) 199 return 1 200 } 201 202 evalID = sanitizeUUIDPrefix(evalID) 203 evals, _, err := m.client.Evaluations().PrefixList(evalID) 204 if err != nil { 205 m.ui.Error(fmt.Sprintf("Error reading evaluation: %s", err)) 206 return 1 207 } 208 if len(evals) == 0 { 209 m.ui.Error(fmt.Sprintf("No evaluation(s) with prefix or id %q found", evalID)) 210 return 1 211 } 212 if len(evals) > 1 { 213 // Format the evaluations 214 out := make([]string, len(evals)+1) 215 out[0] = "ID|Priority|Type|Triggered By|Status" 216 for i, eval := range evals { 217 out[i+1] = fmt.Sprintf("%s|%d|%s|%s|%s", 218 limit(eval.ID, m.length), 219 eval.Priority, 220 eval.Type, 221 eval.TriggeredBy, 222 eval.Status) 223 } 224 m.ui.Output(fmt.Sprintf("Prefix matched multiple evaluations\n\n%s", formatList(out))) 225 return 0 226 } 227 // Prefix lookup matched a single evaluation 228 eval, _, err = m.client.Evaluations().Info(evals[0].ID, nil) 229 if err != nil { 230 m.ui.Error(fmt.Sprintf("Error reading evaluation: %s", err)) 231 } 232 } 233 234 if !headerWritten { 235 m.ui.Info(fmt.Sprintf("Monitoring evaluation %q", limit(eval.ID, m.length))) 236 headerWritten = true 237 } 238 239 // Create the new eval state. 240 state := newEvalState() 241 state.status = eval.Status 242 state.desc = eval.StatusDescription 243 state.node = eval.NodeID 244 state.job = eval.JobID 245 state.deployment = eval.DeploymentID 246 state.wait = eval.Wait 247 state.index = eval.CreateIndex 248 249 // Query the allocations associated with the evaluation 250 allocs, _, err := m.client.Evaluations().Allocations(eval.ID, nil) 251 if err != nil { 252 m.ui.Error(fmt.Sprintf("Error reading allocations: %s", err)) 253 return 1 254 } 255 256 // Add the allocs to the state 257 for _, alloc := range allocs { 258 state.allocs[alloc.ID] = &allocState{ 259 id: alloc.ID, 260 group: alloc.TaskGroup, 261 node: alloc.NodeID, 262 desired: alloc.DesiredStatus, 263 desiredDesc: alloc.DesiredDescription, 264 client: alloc.ClientStatus, 265 clientDesc: alloc.ClientDescription, 266 index: alloc.CreateIndex, 267 } 268 } 269 270 // Update the state 271 m.update(state) 272 273 switch eval.Status { 274 case structs.EvalStatusComplete, structs.EvalStatusFailed, structs.EvalStatusCancelled: 275 if len(eval.FailedTGAllocs) == 0 { 276 m.ui.Info(fmt.Sprintf("Evaluation %q finished with status %q", 277 limit(eval.ID, m.length), eval.Status)) 278 } else { 279 // There were failures making the allocations 280 schedFailure = true 281 m.ui.Info(fmt.Sprintf("Evaluation %q finished with status %q but failed to place all allocations:", 282 limit(eval.ID, m.length), eval.Status)) 283 284 // Print the failures per task group 285 for tg, metrics := range eval.FailedTGAllocs { 286 noun := "allocation" 287 if metrics.CoalescedFailures > 0 { 288 noun += "s" 289 } 290 m.ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):", tg, metrics.CoalescedFailures+1, noun)) 291 metrics := formatAllocMetrics(metrics, false, " ") 292 for _, line := range strings.Split(metrics, "\n") { 293 m.ui.Output(line) 294 } 295 } 296 297 if eval.BlockedEval != "" { 298 m.ui.Output(fmt.Sprintf("Evaluation %q waiting for additional capacity to place remainder", 299 limit(eval.BlockedEval, m.length))) 300 } 301 } 302 default: 303 // Wait for the next update 304 time.Sleep(updateWait) 305 continue 306 } 307 308 // Monitor the next eval in the chain, if present 309 if eval.NextEval != "" { 310 if eval.Wait.Nanoseconds() != 0 { 311 m.ui.Info(fmt.Sprintf( 312 "Monitoring next evaluation %q in %s", 313 limit(eval.NextEval, m.length), eval.Wait)) 314 315 // Skip some unnecessary polling 316 time.Sleep(eval.Wait) 317 } 318 319 // Reset the state and monitor the new eval 320 m.state = newEvalState() 321 return m.monitor(eval.NextEval, allowPrefix) 322 } 323 break 324 } 325 326 // Treat scheduling failures specially using a dedicated exit code. 327 // This makes it easier to detect failures from the CLI. 328 if schedFailure { 329 return 2 330 } 331 332 return 0 333 } 334 335 func formatAllocMetrics(metrics *api.AllocationMetric, scores bool, prefix string) string { 336 // Print a helpful message if we have an eligibility problem 337 var out string 338 if metrics.NodesEvaluated == 0 { 339 out += fmt.Sprintf("%s* No nodes were eligible for evaluation\n", prefix) 340 } 341 342 // Print a helpful message if the user has asked for a DC that has no 343 // available nodes. 344 for dc, available := range metrics.NodesAvailable { 345 if available == 0 { 346 out += fmt.Sprintf("%s* No nodes are available in datacenter %q\n", prefix, dc) 347 } 348 } 349 350 // Print filter info 351 for class, num := range metrics.ClassFiltered { 352 out += fmt.Sprintf("%s* Class %q filtered %d nodes\n", prefix, class, num) 353 } 354 for cs, num := range metrics.ConstraintFiltered { 355 out += fmt.Sprintf("%s* Constraint %q filtered %d nodes\n", prefix, cs, num) 356 } 357 358 // Print exhaustion info 359 if ne := metrics.NodesExhausted; ne > 0 { 360 out += fmt.Sprintf("%s* Resources exhausted on %d nodes\n", prefix, ne) 361 } 362 for class, num := range metrics.ClassExhausted { 363 out += fmt.Sprintf("%s* Class %q exhausted on %d nodes\n", prefix, class, num) 364 } 365 for dim, num := range metrics.DimensionExhausted { 366 out += fmt.Sprintf("%s* Dimension %q exhausted on %d nodes\n", prefix, dim, num) 367 } 368 369 // Print quota info 370 for _, dim := range metrics.QuotaExhausted { 371 out += fmt.Sprintf("%s* Quota limit hit %q\n", prefix, dim) 372 } 373 374 // Print scores 375 if scores { 376 if len(metrics.ScoreMetaData) > 0 { 377 scoreOutput := make([]string, len(metrics.ScoreMetaData)+1) 378 379 for i, scoreMeta := range metrics.ScoreMetaData { 380 // Add header as first row 381 if i == 0 { 382 scoreOutput[0] = "Node|" 383 for scorerName := range scoreMeta.Scores { 384 scoreOutput[0] += fmt.Sprintf("%v|", scorerName) 385 } 386 scoreOutput[0] += "Final Score" 387 } 388 scoreOutput[i+1] = fmt.Sprintf("%v|", scoreMeta.NodeID) 389 for _, scoreVal := range scoreMeta.Scores { 390 scoreOutput[i+1] += fmt.Sprintf("%v|", scoreVal) 391 } 392 scoreOutput[i+1] += fmt.Sprintf("%v", scoreMeta.NormScore) 393 } 394 out += formatList(scoreOutput) 395 } else { 396 // Backwards compatibility for old allocs 397 for name, score := range metrics.Scores { 398 out += fmt.Sprintf("%s* Score %q = %f\n", prefix, name, score) 399 } 400 } 401 } 402 403 out = strings.TrimSuffix(out, "\n") 404 return out 405 }