github.com/AliyunContainerService/cli@v0.0.0-20181009023821-814ced4b30d0/cli/command/service/progress/progress.go (about) 1 package progress 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "os" 9 "os/signal" 10 "strings" 11 "time" 12 13 "github.com/docker/docker/api/types" 14 "github.com/docker/docker/api/types/filters" 15 "github.com/docker/docker/api/types/swarm" 16 "github.com/docker/docker/client" 17 "github.com/docker/docker/pkg/progress" 18 "github.com/docker/docker/pkg/streamformatter" 19 "github.com/docker/docker/pkg/stringid" 20 ) 21 22 var ( 23 numberedStates = map[swarm.TaskState]int64{ 24 swarm.TaskStateNew: 1, 25 swarm.TaskStateAllocated: 2, 26 swarm.TaskStatePending: 3, 27 swarm.TaskStateAssigned: 4, 28 swarm.TaskStateAccepted: 5, 29 swarm.TaskStatePreparing: 6, 30 swarm.TaskStateReady: 7, 31 swarm.TaskStateStarting: 8, 32 swarm.TaskStateRunning: 9, 33 34 // The following states are not actually shown in progress 35 // output, but are used internally for ordering. 36 swarm.TaskStateComplete: 10, 37 swarm.TaskStateShutdown: 11, 38 swarm.TaskStateFailed: 12, 39 swarm.TaskStateRejected: 13, 40 } 41 42 longestState int 43 ) 44 45 const ( 46 maxProgress = 9 47 maxProgressBars = 20 48 ) 49 50 type progressUpdater interface { 51 update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) 52 } 53 54 func init() { 55 for state := range numberedStates { 56 if !terminalState(state) && len(state) > longestState { 57 longestState = len(state) 58 } 59 } 60 } 61 62 func terminalState(state swarm.TaskState) bool { 63 return numberedStates[state] > numberedStates[swarm.TaskStateRunning] 64 } 65 66 func stateToProgress(state swarm.TaskState, rollback bool) int64 { 67 if !rollback { 68 return numberedStates[state] 69 } 70 return numberedStates[swarm.TaskStateRunning] - numberedStates[state] 71 } 72 73 // ServiceProgress outputs progress information for convergence of a service. 74 // nolint: gocyclo 75 func ServiceProgress(ctx context.Context, client client.APIClient, serviceID string, progressWriter io.WriteCloser) error { 76 defer progressWriter.Close() 77 78 progressOut := streamformatter.NewJSONProgressOutput(progressWriter, false) 79 80 sigint := make(chan os.Signal, 1) 81 signal.Notify(sigint, os.Interrupt) 82 defer signal.Stop(sigint) 83 84 taskFilter := filters.NewArgs() 85 taskFilter.Add("service", serviceID) 86 taskFilter.Add("_up-to-date", "true") 87 88 getUpToDateTasks := func() ([]swarm.Task, error) { 89 return client.TaskList(ctx, types.TaskListOptions{Filters: taskFilter}) 90 } 91 92 var ( 93 updater progressUpdater 94 converged bool 95 convergedAt time.Time 96 monitor = 5 * time.Second 97 rollback bool 98 ) 99 100 for { 101 service, _, err := client.ServiceInspectWithRaw(ctx, serviceID, types.ServiceInspectOptions{}) 102 if err != nil { 103 return err 104 } 105 106 if service.Spec.UpdateConfig != nil && service.Spec.UpdateConfig.Monitor != 0 { 107 monitor = service.Spec.UpdateConfig.Monitor 108 } 109 110 if updater == nil { 111 updater, err = initializeUpdater(service, progressOut) 112 if err != nil { 113 return err 114 } 115 } 116 117 if service.UpdateStatus != nil { 118 switch service.UpdateStatus.State { 119 case swarm.UpdateStateUpdating: 120 rollback = false 121 case swarm.UpdateStateCompleted: 122 if !converged { 123 return nil 124 } 125 case swarm.UpdateStatePaused: 126 return fmt.Errorf("service update paused: %s", service.UpdateStatus.Message) 127 case swarm.UpdateStateRollbackStarted: 128 if !rollback && service.UpdateStatus.Message != "" { 129 progressOut.WriteProgress(progress.Progress{ 130 ID: "rollback", 131 Action: service.UpdateStatus.Message, 132 }) 133 } 134 rollback = true 135 case swarm.UpdateStateRollbackPaused: 136 return fmt.Errorf("service rollback paused: %s", service.UpdateStatus.Message) 137 case swarm.UpdateStateRollbackCompleted: 138 if !converged { 139 return fmt.Errorf("service rolled back: %s", service.UpdateStatus.Message) 140 } 141 } 142 } 143 if converged && time.Since(convergedAt) >= monitor { 144 progressOut.WriteProgress(progress.Progress{ 145 ID: "verify", 146 Action: "Service converged", 147 }) 148 149 return nil 150 } 151 152 tasks, err := getUpToDateTasks() 153 if err != nil { 154 return err 155 } 156 157 activeNodes, err := getActiveNodes(ctx, client) 158 if err != nil { 159 return err 160 } 161 162 converged, err = updater.update(service, tasks, activeNodes, rollback) 163 if err != nil { 164 return err 165 } 166 if converged { 167 if convergedAt.IsZero() { 168 convergedAt = time.Now() 169 } 170 wait := monitor - time.Since(convergedAt) 171 if wait >= 0 { 172 progressOut.WriteProgress(progress.Progress{ 173 // Ideally this would have no ID, but 174 // the progress rendering code behaves 175 // poorly on an "action" with no ID. It 176 // returns the cursor to the beginning 177 // of the line, so the first character 178 // may be difficult to read. Then the 179 // output is overwritten by the shell 180 // prompt when the command finishes. 181 ID: "verify", 182 Action: fmt.Sprintf("Waiting %d seconds to verify that tasks are stable...", wait/time.Second+1), 183 }) 184 } 185 } else { 186 if !convergedAt.IsZero() { 187 progressOut.WriteProgress(progress.Progress{ 188 ID: "verify", 189 Action: "Detected task failure", 190 }) 191 } 192 convergedAt = time.Time{} 193 } 194 195 select { 196 case <-time.After(200 * time.Millisecond): 197 case <-sigint: 198 if !converged { 199 progress.Message(progressOut, "", "Operation continuing in background.") 200 progress.Messagef(progressOut, "", "Use `docker service ps %s` to check progress.", serviceID) 201 } 202 return nil 203 } 204 } 205 } 206 207 func getActiveNodes(ctx context.Context, client client.APIClient) (map[string]struct{}, error) { 208 nodes, err := client.NodeList(ctx, types.NodeListOptions{}) 209 if err != nil { 210 return nil, err 211 } 212 213 activeNodes := make(map[string]struct{}) 214 for _, n := range nodes { 215 if n.Status.State != swarm.NodeStateDown { 216 activeNodes[n.ID] = struct{}{} 217 } 218 } 219 return activeNodes, nil 220 } 221 222 func initializeUpdater(service swarm.Service, progressOut progress.Output) (progressUpdater, error) { 223 if service.Spec.Mode.Replicated != nil && service.Spec.Mode.Replicated.Replicas != nil { 224 return &replicatedProgressUpdater{ 225 progressOut: progressOut, 226 }, nil 227 } 228 if service.Spec.Mode.Global != nil { 229 return &globalProgressUpdater{ 230 progressOut: progressOut, 231 }, nil 232 } 233 return nil, errors.New("unrecognized service mode") 234 } 235 236 func writeOverallProgress(progressOut progress.Output, numerator, denominator int, rollback bool) { 237 if rollback { 238 progressOut.WriteProgress(progress.Progress{ 239 ID: "overall progress", 240 Action: fmt.Sprintf("rolling back update: %d out of %d tasks", numerator, denominator), 241 }) 242 return 243 } 244 progressOut.WriteProgress(progress.Progress{ 245 ID: "overall progress", 246 Action: fmt.Sprintf("%d out of %d tasks", numerator, denominator), 247 }) 248 } 249 250 func truncError(errMsg string) string { 251 // Remove newlines from the error, which corrupt the output. 252 errMsg = strings.Replace(errMsg, "\n", " ", -1) 253 254 // Limit the length to 75 characters, so that even on narrow terminals 255 // this will not overflow to the next line. 256 if len(errMsg) > 75 { 257 errMsg = errMsg[:74] + "…" 258 } 259 return errMsg 260 } 261 262 type replicatedProgressUpdater struct { 263 progressOut progress.Output 264 265 // used for mapping slots to a contiguous space 266 // this also causes progress bars to appear in order 267 slotMap map[int]int 268 269 initialized bool 270 done bool 271 } 272 273 func (u *replicatedProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) { 274 if service.Spec.Mode.Replicated == nil || service.Spec.Mode.Replicated.Replicas == nil { 275 return false, errors.New("no replica count") 276 } 277 replicas := *service.Spec.Mode.Replicated.Replicas 278 279 if !u.initialized { 280 u.slotMap = make(map[int]int) 281 282 // Draw progress bars in order 283 writeOverallProgress(u.progressOut, 0, int(replicas), rollback) 284 285 if replicas <= maxProgressBars { 286 for i := uint64(1); i <= replicas; i++ { 287 progress.Update(u.progressOut, fmt.Sprintf("%d/%d", i, replicas), " ") 288 } 289 } 290 u.initialized = true 291 } 292 293 tasksBySlot := u.tasksBySlot(tasks, activeNodes) 294 295 // If we had reached a converged state, check if we are still converged. 296 if u.done { 297 for _, task := range tasksBySlot { 298 if task.Status.State != swarm.TaskStateRunning { 299 u.done = false 300 break 301 } 302 } 303 } 304 305 running := uint64(0) 306 307 for _, task := range tasksBySlot { 308 mappedSlot := u.slotMap[task.Slot] 309 if mappedSlot == 0 { 310 mappedSlot = len(u.slotMap) + 1 311 u.slotMap[task.Slot] = mappedSlot 312 } 313 314 if !terminalState(task.DesiredState) && task.Status.State == swarm.TaskStateRunning { 315 running++ 316 } 317 318 u.writeTaskProgress(task, mappedSlot, replicas, rollback) 319 } 320 321 if !u.done { 322 writeOverallProgress(u.progressOut, int(running), int(replicas), rollback) 323 324 if running == replicas { 325 u.done = true 326 } 327 } 328 329 return running == replicas, nil 330 } 331 332 func (u *replicatedProgressUpdater) tasksBySlot(tasks []swarm.Task, activeNodes map[string]struct{}) map[int]swarm.Task { 333 // If there are multiple tasks with the same slot number, favor the one 334 // with the *lowest* desired state. This can happen in restart 335 // scenarios. 336 tasksBySlot := make(map[int]swarm.Task) 337 for _, task := range tasks { 338 if numberedStates[task.DesiredState] == 0 || numberedStates[task.Status.State] == 0 { 339 continue 340 } 341 if existingTask, ok := tasksBySlot[task.Slot]; ok { 342 if numberedStates[existingTask.DesiredState] < numberedStates[task.DesiredState] { 343 continue 344 } 345 // If the desired states match, observed state breaks 346 // ties. This can happen with the "start first" service 347 // update mode. 348 if numberedStates[existingTask.DesiredState] == numberedStates[task.DesiredState] && 349 numberedStates[existingTask.Status.State] <= numberedStates[task.Status.State] { 350 continue 351 } 352 } 353 if task.NodeID != "" { 354 if _, nodeActive := activeNodes[task.NodeID]; !nodeActive { 355 continue 356 } 357 } 358 tasksBySlot[task.Slot] = task 359 } 360 361 return tasksBySlot 362 } 363 364 func (u *replicatedProgressUpdater) writeTaskProgress(task swarm.Task, mappedSlot int, replicas uint64, rollback bool) { 365 if u.done || replicas > maxProgressBars || uint64(mappedSlot) > replicas { 366 return 367 } 368 369 if task.Status.Err != "" { 370 u.progressOut.WriteProgress(progress.Progress{ 371 ID: fmt.Sprintf("%d/%d", mappedSlot, replicas), 372 Action: truncError(task.Status.Err), 373 }) 374 return 375 } 376 377 if !terminalState(task.DesiredState) && !terminalState(task.Status.State) { 378 u.progressOut.WriteProgress(progress.Progress{ 379 ID: fmt.Sprintf("%d/%d", mappedSlot, replicas), 380 Action: fmt.Sprintf("%-[1]*s", longestState, task.Status.State), 381 Current: stateToProgress(task.Status.State, rollback), 382 Total: maxProgress, 383 HideCounts: true, 384 }) 385 } 386 } 387 388 type globalProgressUpdater struct { 389 progressOut progress.Output 390 391 initialized bool 392 done bool 393 } 394 395 func (u *globalProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) { 396 tasksByNode := u.tasksByNode(tasks) 397 398 // We don't have perfect knowledge of how many nodes meet the 399 // constraints for this service. But the orchestrator creates tasks 400 // for all eligible nodes at the same time, so we should see all those 401 // nodes represented among the up-to-date tasks. 402 nodeCount := len(tasksByNode) 403 404 if !u.initialized { 405 if nodeCount == 0 { 406 // Two possibilities: either the orchestrator hasn't created 407 // the tasks yet, or the service doesn't meet constraints for 408 // any node. Either way, we wait. 409 u.progressOut.WriteProgress(progress.Progress{ 410 ID: "overall progress", 411 Action: "waiting for new tasks", 412 }) 413 return false, nil 414 } 415 416 writeOverallProgress(u.progressOut, 0, nodeCount, rollback) 417 u.initialized = true 418 } 419 420 // If we had reached a converged state, check if we are still converged. 421 if u.done { 422 for _, task := range tasksByNode { 423 if task.Status.State != swarm.TaskStateRunning { 424 u.done = false 425 break 426 } 427 } 428 } 429 430 running := 0 431 432 for _, task := range tasksByNode { 433 if _, nodeActive := activeNodes[task.NodeID]; nodeActive { 434 if !terminalState(task.DesiredState) && task.Status.State == swarm.TaskStateRunning { 435 running++ 436 } 437 438 u.writeTaskProgress(task, nodeCount, rollback) 439 } 440 } 441 442 if !u.done { 443 writeOverallProgress(u.progressOut, running, nodeCount, rollback) 444 445 if running == nodeCount { 446 u.done = true 447 } 448 } 449 450 return running == nodeCount, nil 451 } 452 453 func (u *globalProgressUpdater) tasksByNode(tasks []swarm.Task) map[string]swarm.Task { 454 // If there are multiple tasks with the same node ID, favor the one 455 // with the *lowest* desired state. This can happen in restart 456 // scenarios. 457 tasksByNode := make(map[string]swarm.Task) 458 for _, task := range tasks { 459 if numberedStates[task.DesiredState] == 0 || numberedStates[task.Status.State] == 0 { 460 continue 461 } 462 if existingTask, ok := tasksByNode[task.NodeID]; ok { 463 if numberedStates[existingTask.DesiredState] < numberedStates[task.DesiredState] { 464 continue 465 } 466 467 // If the desired states match, observed state breaks 468 // ties. This can happen with the "start first" service 469 // update mode. 470 if numberedStates[existingTask.DesiredState] == numberedStates[task.DesiredState] && 471 numberedStates[existingTask.Status.State] <= numberedStates[task.Status.State] { 472 continue 473 } 474 475 } 476 tasksByNode[task.NodeID] = task 477 } 478 479 return tasksByNode 480 } 481 482 func (u *globalProgressUpdater) writeTaskProgress(task swarm.Task, nodeCount int, rollback bool) { 483 if u.done || nodeCount > maxProgressBars { 484 return 485 } 486 487 if task.Status.Err != "" { 488 u.progressOut.WriteProgress(progress.Progress{ 489 ID: stringid.TruncateID(task.NodeID), 490 Action: truncError(task.Status.Err), 491 }) 492 return 493 } 494 495 if !terminalState(task.DesiredState) && !terminalState(task.Status.State) { 496 u.progressOut.WriteProgress(progress.Progress{ 497 ID: stringid.TruncateID(task.NodeID), 498 Action: fmt.Sprintf("%-[1]*s", longestState, task.Status.State), 499 Current: stateToProgress(task.Status.State, rollback), 500 Total: maxProgress, 501 HideCounts: true, 502 }) 503 } 504 }