github.com/rumpl/bof@v23.0.0-rc.2+incompatible/integration/internal/swarm/states.go (about) 1 package swarm 2 3 import ( 4 "context" 5 "fmt" 6 7 "github.com/docker/docker/api/types" 8 "github.com/docker/docker/api/types/filters" 9 swarmtypes "github.com/docker/docker/api/types/swarm" 10 "github.com/docker/docker/client" 11 "gotest.tools/v3/poll" 12 ) 13 14 // NoTasksForService verifies that there are no more tasks for the given service 15 func NoTasksForService(ctx context.Context, client client.ServiceAPIClient, serviceID string) func(log poll.LogT) poll.Result { 16 return func(log poll.LogT) poll.Result { 17 tasks, err := client.TaskList(ctx, types.TaskListOptions{ 18 Filters: filters.NewArgs( 19 filters.Arg("service", serviceID), 20 ), 21 }) 22 if err == nil { 23 if len(tasks) == 0 { 24 return poll.Success() 25 } 26 if len(tasks) > 0 { 27 return poll.Continue("task count for service %s at %d waiting for 0", serviceID, len(tasks)) 28 } 29 return poll.Continue("waiting for tasks for service %s to be deleted", serviceID) 30 } 31 // TODO we should not use an error as indication that the tasks are gone. There may be other reasons for an error to occur. 32 return poll.Success() 33 } 34 } 35 36 // NoTasks verifies that all tasks are gone 37 func NoTasks(ctx context.Context, client client.ServiceAPIClient) func(log poll.LogT) poll.Result { 38 return func(log poll.LogT) poll.Result { 39 tasks, err := client.TaskList(ctx, types.TaskListOptions{}) 40 switch { 41 case err != nil: 42 return poll.Error(err) 43 case len(tasks) == 0: 44 return poll.Success() 45 default: 46 return poll.Continue("waiting for all tasks to be removed: task count at %d", len(tasks)) 47 } 48 } 49 } 50 51 // RunningTasksCount verifies there are `instances` tasks running for `serviceID` 52 func RunningTasksCount(client client.ServiceAPIClient, serviceID string, instances uint64) func(log poll.LogT) poll.Result { 53 return func(log poll.LogT) poll.Result { 54 filter := filters.NewArgs() 55 filter.Add("service", serviceID) 56 tasks, err := client.TaskList(context.Background(), types.TaskListOptions{ 57 Filters: filter, 58 }) 59 var running int 60 var taskError string 61 for _, task := range tasks { 62 switch task.Status.State { 63 case swarmtypes.TaskStateRunning: 64 running++ 65 case swarmtypes.TaskStateFailed: 66 if task.Status.Err != "" { 67 taskError = task.Status.Err 68 } 69 } 70 } 71 72 switch { 73 case err != nil: 74 return poll.Error(err) 75 case running > int(instances): 76 return poll.Continue("waiting for tasks to terminate") 77 case running < int(instances) && taskError != "": 78 return poll.Continue("waiting for tasks to enter run state. task failed with error: %s", taskError) 79 case running == int(instances): 80 return poll.Success() 81 default: 82 return poll.Continue("running task count at %d waiting for %d (total tasks: %d)", running, instances, len(tasks)) 83 } 84 } 85 } 86 87 // JobComplete is a poll function for determining that a ReplicatedJob is 88 // completed additionally, while polling, it verifies that the job never 89 // exceeds MaxConcurrent running tasks 90 func JobComplete(client client.CommonAPIClient, service swarmtypes.Service) func(log poll.LogT) poll.Result { 91 filter := filters.NewArgs() 92 filter.Add("service", service.ID) 93 94 var jobIteration swarmtypes.Version 95 if service.JobStatus != nil { 96 jobIteration = service.JobStatus.JobIteration 97 } 98 99 maxRaw := service.Spec.Mode.ReplicatedJob.MaxConcurrent 100 totalRaw := service.Spec.Mode.ReplicatedJob.TotalCompletions 101 102 max := int(*maxRaw) 103 total := int(*totalRaw) 104 105 previousResult := "" 106 107 return func(log poll.LogT) poll.Result { 108 tasks, err := client.TaskList(context.Background(), types.TaskListOptions{ 109 Filters: filter, 110 }) 111 112 if err != nil { 113 poll.Error(err) 114 } 115 116 var running int 117 var completed int 118 119 var runningSlot []int 120 var runningID []string 121 122 for _, task := range tasks { 123 // make sure the task has the same job iteration 124 if task.JobIteration == nil || task.JobIteration.Index != jobIteration.Index { 125 continue 126 } 127 switch task.Status.State { 128 case swarmtypes.TaskStateRunning: 129 running++ 130 runningSlot = append(runningSlot, task.Slot) 131 runningID = append(runningID, task.ID) 132 case swarmtypes.TaskStateComplete: 133 completed++ 134 } 135 } 136 137 switch { 138 case running > max: 139 return poll.Error(fmt.Errorf( 140 "number of running tasks (%v) exceeds max (%v)", running, max, 141 )) 142 case (completed + running) > total: 143 return poll.Error(fmt.Errorf( 144 "number of tasks exceeds total (%v), %v running and %v completed", 145 total, running, completed, 146 )) 147 case completed == total && running == 0: 148 return poll.Success() 149 default: 150 newRes := fmt.Sprintf( 151 "Completed: %2d Running: %v\n\t%v", 152 completed, runningSlot, runningID, 153 ) 154 if newRes == previousResult { 155 } else { 156 previousResult = newRes 157 } 158 159 return poll.Continue( 160 "Job not yet finished, %v completed and %v running out of %v total", 161 completed, running, total, 162 ) 163 } 164 } 165 }