github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/orchestrator/replicated/update_test.go (about) 1 package replicated 2 3 import ( 4 "context" 5 "sync" 6 "testing" 7 "time" 8 9 "github.com/docker/go-events" 10 "github.com/docker/swarmkit/api" 11 "github.com/docker/swarmkit/manager/orchestrator/testutils" 12 "github.com/docker/swarmkit/manager/state" 13 "github.com/docker/swarmkit/manager/state/store" 14 gogotypes "github.com/gogo/protobuf/types" 15 "github.com/stretchr/testify/assert" 16 "github.com/stretchr/testify/require" 17 ) 18 19 func TestUpdaterRollback(t *testing.T) { 20 t.Run("pause/monitor_set/spec_version_unset", func(t *testing.T) { testUpdaterRollback(t, api.UpdateConfig_PAUSE, true, false) }) 21 t.Run("pause/monitor_set/spec_version_set", func(t *testing.T) { testUpdaterRollback(t, api.UpdateConfig_PAUSE, true, true) }) 22 // skipped, see #2137 23 // t.Run("pause/monitor_unset/spec_version_unset", func(t *testing.T) { testUpdaterRollback(t, api.UpdateConfig_PAUSE, false, false) }) 24 // t.Run("pause/monitor_unset/spec_version_set", func(t *testing.T) { testUpdaterRollback(t, api.UpdateConfig_PAUSE, false, true) }) 25 t.Run("continue/spec_version_unset", func(t *testing.T) { testUpdaterRollback(t, api.UpdateConfig_CONTINUE, true, false) }) 26 t.Run("continue/spec_version_set", func(t *testing.T) { testUpdaterRollback(t, api.UpdateConfig_CONTINUE, true, true) }) 27 } 28 29 func testUpdaterRollback(t *testing.T, rollbackFailureAction api.UpdateConfig_FailureAction, setMonitor bool, useSpecVersion bool) { 30 // this test should complete within 30 seconds. if not, bail out 31 ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) 32 defer cancel() 33 34 s := store.NewMemoryStore(nil) 35 assert.NotNil(t, s) 36 defer s.Close() 37 38 orchestrator := NewReplicatedOrchestrator(s) 39 40 // These variables will be used to signal that The Fail Loop should start 41 // failing these tasks. Once they're closed, The Failing Can Begin. 42 var ( 43 failMu sync.Mutex 44 failImage1 bool 45 ) 46 47 // create a watch for task creates, which we will use to verify that the 48 // updater works correctly. 49 watchCreate, cancelCreate := state.Watch(s.WatchQueue(), api.EventCreateTask{}) 50 defer cancelCreate() 51 52 watchServiceUpdate, cancelServiceUpdate := state.Watch(s.WatchQueue(), api.EventUpdateService{}) 53 defer cancelServiceUpdate() 54 55 // Fail new tasks the updater tries to run 56 watchUpdate, cancelUpdate := state.Watch(s.WatchQueue(), api.EventUpdateTask{}) 57 defer cancelUpdate() 58 59 // We're gonna call this big chunk here "The Fail Loop". its job is to put 60 // tasks into a Failed state in certain conditions. 61 testutils.EnsureRuns(func() { 62 failedLast := false 63 // typical go pattern: infinite for loop in a goroutine, exits on 64 // ctx.Done 65 for { 66 var e events.Event 67 select { 68 case e = <-watchUpdate: 69 case <-ctx.Done(): 70 return 71 } 72 task := e.(api.EventUpdateTask).Task 73 if task.DesiredState == task.Status.State { 74 continue 75 } 76 // This used to have a 3rd clause, 77 // "&& task.Status.State != api.TaskStateRunning" 78 // however, this is unneeded. If DesiredState is Running, then 79 // actual state cannot be Running, because that would get caught 80 // in the condition about (DesiredState == State) 81 if task.DesiredState == api.TaskStateRunning && task.Status.State != api.TaskStateFailed { 82 err := s.Update(func(tx store.Tx) error { 83 task = store.GetTask(tx, task.ID) 84 // lock mutex governing access to failImage1. 85 failMu.Lock() 86 defer failMu.Unlock() 87 // we should start failing tasks with image1 only after1 88 if task.Spec.GetContainer().Image == "image1" && failImage1 { 89 // only fail the task if we can read from failImage1 90 // (which will only be true if it's closed) 91 task.Status.State = api.TaskStateFailed 92 failedLast = true 93 } else if task.Spec.GetContainer().Image == "image2" && !failedLast { 94 // Never fail two image2 tasks in a row, so there's a mix of 95 // failed and successful tasks for the rollback. 96 task.Status.State = api.TaskStateFailed 97 failedLast = true 98 } else { 99 task.Status.State = task.DesiredState 100 failedLast = false 101 } 102 return store.UpdateTask(tx, task) 103 }) 104 assert.NoError(t, err) 105 } else if task.DesiredState > api.TaskStateRunning { 106 err := s.Update(func(tx store.Tx) error { 107 task = store.GetTask(tx, task.ID) 108 task.Status.State = task.DesiredState 109 return store.UpdateTask(tx, task) 110 }) 111 assert.NoError(t, err) 112 } 113 } 114 }) 115 116 // Create a service with four replicas specified before the orchestrator 117 // is started. This should result in two tasks when the orchestrator 118 // starts up. 119 err := s.Update(func(tx store.Tx) error { 120 s1 := &api.Service{ 121 ID: "id1", 122 Spec: api.ServiceSpec{ 123 Annotations: api.Annotations{ 124 Name: "name1", 125 }, 126 Task: api.TaskSpec{ 127 Runtime: &api.TaskSpec_Container{ 128 Container: &api.ContainerSpec{ 129 Image: "image1", 130 }, 131 }, 132 Restart: &api.RestartPolicy{ 133 Condition: api.RestartOnNone, 134 }, 135 }, 136 Mode: &api.ServiceSpec_Replicated{ 137 Replicated: &api.ReplicatedService{ 138 Replicas: 4, 139 }, 140 }, 141 Update: &api.UpdateConfig{ 142 FailureAction: api.UpdateConfig_ROLLBACK, 143 Parallelism: 1, 144 Delay: 10 * time.Millisecond, 145 MaxFailureRatio: 0.4, 146 }, 147 Rollback: &api.UpdateConfig{ 148 FailureAction: rollbackFailureAction, 149 Parallelism: 1, 150 Delay: 10 * time.Millisecond, 151 MaxFailureRatio: 0.4, 152 }, 153 }, 154 } 155 156 if setMonitor { 157 s1.Spec.Update.Monitor = gogotypes.DurationProto(500 * time.Millisecond) 158 s1.Spec.Rollback.Monitor = gogotypes.DurationProto(500 * time.Millisecond) 159 } 160 if useSpecVersion { 161 s1.SpecVersion = &api.Version{ 162 Index: 1, 163 } 164 } 165 166 assert.NoError(t, store.CreateService(tx, s1)) 167 return nil 168 }) 169 assert.NoError(t, err) 170 171 // Start the orchestrator. 172 var orchestratorError error 173 orchestratorDone := testutils.EnsureRuns(func() { 174 orchestratorError = orchestrator.Run(ctx) 175 }) 176 177 defer func() { 178 orchestrator.Stop() 179 select { 180 case <-ctx.Done(): 181 case <-orchestratorDone: 182 assert.NoError(t, orchestratorError) 183 } 184 }() 185 186 observedTask := testutils.WatchTaskCreate(t, watchCreate) 187 assert.Equal(t, observedTask.Status.State, api.TaskStateNew) 188 assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") 189 190 observedTask = testutils.WatchTaskCreate(t, watchCreate) 191 assert.Equal(t, observedTask.Status.State, api.TaskStateNew) 192 assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") 193 194 observedTask = testutils.WatchTaskCreate(t, watchCreate) 195 assert.Equal(t, observedTask.Status.State, api.TaskStateNew) 196 assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") 197 198 observedTask = testutils.WatchTaskCreate(t, watchCreate) 199 assert.Equal(t, observedTask.Status.State, api.TaskStateNew) 200 assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") 201 202 // Start a rolling update 203 err = s.Update(func(tx store.Tx) error { 204 s1 := store.GetService(tx, "id1") 205 require.NotNil(t, s1) 206 s1.PreviousSpec = s1.Spec.Copy() 207 s1.PreviousSpecVersion = s1.SpecVersion.Copy() 208 s1.UpdateStatus = nil 209 s1.Spec.Task.GetContainer().Image = "image2" 210 if s1.SpecVersion != nil { 211 s1.SpecVersion.Index = 2 212 } 213 assert.NoError(t, store.UpdateService(tx, s1)) 214 return nil 215 }) 216 assert.NoError(t, err) 217 218 // Should see three tasks started, then a rollback 219 220 observedTask = testutils.WatchTaskCreate(t, watchCreate) 221 assert.Equal(t, observedTask.Status.State, api.TaskStateNew) 222 assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") 223 224 observedTask = testutils.WatchTaskCreate(t, watchCreate) 225 assert.Equal(t, observedTask.Status.State, api.TaskStateNew) 226 assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") 227 228 observedTask = testutils.WatchTaskCreate(t, watchCreate) 229 assert.Equal(t, observedTask.Status.State, api.TaskStateNew) 230 assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") 231 232 // Should get to the ROLLBACK_STARTED state 233 for { 234 var e events.Event 235 select { 236 case e = <-watchServiceUpdate: 237 case <-ctx.Done(): 238 t.Error("test timed out before watchServiceUpdate provided an event") 239 return 240 } 241 if e.(api.EventUpdateService).Service.UpdateStatus == nil { 242 continue 243 } 244 if e.(api.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED { 245 break 246 } 247 } 248 249 observedTask = testutils.WatchTaskCreate(t, watchCreate) 250 assert.Equal(t, observedTask.Status.State, api.TaskStateNew) 251 assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") 252 253 observedTask = testutils.WatchTaskCreate(t, watchCreate) 254 assert.Equal(t, observedTask.Status.State, api.TaskStateNew) 255 assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") 256 257 observedTask = testutils.WatchTaskCreate(t, watchCreate) 258 assert.Equal(t, observedTask.Status.State, api.TaskStateNew) 259 assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") 260 261 if !setMonitor { 262 // Exit early in this case, since it would take a long time for 263 // the service to reach the "*_COMPLETED" states. 264 return 265 } 266 267 // Should end up in ROLLBACK_COMPLETED state 268 for { 269 var e events.Event 270 select { 271 case e = <-watchServiceUpdate: 272 t.Log("service was updated") 273 case <-ctx.Done(): 274 t.Error("test timed out before watchServiceUpdate provided an event") 275 return 276 } 277 278 if e.(api.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_COMPLETED { 279 break 280 } 281 } 282 283 // Repeat the rolling update but this time fail the tasks that the 284 // rollback creates. 285 failMu.Lock() 286 failImage1 = true 287 failMu.Unlock() 288 289 err = s.Update(func(tx store.Tx) error { 290 s1 := store.GetService(tx, "id1") 291 require.NotNil(t, s1) 292 s1.PreviousSpec = s1.Spec.Copy() 293 s1.PreviousSpecVersion = s1.SpecVersion.Copy() 294 s1.UpdateStatus = nil 295 s1.Spec.Task.GetContainer().Image = "image2" 296 if s1.SpecVersion != nil { 297 s1.SpecVersion.Index = 2 298 } 299 assert.NoError(t, store.UpdateService(tx, s1)) 300 return nil 301 }) 302 assert.NoError(t, err) 303 304 // Should see three tasks started, then a rollback 305 306 observedTask = testutils.WatchTaskCreate(t, watchCreate) 307 assert.Equal(t, observedTask.Status.State, api.TaskStateNew) 308 assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") 309 310 observedTask = testutils.WatchTaskCreate(t, watchCreate) 311 assert.Equal(t, observedTask.Status.State, api.TaskStateNew) 312 assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") 313 314 observedTask = testutils.WatchTaskCreate(t, watchCreate) 315 assert.Equal(t, observedTask.Status.State, api.TaskStateNew) 316 assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") 317 318 // Should get to the ROLLBACK_STARTED state 319 for { 320 var e events.Event 321 select { 322 case e = <-watchServiceUpdate: 323 case <-ctx.Done(): 324 t.Error("test timed out before watchServiceUpdate provided an event") 325 return 326 } 327 if e.(api.EventUpdateService).Service.UpdateStatus == nil { 328 continue 329 } 330 if e.(api.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED { 331 break 332 } 333 } 334 335 observedTask = testutils.WatchTaskCreate(t, watchCreate) 336 assert.Equal(t, observedTask.Status.State, api.TaskStateNew) 337 assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") 338 339 observedTask = testutils.WatchTaskCreate(t, watchCreate) 340 assert.Equal(t, observedTask.Status.State, api.TaskStateNew) 341 assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") 342 343 observedTask = testutils.WatchTaskCreate(t, watchCreate) 344 assert.Equal(t, observedTask.Status.State, api.TaskStateNew) 345 assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") 346 347 switch rollbackFailureAction { 348 case api.UpdateConfig_PAUSE: 349 // Should end up in ROLLBACK_PAUSED state 350 for { 351 var e events.Event 352 select { 353 case e = <-watchServiceUpdate: 354 case <-ctx.Done(): 355 t.Error("test timed out before watchServiceUpdate provided an event") 356 return 357 } 358 if e.(api.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_PAUSED { 359 return 360 } 361 } 362 case api.UpdateConfig_CONTINUE: 363 // Should end up in ROLLBACK_COMPLETE state 364 for { 365 var e events.Event 366 select { 367 case e = <-watchServiceUpdate: 368 case <-ctx.Done(): 369 t.Error("test timed out before watchServiceUpdate provided an event") 370 return 371 } 372 if e.(api.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_COMPLETED { 373 return 374 } 375 } 376 } 377 }