github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/orchestrator/jobs/replicated/reconciler_test.go (about) 1 package replicated 2 3 import ( 4 . "github.com/onsi/ginkgo" 5 . "github.com/onsi/gomega" 6 . "github.com/onsi/gomega/types" 7 8 "context" 9 "fmt" 10 11 "github.com/docker/swarmkit/api" 12 "github.com/docker/swarmkit/manager/orchestrator" 13 "github.com/docker/swarmkit/manager/state/store" 14 ) 15 16 type fakeRestartSupervisor struct { 17 tasks []string 18 } 19 20 func (f *fakeRestartSupervisor) Restart(_ context.Context, _ store.Tx, _ *api.Cluster, _ *api.Service, task api.Task) error { 21 f.tasks = append(f.tasks, task.ID) 22 return nil 23 } 24 25 // uniqueSlotsMatcher is used to verify that a set of tasks all have unique, 26 // non-overlapping slot numbers 27 type uniqueSlotsMatcher struct { 28 duplicatedSlot uint64 29 } 30 31 func (u uniqueSlotsMatcher) Match(actual interface{}) (bool, error) { 32 tasks, ok := actual.([]*api.Task) 33 if !ok { 34 return false, fmt.Errorf("actual is not []*api.Tasks{}") 35 } 36 37 slots := map[uint64]bool{} 38 for _, task := range tasks { 39 if filled, ok := slots[task.Slot]; ok || filled { 40 u.duplicatedSlot = task.Slot 41 return false, nil 42 } 43 slots[task.Slot] = true 44 } 45 return true, nil 46 } 47 48 func (u uniqueSlotsMatcher) FailureMessage(_ interface{}) string { 49 return fmt.Sprintf("expected tasks to have unique slots, but %v is duplicated", u.duplicatedSlot) 50 } 51 52 func (u uniqueSlotsMatcher) NegatedFailureMessage(_ interface{}) string { 53 return fmt.Sprintf("expected tasks to have duplicate slots") 54 } 55 56 func HaveUniqueSlots() GomegaMatcher { 57 return uniqueSlotsMatcher{} 58 } 59 60 func AllTasks(s *store.MemoryStore) []*api.Task { 61 var tasks []*api.Task 62 s.View(func(tx store.ReadTx) { 63 tasks, _ = store.FindTasks(tx, store.All) 64 }) 65 return tasks 66 } 67 68 var _ = Describe("Replicated Job reconciler", func() { 69 var ( 70 r *Reconciler 71 s *store.MemoryStore 72 cluster *api.Cluster 73 f *fakeRestartSupervisor 74 ) 75 76 BeforeEach(func() { 77 s = store.NewMemoryStore(nil) 78 Expect(s).ToNot(BeNil()) 79 f = &fakeRestartSupervisor{} 80 81 r = &Reconciler{ 82 store: s, 83 restart: f, 84 } 85 }) 86 87 AfterEach(func() { 88 s.Close() 89 }) 90 91 Describe("ReconcileService", func() { 92 var ( 93 serviceID string 94 service *api.Service 95 maxConcurrent uint64 96 totalCompletions uint64 97 98 reconcileErr error 99 ) 100 101 BeforeEach(func() { 102 serviceID = "someService" 103 maxConcurrent = 10 104 totalCompletions = 30 105 service = &api.Service{ 106 ID: serviceID, 107 Spec: api.ServiceSpec{ 108 Mode: &api.ServiceSpec_ReplicatedJob{ 109 ReplicatedJob: &api.ReplicatedJob{ 110 MaxConcurrent: maxConcurrent, 111 TotalCompletions: totalCompletions, 112 }, 113 }, 114 }, 115 JobStatus: &api.JobStatus{ 116 JobIteration: api.Version{Index: 0}, 117 }, 118 } 119 120 cluster = &api.Cluster{ 121 ID: "someCluster", 122 Spec: api.ClusterSpec{ 123 Annotations: api.Annotations{ 124 Name: "someCluster", 125 }, 126 TaskDefaults: api.TaskDefaults{ 127 LogDriver: &api.Driver{ 128 Name: "someDriver", 129 }, 130 }, 131 }, 132 } 133 }) 134 135 When("a job has been updated", func() { 136 var ( 137 tasks []*api.Task 138 ) 139 // Before anything, create the job, reconcile the job, and let 140 // tasks be created 141 BeforeEach(func() { 142 err := s.Update(func(tx store.Tx) error { 143 if service != nil { 144 if err := store.CreateService(tx, service); err != nil { 145 return err 146 } 147 } 148 149 if cluster != nil { 150 return store.CreateCluster(tx, cluster) 151 } 152 return nil 153 }) 154 Expect(err).ToNot(HaveOccurred()) 155 156 err = r.ReconcileService(serviceID) 157 Expect(err).ToNot(HaveOccurred()) 158 159 // verify there are maxConcurrent tasks 160 var tasks []*api.Task 161 s.View(func(tx store.ReadTx) { 162 tasks, err = store.FindTasks(tx, store.ByServiceID(serviceID)) 163 }) 164 Expect(err).ToNot(HaveOccurred()) 165 Expect(tasks).To(HaveLen(int(maxConcurrent))) 166 }) 167 168 JustBeforeEach(func() { 169 err := s.Update(func(tx store.Tx) error { 170 // get the service, and bump ForceUpdate and the job 171 // iteration 172 service := store.GetService(tx, serviceID) 173 service.Spec.Task.ForceUpdate++ 174 service.JobStatus.JobIteration.Index++ 175 // we don't actually look at LastExecution in the 176 // replicated reconciler so we don't bother to set it here. 177 return store.UpdateService(tx, service) 178 }) 179 Expect(err).ToNot(HaveOccurred()) 180 err = r.ReconcileService(serviceID) 181 Expect(err).ToNot(HaveOccurred()) 182 183 // fetch the tasks before we get to the test case itself, 184 // because we do this in all cases. 185 s.View(func(tx store.ReadTx) { 186 tasks, err = store.FindTasks(tx, store.ByServiceID(serviceID)) 187 }) 188 Expect(err).ToNot(HaveOccurred()) 189 }) 190 191 It("should remove all tasks belonging to the previous service iteration", func() { 192 count := 0 193 for _, task := range tasks { 194 Expect(task.JobIteration).ToNot(BeNil()) 195 // first iteration of the job should have index 0 196 if task.JobIteration.Index == 0 { 197 Expect(task.DesiredState).To(Equal(api.TaskStateRemove)) 198 count++ 199 } 200 } 201 202 Expect(count).To(Equal(int(maxConcurrent))) 203 }) 204 205 It("should create new tasks with the new JobIteration", func() { 206 count := 0 207 for _, task := range tasks { 208 Expect(task.JobIteration).ToNot(BeNil()) 209 if task.JobIteration.Index == 1 { 210 Expect(task.DesiredState).To(Equal(api.TaskStateCompleted)) 211 count++ 212 } 213 } 214 215 Expect(count).To(Equal(int(maxConcurrent))) 216 }) 217 }) 218 219 When("reconciling a service", func() { 220 JustBeforeEach(func() { 221 err := s.Update(func(tx store.Tx) error { 222 if service != nil { 223 if err := store.CreateService(tx, service); err != nil { 224 return err 225 } 226 } 227 if cluster != nil { 228 return store.CreateCluster(tx, cluster) 229 } 230 return nil 231 }) 232 Expect(err).ToNot(HaveOccurred()) 233 234 reconcileErr = r.ReconcileService(serviceID) 235 }) 236 237 When("the job has no tasks yet created", func() { 238 It("should create MaxConcurrent number of tasks", func() { 239 tasks := AllTasks(s) 240 // casting maxConcurrent to an int, which we know is safe 241 // because we set its value ourselves. 242 Expect(tasks).To(HaveLen(int(maxConcurrent))) 243 244 for _, task := range tasks { 245 Expect(task.ServiceID).To(Equal(service.ID)) 246 Expect(task.JobIteration).ToNot(BeNil()) 247 Expect(task.JobIteration.Index).To(Equal(uint64(0))) 248 } 249 }) 250 251 It("should assign each task to a unique slot", func() { 252 tasks := AllTasks(s) 253 254 Expect(tasks).To(HaveUniqueSlots()) 255 }) 256 257 It("should return no error", func() { 258 Expect(reconcileErr).ToNot(HaveOccurred()) 259 }) 260 261 It("should set the desired state of each task to COMPLETE", func() { 262 tasks := AllTasks(s) 263 for _, task := range tasks { 264 Expect(task.DesiredState).To(Equal(api.TaskStateCompleted)) 265 } 266 }) 267 268 It("should use the cluster to set the default log driver", func() { 269 tasks := AllTasks(s) 270 Expect(len(tasks) >= 1).To(BeTrue()) 271 272 Expect(tasks[0].LogDriver).To(Equal(cluster.Spec.TaskDefaults.LogDriver)) 273 }) 274 }) 275 276 When("the job has some tasks already in progress", func() { 277 BeforeEach(func() { 278 s.Update(func(tx store.Tx) error { 279 // create 6 tasks before we reconcile the service. 280 // also, to fully exercise the slot picking code, we'll 281 // assign these tasks to every other slot 282 for i := uint64(0); i < 12; i += 2 { 283 task := orchestrator.NewTask(cluster, service, i, "") 284 task.JobIteration = &api.Version{} 285 task.DesiredState = api.TaskStateCompleted 286 287 if err := store.CreateTask(tx, task); err != nil { 288 return err 289 } 290 } 291 292 return nil 293 }) 294 }) 295 296 It("should create only the number of tasks needed to reach MaxConcurrent", func() { 297 tasks := AllTasks(s) 298 299 Expect(tasks).To(HaveLen(int(maxConcurrent))) 300 }) 301 302 It("should assign each new task to a unique slot", func() { 303 tasks := AllTasks(s) 304 Expect(tasks).To(HaveUniqueSlots()) 305 }) 306 }) 307 308 When("some running tasks are desired to be shutdown", func() { 309 BeforeEach(func() { 310 err := s.Update(func(tx store.Tx) error { 311 for i := uint64(0); i < maxConcurrent; i++ { 312 task := orchestrator.NewTask(cluster, service, i, "") 313 task.JobIteration = &api.Version{} 314 task.DesiredState = api.TaskStateShutdown 315 316 if err := store.CreateTask(tx, task); err != nil { 317 return err 318 } 319 } 320 return nil 321 }) 322 Expect(err).ToNot(HaveOccurred()) 323 }) 324 325 It("should ignore tasks shutting down when creating new ones", func() { 326 tasks := AllTasks(s) 327 Expect(tasks).To(HaveLen(int(maxConcurrent) * 2)) 328 329 }) 330 331 It("should reuse slots numbers", func() { 332 tasks := AllTasks(s) 333 Expect(tasks).ToNot(HaveUniqueSlots()) 334 }) 335 }) 336 337 When("a job has some failing and some completed tasks", func() { 338 var ( 339 desiredNewTasks uint64 340 failingTasks []string 341 ) 342 343 BeforeEach(func() { 344 failingTasks = []string{} 345 err := s.Update(func(tx store.Tx) error { 346 // first, create a set of tasks with slots 347 // [0, maxConcurrent-1] that have all succeeded 348 for i := uint64(0); i < maxConcurrent; i++ { 349 task := orchestrator.NewTask(cluster, service, i, "") 350 task.JobIteration = &api.Version{} 351 task.DesiredState = api.TaskStateCompleted 352 task.Status.State = api.TaskStateCompleted 353 if err := store.CreateTask(tx, task); err != nil { 354 return err 355 } 356 } 357 358 // next, create half of maxConcurrent tasks, all 359 // failing. 360 startSlot := maxConcurrent 361 endSlot := startSlot + (maxConcurrent / 2) 362 for i := startSlot; i < endSlot; i++ { 363 task := orchestrator.NewTask(cluster, service, i, "") 364 task.JobIteration = &api.Version{} 365 task.DesiredState = api.TaskStateCompleted 366 task.Status.State = api.TaskStateFailed 367 failingTasks = append(failingTasks, task.ID) 368 if err := store.CreateTask(tx, task); err != nil { 369 return err 370 } 371 } 372 373 // it might seem dumb to do this instead of just using 374 // maxConcurrent / 2, but this avoids any issues with 375 // the parity of maxConcurrent that might otherwise 376 // arise from integer division. we want enough tasks to 377 // get us up to maxConcurrent, including the ones 378 // already extant and failing. 379 desiredNewTasks = maxConcurrent - (maxConcurrent / 2) 380 return nil 381 }) 382 Expect(err).ToNot(HaveOccurred()) 383 }) 384 385 It("should not reuse slot numbers", func() { 386 tasks := AllTasks(s) 387 Expect(tasks).To(HaveUniqueSlots()) 388 }) 389 390 It("should not replace the failing tasks", func() { 391 s.View(func(tx store.ReadTx) { 392 // Get all tasks that are in desired state Completed 393 tasks, err := store.FindTasks(tx, store.ByDesiredState(api.TaskStateCompleted)) 394 Expect(err).ToNot(HaveOccurred()) 395 396 // count the tasks that are currently active. use type 397 // uint64 to make comparison with maxConcurrent easier. 398 activeTasks := uint64(0) 399 for _, task := range tasks { 400 if task.Status.State != api.TaskStateCompleted { 401 activeTasks++ 402 } 403 } 404 405 // Assert that there are maxConcurrent of these tasks 406 Expect(activeTasks).To(Equal(maxConcurrent)) 407 408 // Now, assert that there are 1/2 maxConcurrent New 409 // tasks. This shouldn't be a problem, but while we're 410 // here we might as well do this sanity check 411 var newTasks uint64 412 for _, task := range tasks { 413 if task.Status.State == api.TaskStateNew { 414 newTasks++ 415 } 416 } 417 Expect(newTasks).To(Equal(desiredNewTasks)) 418 }) 419 }) 420 421 It("should call Restart for each failing task", func() { 422 Expect(f.tasks).To(ConsistOf(failingTasks)) 423 }) 424 }) 425 426 When("a job is almost complete, and doesn't need MaxConcurrent tasks running", func() { 427 BeforeEach(func() { 428 // we need to create a rather large number of tasks, all in 429 // COMPLETE state. 430 err := s.Update(func(tx store.Tx) error { 431 for i := uint64(0); i < totalCompletions-10; i++ { 432 // each task will get a unique slot 433 434 task := orchestrator.NewTask(nil, service, i, "") 435 task.JobIteration = &api.Version{} 436 task.Status.State = api.TaskStateCompleted 437 task.DesiredState = api.TaskStateCompleted 438 439 if err := store.CreateTask(tx, task); err != nil { 440 return err 441 } 442 } 443 return nil 444 }) 445 446 Expect(err).ToNot(HaveOccurred()) 447 }) 448 449 It("should create no more than the tasks needed to reach TotalCompletions", func() { 450 var newTasks []*api.Task 451 s.View(func(tx store.ReadTx) { 452 newTasks, _ = store.FindTasks(tx, store.ByTaskState(api.TaskStateNew)) 453 }) 454 455 Expect(newTasks).To(HaveLen(10)) 456 }) 457 458 It("should give each new task a unique slot", func() { 459 tasks := AllTasks(s) 460 461 Expect(tasks).To(HaveUniqueSlots()) 462 }) 463 }) 464 465 When("the service does not exist", func() { 466 BeforeEach(func() { 467 service = nil 468 }) 469 470 It("should return no error", func() { 471 Expect(reconcileErr).ToNot(HaveOccurred()) 472 }) 473 474 It("should create no tasks", func() { 475 s.View(func(tx store.ReadTx) { 476 tasks, err := store.FindTasks(tx, store.All) 477 Expect(err).ToNot(HaveOccurred()) 478 Expect(tasks).To(BeEmpty()) 479 }) 480 }) 481 }) 482 }) 483 484 It("should return an underflow error if there are more running tasks than TotalCompletions", func() { 485 // this is an error condition which should not happen in real life, 486 // but i want to make sure that we can't accidentally start 487 // creating nearly the maximum 64-bit unsigned int number of tasks. 488 maxConcurrent := uint64(10) 489 totalCompletions := uint64(20) 490 err := s.Update(func(tx store.Tx) error { 491 service := &api.Service{ 492 ID: "someService", 493 Spec: api.ServiceSpec{ 494 Mode: &api.ServiceSpec_ReplicatedJob{ 495 ReplicatedJob: &api.ReplicatedJob{ 496 MaxConcurrent: maxConcurrent, 497 TotalCompletions: totalCompletions, 498 }, 499 }, 500 }, 501 } 502 if err := store.CreateService(tx, service); err != nil { 503 return err 504 } 505 506 for i := uint64(0); i < totalCompletions+10; i++ { 507 task := orchestrator.NewTask(nil, service, 0, "") 508 task.JobIteration = &api.Version{} 509 task.DesiredState = api.TaskStateCompleted 510 511 if err := store.CreateTask(tx, task); err != nil { 512 return err 513 } 514 } 515 return nil 516 }) 517 Expect(err).ToNot(HaveOccurred()) 518 519 reconcileErr := r.ReconcileService("someService") 520 Expect(reconcileErr).To(HaveOccurred()) 521 Expect(reconcileErr.Error()).To(ContainSubstring("underflow")) 522 }) 523 }) 524 })