github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/orchestrator/taskreaper/task_reaper_test.go (about) 1 package taskreaper 2 3 import ( 4 "context" 5 "fmt" 6 "time" 7 8 "github.com/docker/swarmkit/manager/orchestrator" 9 10 "testing" 11 12 "github.com/stretchr/testify/assert" 13 "github.com/stretchr/testify/require" 14 15 "github.com/docker/swarmkit/api" 16 "github.com/docker/swarmkit/identity" 17 "github.com/docker/swarmkit/manager/orchestrator/replicated" 18 "github.com/docker/swarmkit/manager/orchestrator/testutils" 19 "github.com/docker/swarmkit/manager/state" 20 "github.com/docker/swarmkit/manager/state/store" 21 gogotypes "github.com/gogo/protobuf/types" 22 ) 23 24 // TestTaskReaperInit tests that the task reaper correctly cleans up tasks when 25 // it is initialized. This will happen every time cluster leadership changes. 26 func TestTaskReaperInit(t *testing.T) { 27 // start up the memory store 28 ctx := context.Background() 29 s := store.NewMemoryStore(nil) 30 require.NotNil(t, s) 31 defer s.Close() 32 33 // Create the basic cluster with precooked tasks we need for the taskreaper 34 cluster := &api.Cluster{ 35 Spec: api.ClusterSpec{ 36 Annotations: api.Annotations{ 37 Name: store.DefaultClusterName, 38 }, 39 Orchestration: api.OrchestrationConfig{ 40 TaskHistoryRetentionLimit: 2, 41 }, 42 }, 43 } 44 45 // this service is alive and active, has no tasks to clean up 46 service := &api.Service{ 47 ID: "cleanservice", 48 Spec: api.ServiceSpec{ 49 Annotations: api.Annotations{ 50 Name: "cleanservice", 51 }, 52 Task: api.TaskSpec{ 53 // the runtime spec isn't looked at and doesn't really need to 54 // be filled in 55 Runtime: &api.TaskSpec_Container{ 56 Container: &api.ContainerSpec{}, 57 }, 58 }, 59 Mode: &api.ServiceSpec_Replicated{ 60 Replicated: &api.ReplicatedService{ 61 Replicas: 2, 62 }, 63 }, 64 }, 65 } 66 67 // Two clean tasks, these should not be removed 68 cleantask1 := &api.Task{ 69 ID: "cleantask1", 70 Slot: 1, 71 DesiredState: api.TaskStateRunning, 72 Status: api.TaskStatus{ 73 State: api.TaskStateRunning, 74 }, 75 ServiceID: "cleanservice", 76 } 77 78 cleantask2 := &api.Task{ 79 ID: "cleantask2", 80 Slot: 2, 81 DesiredState: api.TaskStateRunning, 82 Status: api.TaskStatus{ 83 State: api.TaskStateRunning, 84 }, 85 ServiceID: "cleanservice", 86 } 87 88 // this is an old task from when an earlier task failed. It should not be 89 // removed because it's retained history 90 retainedtask := &api.Task{ 91 ID: "retainedtask", 92 Slot: 1, 93 DesiredState: api.TaskStateShutdown, 94 Status: api.TaskStatus{ 95 State: api.TaskStateFailed, 96 }, 97 ServiceID: "cleanservice", 98 } 99 100 // This is a removed task after cleanservice was scaled down 101 removedtask := &api.Task{ 102 ID: "removedtask", 103 Slot: 3, 104 DesiredState: api.TaskStateRemove, 105 Status: api.TaskStatus{ 106 State: api.TaskStateShutdown, 107 }, 108 ServiceID: "cleanservice", 109 } 110 111 // some tasks belonging to a service that does not exist. 112 // this first one is sitll running and should not be cleaned up 113 terminaltask1 := &api.Task{ 114 ID: "terminaltask1", 115 Slot: 1, 116 DesiredState: api.TaskStateRemove, 117 Status: api.TaskStatus{ 118 State: api.TaskStateRunning, 119 }, 120 ServiceID: "goneservice", 121 } 122 123 // this second task is shutdown, and can be cleaned up 124 terminaltask2 := &api.Task{ 125 ID: "terminaltask2", 126 Slot: 2, 127 DesiredState: api.TaskStateRemove, 128 Status: api.TaskStatus{ 129 // use COMPLETE because it's the earliest terminal state 130 State: api.TaskStateCompleted, 131 }, 132 ServiceID: "goneservice", 133 } 134 135 // this third task was never assigned, and should be removed 136 earlytask1 := &api.Task{ 137 ID: "earlytask1", 138 Slot: 3, 139 DesiredState: api.TaskStateRemove, 140 Status: api.TaskStatus{ 141 State: api.TaskStatePending, 142 }, 143 ServiceID: "goneservice", 144 } 145 146 // this fourth task was never assigned, and should be removed 147 earlytask2 := &api.Task{ 148 ID: "earlytask2", 149 Slot: 4, 150 DesiredState: api.TaskStateRemove, 151 Status: api.TaskStatus{ 152 State: api.TaskStateNew, 153 }, 154 ServiceID: "goneservice", 155 } 156 157 err := s.Update(func(tx store.Tx) error { 158 require.NoError(t, store.CreateCluster(tx, cluster)) 159 require.NoError(t, store.CreateService(tx, service)) 160 require.NoError(t, store.CreateTask(tx, cleantask1)) 161 require.NoError(t, store.CreateTask(tx, cleantask2)) 162 require.NoError(t, store.CreateTask(tx, retainedtask)) 163 require.NoError(t, store.CreateTask(tx, removedtask)) 164 require.NoError(t, store.CreateTask(tx, terminaltask1)) 165 require.NoError(t, store.CreateTask(tx, terminaltask2)) 166 require.NoError(t, store.CreateTask(tx, earlytask1)) 167 require.NoError(t, store.CreateTask(tx, earlytask2)) 168 return nil 169 }) 170 require.NoError(t, err, "Error setting up test fixtures") 171 172 // set up the task reaper we'll use for this test 173 reaper := New(s) 174 175 // Now, start the reaper 176 testutils.EnsureRuns(func() { reaper.Run(ctx) }) 177 178 // And then stop the reaper. This will cause the reaper to run through its 179 // whole init phase and then immediately enter the loop body, get the stop 180 // signal, and exit. plus, it will block until that loop body has been 181 // reached and the reaper is stopped. 182 reaper.Stop() 183 184 // Now check that all of the tasks are in the state we expect 185 s.View(func(tx store.ReadTx) { 186 // the first two clean tasks should exist 187 assert.NotNil(t, store.GetTask(tx, "cleantask1")) 188 assert.NotNil(t, store.GetTask(tx, "cleantask1")) 189 // the retained task should still exist 190 assert.NotNil(t, store.GetTask(tx, "retainedtask")) 191 // the removed task should be gone 192 assert.Nil(t, store.GetTask(tx, "removedtask")) 193 // the first terminal task, which has not yet shut down, should exist 194 assert.NotNil(t, store.GetTask(tx, "terminaltask1")) 195 // the second terminal task should have been removed 196 assert.Nil(t, store.GetTask(tx, "terminaltask2")) 197 // the first early task, which was never assigned, should be removed 198 assert.Nil(t, store.GetTask(tx, "earlytask1")) 199 // the second early task, which was never assigned, should be removed 200 assert.Nil(t, store.GetTask(tx, "earlytask2")) 201 }) 202 } 203 204 func TestTaskHistory(t *testing.T) { 205 ctx := context.Background() 206 s := store.NewMemoryStore(nil) 207 assert.NotNil(t, s) 208 defer s.Close() 209 210 assert.NoError(t, s.Update(func(tx store.Tx) error { 211 store.CreateCluster(tx, &api.Cluster{ 212 ID: identity.NewID(), 213 Spec: api.ClusterSpec{ 214 Annotations: api.Annotations{ 215 Name: store.DefaultClusterName, 216 }, 217 Orchestration: api.OrchestrationConfig{ 218 TaskHistoryRetentionLimit: 2, 219 }, 220 }, 221 }) 222 return nil 223 })) 224 225 taskReaper := New(s) 226 defer taskReaper.Stop() 227 orchestrator := replicated.NewReplicatedOrchestrator(s) 228 defer orchestrator.Stop() 229 230 watch, cancel := state.Watch(s.WatchQueue() /*api.EventCreateTask{}, api.EventUpdateTask{}*/) 231 defer cancel() 232 233 // Create a service with two instances specified before the orchestrator is 234 // started. This should result in two tasks when the orchestrator 235 // starts up. 236 err := s.Update(func(tx store.Tx) error { 237 j1 := &api.Service{ 238 ID: "id1", 239 Spec: api.ServiceSpec{ 240 Annotations: api.Annotations{ 241 Name: "name1", 242 }, 243 Mode: &api.ServiceSpec_Replicated{ 244 Replicated: &api.ReplicatedService{ 245 Replicas: 2, 246 }, 247 }, 248 Task: api.TaskSpec{ 249 Restart: &api.RestartPolicy{ 250 Condition: api.RestartOnAny, 251 Delay: gogotypes.DurationProto(0), 252 }, 253 }, 254 }, 255 } 256 assert.NoError(t, store.CreateService(tx, j1)) 257 return nil 258 }) 259 assert.NoError(t, err) 260 261 // Start the orchestrator. 262 testutils.EnsureRuns(func() { 263 assert.NoError(t, orchestrator.Run(ctx)) 264 }) 265 testutils.EnsureRuns(func() { taskReaper.Run(ctx) }) 266 267 observedTask1 := testutils.WatchTaskCreate(t, watch) 268 assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) 269 assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") 270 271 observedTask2 := testutils.WatchTaskCreate(t, watch) 272 assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) 273 assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") 274 275 // Fail both tasks. They should both get restarted. 276 updatedTask1 := observedTask1.Copy() 277 updatedTask1.Status.State = api.TaskStateFailed 278 updatedTask1.ServiceAnnotations = api.Annotations{Name: "original"} 279 updatedTask2 := observedTask2.Copy() 280 updatedTask2.Status.State = api.TaskStateFailed 281 updatedTask2.ServiceAnnotations = api.Annotations{Name: "original"} 282 err = s.Update(func(tx store.Tx) error { 283 assert.NoError(t, store.UpdateTask(tx, updatedTask1)) 284 assert.NoError(t, store.UpdateTask(tx, updatedTask2)) 285 return nil 286 }) 287 288 testutils.Expect(t, watch, state.EventCommit{}) 289 testutils.Expect(t, watch, api.EventUpdateTask{}) 290 testutils.Expect(t, watch, api.EventUpdateTask{}) 291 testutils.Expect(t, watch, state.EventCommit{}) 292 293 testutils.Expect(t, watch, api.EventUpdateTask{}) 294 observedTask3 := testutils.WatchTaskCreate(t, watch) 295 assert.Equal(t, observedTask3.Status.State, api.TaskStateNew) 296 assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1") 297 298 testutils.Expect(t, watch, api.EventUpdateTask{}) 299 observedTask4 := testutils.WatchTaskCreate(t, watch) 300 assert.Equal(t, observedTask4.Status.State, api.TaskStateNew) 301 assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1") 302 303 // Fail these replacement tasks. Since TaskHistory is set to 2, this 304 // should cause the oldest tasks for each instance to get deleted. 305 updatedTask3 := observedTask3.Copy() 306 updatedTask3.Status.State = api.TaskStateFailed 307 updatedTask4 := observedTask4.Copy() 308 updatedTask4.Status.State = api.TaskStateFailed 309 err = s.Update(func(tx store.Tx) error { 310 assert.NoError(t, store.UpdateTask(tx, updatedTask3)) 311 assert.NoError(t, store.UpdateTask(tx, updatedTask4)) 312 return nil 313 }) 314 315 deletedTask1 := testutils.WatchTaskDelete(t, watch) 316 deletedTask2 := testutils.WatchTaskDelete(t, watch) 317 318 assert.Equal(t, api.TaskStateFailed, deletedTask1.Status.State) 319 assert.Equal(t, "original", deletedTask1.ServiceAnnotations.Name) 320 assert.Equal(t, api.TaskStateFailed, deletedTask2.Status.State) 321 assert.Equal(t, "original", deletedTask2.ServiceAnnotations.Name) 322 323 var foundTasks []*api.Task 324 s.View(func(tx store.ReadTx) { 325 foundTasks, err = store.FindTasks(tx, store.All) 326 }) 327 assert.NoError(t, err) 328 assert.Len(t, foundTasks, 4) 329 } 330 331 // TestTaskStateRemoveOnScaledown tests that on service scale down, task desired 332 // states are set to REMOVE. Then, when the agent shuts the task down (simulated 333 // by setting the task state to SHUTDOWN), the task reaper actually deletes 334 // the tasks from the store. 335 func TestTaskStateRemoveOnScaledown(t *testing.T) { 336 ctx := context.Background() 337 s := store.NewMemoryStore(nil) 338 assert.NotNil(t, s) 339 defer s.Close() 340 341 assert.NoError(t, s.Update(func(tx store.Tx) error { 342 store.CreateCluster(tx, &api.Cluster{ 343 ID: identity.NewID(), 344 Spec: api.ClusterSpec{ 345 Annotations: api.Annotations{ 346 Name: store.DefaultClusterName, 347 }, 348 Orchestration: api.OrchestrationConfig{ 349 // set TaskHistoryRetentionLimit to a negative value, so 350 // that it is not considered in this test 351 TaskHistoryRetentionLimit: -1, 352 }, 353 }, 354 }) 355 return nil 356 })) 357 358 taskReaper := New(s) 359 defer taskReaper.Stop() 360 orchestrator := replicated.NewReplicatedOrchestrator(s) 361 defer orchestrator.Stop() 362 363 // watch all incoming events 364 watch, cancel := state.Watch(s.WatchQueue()) 365 defer cancel() 366 367 service1 := &api.Service{ 368 ID: "id1", 369 Spec: api.ServiceSpec{ 370 Annotations: api.Annotations{ 371 Name: "name1", 372 }, 373 Mode: &api.ServiceSpec_Replicated{ 374 Replicated: &api.ReplicatedService{ 375 Replicas: 2, 376 }, 377 }, 378 Task: api.TaskSpec{ 379 Restart: &api.RestartPolicy{ 380 Condition: api.RestartOnAny, 381 Delay: gogotypes.DurationProto(0), 382 }, 383 }, 384 }, 385 } 386 387 // Create a service with two instances specified before the orchestrator is 388 // started. This should result in two tasks when the orchestrator 389 // starts up. 390 err := s.Update(func(tx store.Tx) error { 391 assert.NoError(t, store.CreateService(tx, service1)) 392 return nil 393 }) 394 assert.NoError(t, err) 395 396 // Start the orchestrator. 397 testutils.EnsureRuns(func() { assert.NoError(t, orchestrator.Run(ctx)) }) 398 testutils.EnsureRuns(func() { taskReaper.Run(ctx) }) 399 400 observedTask1 := testutils.WatchTaskCreate(t, watch) 401 assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) 402 assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") 403 404 observedTask2 := testutils.WatchTaskCreate(t, watch) 405 assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) 406 assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") 407 408 // Set both tasks to RUNNING, so the service is successfully running 409 updatedTask1 := observedTask1.Copy() 410 updatedTask1.Status.State = api.TaskStateRunning 411 updatedTask1.ServiceAnnotations = api.Annotations{Name: "original"} 412 updatedTask2 := observedTask2.Copy() 413 updatedTask2.Status.State = api.TaskStateRunning 414 updatedTask2.ServiceAnnotations = api.Annotations{Name: "original"} 415 err = s.Update(func(tx store.Tx) error { 416 assert.NoError(t, store.UpdateTask(tx, updatedTask1)) 417 assert.NoError(t, store.UpdateTask(tx, updatedTask2)) 418 return nil 419 }) 420 421 testutils.Expect(t, watch, state.EventCommit{}) 422 testutils.Expect(t, watch, api.EventUpdateTask{}) 423 testutils.Expect(t, watch, api.EventUpdateTask{}) 424 testutils.Expect(t, watch, state.EventCommit{}) 425 426 // Scale the service down to one instance. This should trigger one of the task 427 // statuses to be set to REMOVE. 428 service1.Spec.GetReplicated().Replicas = 1 429 err = s.Update(func(tx store.Tx) error { 430 assert.NoError(t, store.UpdateService(tx, service1)) 431 return nil 432 }) 433 434 observedTask3 := testutils.WatchTaskUpdate(t, watch) 435 assert.Equal(t, observedTask3.DesiredState, api.TaskStateRemove) 436 assert.Equal(t, observedTask3.ServiceAnnotations.Name, "original") 437 438 testutils.Expect(t, watch, state.EventCommit{}) 439 440 // Now the task for which desired state was set to REMOVE must be deleted by the task reaper. 441 // Shut this task down first (simulates shut down by agent) 442 updatedTask3 := observedTask3.Copy() 443 updatedTask3.Status.State = api.TaskStateShutdown 444 err = s.Update(func(tx store.Tx) error { 445 assert.NoError(t, store.UpdateTask(tx, updatedTask3)) 446 return nil 447 }) 448 449 deletedTask1 := testutils.WatchTaskDelete(t, watch) 450 451 assert.Equal(t, api.TaskStateShutdown, deletedTask1.Status.State) 452 assert.Equal(t, "original", deletedTask1.ServiceAnnotations.Name) 453 454 var foundTasks []*api.Task 455 s.View(func(tx store.ReadTx) { 456 foundTasks, err = store.FindTasks(tx, store.All) 457 }) 458 assert.NoError(t, err) 459 assert.Len(t, foundTasks, 1) 460 } 461 462 // TestTaskStateRemoveOnServiceRemoval tests that on service removal, task desired 463 // states are set to REMOVE. Then, when the agent shuts the task down (simulated 464 // by setting the task state to SHUTDOWN), the task reaper actually deletes 465 // the tasks from the store. 466 func TestTaskStateRemoveOnServiceRemoval(t *testing.T) { 467 ctx := context.Background() 468 s := store.NewMemoryStore(nil) 469 assert.NotNil(t, s) 470 defer s.Close() 471 472 assert.NoError(t, s.Update(func(tx store.Tx) error { 473 store.CreateCluster(tx, &api.Cluster{ 474 ID: identity.NewID(), 475 Spec: api.ClusterSpec{ 476 Annotations: api.Annotations{ 477 Name: store.DefaultClusterName, 478 }, 479 Orchestration: api.OrchestrationConfig{ 480 // set TaskHistoryRetentionLimit to a negative value, so 481 // that it is not considered in this test 482 TaskHistoryRetentionLimit: -1, 483 }, 484 }, 485 }) 486 return nil 487 })) 488 489 taskReaper := New(s) 490 defer taskReaper.Stop() 491 orchestrator := replicated.NewReplicatedOrchestrator(s) 492 defer orchestrator.Stop() 493 494 watch, cancel := state.Watch(s.WatchQueue() /*api.EventCreateTask{}, api.EventUpdateTask{}*/) 495 defer cancel() 496 497 service1 := &api.Service{ 498 ID: "id1", 499 Spec: api.ServiceSpec{ 500 Annotations: api.Annotations{ 501 Name: "name1", 502 }, 503 Mode: &api.ServiceSpec_Replicated{ 504 Replicated: &api.ReplicatedService{ 505 Replicas: 2, 506 }, 507 }, 508 Task: api.TaskSpec{ 509 Restart: &api.RestartPolicy{ 510 Condition: api.RestartOnAny, 511 Delay: gogotypes.DurationProto(0), 512 }, 513 }, 514 }, 515 } 516 517 // Create a service with two instances specified before the orchestrator is 518 // started. This should result in two tasks when the orchestrator 519 // starts up. 520 err := s.Update(func(tx store.Tx) error { 521 assert.NoError(t, store.CreateService(tx, service1)) 522 return nil 523 }) 524 assert.NoError(t, err) 525 526 // Start the orchestrator. 527 testutils.EnsureRuns(func() { 528 assert.NoError(t, orchestrator.Run(ctx)) 529 }) 530 testutils.EnsureRuns(func() { taskReaper.Run(ctx) }) 531 532 observedTask1 := testutils.WatchTaskCreate(t, watch) 533 assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) 534 assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") 535 536 observedTask2 := testutils.WatchTaskCreate(t, watch) 537 assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) 538 assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") 539 540 // Set both tasks to RUNNING, so the service is successfully running 541 updatedTask1 := observedTask1.Copy() 542 updatedTask1.Status.State = api.TaskStateRunning 543 updatedTask1.ServiceAnnotations = api.Annotations{Name: "original"} 544 updatedTask2 := observedTask2.Copy() 545 updatedTask2.Status.State = api.TaskStateRunning 546 updatedTask2.ServiceAnnotations = api.Annotations{Name: "original"} 547 err = s.Update(func(tx store.Tx) error { 548 assert.NoError(t, store.UpdateTask(tx, updatedTask1)) 549 assert.NoError(t, store.UpdateTask(tx, updatedTask2)) 550 return nil 551 }) 552 553 testutils.Expect(t, watch, state.EventCommit{}) 554 testutils.Expect(t, watch, api.EventUpdateTask{}) 555 testutils.Expect(t, watch, api.EventUpdateTask{}) 556 testutils.Expect(t, watch, state.EventCommit{}) 557 558 // Delete the service. This should trigger both the task desired statuses to be set to REMOVE. 559 err = s.Update(func(tx store.Tx) error { 560 assert.NoError(t, store.DeleteService(tx, service1.ID)) 561 return nil 562 }) 563 564 observedTask3 := testutils.WatchTaskUpdate(t, watch) 565 assert.Equal(t, observedTask3.DesiredState, api.TaskStateRemove) 566 assert.Equal(t, observedTask3.ServiceAnnotations.Name, "original") 567 observedTask4 := testutils.WatchTaskUpdate(t, watch) 568 assert.Equal(t, observedTask4.DesiredState, api.TaskStateRemove) 569 assert.Equal(t, observedTask4.ServiceAnnotations.Name, "original") 570 571 testutils.Expect(t, watch, state.EventCommit{}) 572 573 // Now the tasks must be deleted by the task reaper. 574 // Shut them down first (simulates shut down by agent) 575 updatedTask3 := observedTask3.Copy() 576 updatedTask3.Status.State = api.TaskStateShutdown 577 updatedTask4 := observedTask4.Copy() 578 updatedTask4.Status.State = api.TaskStateShutdown 579 err = s.Update(func(tx store.Tx) error { 580 assert.NoError(t, store.UpdateTask(tx, updatedTask3)) 581 assert.NoError(t, store.UpdateTask(tx, updatedTask4)) 582 return nil 583 }) 584 585 deletedTask1 := testutils.WatchTaskDelete(t, watch) 586 assert.Equal(t, api.TaskStateShutdown, deletedTask1.Status.State) 587 assert.Equal(t, "original", deletedTask1.ServiceAnnotations.Name) 588 589 deletedTask2 := testutils.WatchTaskDelete(t, watch) 590 assert.Equal(t, api.TaskStateShutdown, deletedTask2.Status.State) 591 assert.Equal(t, "original", deletedTask1.ServiceAnnotations.Name) 592 593 var foundTasks []*api.Task 594 s.View(func(tx store.ReadTx) { 595 foundTasks, err = store.FindTasks(tx, store.All) 596 }) 597 assert.NoError(t, err) 598 assert.Len(t, foundTasks, 0) 599 } 600 601 // TestServiceRemoveDeadTasks tests removal of dead tasks 602 // (old shutdown tasks) on service remove. 603 func TestServiceRemoveDeadTasks(t *testing.T) { 604 ctx := context.Background() 605 s := store.NewMemoryStore(nil) 606 assert.NotNil(t, s) 607 defer s.Close() 608 609 assert.NoError(t, s.Update(func(tx store.Tx) error { 610 store.CreateCluster(tx, &api.Cluster{ 611 ID: identity.NewID(), 612 Spec: api.ClusterSpec{ 613 Annotations: api.Annotations{ 614 Name: store.DefaultClusterName, 615 }, 616 Orchestration: api.OrchestrationConfig{ 617 // set TaskHistoryRetentionLimit to a negative value, so 618 // that it is not considered in this test 619 TaskHistoryRetentionLimit: -1, 620 }, 621 }, 622 }) 623 return nil 624 })) 625 626 taskReaper := New(s) 627 defer taskReaper.Stop() 628 orchestrator := replicated.NewReplicatedOrchestrator(s) 629 defer orchestrator.Stop() 630 631 watch, cancel := state.Watch(s.WatchQueue() /*api.EventCreateTask{}, api.EventUpdateTask{}*/) 632 defer cancel() 633 634 service1 := &api.Service{ 635 ID: "id1", 636 Spec: api.ServiceSpec{ 637 Annotations: api.Annotations{ 638 Name: "name1", 639 }, 640 Mode: &api.ServiceSpec_Replicated{ 641 Replicated: &api.ReplicatedService{ 642 Replicas: 2, 643 }, 644 }, 645 Task: api.TaskSpec{ 646 Restart: &api.RestartPolicy{ 647 // Turn off restart to get an accurate count on tasks. 648 Condition: api.RestartOnNone, 649 Delay: gogotypes.DurationProto(0), 650 }, 651 }, 652 }, 653 } 654 655 // Create a service with two instances specified before the orchestrator is 656 // started. This should result in two tasks when the orchestrator 657 // starts up. 658 err := s.Update(func(tx store.Tx) error { 659 assert.NoError(t, store.CreateService(tx, service1)) 660 return nil 661 }) 662 assert.NoError(t, err) 663 664 // Start the orchestrator and the reaper. 665 testutils.EnsureRuns(func() { 666 assert.NoError(t, orchestrator.Run(ctx)) 667 }) 668 testutils.EnsureRuns(func() { taskReaper.Run(ctx) }) 669 670 observedTask1 := testutils.WatchTaskCreate(t, watch) 671 assert.Equal(t, api.TaskStateNew, observedTask1.Status.State) 672 assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") 673 674 observedTask2 := testutils.WatchTaskCreate(t, watch) 675 assert.Equal(t, api.TaskStateNew, observedTask2.Status.State) 676 assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") 677 678 // Set both task states to RUNNING. 679 updatedTask1 := observedTask1.Copy() 680 updatedTask1.Status.State = api.TaskStateRunning 681 updatedTask1.ServiceAnnotations = api.Annotations{Name: "original"} 682 updatedTask2 := observedTask2.Copy() 683 updatedTask2.Status.State = api.TaskStateRunning 684 updatedTask2.ServiceAnnotations = api.Annotations{Name: "original"} 685 err = s.Update(func(tx store.Tx) error { 686 assert.NoError(t, store.UpdateTask(tx, updatedTask1)) 687 assert.NoError(t, store.UpdateTask(tx, updatedTask2)) 688 return nil 689 }) 690 require.NoError(t, err) 691 692 testutils.Expect(t, watch, state.EventCommit{}) 693 testutils.Expect(t, watch, api.EventUpdateTask{}) 694 testutils.Expect(t, watch, api.EventUpdateTask{}) 695 testutils.Expect(t, watch, state.EventCommit{}) 696 697 // Set both tasks to COMPLETED. 698 updatedTask3 := observedTask1.Copy() 699 updatedTask3.DesiredState = api.TaskStateCompleted 700 updatedTask3.Status.State = api.TaskStateCompleted 701 updatedTask3.ServiceAnnotations = api.Annotations{Name: "original"} 702 updatedTask4 := observedTask2.Copy() 703 updatedTask4.DesiredState = api.TaskStateCompleted 704 updatedTask4.Status.State = api.TaskStateCompleted 705 updatedTask4.ServiceAnnotations = api.Annotations{Name: "original"} 706 err = s.Update(func(tx store.Tx) error { 707 assert.NoError(t, store.UpdateTask(tx, updatedTask3)) 708 assert.NoError(t, store.UpdateTask(tx, updatedTask4)) 709 return nil 710 }) 711 require.NoError(t, err) 712 713 // Verify state is set to COMPLETED 714 observedTask3 := testutils.WatchTaskUpdate(t, watch) 715 assert.Equal(t, api.TaskStateCompleted, observedTask3.Status.State) 716 assert.Equal(t, "original", observedTask3.ServiceAnnotations.Name) 717 observedTask4 := testutils.WatchTaskUpdate(t, watch) 718 assert.Equal(t, api.TaskStateCompleted, observedTask4.Status.State) 719 assert.Equal(t, "original", observedTask4.ServiceAnnotations.Name) 720 721 // Delete the service. 722 err = s.Update(func(tx store.Tx) error { 723 assert.NoError(t, store.DeleteService(tx, service1.ID)) 724 return nil 725 }) 726 727 // Service delete should trigger both the task desired statuses 728 // to be set to REMOVE. 729 observedTask3 = testutils.WatchTaskUpdate(t, watch) 730 assert.Equal(t, api.TaskStateRemove, observedTask3.DesiredState) 731 assert.Equal(t, "original", observedTask3.ServiceAnnotations.Name) 732 observedTask4 = testutils.WatchTaskUpdate(t, watch) 733 assert.Equal(t, api.TaskStateRemove, observedTask4.DesiredState) 734 assert.Equal(t, "original", observedTask4.ServiceAnnotations.Name) 735 736 testutils.Expect(t, watch, state.EventCommit{}) 737 738 // Task reaper should see the event updates for desired state update 739 // to REMOVE and should deleted by the reaper. 740 deletedTask1 := testutils.WatchTaskDelete(t, watch) 741 assert.Equal(t, api.TaskStateCompleted, deletedTask1.Status.State) 742 assert.Equal(t, "original", deletedTask1.ServiceAnnotations.Name) 743 deletedTask2 := testutils.WatchTaskDelete(t, watch) 744 assert.Equal(t, api.TaskStateCompleted, deletedTask2.Status.State) 745 assert.Equal(t, "original", deletedTask2.ServiceAnnotations.Name) 746 747 var foundTasks []*api.Task 748 s.View(func(tx store.ReadTx) { 749 foundTasks, err = store.FindTasks(tx, store.All) 750 }) 751 assert.NoError(t, err) 752 assert.Len(t, foundTasks, 0) 753 } 754 755 // TestTaskReaperBatching tests that the batching logic for the task reaper 756 // runs correctly. 757 func TestTaskReaperBatching(t *testing.T) { 758 // create a canned context and store to use with this task reaper 759 ctx := context.Background() 760 s := store.NewMemoryStore(nil) 761 assert.NotNil(t, s) 762 defer s.Close() 763 764 var ( 765 task1, task2, task3 *api.Task 766 tasks []*api.Task 767 ) 768 769 // set up all of the test fixtures 770 assert.NoError(t, s.Update(func(tx store.Tx) error { 771 // we need a cluster object, because we need to set the retention limit 772 // to a low value 773 assert.NoError(t, store.CreateCluster(tx, &api.Cluster{ 774 ID: identity.NewID(), 775 Spec: api.ClusterSpec{ 776 Annotations: api.Annotations{ 777 Name: store.DefaultClusterName, 778 }, 779 Orchestration: api.OrchestrationConfig{ 780 TaskHistoryRetentionLimit: 1, 781 }, 782 }, 783 })) 784 785 task1 = &api.Task{ 786 ID: "foo", 787 ServiceID: "bar", 788 Slot: 0, 789 DesiredState: api.TaskStateShutdown, 790 Status: api.TaskStatus{ 791 State: api.TaskStateShutdown, 792 }, 793 } 794 // we need to create all of the tasks used in this test, because we'll 795 // be using task update events to trigger reaper behavior. 796 assert.NoError(t, store.CreateTask(tx, task1)) 797 798 task2 = &api.Task{ 799 ID: "foo2", 800 ServiceID: "bar", 801 Slot: 1, 802 DesiredState: api.TaskStateShutdown, 803 Status: api.TaskStatus{ 804 State: api.TaskStateShutdown, 805 }, 806 } 807 assert.NoError(t, store.CreateTask(tx, task2)) 808 809 tasks = make([]*api.Task, maxDirty+1) 810 for i := 0; i < maxDirty+1; i++ { 811 tasks[i] = &api.Task{ 812 ID: fmt.Sprintf("baz%v", i), 813 ServiceID: "bar", 814 // every task in a different slot, so they don't get cleaned up 815 // based on exceeding the retention limit 816 Slot: uint64(i), 817 DesiredState: api.TaskStateShutdown, 818 Status: api.TaskStatus{ 819 State: api.TaskStateShutdown, 820 }, 821 } 822 if err := store.CreateTask(tx, tasks[i]); err != nil { 823 return err 824 } 825 } 826 827 task3 = &api.Task{ 828 ID: "foo3", 829 ServiceID: "bar", 830 Slot: 2, 831 DesiredState: api.TaskStateShutdown, 832 Status: api.TaskStatus{ 833 State: api.TaskStateShutdown, 834 }, 835 } 836 assert.NoError(t, store.CreateTask(tx, task3)) 837 return nil 838 })) 839 840 // now create the task reaper 841 taskReaper := New(s) 842 taskReaper.tickSignal = make(chan struct{}, 1) 843 defer taskReaper.Stop() 844 testutils.EnsureRuns(func() { taskReaper.Run(ctx) }) 845 846 // None of the tasks we've created are eligible for deletion. We should 847 // see no task delete events. Wait for a tick signal, or 500ms to pass, to 848 // verify that no tick will occur. 849 select { 850 case <-taskReaper.tickSignal: 851 t.Fatalf("the taskreaper ticked when it should not have") 852 case <-time.After(reaperBatchingInterval * 2): 853 // ok, looks good, moving on 854 } 855 856 // update task1 to die 857 assert.NoError(t, s.Update(func(tx store.Tx) error { 858 task1.DesiredState = api.TaskStateRemove 859 return store.UpdateTask(tx, task1) 860 })) 861 862 // the task should be added to the cleanup map and a tick should occur 863 // shortly. give it an extra 50ms for overhead 864 select { 865 case <-taskReaper.tickSignal: 866 case <-time.After(reaperBatchingInterval + (50 * time.Millisecond)): 867 t.Fatalf("the taskreaper should have ticked but did not") 868 } 869 870 // now wait and make sure the task reaper does not tick again 871 select { 872 case <-taskReaper.tickSignal: 873 t.Fatalf("the taskreaper should not have ticked but did") 874 case <-time.After(reaperBatchingInterval * 2): 875 } 876 877 // now make sure we'll tick again if we update another task to die 878 assert.NoError(t, s.Update(func(tx store.Tx) error { 879 task2.DesiredState = api.TaskStateRemove 880 return store.UpdateTask(tx, task2) 881 })) 882 883 select { 884 case <-taskReaper.tickSignal: 885 case <-time.After(reaperBatchingInterval + (50 * time.Millisecond)): 886 t.Fatalf("the taskreaper should have ticked by now but did not") 887 } 888 889 // again, now wait and make sure the task reaper does not tick again 890 select { 891 case <-taskReaper.tickSignal: 892 t.Fatalf("the taskreaper should not have ticked but did") 893 case <-time.After(reaperBatchingInterval * 2): 894 } 895 896 // now create a shitload of tasks. this should tick immediately after, no 897 // waiting. we should easily within the batching interval be able to 898 // process all of these events, and should expect 1 tick immediately after 899 // and no more 900 assert.NoError(t, s.Update(func(tx store.Tx) error { 901 for _, task := range tasks { 902 task.DesiredState = api.TaskStateRemove 903 assert.NoError(t, store.UpdateTask(tx, task)) 904 } 905 return nil 906 })) 907 908 select { 909 case <-taskReaper.tickSignal: 910 case <-time.After(reaperBatchingInterval): 911 // tight bound on the how long it should take to tick. we should tick 912 // before the reaper batching interval. this should only POSSIBLY fail 913 // on a really slow system, where processing the 1000+ incoming events 914 // takes longer than the reaperBatchingInterval. if this test flakes 915 // here, that's probably why. 916 t.Fatalf("we should have immediately ticked already, but did not") 917 } 918 919 // again again, wait and make sure the task reaper does not tick again 920 select { 921 case <-taskReaper.tickSignal: 922 t.Fatalf("the taskreaper should not have ticked but did") 923 case <-time.After(reaperBatchingInterval * 2): 924 } 925 926 // now before we wrap up, make sure the task reaper still works off the 927 // timer 928 assert.NoError(t, s.Update(func(tx store.Tx) error { 929 task3.DesiredState = api.TaskStateRemove 930 return store.UpdateTask(tx, task3) 931 })) 932 933 select { 934 case <-taskReaper.tickSignal: 935 case <-time.After(reaperBatchingInterval + (50 * time.Millisecond)): 936 t.Fatalf("the taskreaper should have ticked by now but did not") 937 } 938 939 // again, now wait and make sure the task reaper does not tick again 940 select { 941 case <-taskReaper.tickSignal: 942 t.Fatalf("the taskreaper should not have ticked but did") 943 case <-time.After(reaperBatchingInterval * 2): 944 } 945 } 946 947 // TestServiceRemoveDeadTasks tests removal of 948 // tasks in state < TaskStateAssigned. 949 func TestServiceRemoveUnassignedTasks(t *testing.T) { 950 ctx := context.Background() 951 s := store.NewMemoryStore(nil) 952 assert.NotNil(t, s) 953 defer s.Close() 954 955 assert.NoError(t, s.Update(func(tx store.Tx) error { 956 store.CreateCluster(tx, &api.Cluster{ 957 ID: identity.NewID(), 958 Spec: api.ClusterSpec{ 959 Annotations: api.Annotations{ 960 Name: store.DefaultClusterName, 961 }, 962 Orchestration: api.OrchestrationConfig{ 963 // set TaskHistoryRetentionLimit to a negative value, so 964 // that tasks are cleaned up right away. 965 TaskHistoryRetentionLimit: 1, 966 }, 967 }, 968 }) 969 return nil 970 })) 971 972 taskReaper := New(s) 973 defer taskReaper.Stop() 974 orchestrator := replicated.NewReplicatedOrchestrator(s) 975 defer orchestrator.Stop() 976 977 watch, cancel := state.Watch(s.WatchQueue() /*api.EventCreateTask{}, api.EventUpdateTask{}*/) 978 defer cancel() 979 980 service1 := &api.Service{ 981 ID: "id1", 982 Spec: api.ServiceSpec{ 983 Annotations: api.Annotations{ 984 Name: "name1", 985 }, 986 Mode: &api.ServiceSpec_Replicated{ 987 Replicated: &api.ReplicatedService{ 988 Replicas: 1, 989 }, 990 }, 991 Task: api.TaskSpec{ 992 Restart: &api.RestartPolicy{ 993 // Turn off restart to get an accurate count on tasks. 994 Condition: api.RestartOnNone, 995 Delay: gogotypes.DurationProto(0), 996 }, 997 }, 998 }, 999 } 1000 1001 // Create a service with one replica specified before the orchestrator is 1002 // started. This should result in one tasks when the orchestrator 1003 // starts up. 1004 err := s.Update(func(tx store.Tx) error { 1005 assert.NoError(t, store.CreateService(tx, service1)) 1006 return nil 1007 }) 1008 assert.NoError(t, err) 1009 1010 // Start the orchestrator. 1011 testutils.EnsureRuns(func() { 1012 assert.NoError(t, orchestrator.Run(ctx)) 1013 }) 1014 testutils.EnsureRuns(func() { taskReaper.Run(ctx) }) 1015 1016 observedTask1 := testutils.WatchTaskCreate(t, watch) 1017 assert.Equal(t, api.TaskStateNew, observedTask1.Status.State) 1018 assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") 1019 1020 // Set the task state to PENDING to simulate allocation. 1021 updatedTask1 := observedTask1.Copy() 1022 updatedTask1.Status.State = api.TaskStatePending 1023 updatedTask1.ServiceAnnotations = api.Annotations{Name: "original"} 1024 err = s.Update(func(tx store.Tx) error { 1025 assert.NoError(t, store.UpdateTask(tx, updatedTask1)) 1026 return nil 1027 }) 1028 require.NoError(t, err) 1029 1030 testutils.Expect(t, watch, state.EventCommit{}) 1031 testutils.Expect(t, watch, api.EventUpdateTask{}) 1032 testutils.Expect(t, watch, state.EventCommit{}) 1033 1034 service1.Spec.Task.ForceUpdate++ 1035 // This should shutdown the previous task and create a new one. 1036 err = s.Update(func(tx store.Tx) error { 1037 assert.NoError(t, store.UpdateService(tx, service1)) 1038 return nil 1039 }) 1040 testutils.Expect(t, watch, api.EventUpdateService{}) 1041 testutils.Expect(t, watch, state.EventCommit{}) 1042 1043 // New task should be created and old task marked for SHUTDOWN. 1044 observedTask1 = testutils.WatchTaskCreate(t, watch) 1045 assert.Equal(t, api.TaskStateNew, observedTask1.Status.State) 1046 assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") 1047 1048 observedTask3 := testutils.WatchTaskUpdate(t, watch) 1049 assert.Equal(t, api.TaskStateShutdown, observedTask3.DesiredState) 1050 assert.Equal(t, "original", observedTask3.ServiceAnnotations.Name) 1051 1052 testutils.Expect(t, watch, state.EventCommit{}) 1053 1054 // Task reaper should delete the task previously marked for SHUTDOWN. 1055 deletedTask1 := testutils.WatchTaskDelete(t, watch) 1056 assert.Equal(t, api.TaskStatePending, deletedTask1.Status.State) 1057 assert.Equal(t, "original", deletedTask1.ServiceAnnotations.Name) 1058 1059 testutils.Expect(t, watch, state.EventCommit{}) 1060 1061 var foundTasks []*api.Task 1062 s.View(func(tx store.ReadTx) { 1063 foundTasks, err = store.FindTasks(tx, store.All) 1064 }) 1065 assert.NoError(t, err) 1066 assert.Len(t, foundTasks, 1) 1067 } 1068 1069 // setupTaskReaperDirty adds slots to the task reaper dirty set for testing. 1070 func setupTaskReaperDirty(tr *TaskReaper) { 1071 tr.dirty[orchestrator.SlotTuple{ 1072 Slot: 1, 1073 ServiceID: "id1", 1074 NodeID: "node1", 1075 }] = struct{}{} 1076 tr.dirty[orchestrator.SlotTuple{ 1077 Slot: 1, 1078 ServiceID: "id2", 1079 NodeID: "node1", 1080 }] = struct{}{} 1081 } 1082 1083 // TestTick unit-tests the task reaper tick function. 1084 // 1. Test that the dirty set is cleaned up when the service can't be found. 1085 // 2. Test that the dirty set is cleaned up when the number of total tasks 1086 // is smaller than the retention limit. 1087 // 3. Test that the dirty set and excess tasks in the store are cleaned up 1088 // when there the number of total tasks is greater than the retention limit. 1089 func TestTick(t *testing.T) { 1090 s := store.NewMemoryStore(nil) 1091 assert.NotNil(t, s) 1092 defer s.Close() 1093 1094 // create the task reaper. 1095 taskReaper := New(s) 1096 1097 // Test # 1 1098 // Setup the dirty set with entries to 1099 // verify that the dirty set it cleaned up 1100 // when the service is not found. 1101 setupTaskReaperDirty(taskReaper) 1102 // call tick directly and verify dirty set was cleaned up. 1103 taskReaper.tick() 1104 assert.Zero(t, len(taskReaper.dirty)) 1105 1106 // Test # 2 1107 // Verify that the dirty set it cleaned up 1108 // when the history limit is set to zero. 1109 1110 // Create a service in the store for the following test cases. 1111 service1 := &api.Service{ 1112 ID: "id1", 1113 Spec: api.ServiceSpec{ 1114 Annotations: api.Annotations{ 1115 Name: "name1", 1116 }, 1117 Mode: &api.ServiceSpec_Replicated{ 1118 Replicated: &api.ReplicatedService{ 1119 Replicas: 1, 1120 }, 1121 }, 1122 Task: api.TaskSpec{ 1123 Restart: &api.RestartPolicy{ 1124 // Turn off restart to get an accurate count on tasks. 1125 Condition: api.RestartOnNone, 1126 Delay: gogotypes.DurationProto(0), 1127 }, 1128 }, 1129 }, 1130 } 1131 1132 // Create another service in the store for the following test cases. 1133 service2 := &api.Service{ 1134 ID: "id2", 1135 Spec: api.ServiceSpec{ 1136 Annotations: api.Annotations{ 1137 Name: "name2", 1138 }, 1139 Mode: &api.ServiceSpec_Replicated{ 1140 Replicated: &api.ReplicatedService{ 1141 Replicas: 1, 1142 }, 1143 }, 1144 Task: api.TaskSpec{ 1145 Restart: &api.RestartPolicy{ 1146 // Turn off restart to get an accurate count on tasks. 1147 Condition: api.RestartOnNone, 1148 Delay: gogotypes.DurationProto(0), 1149 }, 1150 }, 1151 }, 1152 } 1153 1154 // Create a service. 1155 err := s.Update(func(tx store.Tx) error { 1156 assert.NoError(t, store.CreateService(tx, service1)) 1157 assert.NoError(t, store.CreateService(tx, service2)) 1158 return nil 1159 }) 1160 assert.NoError(t, err) 1161 1162 // Setup the dirty set with entries to 1163 // verify that the dirty set it cleaned up 1164 // when the history limit is set to zero. 1165 setupTaskReaperDirty(taskReaper) 1166 taskReaper.taskHistory = 0 1167 // call tick directly and verify dirty set was cleaned up. 1168 taskReaper.tick() 1169 assert.Zero(t, len(taskReaper.dirty)) 1170 1171 // Test # 3 1172 // Test that the tasks are cleanup when the total number of tasks 1173 // is greater than the retention limit. 1174 1175 // Create tasks for both services in the store. 1176 task1 := &api.Task{ 1177 ID: "id1task1", 1178 Slot: 1, 1179 DesiredState: api.TaskStateShutdown, 1180 Status: api.TaskStatus{ 1181 State: api.TaskStateShutdown, 1182 }, 1183 ServiceID: "id1", 1184 ServiceAnnotations: api.Annotations{ 1185 Name: "name1", 1186 }, 1187 } 1188 1189 task2 := &api.Task{ 1190 ID: "id2task1", 1191 Slot: 1, 1192 DesiredState: api.TaskStateShutdown, 1193 Status: api.TaskStatus{ 1194 State: api.TaskStateShutdown, 1195 }, 1196 ServiceID: "id2", 1197 ServiceAnnotations: api.Annotations{ 1198 Name: "name2", 1199 }, 1200 } 1201 1202 // Create Tasks. 1203 err = s.Update(func(tx store.Tx) error { 1204 assert.NoError(t, store.CreateTask(tx, task1)) 1205 assert.NoError(t, store.CreateTask(tx, task2)) 1206 return nil 1207 }) 1208 assert.NoError(t, err) 1209 1210 // Set history to 1 to ensure that the tasks are not cleaned up yet. 1211 // At the same time, we should be able to test that the dirty set was 1212 // cleaned up at the end of tick(). 1213 taskReaper.taskHistory = 1 1214 setupTaskReaperDirty(taskReaper) 1215 // call tick directly and verify dirty set was cleaned up. 1216 taskReaper.tick() 1217 assert.Zero(t, len(taskReaper.dirty)) 1218 1219 // Now test that tick() function cleans up the old tasks from the store. 1220 1221 // Create new tasks in the store for the same slots to simulate service update. 1222 task1.Status.State = api.TaskStateNew 1223 task1.DesiredState = api.TaskStateRunning 1224 task1.ID = "id1task2" 1225 task2.Status.State = api.TaskStateNew 1226 task2.DesiredState = api.TaskStateRunning 1227 task2.ID = "id2task2" 1228 err = s.Update(func(tx store.Tx) error { 1229 assert.NoError(t, store.CreateTask(tx, task1)) 1230 assert.NoError(t, store.CreateTask(tx, task2)) 1231 return nil 1232 }) 1233 assert.NoError(t, err) 1234 1235 watch, cancel := state.Watch(s.WatchQueue() /*api.EventCreateTask{}, api.EventUpdateTask{}*/) 1236 defer cancel() 1237 1238 // Setup the task reaper dirty set. 1239 setupTaskReaperDirty(taskReaper) 1240 // Call tick directly and verify dirty set was cleaned up. 1241 taskReaper.tick() 1242 assert.Zero(t, len(taskReaper.dirty)) 1243 // Task reaper should delete the task previously marked for SHUTDOWN. 1244 deletedTask1 := testutils.WatchTaskDelete(t, watch) 1245 assert.Equal(t, api.TaskStateShutdown, deletedTask1.Status.State) 1246 assert.Equal(t, api.TaskStateShutdown, deletedTask1.DesiredState) 1247 assert.True(t, deletedTask1.ServiceAnnotations.Name == "name1" || 1248 deletedTask1.ServiceAnnotations.Name == "name2") 1249 1250 deletedTask2 := testutils.WatchTaskDelete(t, watch) 1251 assert.Equal(t, api.TaskStateShutdown, deletedTask2.Status.State) 1252 assert.Equal(t, api.TaskStateShutdown, deletedTask2.DesiredState) 1253 assert.True(t, deletedTask1.ServiceAnnotations.Name == "name1" || 1254 deletedTask1.ServiceAnnotations.Name == "name2") 1255 } 1256 1257 // TestTickHistoryCleanup tests the condition the task reaper 1258 // uses to delete historic tasks: 1259 // 1. task in terminal state i.e. actual state > running 1260 // 2. actual State < assigned and desired state > running. 1261 func TestTickHistoryCleanup(t *testing.T) { 1262 s := store.NewMemoryStore(nil) 1263 assert.NotNil(t, s) 1264 defer s.Close() 1265 // Create a service. 1266 service1 := &api.Service{ 1267 ID: "id1", 1268 Spec: api.ServiceSpec{ 1269 Annotations: api.Annotations{ 1270 Name: "name1", 1271 }, 1272 Mode: &api.ServiceSpec_Replicated{ 1273 Replicated: &api.ReplicatedService{ 1274 Replicas: 1, 1275 }, 1276 }, 1277 Task: api.TaskSpec{ 1278 Restart: &api.RestartPolicy{ 1279 // Turn off restart to get an accurate count on tasks. 1280 Condition: api.RestartOnNone, 1281 Delay: gogotypes.DurationProto(0), 1282 }, 1283 }, 1284 }, 1285 } 1286 1287 s.Update(func(tx store.Tx) error { 1288 assert.NoError(t, store.CreateService(tx, service1)) 1289 return nil 1290 }) 1291 1292 watch, cancel := state.Watch(s.WatchQueue() /*api.EventDeleteTask{}*/) 1293 defer cancel() 1294 taskReaper := New(s) 1295 taskReaper.taskHistory = 0 1296 1297 // Test function will create a task with the given desired and actual state, 1298 // setup the task reaper dirty list and call tick for testing. 1299 testfunc := func(desiredState api.TaskState, actualState api.TaskState) { 1300 var task *api.Task 1301 s.View(func(tx store.ReadTx) { 1302 task = store.GetTask(tx, "id1task3") 1303 }) 1304 1305 if task == nil { 1306 // create task3 1307 task3 := &api.Task{ 1308 ID: "id1task3", 1309 Slot: 1, 1310 DesiredState: desiredState, 1311 Status: api.TaskStatus{ 1312 State: actualState, 1313 }, 1314 ServiceID: "id1", 1315 ServiceAnnotations: api.Annotations{ 1316 Name: "name1", 1317 }, 1318 } 1319 s.Update(func(tx store.Tx) error { 1320 assert.NoError(t, store.CreateTask(tx, task3)) 1321 return nil 1322 }) 1323 } else { 1324 task.DesiredState = desiredState 1325 task.Status.State = actualState 1326 s.Update(func(tx store.Tx) error { 1327 assert.NoError(t, store.UpdateTask(tx, task)) 1328 return nil 1329 }) 1330 } 1331 1332 setupTaskReaperDirty(taskReaper) 1333 taskReaper.tick() 1334 } 1335 1336 // Function to verify task was deleted. 1337 waitForTaskDelete := func(desiredState api.TaskState, actualState api.TaskState) { 1338 deletedTask1 := testutils.WatchTaskDelete(t, watch) 1339 assert.Equal(t, actualState, deletedTask1.Status.State) 1340 assert.Equal(t, desiredState, deletedTask1.DesiredState) 1341 assert.Equal(t, "name1", deletedTask1.ServiceAnnotations.Name) 1342 assert.Equal(t, "id1task3", deletedTask1.ID) 1343 } 1344 1345 for _, testcase := range []struct { 1346 // Desired and actual states to test. 1347 desired, actual api.TaskState 1348 1349 // Flag to indicate whether the task should have been deleted by tick(). 1350 cleanedUp bool 1351 }{ 1352 {desired: api.TaskStateRunning, actual: api.TaskStateNew, cleanedUp: false}, 1353 {desired: api.TaskStateRunning, actual: api.TaskStatePending, cleanedUp: false}, 1354 {desired: api.TaskStateRunning, actual: api.TaskStateAssigned, cleanedUp: false}, 1355 {desired: api.TaskStateRunning, actual: api.TaskStateAccepted, cleanedUp: false}, 1356 {desired: api.TaskStateRunning, actual: api.TaskStatePreparing, cleanedUp: false}, 1357 {desired: api.TaskStateRunning, actual: api.TaskStateReady, cleanedUp: false}, 1358 {desired: api.TaskStateRunning, actual: api.TaskStateStarting, cleanedUp: false}, 1359 {desired: api.TaskStateRunning, actual: api.TaskStateRunning, cleanedUp: false}, 1360 {desired: api.TaskStateRunning, actual: api.TaskStateCompleted, cleanedUp: true}, 1361 {desired: api.TaskStateRunning, actual: api.TaskStateFailed, cleanedUp: true}, 1362 {desired: api.TaskStateRunning, actual: api.TaskStateRejected, cleanedUp: true}, 1363 {desired: api.TaskStateRunning, actual: api.TaskStateRemove, cleanedUp: true}, 1364 {desired: api.TaskStateRunning, actual: api.TaskStateOrphaned, cleanedUp: true}, 1365 1366 {desired: api.TaskStateShutdown, actual: api.TaskStateNew, cleanedUp: true}, 1367 {desired: api.TaskStateShutdown, actual: api.TaskStatePending, cleanedUp: true}, 1368 {desired: api.TaskStateShutdown, actual: api.TaskStateAssigned, cleanedUp: false}, 1369 {desired: api.TaskStateShutdown, actual: api.TaskStateAccepted, cleanedUp: false}, 1370 {desired: api.TaskStateShutdown, actual: api.TaskStatePreparing, cleanedUp: false}, 1371 {desired: api.TaskStateShutdown, actual: api.TaskStateReady, cleanedUp: false}, 1372 {desired: api.TaskStateShutdown, actual: api.TaskStateStarting, cleanedUp: false}, 1373 {desired: api.TaskStateShutdown, actual: api.TaskStateRunning, cleanedUp: false}, 1374 {desired: api.TaskStateShutdown, actual: api.TaskStateCompleted, cleanedUp: true}, 1375 {desired: api.TaskStateShutdown, actual: api.TaskStateFailed, cleanedUp: true}, 1376 {desired: api.TaskStateShutdown, actual: api.TaskStateRejected, cleanedUp: true}, 1377 {desired: api.TaskStateShutdown, actual: api.TaskStateRemove, cleanedUp: true}, 1378 {desired: api.TaskStateShutdown, actual: api.TaskStateOrphaned, cleanedUp: true}, 1379 } { 1380 testfunc(testcase.desired, testcase.actual) 1381 assert.Zero(t, len(taskReaper.dirty)) 1382 if testcase.cleanedUp { 1383 waitForTaskDelete(testcase.desired, testcase.actual) 1384 } 1385 s.View(func(tx store.ReadTx) { 1386 task := store.GetTask(tx, "id1task3") 1387 if testcase.cleanedUp { 1388 assert.Nil(t, task) 1389 } else { 1390 assert.NotNil(t, task) 1391 } 1392 }) 1393 } 1394 }