github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/orchestrator/global/global_test.go (about) 1 package global 2 3 import ( 4 "context" 5 "testing" 6 "time" 7 8 "github.com/docker/go-events" 9 "github.com/docker/swarmkit/api" 10 "github.com/docker/swarmkit/manager/orchestrator/testutils" 11 "github.com/docker/swarmkit/manager/state" 12 "github.com/docker/swarmkit/manager/state/store" 13 "github.com/docker/swarmkit/protobuf/ptypes" 14 gogotypes "github.com/gogo/protobuf/types" 15 "github.com/stretchr/testify/assert" 16 "github.com/stretchr/testify/require" 17 ) 18 19 var ( 20 node1 = &api.Node{ 21 ID: "nodeid1", 22 Spec: api.NodeSpec{ 23 Annotations: api.Annotations{ 24 Name: "name1", 25 }, 26 Availability: api.NodeAvailabilityActive, 27 }, 28 Status: api.NodeStatus{ 29 State: api.NodeStatus_READY, 30 }, 31 Description: &api.NodeDescription{ 32 Hostname: "name1", 33 }, 34 Role: api.NodeRoleWorker, 35 } 36 node2 = &api.Node{ 37 ID: "nodeid2", 38 Spec: api.NodeSpec{ 39 Annotations: api.Annotations{ 40 Name: "name2", 41 }, 42 Availability: api.NodeAvailabilityActive, 43 }, 44 Status: api.NodeStatus{ 45 State: api.NodeStatus_READY, 46 }, 47 Description: &api.NodeDescription{ 48 Hostname: "name2", 49 }, 50 Role: api.NodeRoleWorker, 51 } 52 53 restartDelay = 50 * time.Millisecond 54 55 service1 = &api.Service{ 56 ID: "serviceid1", 57 Spec: api.ServiceSpec{ 58 Annotations: api.Annotations{ 59 Name: "name1", 60 }, 61 Task: api.TaskSpec{ 62 Runtime: &api.TaskSpec_Container{ 63 Container: &api.ContainerSpec{}, 64 }, 65 Restart: &api.RestartPolicy{ 66 Condition: api.RestartOnAny, 67 Delay: gogotypes.DurationProto(restartDelay), 68 }, 69 }, 70 Mode: &api.ServiceSpec_Global{ 71 Global: &api.GlobalService{}, 72 }, 73 }, 74 } 75 76 service2 = &api.Service{ 77 ID: "serviceid2", 78 Spec: api.ServiceSpec{ 79 Annotations: api.Annotations{ 80 Name: "name2", 81 }, 82 Task: api.TaskSpec{ 83 Runtime: &api.TaskSpec_Container{ 84 Container: &api.ContainerSpec{}, 85 }, 86 }, 87 Mode: &api.ServiceSpec_Global{ 88 Global: &api.GlobalService{}, 89 }, 90 }, 91 } 92 93 serviceNoRestart = &api.Service{ 94 ID: "serviceid3", 95 Spec: api.ServiceSpec{ 96 Annotations: api.Annotations{ 97 Name: "norestart", 98 }, 99 Task: api.TaskSpec{ 100 Runtime: &api.TaskSpec_Container{ 101 Container: &api.ContainerSpec{}, 102 }, 103 Restart: &api.RestartPolicy{ 104 Condition: api.RestartOnNone, 105 }, 106 }, 107 Mode: &api.ServiceSpec_Global{ 108 Global: &api.GlobalService{}, 109 }, 110 }, 111 } 112 ) 113 114 func setup(t *testing.T, store *store.MemoryStore, watch chan events.Event) *Orchestrator { 115 ctx := context.Background() 116 // Start the global orchestrator. 117 global := NewGlobalOrchestrator(store) 118 testutils.EnsureRuns(func() { 119 assert.NoError(t, global.Run(ctx)) 120 }) 121 122 addService(t, store, service1) 123 testutils.Expect(t, watch, api.EventCreateService{}) 124 testutils.Expect(t, watch, state.EventCommit{}) 125 126 addNode(t, store, node1) 127 testutils.Expect(t, watch, api.EventCreateNode{}) 128 testutils.Expect(t, watch, state.EventCommit{}) 129 130 return global 131 } 132 133 func TestSetup(t *testing.T) { 134 store := store.NewMemoryStore(nil) 135 assert.NotNil(t, store) 136 defer store.Close() 137 138 watch, cancel := state.Watch(store.WatchQueue() /*state.EventCreateTask{}, state.EventUpdateTask{}*/) 139 defer cancel() 140 141 orchestrator := setup(t, store, watch) 142 defer orchestrator.Stop() 143 144 observedTask1 := testutils.WatchTaskCreate(t, watch) 145 146 assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) 147 assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") 148 assert.Equal(t, observedTask1.NodeID, "nodeid1") 149 } 150 151 func TestAddNode(t *testing.T) { 152 store := store.NewMemoryStore(nil) 153 assert.NotNil(t, store) 154 defer store.Close() 155 156 watch, cancel := state.Watch(store.WatchQueue()) 157 defer cancel() 158 159 orchestrator := setup(t, store, watch) 160 defer orchestrator.Stop() 161 162 testutils.WatchTaskCreate(t, watch) 163 164 addNode(t, store, node2) 165 observedTask2 := testutils.WatchTaskCreate(t, watch) 166 assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) 167 assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") 168 assert.Equal(t, observedTask2.NodeID, "nodeid2") 169 } 170 171 func TestDeleteNode(t *testing.T) { 172 store := store.NewMemoryStore(nil) 173 assert.NotNil(t, store) 174 defer store.Close() 175 176 watch, cancel := state.Watch(store.WatchQueue()) 177 defer cancel() 178 179 orchestrator := setup(t, store, watch) 180 defer orchestrator.Stop() 181 182 testutils.WatchTaskCreate(t, watch) 183 184 deleteNode(t, store, node1) 185 // task should be set to dead 186 observedTask := testutils.WatchTaskDelete(t, watch) 187 assert.Equal(t, observedTask.ServiceAnnotations.Name, "name1") 188 assert.Equal(t, observedTask.NodeID, "nodeid1") 189 } 190 191 func TestNodeAvailability(t *testing.T) { 192 t.Parallel() 193 194 store := store.NewMemoryStore(nil) 195 assert.NotNil(t, store) 196 defer store.Close() 197 198 watch, cancel := state.Watch(store.WatchQueue()) 199 defer cancel() 200 201 orchestrator := setup(t, store, watch) 202 defer orchestrator.Stop() 203 204 testutils.WatchTaskCreate(t, watch) 205 206 // set node1 to drain 207 updateNodeAvailability(t, store, node1, api.NodeAvailabilityDrain) 208 209 // task should be set to dead 210 observedTask1 := testutils.WatchShutdownTask(t, watch) 211 assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") 212 assert.Equal(t, observedTask1.NodeID, "nodeid1") 213 testutils.Expect(t, watch, state.EventCommit{}) 214 215 // updating the service shouldn't restart the task 216 updateService(t, store, service1, true) 217 testutils.Expect(t, watch, api.EventUpdateService{}) 218 testutils.Expect(t, watch, state.EventCommit{}) 219 select { 220 case event := <-watch: 221 t.Fatalf("got unexpected event %T: %+v", event, event) 222 case <-time.After(100 * time.Millisecond): 223 } 224 225 // set node1 to active 226 updateNodeAvailability(t, store, node1, api.NodeAvailabilityActive) 227 // task should be added back 228 observedTask2 := testutils.WatchTaskCreate(t, watch) 229 assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) 230 assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") 231 assert.Equal(t, observedTask2.NodeID, "nodeid1") 232 testutils.Expect(t, watch, state.EventCommit{}) 233 234 // set node1 to pause 235 updateNodeAvailability(t, store, node1, api.NodeAvailabilityPause) 236 237 failTask(t, store, observedTask2) 238 observedTask3 := testutils.WatchShutdownTask(t, watch) 239 assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1") 240 assert.Equal(t, observedTask3.NodeID, "nodeid1") 241 testutils.Expect(t, watch, state.EventCommit{}) 242 243 // updating the service shouldn't restart the task 244 updateService(t, store, service1, true) 245 testutils.Expect(t, watch, api.EventUpdateService{}) 246 testutils.Expect(t, watch, state.EventCommit{}) 247 select { 248 case event := <-watch: 249 t.Fatalf("got unexpected event %T: %+v", event, event) 250 case <-time.After(100 * time.Millisecond): 251 } 252 253 } 254 255 func TestNodeState(t *testing.T) { 256 t.Parallel() 257 258 store := store.NewMemoryStore(nil) 259 assert.NotNil(t, store) 260 defer store.Close() 261 262 watch, cancel := state.Watch(store.WatchQueue()) 263 defer cancel() 264 265 orchestrator := setup(t, store, watch) 266 defer orchestrator.Stop() 267 268 testutils.WatchTaskCreate(t, watch) 269 270 // set node1 to down 271 updateNodeState(t, store, node1, api.NodeStatus_DOWN) 272 273 // task should be set to dead 274 observedTask1 := testutils.WatchShutdownTask(t, watch) 275 assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") 276 assert.Equal(t, observedTask1.NodeID, "nodeid1") 277 testutils.Expect(t, watch, state.EventCommit{}) 278 279 // updating the service shouldn't restart the task 280 updateService(t, store, service1, true) 281 testutils.Expect(t, watch, api.EventUpdateService{}) 282 testutils.Expect(t, watch, state.EventCommit{}) 283 select { 284 case event := <-watch: 285 t.Fatalf("got unexpected event %T: %+v", event, event) 286 case <-time.After(100 * time.Millisecond): 287 } 288 289 // set node1 to ready 290 updateNodeState(t, store, node1, api.NodeStatus_READY) 291 // task should be added back 292 observedTask2 := testutils.WatchTaskCreate(t, watch) 293 assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) 294 assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") 295 assert.Equal(t, observedTask2.NodeID, "nodeid1") 296 } 297 298 func TestAddService(t *testing.T) { 299 store := store.NewMemoryStore(nil) 300 assert.NotNil(t, store) 301 defer store.Close() 302 303 watch, cancel := state.Watch(store.WatchQueue()) 304 defer cancel() 305 306 orchestrator := setup(t, store, watch) 307 defer orchestrator.Stop() 308 309 testutils.WatchTaskCreate(t, watch) 310 311 addService(t, store, service2) 312 observedTask := testutils.WatchTaskCreate(t, watch) 313 assert.Equal(t, observedTask.Status.State, api.TaskStateNew) 314 assert.Equal(t, observedTask.ServiceAnnotations.Name, "name2") 315 assert.True(t, observedTask.NodeID == "nodeid1") 316 } 317 318 func TestDeleteService(t *testing.T) { 319 store := store.NewMemoryStore(nil) 320 assert.NotNil(t, store) 321 defer store.Close() 322 323 watch, cancel := state.Watch(store.WatchQueue()) 324 defer cancel() 325 326 orchestrator := setup(t, store, watch) 327 defer orchestrator.Stop() 328 329 testutils.WatchTaskCreate(t, watch) 330 331 deleteService(t, store, service1) 332 // task should be deleted 333 observedTask := testutils.WatchTaskUpdate(t, watch) 334 assert.Equal(t, observedTask.ServiceAnnotations.Name, "name1") 335 assert.Equal(t, observedTask.NodeID, "nodeid1") 336 } 337 338 func TestRemoveTask(t *testing.T) { 339 t.Parallel() 340 341 store := store.NewMemoryStore(nil) 342 assert.NotNil(t, store) 343 defer store.Close() 344 345 watch, cancel := state.Watch(store.WatchQueue() /*api.EventCreateTask{}, api.EventUpdateTask{}*/) 346 defer cancel() 347 348 orchestrator := setup(t, store, watch) 349 defer orchestrator.Stop() 350 351 observedTask1 := testutils.WatchTaskCreate(t, watch) 352 testutils.Expect(t, watch, state.EventCommit{}) 353 354 assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) 355 assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") 356 assert.Equal(t, observedTask1.NodeID, "nodeid1") 357 358 deleteTask(t, store, observedTask1) 359 testutils.Expect(t, watch, api.EventDeleteTask{}) 360 testutils.Expect(t, watch, state.EventCommit{}) 361 362 // the task should not be recreated 363 select { 364 case event := <-watch: 365 t.Fatalf("got unexpected event %T: %+v", event, event) 366 case <-time.After(100 * time.Millisecond): 367 } 368 } 369 370 func TestTaskFailure(t *testing.T) { 371 t.Parallel() 372 373 store := store.NewMemoryStore(nil) 374 assert.NotNil(t, store) 375 defer store.Close() 376 377 watch, cancel := state.Watch(store.WatchQueue() /*api.EventCreateTask{}, api.EventUpdateTask{}*/) 378 defer cancel() 379 380 // first, try a "restart on any" policy 381 orchestrator := setup(t, store, watch) 382 defer orchestrator.Stop() 383 384 observedTask1 := testutils.WatchTaskCreate(t, watch) 385 386 assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) 387 assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") 388 assert.Equal(t, observedTask1.NodeID, "nodeid1") 389 390 failTask(t, store, observedTask1) 391 392 testutils.WatchShutdownTask(t, watch) 393 394 // the task should be recreated 395 observedTask2 := testutils.WatchTaskCreate(t, watch) 396 assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) 397 assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") 398 assert.Equal(t, observedTask2.NodeID, "nodeid1") 399 testutils.Expect(t, watch, state.EventCommit{}) 400 testutils.Expect(t, watch, api.EventUpdateTask{}) // ready->running 401 testutils.Expect(t, watch, state.EventCommit{}) 402 403 // repeat with service set up not to restart 404 addService(t, store, serviceNoRestart) 405 testutils.Expect(t, watch, api.EventCreateService{}) 406 testutils.Expect(t, watch, state.EventCommit{}) 407 408 observedTask3 := testutils.WatchTaskCreate(t, watch) 409 assert.Equal(t, observedTask3.Status.State, api.TaskStateNew) 410 assert.Equal(t, observedTask3.ServiceAnnotations.Name, "norestart") 411 assert.Equal(t, observedTask3.NodeID, "nodeid1") 412 testutils.Expect(t, watch, state.EventCommit{}) 413 414 failTask(t, store, observedTask3) 415 testutils.Expect(t, watch, api.EventUpdateTask{}) 416 testutils.Expect(t, watch, state.EventCommit{}) 417 observedTask4 := testutils.WatchTaskUpdate(t, watch) 418 assert.Equal(t, observedTask4.DesiredState, api.TaskStateShutdown) 419 testutils.Expect(t, watch, state.EventCommit{}) 420 421 // the task should not be recreated 422 select { 423 case event := <-watch: 424 t.Fatalf("got unexpected event %T: %+v", event, event) 425 case <-time.After(100 * time.Millisecond): 426 } 427 428 // update the service with no spec changes, to trigger a 429 // reconciliation. the task should still not be updated. 430 updateService(t, store, serviceNoRestart, false) 431 testutils.Expect(t, watch, api.EventUpdateService{}) 432 testutils.Expect(t, watch, state.EventCommit{}) 433 434 select { 435 case event := <-watch: 436 t.Fatalf("got unexpected event %T: %+v", event, event) 437 case <-time.After(100 * time.Millisecond): 438 } 439 440 // update the service with spec changes. now the task should be recreated. 441 updateService(t, store, serviceNoRestart, true) 442 testutils.Expect(t, watch, api.EventUpdateService{}) 443 testutils.Expect(t, watch, state.EventCommit{}) 444 445 observedTask5 := testutils.WatchTaskCreate(t, watch) 446 assert.Equal(t, observedTask5.Status.State, api.TaskStateNew) 447 assert.Equal(t, observedTask5.ServiceAnnotations.Name, "norestart") 448 assert.Equal(t, observedTask5.NodeID, "nodeid1") 449 testutils.Expect(t, watch, state.EventCommit{}) 450 } 451 452 func addService(t *testing.T, s *store.MemoryStore, service *api.Service) { 453 s.Update(func(tx store.Tx) error { 454 assert.NoError(t, store.CreateService(tx, service.Copy())) 455 return nil 456 }) 457 } 458 459 func updateService(t *testing.T, s *store.MemoryStore, service *api.Service, force bool) { 460 s.Update(func(tx store.Tx) error { 461 service := store.GetService(tx, service.ID) 462 require.NotNil(t, service) 463 if force { 464 service.Spec.Task.ForceUpdate++ 465 } 466 assert.NoError(t, store.UpdateService(tx, service)) 467 return nil 468 }) 469 } 470 471 func deleteService(t *testing.T, s *store.MemoryStore, service *api.Service) { 472 s.Update(func(tx store.Tx) error { 473 assert.NoError(t, store.DeleteService(tx, service.ID)) 474 return nil 475 }) 476 } 477 478 func addNode(t *testing.T, s *store.MemoryStore, node *api.Node) { 479 s.Update(func(tx store.Tx) error { 480 assert.NoError(t, store.CreateNode(tx, node.Copy())) 481 return nil 482 }) 483 } 484 485 func updateNodeAvailability(t *testing.T, s *store.MemoryStore, node *api.Node, avail api.NodeSpec_Availability) { 486 s.Update(func(tx store.Tx) error { 487 node := store.GetNode(tx, node.ID) 488 require.NotNil(t, node) 489 node.Spec.Availability = avail 490 assert.NoError(t, store.UpdateNode(tx, node)) 491 return nil 492 }) 493 } 494 495 func updateNodeState(t *testing.T, s *store.MemoryStore, node *api.Node, state api.NodeStatus_State) { 496 s.Update(func(tx store.Tx) error { 497 node := store.GetNode(tx, node.ID) 498 require.NotNil(t, node) 499 node.Status.State = state 500 assert.NoError(t, store.UpdateNode(tx, node)) 501 return nil 502 }) 503 } 504 505 func deleteNode(t *testing.T, s *store.MemoryStore, node *api.Node) { 506 s.Update(func(tx store.Tx) error { 507 assert.NoError(t, store.DeleteNode(tx, node.ID)) 508 return nil 509 }) 510 } 511 512 func addTask(t *testing.T, s *store.MemoryStore, task *api.Task) { 513 s.Update(func(tx store.Tx) error { 514 assert.NoError(t, store.CreateTask(tx, task)) 515 return nil 516 }) 517 } 518 519 func deleteTask(t *testing.T, s *store.MemoryStore, task *api.Task) { 520 s.Update(func(tx store.Tx) error { 521 assert.NoError(t, store.DeleteTask(tx, task.ID)) 522 return nil 523 }) 524 } 525 526 func failTask(t *testing.T, s *store.MemoryStore, task *api.Task) { 527 s.Update(func(tx store.Tx) error { 528 task := store.GetTask(tx, task.ID) 529 require.NotNil(t, task) 530 task.Status.State = api.TaskStateFailed 531 assert.NoError(t, store.UpdateTask(tx, task)) 532 return nil 533 }) 534 } 535 536 func TestInitializationRejectedTasks(t *testing.T) { 537 ctx := context.Background() 538 s := store.NewMemoryStore(nil) 539 assert.NotNil(t, s) 540 defer s.Close() 541 542 // create nodes, services and tasks in store directly 543 // where orchestrator runs, it should fix tasks to declarative state 544 addNode(t, s, node1) 545 addService(t, s, service1) 546 tasks := []*api.Task{ 547 // nodeid1 has a rejected task for serviceid1 548 { 549 ID: "task1", 550 Slot: 0, 551 DesiredState: api.TaskStateReady, 552 Status: api.TaskStatus{ 553 State: api.TaskStateRejected, 554 }, 555 Spec: api.TaskSpec{ 556 Runtime: &api.TaskSpec_Container{ 557 Container: &api.ContainerSpec{}, 558 }, 559 Restart: &api.RestartPolicy{ 560 Condition: api.RestartOnAny, 561 Delay: gogotypes.DurationProto(restartDelay), 562 }, 563 }, 564 ServiceAnnotations: api.Annotations{ 565 Name: "task1", 566 }, 567 ServiceID: "serviceid1", 568 NodeID: "nodeid1", 569 }, 570 } 571 for _, task := range tasks { 572 addTask(t, s, task) 573 } 574 575 // watch orchestration events 576 watch, cancel := state.Watch(s.WatchQueue(), api.EventCreateTask{}, api.EventUpdateTask{}, api.EventDeleteTask{}) 577 defer cancel() 578 579 orchestrator := NewGlobalOrchestrator(s) 580 defer orchestrator.Stop() 581 582 testutils.EnsureRuns(func() { 583 assert.NoError(t, orchestrator.Run(ctx)) 584 }) 585 586 observedTask1 := testutils.WatchTaskUpdate(t, watch) 587 assert.Equal(t, observedTask1.ID, "task1") 588 assert.Equal(t, observedTask1.Status.State, api.TaskStateRejected) 589 assert.Equal(t, observedTask1.DesiredState, api.TaskStateShutdown) 590 591 observedTask2 := testutils.WatchTaskCreate(t, watch) 592 assert.Equal(t, observedTask2.NodeID, "nodeid1") 593 assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) 594 assert.Equal(t, observedTask2.DesiredState, api.TaskStateReady) 595 } 596 597 func TestInitializationFailedTasks(t *testing.T) { 598 ctx := context.Background() 599 s := store.NewMemoryStore(nil) 600 assert.NotNil(t, s) 601 defer s.Close() 602 603 // create nodes, services and tasks in store directly 604 // where orchestrator runs, it should fix tasks to declarative state 605 addNode(t, s, node1) 606 addService(t, s, service1) 607 before := time.Now() 608 tasks := []*api.Task{ 609 // nodeid1 has a failed task for serviceid1 610 { 611 ID: "task1", 612 Slot: 0, 613 DesiredState: api.TaskStateRunning, 614 Status: api.TaskStatus{ 615 State: api.TaskStateFailed, 616 Timestamp: ptypes.MustTimestampProto(before), 617 }, 618 Spec: api.TaskSpec{ 619 Runtime: &api.TaskSpec_Container{ 620 Container: &api.ContainerSpec{}, 621 }, 622 Restart: &api.RestartPolicy{ 623 Condition: api.RestartOnAny, 624 Delay: gogotypes.DurationProto(restartDelay), 625 }, 626 }, 627 ServiceAnnotations: api.Annotations{ 628 Name: "task1", 629 }, 630 ServiceID: "serviceid1", 631 NodeID: "nodeid1", 632 }, 633 } 634 for _, task := range tasks { 635 addTask(t, s, task) 636 } 637 638 // watch orchestration events 639 watch, cancel := state.Watch(s.WatchQueue(), api.EventCreateTask{}, api.EventUpdateTask{}, api.EventDeleteTask{}) 640 defer cancel() 641 642 orchestrator := NewGlobalOrchestrator(s) 643 defer orchestrator.Stop() 644 645 testutils.EnsureRuns(func() { 646 assert.NoError(t, orchestrator.Run(ctx)) 647 }) 648 649 observedTask1 := testutils.WatchTaskUpdate(t, watch) 650 assert.Equal(t, observedTask1.ID, "task1") 651 assert.Equal(t, observedTask1.Status.State, api.TaskStateFailed) 652 assert.Equal(t, observedTask1.DesiredState, api.TaskStateShutdown) 653 654 observedTask2 := testutils.WatchTaskCreate(t, watch) 655 assert.Equal(t, observedTask2.NodeID, "nodeid1") 656 assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) 657 assert.Equal(t, observedTask2.DesiredState, api.TaskStateReady) 658 659 observedTask3 := testutils.WatchTaskUpdate(t, watch) 660 after := time.Now() 661 assert.Equal(t, observedTask3.NodeID, "nodeid1") 662 assert.Equal(t, observedTask3.DesiredState, api.TaskStateRunning) 663 664 if after.Sub(before) < restartDelay { 665 t.Fatalf("restart delay should have elapsed. Got: %v", after.Sub(before)) 666 } 667 } 668 669 func TestInitializationExtraTask(t *testing.T) { 670 ctx := context.Background() 671 s := store.NewMemoryStore(nil) 672 assert.NotNil(t, s) 673 defer s.Close() 674 675 // create nodes, services and tasks in store directly 676 // where orchestrator runs, it should fix tasks to declarative state 677 addNode(t, s, node1) 678 addService(t, s, service1) 679 tasks := []*api.Task{ 680 // nodeid1 has 2 tasks for serviceid1 681 { 682 ID: "task1", 683 Slot: 0, 684 DesiredState: api.TaskStateRunning, 685 Status: api.TaskStatus{ 686 State: api.TaskStateRunning, 687 }, 688 Spec: api.TaskSpec{ 689 Runtime: &api.TaskSpec_Container{ 690 Container: &api.ContainerSpec{}, 691 }, 692 Restart: &api.RestartPolicy{ 693 Condition: api.RestartOnAny, 694 Delay: gogotypes.DurationProto(restartDelay), 695 }, 696 }, 697 ServiceAnnotations: api.Annotations{ 698 Name: "task1", 699 }, 700 ServiceID: "serviceid1", 701 NodeID: "nodeid1", 702 }, 703 { 704 ID: "task2", 705 Slot: 0, 706 DesiredState: api.TaskStateRunning, 707 Status: api.TaskStatus{ 708 State: api.TaskStateRunning, 709 }, 710 Spec: api.TaskSpec{ 711 Runtime: &api.TaskSpec_Container{ 712 Container: &api.ContainerSpec{}, 713 }, 714 Restart: &api.RestartPolicy{ 715 Condition: api.RestartOnAny, 716 Delay: gogotypes.DurationProto(restartDelay), 717 }, 718 }, 719 ServiceAnnotations: api.Annotations{ 720 Name: "task2", 721 }, 722 ServiceID: "serviceid1", 723 NodeID: "nodeid1", 724 }, 725 } 726 for _, task := range tasks { 727 addTask(t, s, task) 728 } 729 730 // watch orchestration events 731 watch, cancel := state.Watch(s.WatchQueue(), api.EventCreateTask{}, api.EventUpdateTask{}, api.EventDeleteTask{}) 732 defer cancel() 733 734 orchestrator := NewGlobalOrchestrator(s) 735 defer orchestrator.Stop() 736 737 testutils.EnsureRuns(func() { 738 assert.NoError(t, orchestrator.Run(ctx)) 739 }) 740 741 observedTask1 := testutils.WatchTaskUpdate(t, watch) 742 assert.True(t, observedTask1.ID == "task1" || observedTask1.ID == "task2") 743 assert.Equal(t, observedTask1.Status.State, api.TaskStateRunning) 744 assert.Equal(t, observedTask1.DesiredState, api.TaskStateShutdown) 745 746 var deadCnt, liveCnt int 747 var err error 748 s.View(func(readTx store.ReadTx) { 749 var tasks []*api.Task 750 tasks, err = store.FindTasks(readTx, store.ByServiceID("serviceid1")) 751 for _, task := range tasks { 752 if task.DesiredState == api.TaskStateShutdown { 753 assert.Equal(t, task.ID, observedTask1.ID) 754 deadCnt++ 755 } else { 756 assert.Equal(t, task.DesiredState, api.TaskStateRunning) 757 liveCnt++ 758 } 759 } 760 }) 761 assert.NoError(t, err) 762 assert.Equal(t, deadCnt, 1) 763 assert.Equal(t, liveCnt, 1) 764 } 765 766 func TestInitializationMultipleServices(t *testing.T) { 767 ctx := context.Background() 768 s := store.NewMemoryStore(nil) 769 assert.NotNil(t, s) 770 defer s.Close() 771 772 // create nodes, services and tasks in store directly 773 // where orchestrator runs, it should fix tasks to declarative state 774 addNode(t, s, node1) 775 addService(t, s, service1) 776 addService(t, s, service2) 777 tasks := []*api.Task{ 778 // nodeid1 has 1 task for serviceid1 and 1 task for serviceid2 779 { 780 ID: "task1", 781 DesiredState: api.TaskStateRunning, 782 Status: api.TaskStatus{ 783 State: api.TaskStateRunning, 784 }, 785 Spec: service1.Spec.Task, 786 ServiceAnnotations: api.Annotations{ 787 Name: "task1", 788 }, 789 ServiceID: "serviceid1", 790 NodeID: "nodeid1", 791 }, 792 { 793 ID: "task2", 794 DesiredState: api.TaskStateRunning, 795 Status: api.TaskStatus{ 796 State: api.TaskStateRunning, 797 }, 798 Spec: service2.Spec.Task, 799 ServiceAnnotations: api.Annotations{ 800 Name: "task2", 801 }, 802 ServiceID: "serviceid2", 803 NodeID: "nodeid1", 804 }, 805 } 806 for _, task := range tasks { 807 addTask(t, s, task) 808 } 809 810 // watch orchestration events 811 watch, cancel := state.Watch(s.WatchQueue(), api.EventCreateTask{}, api.EventUpdateTask{}, api.EventDeleteTask{}) 812 defer cancel() 813 814 orchestrator := NewGlobalOrchestrator(s) 815 defer orchestrator.Stop() 816 817 testutils.EnsureRuns(func() { 818 assert.NoError(t, orchestrator.Run(ctx)) 819 }) 820 821 // Nothing should happen because both tasks are up to date. 822 select { 823 case e := <-watch: 824 t.Fatalf("Received unexpected event (type: %T) %+v", e, e) 825 case <-time.After(100 * time.Millisecond): 826 } 827 828 // Update service 1. Make sure only service 1's task is restarted. 829 830 s.Update(func(tx store.Tx) error { 831 s1 := store.GetService(tx, "serviceid1") 832 require.NotNil(t, s1) 833 834 s1.Spec.Task.Restart.Delay = gogotypes.DurationProto(70 * time.Millisecond) 835 836 assert.NoError(t, store.UpdateService(tx, s1)) 837 return nil 838 }) 839 840 observedCreation1 := testutils.WatchTaskCreate(t, watch) 841 assert.Equal(t, "serviceid1", observedCreation1.ServiceID) 842 assert.Equal(t, "nodeid1", observedCreation1.NodeID) 843 assert.Equal(t, api.TaskStateReady, observedCreation1.DesiredState) 844 845 observedUpdate1 := testutils.WatchTaskUpdate(t, watch) 846 assert.Equal(t, "serviceid1", observedUpdate1.ServiceID) 847 assert.Equal(t, "nodeid1", observedUpdate1.NodeID) 848 assert.Equal(t, api.TaskStateShutdown, observedUpdate1.DesiredState) 849 850 // Nothing else should happen 851 select { 852 case e := <-watch: 853 t.Fatalf("Received unexpected event (type: %T) %+v", e, e) 854 case <-time.After(100 * time.Millisecond): 855 } 856 857 // Fail a task from service 2. Make sure only service 2's task is restarted. 858 859 s.Update(func(tx store.Tx) error { 860 t2 := store.GetTask(tx, "task2") 861 require.NotNil(t, t2) 862 863 t2.Status.State = api.TaskStateFailed 864 865 assert.NoError(t, store.UpdateTask(tx, t2)) 866 return nil 867 }) 868 869 // Consume our own task update event 870 <-watch 871 872 observedUpdate2 := testutils.WatchTaskUpdate(t, watch) 873 assert.Equal(t, "serviceid2", observedUpdate2.ServiceID) 874 assert.Equal(t, "nodeid1", observedUpdate2.NodeID) 875 assert.Equal(t, api.TaskStateShutdown, observedUpdate2.DesiredState) 876 877 observedCreation2 := testutils.WatchTaskCreate(t, watch) 878 assert.Equal(t, "serviceid2", observedCreation2.ServiceID) 879 assert.Equal(t, "nodeid1", observedCreation2.NodeID) 880 assert.Equal(t, api.TaskStateReady, observedCreation2.DesiredState) 881 882 // Nothing else should happen 883 select { 884 case e := <-watch: 885 t.Fatalf("Received unexpected event (type: %T) %+v", e, e) 886 case <-time.After(100 * time.Millisecond): 887 } 888 } 889 890 func TestInitializationTaskWithoutService(t *testing.T) { 891 ctx := context.Background() 892 s := store.NewMemoryStore(nil) 893 assert.NotNil(t, s) 894 defer s.Close() 895 896 // create nodes, services and tasks in store directly 897 // where orchestrator runs, it should fix tasks to declarative state 898 addNode(t, s, node1) 899 addService(t, s, service1) 900 tasks := []*api.Task{ 901 // nodeid1 has 1 task for serviceid1 and 1 task for serviceid2 902 { 903 ID: "task1", 904 Slot: 0, 905 DesiredState: api.TaskStateRunning, 906 Status: api.TaskStatus{ 907 State: api.TaskStateRunning, 908 }, 909 Spec: api.TaskSpec{ 910 Runtime: &api.TaskSpec_Container{ 911 Container: &api.ContainerSpec{}, 912 }, 913 Restart: &api.RestartPolicy{ 914 Condition: api.RestartOnAny, 915 Delay: gogotypes.DurationProto(restartDelay), 916 }, 917 }, 918 ServiceAnnotations: api.Annotations{ 919 Name: "task1", 920 }, 921 ServiceID: "serviceid1", 922 NodeID: "nodeid1", 923 }, 924 { 925 ID: "task2", 926 Slot: 0, 927 DesiredState: api.TaskStateRunning, 928 Status: api.TaskStatus{ 929 State: api.TaskStateRunning, 930 }, 931 Spec: api.TaskSpec{ 932 Runtime: &api.TaskSpec_Container{ 933 Container: &api.ContainerSpec{}, 934 }, 935 Restart: &api.RestartPolicy{ 936 Condition: api.RestartOnAny, 937 Delay: gogotypes.DurationProto(restartDelay), 938 }, 939 }, 940 ServiceAnnotations: api.Annotations{ 941 Name: "task2", 942 }, 943 ServiceID: "serviceid2", 944 NodeID: "nodeid1", 945 }, 946 } 947 for _, task := range tasks { 948 addTask(t, s, task) 949 } 950 951 // watch orchestration events 952 watch, cancel := state.Watch(s.WatchQueue(), api.EventCreateTask{}, api.EventUpdateTask{}, api.EventDeleteTask{}) 953 defer cancel() 954 955 orchestrator := NewGlobalOrchestrator(s) 956 defer orchestrator.Stop() 957 958 testutils.EnsureRuns(func() { 959 assert.NoError(t, orchestrator.Run(ctx)) 960 }) 961 962 observedTask1 := testutils.WatchTaskDelete(t, watch) 963 assert.Equal(t, observedTask1.ID, "task2") 964 assert.Equal(t, observedTask1.Status.State, api.TaskStateRunning) 965 assert.Equal(t, observedTask1.DesiredState, api.TaskStateRunning) 966 } 967 968 func TestInitializationTaskOnDrainedNode(t *testing.T) { 969 ctx := context.Background() 970 s := store.NewMemoryStore(nil) 971 assert.NotNil(t, s) 972 defer s.Close() 973 974 // create nodes, services and tasks in store directly 975 // where orchestrator runs, it should fix tasks to declarative state 976 n1 := *node1 977 n1.Spec.Availability = api.NodeAvailabilityDrain 978 addNode(t, s, &n1) 979 addService(t, s, service1) 980 tasks := []*api.Task{ 981 // nodeid1 has 1 task for serviceid1 982 { 983 ID: "task1", 984 Slot: 0, 985 DesiredState: api.TaskStateRunning, 986 Status: api.TaskStatus{ 987 State: api.TaskStateRunning, 988 }, 989 Spec: api.TaskSpec{ 990 Runtime: &api.TaskSpec_Container{ 991 Container: &api.ContainerSpec{}, 992 }, 993 Restart: &api.RestartPolicy{ 994 Condition: api.RestartOnAny, 995 Delay: gogotypes.DurationProto(restartDelay), 996 }, 997 }, 998 ServiceAnnotations: api.Annotations{ 999 Name: "task1", 1000 }, 1001 ServiceID: "serviceid1", 1002 NodeID: "nodeid1", 1003 }, 1004 } 1005 for _, task := range tasks { 1006 addTask(t, s, task) 1007 } 1008 1009 // watch orchestration events 1010 watch, cancel := state.Watch(s.WatchQueue(), api.EventCreateTask{}, api.EventUpdateTask{}, api.EventDeleteTask{}) 1011 defer cancel() 1012 1013 orchestrator := NewGlobalOrchestrator(s) 1014 defer orchestrator.Stop() 1015 1016 testutils.EnsureRuns(func() { 1017 assert.NoError(t, orchestrator.Run(ctx)) 1018 }) 1019 1020 observedTask1 := testutils.WatchTaskUpdate(t, watch) 1021 assert.Equal(t, observedTask1.ID, "task1") 1022 assert.Equal(t, observedTask1.Status.State, api.TaskStateRunning) 1023 assert.Equal(t, observedTask1.DesiredState, api.TaskStateShutdown) 1024 1025 var deadCnt, liveCnt int 1026 var err error 1027 s.View(func(readTx store.ReadTx) { 1028 var tasks []*api.Task 1029 tasks, err = store.FindTasks(readTx, store.ByServiceID("serviceid1")) 1030 for _, task := range tasks { 1031 if task.DesiredState == api.TaskStateShutdown { 1032 deadCnt++ 1033 } else { 1034 liveCnt++ 1035 } 1036 } 1037 }) 1038 assert.NoError(t, err) 1039 assert.Equal(t, deadCnt, 1) 1040 assert.Equal(t, liveCnt, 0) 1041 } 1042 1043 func TestInitializationTaskOnNonexistentNode(t *testing.T) { 1044 ctx := context.Background() 1045 s := store.NewMemoryStore(nil) 1046 assert.NotNil(t, s) 1047 defer s.Close() 1048 1049 // create nodes, services and tasks in store directly 1050 // where orchestrator runs, it should fix tasks to declarative state 1051 addService(t, s, service1) 1052 tasks := []*api.Task{ 1053 // 1 task for serviceid1 on nonexistent nodeid1 1054 { 1055 ID: "task1", 1056 Slot: 0, 1057 DesiredState: api.TaskStateRunning, 1058 Status: api.TaskStatus{ 1059 State: api.TaskStateRunning, 1060 }, 1061 Spec: api.TaskSpec{ 1062 Runtime: &api.TaskSpec_Container{ 1063 Container: &api.ContainerSpec{}, 1064 }, 1065 Restart: &api.RestartPolicy{ 1066 Condition: api.RestartOnAny, 1067 Delay: gogotypes.DurationProto(restartDelay), 1068 }, 1069 }, 1070 ServiceAnnotations: api.Annotations{ 1071 Name: "task1", 1072 }, 1073 ServiceID: "serviceid1", 1074 NodeID: "nodeid1", 1075 }, 1076 } 1077 for _, task := range tasks { 1078 addTask(t, s, task) 1079 } 1080 1081 // watch orchestration events 1082 watch, cancel := state.Watch(s.WatchQueue(), api.EventCreateTask{}, api.EventUpdateTask{}, api.EventDeleteTask{}) 1083 defer cancel() 1084 1085 orchestrator := NewGlobalOrchestrator(s) 1086 defer orchestrator.Stop() 1087 1088 testutils.EnsureRuns(func() { 1089 assert.NoError(t, orchestrator.Run(ctx)) 1090 }) 1091 1092 observedTask1 := testutils.WatchTaskUpdate(t, watch) 1093 assert.Equal(t, observedTask1.ID, "task1") 1094 assert.Equal(t, observedTask1.Status.State, api.TaskStateRunning) 1095 assert.Equal(t, observedTask1.DesiredState, api.TaskStateShutdown) 1096 1097 var deadCnt, liveCnt int 1098 var err error 1099 s.View(func(readTx store.ReadTx) { 1100 var tasks []*api.Task 1101 tasks, err = store.FindTasks(readTx, store.ByServiceID("serviceid1")) 1102 for _, task := range tasks { 1103 if task.DesiredState == api.TaskStateShutdown { 1104 deadCnt++ 1105 } else { 1106 liveCnt++ 1107 } 1108 } 1109 }) 1110 assert.NoError(t, err) 1111 assert.Equal(t, deadCnt, 1) 1112 assert.Equal(t, liveCnt, 0) 1113 } 1114 1115 func TestInitializationRestartHistory(t *testing.T) { 1116 ctx := context.Background() 1117 s := store.NewMemoryStore(nil) 1118 assert.NotNil(t, s) 1119 defer s.Close() 1120 1121 // create nodes, services and tasks in store directly 1122 addNode(t, s, node1) 1123 1124 service := &api.Service{ 1125 ID: "serviceid1", 1126 SpecVersion: &api.Version{ 1127 Index: 2, 1128 }, 1129 Spec: api.ServiceSpec{ 1130 Annotations: api.Annotations{ 1131 Name: "name1", 1132 }, 1133 Task: api.TaskSpec{ 1134 Runtime: &api.TaskSpec_Container{ 1135 Container: &api.ContainerSpec{}, 1136 }, 1137 Restart: &api.RestartPolicy{ 1138 Condition: api.RestartOnAny, 1139 Delay: gogotypes.DurationProto(restartDelay), 1140 MaxAttempts: 3, 1141 Window: gogotypes.DurationProto(10 * time.Minute), 1142 }, 1143 }, 1144 Mode: &api.ServiceSpec_Global{ 1145 Global: &api.GlobalService{}, 1146 }, 1147 }, 1148 } 1149 addService(t, s, service) 1150 1151 now := time.Now() 1152 1153 tasks := []*api.Task{ 1154 // old spec versions should be ignored for restart tracking 1155 { 1156 ID: "oldspec", 1157 Meta: api.Meta{ 1158 CreatedAt: ptypes.MustTimestampProto(now.Add(-5 * time.Minute)), 1159 }, 1160 DesiredState: api.TaskStateShutdown, 1161 SpecVersion: &api.Version{ 1162 Index: 1, 1163 }, 1164 Status: api.TaskStatus{ 1165 State: api.TaskStateShutdown, 1166 Timestamp: ptypes.MustTimestampProto(now.Add(-5 * time.Minute)), 1167 }, 1168 Spec: service.Spec.Task, 1169 ServiceID: "serviceid1", 1170 NodeID: "nodeid1", 1171 }, 1172 // this is the first task with the current spec version 1173 { 1174 ID: "firstcurrent", 1175 Meta: api.Meta{ 1176 CreatedAt: ptypes.MustTimestampProto(now.Add(-12 * time.Minute)), 1177 }, 1178 DesiredState: api.TaskStateShutdown, 1179 SpecVersion: &api.Version{ 1180 Index: 2, 1181 }, 1182 Status: api.TaskStatus{ 1183 State: api.TaskStateFailed, 1184 Timestamp: ptypes.MustTimestampProto(now.Add(-12 * time.Minute)), 1185 }, 1186 Spec: service.Spec.Task, 1187 ServiceID: "serviceid1", 1188 NodeID: "nodeid1", 1189 }, 1190 1191 // this task falls outside the restart window 1192 { 1193 ID: "outsidewindow", 1194 Meta: api.Meta{ 1195 CreatedAt: ptypes.MustTimestampProto(now.Add(-11 * time.Minute)), 1196 }, 1197 DesiredState: api.TaskStateShutdown, 1198 SpecVersion: &api.Version{ 1199 Index: 2, 1200 }, 1201 Status: api.TaskStatus{ 1202 State: api.TaskStateFailed, 1203 Timestamp: ptypes.MustTimestampProto(now.Add(-11 * time.Minute)), 1204 }, 1205 Spec: service.Spec.Task, 1206 ServiceID: "serviceid1", 1207 NodeID: "nodeid1", 1208 }, 1209 // first task inside restart window 1210 { 1211 ID: "firstinside", 1212 Meta: api.Meta{ 1213 CreatedAt: ptypes.MustTimestampProto(now.Add(-9 * time.Minute)), 1214 }, 1215 DesiredState: api.TaskStateShutdown, 1216 SpecVersion: &api.Version{ 1217 Index: 2, 1218 }, 1219 Status: api.TaskStatus{ 1220 State: api.TaskStateFailed, 1221 Timestamp: ptypes.MustTimestampProto(now.Add(-9 * time.Minute)), 1222 }, 1223 Spec: service.Spec.Task, 1224 ServiceID: "serviceid1", 1225 NodeID: "nodeid1", 1226 }, 1227 // second task inside restart window, currently running 1228 { 1229 ID: "secondinside", 1230 Meta: api.Meta{ 1231 CreatedAt: ptypes.MustTimestampProto(now.Add(-8 * time.Minute)), 1232 }, 1233 DesiredState: api.TaskStateRunning, 1234 SpecVersion: &api.Version{ 1235 Index: 2, 1236 }, 1237 Status: api.TaskStatus{ 1238 State: api.TaskStateRunning, 1239 Timestamp: ptypes.MustTimestampProto(now.Add(-8 * time.Minute)), 1240 }, 1241 Spec: service.Spec.Task, 1242 ServiceID: "serviceid1", 1243 NodeID: "nodeid1", 1244 }, 1245 } 1246 for _, task := range tasks { 1247 addTask(t, s, task) 1248 } 1249 1250 // watch orchestration events 1251 watch, cancel := state.Watch(s.WatchQueue(), api.EventCreateTask{}, api.EventUpdateTask{}, api.EventDeleteTask{}) 1252 defer cancel() 1253 1254 orchestrator := NewGlobalOrchestrator(s) 1255 defer orchestrator.Stop() 1256 1257 testutils.EnsureRuns(func() { 1258 assert.NoError(t, orchestrator.Run(ctx)) 1259 }) 1260 1261 // Fail the running task 1262 s.Update(func(tx store.Tx) error { 1263 task := store.GetTask(tx, "secondinside") 1264 require.NotNil(t, task) 1265 task.Status.State = api.TaskStateFailed 1266 task.Status.Timestamp = ptypes.MustTimestampProto(time.Now()) 1267 assert.NoError(t, store.UpdateTask(tx, task)) 1268 return nil 1269 }) 1270 testutils.Expect(t, watch, api.EventUpdateTask{}) 1271 1272 // It should restart, because this will only be the third restart 1273 // attempt within the time window. 1274 observedTask1 := testutils.WatchTaskUpdate(t, watch) 1275 assert.Equal(t, "secondinside", observedTask1.ID) 1276 assert.Equal(t, api.TaskStateFailed, observedTask1.Status.State) 1277 1278 observedTask2 := testutils.WatchTaskCreate(t, watch) 1279 assert.Equal(t, observedTask2.NodeID, "nodeid1") 1280 assert.Equal(t, api.TaskStateNew, observedTask2.Status.State) 1281 assert.Equal(t, api.TaskStateReady, observedTask2.DesiredState) 1282 1283 observedTask3 := testutils.WatchTaskUpdate(t, watch) 1284 assert.Equal(t, observedTask2.ID, observedTask3.ID) 1285 assert.Equal(t, api.TaskStateRunning, observedTask3.DesiredState) 1286 1287 // Reject the new task 1288 s.Update(func(tx store.Tx) error { 1289 task := store.GetTask(tx, observedTask2.ID) 1290 require.NotNil(t, task) 1291 task.Status.State = api.TaskStateRejected 1292 task.Status.Timestamp = ptypes.MustTimestampProto(time.Now()) 1293 assert.NoError(t, store.UpdateTask(tx, task)) 1294 return nil 1295 }) 1296 testutils.Expect(t, watch, api.EventUpdateTask{}) // our update 1297 testutils.Expect(t, watch, api.EventUpdateTask{}) // orchestrator changes desired state 1298 1299 // It shouldn't restart - that would exceed MaxAttempts 1300 select { 1301 case event := <-watch: 1302 t.Fatalf("got unexpected event %T: %+v", event, event) 1303 case <-time.After(100 * time.Millisecond): 1304 } 1305 }