github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/orchestrator/replicated/restart_test.go (about) 1 package replicated 2 3 import ( 4 "context" 5 "testing" 6 "time" 7 8 "github.com/docker/swarmkit/api" 9 "github.com/docker/swarmkit/manager/orchestrator/testutils" 10 "github.com/docker/swarmkit/manager/state" 11 "github.com/docker/swarmkit/manager/state/store" 12 "github.com/docker/swarmkit/protobuf/ptypes" 13 gogotypes "github.com/gogo/protobuf/types" 14 "github.com/stretchr/testify/assert" 15 "github.com/stretchr/testify/require" 16 ) 17 18 func TestOrchestratorRestartOnAny(t *testing.T) { 19 ctx := context.Background() 20 s := store.NewMemoryStore(nil) 21 assert.NotNil(t, s) 22 defer s.Close() 23 24 orchestrator := NewReplicatedOrchestrator(s) 25 defer orchestrator.Stop() 26 27 watch, cancel := state.Watch(s.WatchQueue() /*api.EventCreateTask{}, api.EventUpdateTask{}*/) 28 defer cancel() 29 30 // Create a service with two instances specified before the orchestrator is 31 // started. This should result in two tasks when the orchestrator 32 // starts up. 33 err := s.Update(func(tx store.Tx) error { 34 j1 := &api.Service{ 35 ID: "id1", 36 Spec: api.ServiceSpec{ 37 Annotations: api.Annotations{ 38 Name: "name1", 39 }, 40 Task: api.TaskSpec{ 41 Runtime: &api.TaskSpec_Container{ 42 Container: &api.ContainerSpec{}, 43 }, 44 Restart: &api.RestartPolicy{ 45 Condition: api.RestartOnAny, 46 Delay: gogotypes.DurationProto(0), 47 }, 48 }, 49 Mode: &api.ServiceSpec_Replicated{ 50 Replicated: &api.ReplicatedService{ 51 Replicas: 2, 52 }, 53 }, 54 }, 55 } 56 assert.NoError(t, store.CreateService(tx, j1)) 57 return nil 58 }) 59 assert.NoError(t, err) 60 61 // Start the orchestrator. 62 go func() { 63 assert.NoError(t, orchestrator.Run(ctx)) 64 }() 65 66 observedTask1 := testutils.WatchTaskCreate(t, watch) 67 assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) 68 assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") 69 70 observedTask2 := testutils.WatchTaskCreate(t, watch) 71 assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) 72 assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") 73 74 // Fail the first task. Confirm that it gets restarted. 75 updatedTask1 := observedTask1.Copy() 76 updatedTask1.Status = api.TaskStatus{State: api.TaskStateFailed, Timestamp: ptypes.MustTimestampProto(time.Now())} 77 err = s.Update(func(tx store.Tx) error { 78 assert.NoError(t, store.UpdateTask(tx, updatedTask1)) 79 return nil 80 }) 81 assert.NoError(t, err) 82 testutils.Expect(t, watch, state.EventCommit{}) 83 testutils.Expect(t, watch, api.EventUpdateTask{}) 84 testutils.Expect(t, watch, state.EventCommit{}) 85 testutils.Expect(t, watch, api.EventUpdateTask{}) 86 87 observedTask3 := testutils.WatchTaskCreate(t, watch) 88 assert.Equal(t, observedTask3.Status.State, api.TaskStateNew) 89 assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1") 90 91 testutils.Expect(t, watch, state.EventCommit{}) 92 93 observedTask4 := testutils.WatchTaskUpdate(t, watch) 94 assert.Equal(t, observedTask4.DesiredState, api.TaskStateRunning) 95 assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1") 96 97 // Mark the second task as completed. Confirm that it gets restarted. 98 updatedTask2 := observedTask2.Copy() 99 updatedTask2.Status = api.TaskStatus{State: api.TaskStateCompleted, Timestamp: ptypes.MustTimestampProto(time.Now())} 100 err = s.Update(func(tx store.Tx) error { 101 assert.NoError(t, store.UpdateTask(tx, updatedTask2)) 102 return nil 103 }) 104 assert.NoError(t, err) 105 testutils.Expect(t, watch, state.EventCommit{}) 106 testutils.Expect(t, watch, api.EventUpdateTask{}) 107 testutils.Expect(t, watch, state.EventCommit{}) 108 testutils.Expect(t, watch, api.EventUpdateTask{}) 109 110 observedTask5 := testutils.WatchTaskCreate(t, watch) 111 assert.Equal(t, observedTask5.Status.State, api.TaskStateNew) 112 assert.Equal(t, observedTask5.ServiceAnnotations.Name, "name1") 113 114 testutils.Expect(t, watch, state.EventCommit{}) 115 116 observedTask6 := testutils.WatchTaskUpdate(t, watch) 117 assert.Equal(t, observedTask6.DesiredState, api.TaskStateRunning) 118 assert.Equal(t, observedTask6.ServiceAnnotations.Name, "name1") 119 } 120 121 func TestOrchestratorRestartOnFailure(t *testing.T) { 122 t.Parallel() 123 124 ctx := context.Background() 125 s := store.NewMemoryStore(nil) 126 assert.NotNil(t, s) 127 defer s.Close() 128 129 orchestrator := NewReplicatedOrchestrator(s) 130 defer orchestrator.Stop() 131 132 watch, cancel := state.Watch(s.WatchQueue(), api.EventCreateTask{}, api.EventUpdateTask{}) 133 defer cancel() 134 135 // Create a service with two instances specified before the orchestrator is 136 // started. This should result in two tasks when the orchestrator 137 // starts up. 138 err := s.Update(func(tx store.Tx) error { 139 j1 := &api.Service{ 140 ID: "id1", 141 Spec: api.ServiceSpec{ 142 Annotations: api.Annotations{ 143 Name: "name1", 144 }, 145 Task: api.TaskSpec{ 146 Runtime: &api.TaskSpec_Container{ 147 Container: &api.ContainerSpec{}, 148 }, 149 Restart: &api.RestartPolicy{ 150 Condition: api.RestartOnFailure, 151 Delay: gogotypes.DurationProto(0), 152 }, 153 }, 154 Mode: &api.ServiceSpec_Replicated{ 155 Replicated: &api.ReplicatedService{ 156 Replicas: 2, 157 }, 158 }, 159 }, 160 } 161 assert.NoError(t, store.CreateService(tx, j1)) 162 return nil 163 }) 164 assert.NoError(t, err) 165 166 // Start the orchestrator. 167 go func() { 168 assert.NoError(t, orchestrator.Run(ctx)) 169 }() 170 171 observedTask1 := testutils.WatchTaskCreate(t, watch) 172 assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) 173 assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") 174 175 observedTask2 := testutils.WatchTaskCreate(t, watch) 176 assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) 177 assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") 178 179 // Fail the first task. Confirm that it gets restarted. 180 updatedTask1 := observedTask1.Copy() 181 updatedTask1.Status = api.TaskStatus{State: api.TaskStateFailed, Timestamp: ptypes.MustTimestampProto(time.Now())} 182 err = s.Update(func(tx store.Tx) error { 183 assert.NoError(t, store.UpdateTask(tx, updatedTask1)) 184 return nil 185 }) 186 assert.NoError(t, err) 187 testutils.Expect(t, watch, api.EventUpdateTask{}) 188 testutils.Expect(t, watch, api.EventUpdateTask{}) 189 190 observedTask3 := testutils.WatchTaskCreate(t, watch) 191 assert.Equal(t, observedTask3.Status.State, api.TaskStateNew) 192 assert.Equal(t, observedTask3.DesiredState, api.TaskStateReady) 193 assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1") 194 195 observedTask4 := testutils.WatchTaskUpdate(t, watch) 196 assert.Equal(t, observedTask4.DesiredState, api.TaskStateRunning) 197 assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1") 198 199 // Mark the second task as completed. Confirm that it does not get restarted. 200 updatedTask2 := observedTask2.Copy() 201 updatedTask2.Status = api.TaskStatus{State: api.TaskStateCompleted, Timestamp: ptypes.MustTimestampProto(time.Now())} 202 err = s.Update(func(tx store.Tx) error { 203 assert.NoError(t, store.UpdateTask(tx, updatedTask2)) 204 return nil 205 }) 206 assert.NoError(t, err) 207 testutils.Expect(t, watch, api.EventUpdateTask{}) 208 testutils.Expect(t, watch, api.EventUpdateTask{}) 209 210 select { 211 case <-watch: 212 t.Fatal("got unexpected event") 213 case <-time.After(100 * time.Millisecond): 214 } 215 216 // Update the service, but don't change anything in the spec. The 217 // second instance instance should not be restarted. 218 err = s.Update(func(tx store.Tx) error { 219 service := store.GetService(tx, "id1") 220 require.NotNil(t, service) 221 assert.NoError(t, store.UpdateService(tx, service)) 222 return nil 223 }) 224 assert.NoError(t, err) 225 226 select { 227 case <-watch: 228 t.Fatal("got unexpected event") 229 case <-time.After(100 * time.Millisecond): 230 } 231 232 // Update the service, and change the TaskSpec. Now the second instance 233 // should be restarted. 234 err = s.Update(func(tx store.Tx) error { 235 service := store.GetService(tx, "id1") 236 require.NotNil(t, service) 237 service.Spec.Task.ForceUpdate++ 238 assert.NoError(t, store.UpdateService(tx, service)) 239 return nil 240 }) 241 assert.NoError(t, err) 242 testutils.Expect(t, watch, api.EventCreateTask{}) 243 } 244 245 func TestOrchestratorRestartOnNone(t *testing.T) { 246 t.Parallel() 247 248 ctx := context.Background() 249 s := store.NewMemoryStore(nil) 250 assert.NotNil(t, s) 251 defer s.Close() 252 253 orchestrator := NewReplicatedOrchestrator(s) 254 defer orchestrator.Stop() 255 256 watch, cancel := state.Watch(s.WatchQueue(), api.EventCreateTask{}, api.EventUpdateTask{}) 257 defer cancel() 258 259 // Create a service with two instances specified before the orchestrator is 260 // started. This should result in two tasks when the orchestrator 261 // starts up. 262 err := s.Update(func(tx store.Tx) error { 263 j1 := &api.Service{ 264 ID: "id1", 265 Spec: api.ServiceSpec{ 266 Annotations: api.Annotations{ 267 Name: "name1", 268 }, 269 Task: api.TaskSpec{ 270 Runtime: &api.TaskSpec_Container{ 271 Container: &api.ContainerSpec{}, 272 }, 273 Restart: &api.RestartPolicy{ 274 Condition: api.RestartOnNone, 275 }, 276 }, 277 Mode: &api.ServiceSpec_Replicated{ 278 Replicated: &api.ReplicatedService{ 279 Replicas: 2, 280 }, 281 }, 282 }, 283 } 284 assert.NoError(t, store.CreateService(tx, j1)) 285 return nil 286 }) 287 assert.NoError(t, err) 288 289 // Start the orchestrator. 290 go func() { 291 assert.NoError(t, orchestrator.Run(ctx)) 292 }() 293 294 observedTask1 := testutils.WatchTaskCreate(t, watch) 295 assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) 296 assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") 297 298 observedTask2 := testutils.WatchTaskCreate(t, watch) 299 assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) 300 assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") 301 302 // Fail the first task. Confirm that it does not get restarted. 303 updatedTask1 := observedTask1.Copy() 304 updatedTask1.Status.State = api.TaskStateFailed 305 err = s.Update(func(tx store.Tx) error { 306 assert.NoError(t, store.UpdateTask(tx, updatedTask1)) 307 return nil 308 }) 309 assert.NoError(t, err) 310 testutils.Expect(t, watch, api.EventUpdateTask{}) 311 testutils.Expect(t, watch, api.EventUpdateTask{}) 312 313 select { 314 case <-watch: 315 t.Fatal("got unexpected event") 316 case <-time.After(100 * time.Millisecond): 317 } 318 319 // Mark the second task as completed. Confirm that it does not get restarted. 320 updatedTask2 := observedTask2.Copy() 321 updatedTask2.Status = api.TaskStatus{State: api.TaskStateCompleted, Timestamp: ptypes.MustTimestampProto(time.Now())} 322 err = s.Update(func(tx store.Tx) error { 323 assert.NoError(t, store.UpdateTask(tx, updatedTask2)) 324 return nil 325 }) 326 assert.NoError(t, err) 327 testutils.Expect(t, watch, api.EventUpdateTask{}) 328 testutils.Expect(t, watch, api.EventUpdateTask{}) 329 330 select { 331 case <-watch: 332 t.Fatal("got unexpected event") 333 case <-time.After(100 * time.Millisecond): 334 } 335 336 // Update the service, but don't change anything in the spec. Neither 337 // instance should be restarted. 338 err = s.Update(func(tx store.Tx) error { 339 service := store.GetService(tx, "id1") 340 require.NotNil(t, service) 341 assert.NoError(t, store.UpdateService(tx, service)) 342 return nil 343 }) 344 assert.NoError(t, err) 345 346 select { 347 case <-watch: 348 t.Fatal("got unexpected event") 349 case <-time.After(100 * time.Millisecond): 350 } 351 352 // Update the service, and change the TaskSpec. Both instances should 353 // be restarted. 354 err = s.Update(func(tx store.Tx) error { 355 service := store.GetService(tx, "id1") 356 require.NotNil(t, service) 357 service.Spec.Task.ForceUpdate++ 358 assert.NoError(t, store.UpdateService(tx, service)) 359 return nil 360 }) 361 assert.NoError(t, err) 362 testutils.Expect(t, watch, api.EventCreateTask{}) 363 newTask := testutils.WatchTaskUpdate(t, watch) 364 assert.Equal(t, api.TaskStateRunning, newTask.DesiredState) 365 err = s.Update(func(tx store.Tx) error { 366 newTask := store.GetTask(tx, newTask.ID) 367 require.NotNil(t, newTask) 368 newTask.Status.State = api.TaskStateRunning 369 assert.NoError(t, store.UpdateTask(tx, newTask)) 370 return nil 371 }) 372 assert.NoError(t, err) 373 testutils.Expect(t, watch, api.EventUpdateTask{}) 374 375 testutils.Expect(t, watch, api.EventCreateTask{}) 376 } 377 378 func TestOrchestratorRestartDelay(t *testing.T) { 379 t.Parallel() 380 381 ctx := context.Background() 382 s := store.NewMemoryStore(nil) 383 assert.NotNil(t, s) 384 defer s.Close() 385 386 orchestrator := NewReplicatedOrchestrator(s) 387 defer orchestrator.Stop() 388 389 watch, cancel := state.Watch(s.WatchQueue() /*api.EventCreateTask{}, api.EventUpdateTask{}*/) 390 defer cancel() 391 392 // Create a service with two instances specified before the orchestrator is 393 // started. This should result in two tasks when the orchestrator 394 // starts up. 395 err := s.Update(func(tx store.Tx) error { 396 j1 := &api.Service{ 397 ID: "id1", 398 Spec: api.ServiceSpec{ 399 Annotations: api.Annotations{ 400 Name: "name1", 401 }, 402 Task: api.TaskSpec{ 403 Runtime: &api.TaskSpec_Container{ 404 Container: &api.ContainerSpec{}, 405 }, 406 Restart: &api.RestartPolicy{ 407 Condition: api.RestartOnAny, 408 Delay: gogotypes.DurationProto(100 * time.Millisecond), 409 }, 410 }, 411 Mode: &api.ServiceSpec_Replicated{ 412 Replicated: &api.ReplicatedService{ 413 Replicas: 2, 414 }, 415 }, 416 }, 417 } 418 assert.NoError(t, store.CreateService(tx, j1)) 419 return nil 420 }) 421 assert.NoError(t, err) 422 423 // Start the orchestrator. 424 go func() { 425 assert.NoError(t, orchestrator.Run(ctx)) 426 }() 427 428 observedTask1 := testutils.WatchTaskCreate(t, watch) 429 assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) 430 assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") 431 432 observedTask2 := testutils.WatchTaskCreate(t, watch) 433 assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) 434 assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") 435 436 // Fail the first task. Confirm that it gets restarted. 437 updatedTask1 := observedTask1.Copy() 438 updatedTask1.Status = api.TaskStatus{State: api.TaskStateFailed, Timestamp: ptypes.MustTimestampProto(time.Now())} 439 before := time.Now() 440 err = s.Update(func(tx store.Tx) error { 441 assert.NoError(t, store.UpdateTask(tx, updatedTask1)) 442 return nil 443 }) 444 assert.NoError(t, err) 445 testutils.Expect(t, watch, state.EventCommit{}) 446 testutils.Expect(t, watch, api.EventUpdateTask{}) 447 testutils.Expect(t, watch, state.EventCommit{}) 448 testutils.Expect(t, watch, api.EventUpdateTask{}) 449 450 observedTask3 := testutils.WatchTaskCreate(t, watch) 451 testutils.Expect(t, watch, state.EventCommit{}) 452 assert.Equal(t, observedTask3.Status.State, api.TaskStateNew) 453 assert.Equal(t, observedTask3.DesiredState, api.TaskStateReady) 454 assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1") 455 456 observedTask4 := testutils.WatchTaskUpdate(t, watch) 457 after := time.Now() 458 459 // At least 100 ms should have elapsed. Only check the lower bound, 460 // because the system may be slow and it could have taken longer. 461 if after.Sub(before) < 100*time.Millisecond { 462 t.Fatalf("restart delay should have elapsed. Got: %v", after.Sub(before)) 463 } 464 465 assert.Equal(t, observedTask4.Status.State, api.TaskStateNew) 466 assert.Equal(t, observedTask4.DesiredState, api.TaskStateRunning) 467 assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1") 468 } 469 470 func TestOrchestratorRestartMaxAttempts(t *testing.T) { 471 t.Parallel() 472 473 ctx := context.Background() 474 s := store.NewMemoryStore(nil) 475 assert.NotNil(t, s) 476 defer s.Close() 477 478 orchestrator := NewReplicatedOrchestrator(s) 479 defer orchestrator.Stop() 480 481 watch, cancel := state.Watch(s.WatchQueue(), api.EventCreateTask{}, api.EventUpdateTask{}) 482 defer cancel() 483 484 // Create a service with two instances specified before the orchestrator is 485 // started. This should result in two tasks when the orchestrator 486 // starts up. 487 err := s.Update(func(tx store.Tx) error { 488 j1 := &api.Service{ 489 ID: "id1", 490 Spec: api.ServiceSpec{ 491 Annotations: api.Annotations{ 492 Name: "name1", 493 }, 494 Mode: &api.ServiceSpec_Replicated{ 495 Replicated: &api.ReplicatedService{ 496 Replicas: 2, 497 }, 498 }, 499 Task: api.TaskSpec{ 500 Runtime: &api.TaskSpec_Container{ 501 Container: &api.ContainerSpec{}, 502 }, 503 Restart: &api.RestartPolicy{ 504 Condition: api.RestartOnAny, 505 Delay: gogotypes.DurationProto(100 * time.Millisecond), 506 MaxAttempts: 1, 507 }, 508 }, 509 }, 510 SpecVersion: &api.Version{ 511 Index: 1, 512 }, 513 } 514 assert.NoError(t, store.CreateService(tx, j1)) 515 return nil 516 }) 517 assert.NoError(t, err) 518 519 // Start the orchestrator. 520 go func() { 521 assert.NoError(t, orchestrator.Run(ctx)) 522 }() 523 524 failTask := func(task *api.Task, expectRestart bool) { 525 task = task.Copy() 526 task.Status = api.TaskStatus{State: api.TaskStateFailed, Timestamp: ptypes.MustTimestampProto(time.Now())} 527 err = s.Update(func(tx store.Tx) error { 528 assert.NoError(t, store.UpdateTask(tx, task)) 529 return nil 530 }) 531 assert.NoError(t, err) 532 testutils.Expect(t, watch, api.EventUpdateTask{}) 533 task = testutils.WatchShutdownTask(t, watch) 534 if expectRestart { 535 createdTask := testutils.WatchTaskCreate(t, watch) 536 assert.Equal(t, createdTask.Status.State, api.TaskStateNew) 537 assert.Equal(t, createdTask.DesiredState, api.TaskStateReady) 538 assert.Equal(t, createdTask.ServiceAnnotations.Name, "name1") 539 } 540 err = s.Update(func(tx store.Tx) error { 541 task := task.Copy() 542 task.Status.State = api.TaskStateShutdown 543 assert.NoError(t, store.UpdateTask(tx, task)) 544 return nil 545 }) 546 assert.NoError(t, err) 547 testutils.Expect(t, watch, api.EventUpdateTask{}) 548 } 549 550 testRestart := func(serviceUpdated bool) { 551 observedTask1 := testutils.WatchTaskCreate(t, watch) 552 assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) 553 assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") 554 555 if serviceUpdated { 556 runnableTask := testutils.WatchTaskUpdate(t, watch) 557 assert.Equal(t, observedTask1.ID, runnableTask.ID) 558 assert.Equal(t, api.TaskStateRunning, runnableTask.DesiredState) 559 err = s.Update(func(tx store.Tx) error { 560 task := runnableTask.Copy() 561 task.Status.State = api.TaskStateRunning 562 assert.NoError(t, store.UpdateTask(tx, task)) 563 return nil 564 }) 565 assert.NoError(t, err) 566 567 testutils.Expect(t, watch, api.EventUpdateTask{}) 568 } 569 570 observedTask2 := testutils.WatchTaskCreate(t, watch) 571 assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) 572 assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") 573 574 if serviceUpdated { 575 testutils.Expect(t, watch, api.EventUpdateTask{}) 576 } 577 578 // Fail the first task. Confirm that it gets restarted. 579 before := time.Now() 580 failTask(observedTask1, true) 581 582 observedTask4 := testutils.WatchTaskUpdate(t, watch) 583 after := time.Now() 584 585 // At least 100 ms should have elapsed. Only check the lower bound, 586 // because the system may be slow and it could have taken longer. 587 if after.Sub(before) < 100*time.Millisecond { 588 t.Fatal("restart delay should have elapsed") 589 } 590 591 assert.Equal(t, observedTask4.Status.State, api.TaskStateNew) 592 assert.Equal(t, observedTask4.DesiredState, api.TaskStateRunning) 593 assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1") 594 595 // Fail the second task. Confirm that it gets restarted. 596 failTask(observedTask2, true) 597 598 observedTask6 := testutils.WatchTaskUpdate(t, watch) // task gets started after a delay 599 assert.Equal(t, observedTask6.Status.State, api.TaskStateNew) 600 assert.Equal(t, observedTask6.DesiredState, api.TaskStateRunning) 601 assert.Equal(t, observedTask6.ServiceAnnotations.Name, "name1") 602 603 // Fail the first instance again. It should not be restarted. 604 failTask(observedTask4, false) 605 606 select { 607 case <-watch: 608 t.Fatal("got unexpected event") 609 case <-time.After(200 * time.Millisecond): 610 } 611 612 // Fail the second instance again. It should not be restarted. 613 failTask(observedTask6, false) 614 615 select { 616 case <-watch: 617 t.Fatal("got unexpected event") 618 case <-time.After(200 * time.Millisecond): 619 } 620 } 621 622 testRestart(false) 623 624 // Update the service spec 625 err = s.Update(func(tx store.Tx) error { 626 s := store.GetService(tx, "id1") 627 require.NotNil(t, s) 628 s.Spec.Task.GetContainer().Image = "newimage" 629 s.SpecVersion.Index = 2 630 assert.NoError(t, store.UpdateService(tx, s)) 631 return nil 632 }) 633 assert.NoError(t, err) 634 635 testRestart(true) 636 } 637 638 func TestOrchestratorRestartWindow(t *testing.T) { 639 t.Parallel() 640 641 ctx := context.Background() 642 s := store.NewMemoryStore(nil) 643 assert.NotNil(t, s) 644 defer s.Close() 645 646 orchestrator := NewReplicatedOrchestrator(s) 647 defer orchestrator.Stop() 648 649 watch, cancel := state.Watch(s.WatchQueue() /*api.EventCreateTask{}, api.EventUpdateTask{}*/) 650 defer cancel() 651 652 // Create a service with two instances specified before the orchestrator is 653 // started. This should result in two tasks when the orchestrator 654 // starts up. 655 err := s.Update(func(tx store.Tx) error { 656 j1 := &api.Service{ 657 ID: "id1", 658 Spec: api.ServiceSpec{ 659 Annotations: api.Annotations{ 660 Name: "name1", 661 }, 662 Mode: &api.ServiceSpec_Replicated{ 663 Replicated: &api.ReplicatedService{ 664 Replicas: 2, 665 }, 666 }, 667 Task: api.TaskSpec{ 668 Restart: &api.RestartPolicy{ 669 Condition: api.RestartOnAny, 670 Delay: gogotypes.DurationProto(100 * time.Millisecond), 671 MaxAttempts: 1, 672 Window: gogotypes.DurationProto(500 * time.Millisecond), 673 }, 674 }, 675 }, 676 } 677 assert.NoError(t, store.CreateService(tx, j1)) 678 return nil 679 }) 680 assert.NoError(t, err) 681 682 // Start the orchestrator. 683 go func() { 684 assert.NoError(t, orchestrator.Run(ctx)) 685 }() 686 687 observedTask1 := testutils.WatchTaskCreate(t, watch) 688 assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) 689 assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") 690 691 observedTask2 := testutils.WatchTaskCreate(t, watch) 692 assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) 693 assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") 694 695 // Fail the first task. Confirm that it gets restarted. 696 updatedTask1 := observedTask1.Copy() 697 updatedTask1.Status = api.TaskStatus{State: api.TaskStateFailed, Timestamp: ptypes.MustTimestampProto(time.Now())} 698 before := time.Now() 699 err = s.Update(func(tx store.Tx) error { 700 assert.NoError(t, store.UpdateTask(tx, updatedTask1)) 701 return nil 702 }) 703 assert.NoError(t, err) 704 testutils.Expect(t, watch, state.EventCommit{}) 705 testutils.Expect(t, watch, api.EventUpdateTask{}) 706 testutils.Expect(t, watch, state.EventCommit{}) 707 testutils.Expect(t, watch, api.EventUpdateTask{}) 708 709 observedTask3 := testutils.WatchTaskCreate(t, watch) 710 testutils.Expect(t, watch, state.EventCommit{}) 711 assert.Equal(t, observedTask3.Status.State, api.TaskStateNew) 712 assert.Equal(t, observedTask3.DesiredState, api.TaskStateReady) 713 assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1") 714 715 observedTask4 := testutils.WatchTaskUpdate(t, watch) 716 after := time.Now() 717 718 // At least 100 ms should have elapsed. Only check the lower bound, 719 // because the system may be slow and it could have taken longer. 720 if after.Sub(before) < 100*time.Millisecond { 721 t.Fatal("restart delay should have elapsed") 722 } 723 724 assert.Equal(t, observedTask4.Status.State, api.TaskStateNew) 725 assert.Equal(t, observedTask4.DesiredState, api.TaskStateRunning) 726 assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1") 727 728 // Fail the second task. Confirm that it gets restarted. 729 updatedTask2 := observedTask2.Copy() 730 updatedTask2.Status = api.TaskStatus{State: api.TaskStateFailed, Timestamp: ptypes.MustTimestampProto(time.Now())} 731 err = s.Update(func(tx store.Tx) error { 732 assert.NoError(t, store.UpdateTask(tx, updatedTask2)) 733 return nil 734 }) 735 assert.NoError(t, err) 736 testutils.Expect(t, watch, state.EventCommit{}) 737 testutils.Expect(t, watch, api.EventUpdateTask{}) 738 testutils.Expect(t, watch, state.EventCommit{}) 739 testutils.Expect(t, watch, api.EventUpdateTask{}) 740 741 observedTask5 := testutils.WatchTaskCreate(t, watch) 742 testutils.Expect(t, watch, state.EventCommit{}) 743 assert.Equal(t, observedTask5.Status.State, api.TaskStateNew) 744 assert.Equal(t, observedTask5.DesiredState, api.TaskStateReady) 745 assert.Equal(t, observedTask5.ServiceAnnotations.Name, "name1") 746 747 observedTask6 := testutils.WatchTaskUpdate(t, watch) // task gets started after a delay 748 testutils.Expect(t, watch, state.EventCommit{}) 749 assert.Equal(t, observedTask6.Status.State, api.TaskStateNew) 750 assert.Equal(t, observedTask6.DesiredState, api.TaskStateRunning) 751 assert.Equal(t, observedTask6.ServiceAnnotations.Name, "name1") 752 753 // Fail the first instance again. It should not be restarted. 754 updatedTask1 = observedTask3.Copy() 755 updatedTask1.Status = api.TaskStatus{State: api.TaskStateFailed, Timestamp: ptypes.MustTimestampProto(time.Now())} 756 err = s.Update(func(tx store.Tx) error { 757 assert.NoError(t, store.UpdateTask(tx, updatedTask1)) 758 return nil 759 }) 760 assert.NoError(t, err) 761 testutils.Expect(t, watch, api.EventUpdateTask{}) 762 testutils.Expect(t, watch, state.EventCommit{}) 763 testutils.Expect(t, watch, api.EventUpdateTask{}) 764 testutils.Expect(t, watch, state.EventCommit{}) 765 766 select { 767 case <-watch: 768 t.Fatal("got unexpected event") 769 case <-time.After(200 * time.Millisecond): 770 } 771 772 time.Sleep(time.Second) 773 774 // Fail the second instance again. It should get restarted because 775 // enough time has elapsed since the last restarts. 776 updatedTask2 = observedTask5.Copy() 777 updatedTask2.Status = api.TaskStatus{State: api.TaskStateFailed, Timestamp: ptypes.MustTimestampProto(time.Now())} 778 before = time.Now() 779 err = s.Update(func(tx store.Tx) error { 780 assert.NoError(t, store.UpdateTask(tx, updatedTask2)) 781 return nil 782 }) 783 assert.NoError(t, err) 784 testutils.Expect(t, watch, api.EventUpdateTask{}) 785 testutils.Expect(t, watch, state.EventCommit{}) 786 testutils.Expect(t, watch, api.EventUpdateTask{}) 787 788 observedTask7 := testutils.WatchTaskCreate(t, watch) 789 testutils.Expect(t, watch, state.EventCommit{}) 790 assert.Equal(t, observedTask7.Status.State, api.TaskStateNew) 791 assert.Equal(t, observedTask7.DesiredState, api.TaskStateReady) 792 793 observedTask8 := testutils.WatchTaskUpdate(t, watch) 794 after = time.Now() 795 796 // At least 100 ms should have elapsed. Only check the lower bound, 797 // because the system may be slow and it could have taken longer. 798 if after.Sub(before) < 100*time.Millisecond { 799 t.Fatal("restart delay should have elapsed") 800 } 801 802 assert.Equal(t, observedTask8.Status.State, api.TaskStateNew) 803 assert.Equal(t, observedTask8.DesiredState, api.TaskStateRunning) 804 assert.Equal(t, observedTask8.ServiceAnnotations.Name, "name1") 805 }