github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/orchestrator/replicated/restart_test.go (about)

     1  package replicated
     2  
     3  import (
     4  	"context"
     5  	"testing"
     6  	"time"
     7  
     8  	"github.com/docker/swarmkit/api"
     9  	"github.com/docker/swarmkit/manager/orchestrator/testutils"
    10  	"github.com/docker/swarmkit/manager/state"
    11  	"github.com/docker/swarmkit/manager/state/store"
    12  	"github.com/docker/swarmkit/protobuf/ptypes"
    13  	gogotypes "github.com/gogo/protobuf/types"
    14  	"github.com/stretchr/testify/assert"
    15  	"github.com/stretchr/testify/require"
    16  )
    17  
    18  func TestOrchestratorRestartOnAny(t *testing.T) {
    19  	ctx := context.Background()
    20  	s := store.NewMemoryStore(nil)
    21  	assert.NotNil(t, s)
    22  	defer s.Close()
    23  
    24  	orchestrator := NewReplicatedOrchestrator(s)
    25  	defer orchestrator.Stop()
    26  
    27  	watch, cancel := state.Watch(s.WatchQueue() /*api.EventCreateTask{}, api.EventUpdateTask{}*/)
    28  	defer cancel()
    29  
    30  	// Create a service with two instances specified before the orchestrator is
    31  	// started. This should result in two tasks when the orchestrator
    32  	// starts up.
    33  	err := s.Update(func(tx store.Tx) error {
    34  		j1 := &api.Service{
    35  			ID: "id1",
    36  			Spec: api.ServiceSpec{
    37  				Annotations: api.Annotations{
    38  					Name: "name1",
    39  				},
    40  				Task: api.TaskSpec{
    41  					Runtime: &api.TaskSpec_Container{
    42  						Container: &api.ContainerSpec{},
    43  					},
    44  					Restart: &api.RestartPolicy{
    45  						Condition: api.RestartOnAny,
    46  						Delay:     gogotypes.DurationProto(0),
    47  					},
    48  				},
    49  				Mode: &api.ServiceSpec_Replicated{
    50  					Replicated: &api.ReplicatedService{
    51  						Replicas: 2,
    52  					},
    53  				},
    54  			},
    55  		}
    56  		assert.NoError(t, store.CreateService(tx, j1))
    57  		return nil
    58  	})
    59  	assert.NoError(t, err)
    60  
    61  	// Start the orchestrator.
    62  	go func() {
    63  		assert.NoError(t, orchestrator.Run(ctx))
    64  	}()
    65  
    66  	observedTask1 := testutils.WatchTaskCreate(t, watch)
    67  	assert.Equal(t, observedTask1.Status.State, api.TaskStateNew)
    68  	assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1")
    69  
    70  	observedTask2 := testutils.WatchTaskCreate(t, watch)
    71  	assert.Equal(t, observedTask2.Status.State, api.TaskStateNew)
    72  	assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1")
    73  
    74  	// Fail the first task. Confirm that it gets restarted.
    75  	updatedTask1 := observedTask1.Copy()
    76  	updatedTask1.Status = api.TaskStatus{State: api.TaskStateFailed, Timestamp: ptypes.MustTimestampProto(time.Now())}
    77  	err = s.Update(func(tx store.Tx) error {
    78  		assert.NoError(t, store.UpdateTask(tx, updatedTask1))
    79  		return nil
    80  	})
    81  	assert.NoError(t, err)
    82  	testutils.Expect(t, watch, state.EventCommit{})
    83  	testutils.Expect(t, watch, api.EventUpdateTask{})
    84  	testutils.Expect(t, watch, state.EventCommit{})
    85  	testutils.Expect(t, watch, api.EventUpdateTask{})
    86  
    87  	observedTask3 := testutils.WatchTaskCreate(t, watch)
    88  	assert.Equal(t, observedTask3.Status.State, api.TaskStateNew)
    89  	assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1")
    90  
    91  	testutils.Expect(t, watch, state.EventCommit{})
    92  
    93  	observedTask4 := testutils.WatchTaskUpdate(t, watch)
    94  	assert.Equal(t, observedTask4.DesiredState, api.TaskStateRunning)
    95  	assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1")
    96  
    97  	// Mark the second task as completed. Confirm that it gets restarted.
    98  	updatedTask2 := observedTask2.Copy()
    99  	updatedTask2.Status = api.TaskStatus{State: api.TaskStateCompleted, Timestamp: ptypes.MustTimestampProto(time.Now())}
   100  	err = s.Update(func(tx store.Tx) error {
   101  		assert.NoError(t, store.UpdateTask(tx, updatedTask2))
   102  		return nil
   103  	})
   104  	assert.NoError(t, err)
   105  	testutils.Expect(t, watch, state.EventCommit{})
   106  	testutils.Expect(t, watch, api.EventUpdateTask{})
   107  	testutils.Expect(t, watch, state.EventCommit{})
   108  	testutils.Expect(t, watch, api.EventUpdateTask{})
   109  
   110  	observedTask5 := testutils.WatchTaskCreate(t, watch)
   111  	assert.Equal(t, observedTask5.Status.State, api.TaskStateNew)
   112  	assert.Equal(t, observedTask5.ServiceAnnotations.Name, "name1")
   113  
   114  	testutils.Expect(t, watch, state.EventCommit{})
   115  
   116  	observedTask6 := testutils.WatchTaskUpdate(t, watch)
   117  	assert.Equal(t, observedTask6.DesiredState, api.TaskStateRunning)
   118  	assert.Equal(t, observedTask6.ServiceAnnotations.Name, "name1")
   119  }
   120  
   121  func TestOrchestratorRestartOnFailure(t *testing.T) {
   122  	t.Parallel()
   123  
   124  	ctx := context.Background()
   125  	s := store.NewMemoryStore(nil)
   126  	assert.NotNil(t, s)
   127  	defer s.Close()
   128  
   129  	orchestrator := NewReplicatedOrchestrator(s)
   130  	defer orchestrator.Stop()
   131  
   132  	watch, cancel := state.Watch(s.WatchQueue(), api.EventCreateTask{}, api.EventUpdateTask{})
   133  	defer cancel()
   134  
   135  	// Create a service with two instances specified before the orchestrator is
   136  	// started. This should result in two tasks when the orchestrator
   137  	// starts up.
   138  	err := s.Update(func(tx store.Tx) error {
   139  		j1 := &api.Service{
   140  			ID: "id1",
   141  			Spec: api.ServiceSpec{
   142  				Annotations: api.Annotations{
   143  					Name: "name1",
   144  				},
   145  				Task: api.TaskSpec{
   146  					Runtime: &api.TaskSpec_Container{
   147  						Container: &api.ContainerSpec{},
   148  					},
   149  					Restart: &api.RestartPolicy{
   150  						Condition: api.RestartOnFailure,
   151  						Delay:     gogotypes.DurationProto(0),
   152  					},
   153  				},
   154  				Mode: &api.ServiceSpec_Replicated{
   155  					Replicated: &api.ReplicatedService{
   156  						Replicas: 2,
   157  					},
   158  				},
   159  			},
   160  		}
   161  		assert.NoError(t, store.CreateService(tx, j1))
   162  		return nil
   163  	})
   164  	assert.NoError(t, err)
   165  
   166  	// Start the orchestrator.
   167  	go func() {
   168  		assert.NoError(t, orchestrator.Run(ctx))
   169  	}()
   170  
   171  	observedTask1 := testutils.WatchTaskCreate(t, watch)
   172  	assert.Equal(t, observedTask1.Status.State, api.TaskStateNew)
   173  	assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1")
   174  
   175  	observedTask2 := testutils.WatchTaskCreate(t, watch)
   176  	assert.Equal(t, observedTask2.Status.State, api.TaskStateNew)
   177  	assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1")
   178  
   179  	// Fail the first task. Confirm that it gets restarted.
   180  	updatedTask1 := observedTask1.Copy()
   181  	updatedTask1.Status = api.TaskStatus{State: api.TaskStateFailed, Timestamp: ptypes.MustTimestampProto(time.Now())}
   182  	err = s.Update(func(tx store.Tx) error {
   183  		assert.NoError(t, store.UpdateTask(tx, updatedTask1))
   184  		return nil
   185  	})
   186  	assert.NoError(t, err)
   187  	testutils.Expect(t, watch, api.EventUpdateTask{})
   188  	testutils.Expect(t, watch, api.EventUpdateTask{})
   189  
   190  	observedTask3 := testutils.WatchTaskCreate(t, watch)
   191  	assert.Equal(t, observedTask3.Status.State, api.TaskStateNew)
   192  	assert.Equal(t, observedTask3.DesiredState, api.TaskStateReady)
   193  	assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1")
   194  
   195  	observedTask4 := testutils.WatchTaskUpdate(t, watch)
   196  	assert.Equal(t, observedTask4.DesiredState, api.TaskStateRunning)
   197  	assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1")
   198  
   199  	// Mark the second task as completed. Confirm that it does not get restarted.
   200  	updatedTask2 := observedTask2.Copy()
   201  	updatedTask2.Status = api.TaskStatus{State: api.TaskStateCompleted, Timestamp: ptypes.MustTimestampProto(time.Now())}
   202  	err = s.Update(func(tx store.Tx) error {
   203  		assert.NoError(t, store.UpdateTask(tx, updatedTask2))
   204  		return nil
   205  	})
   206  	assert.NoError(t, err)
   207  	testutils.Expect(t, watch, api.EventUpdateTask{})
   208  	testutils.Expect(t, watch, api.EventUpdateTask{})
   209  
   210  	select {
   211  	case <-watch:
   212  		t.Fatal("got unexpected event")
   213  	case <-time.After(100 * time.Millisecond):
   214  	}
   215  
   216  	// Update the service, but don't change anything in the spec. The
   217  	// second instance instance should not be restarted.
   218  	err = s.Update(func(tx store.Tx) error {
   219  		service := store.GetService(tx, "id1")
   220  		require.NotNil(t, service)
   221  		assert.NoError(t, store.UpdateService(tx, service))
   222  		return nil
   223  	})
   224  	assert.NoError(t, err)
   225  
   226  	select {
   227  	case <-watch:
   228  		t.Fatal("got unexpected event")
   229  	case <-time.After(100 * time.Millisecond):
   230  	}
   231  
   232  	// Update the service, and change the TaskSpec. Now the second instance
   233  	// should be restarted.
   234  	err = s.Update(func(tx store.Tx) error {
   235  		service := store.GetService(tx, "id1")
   236  		require.NotNil(t, service)
   237  		service.Spec.Task.ForceUpdate++
   238  		assert.NoError(t, store.UpdateService(tx, service))
   239  		return nil
   240  	})
   241  	assert.NoError(t, err)
   242  	testutils.Expect(t, watch, api.EventCreateTask{})
   243  }
   244  
   245  func TestOrchestratorRestartOnNone(t *testing.T) {
   246  	t.Parallel()
   247  
   248  	ctx := context.Background()
   249  	s := store.NewMemoryStore(nil)
   250  	assert.NotNil(t, s)
   251  	defer s.Close()
   252  
   253  	orchestrator := NewReplicatedOrchestrator(s)
   254  	defer orchestrator.Stop()
   255  
   256  	watch, cancel := state.Watch(s.WatchQueue(), api.EventCreateTask{}, api.EventUpdateTask{})
   257  	defer cancel()
   258  
   259  	// Create a service with two instances specified before the orchestrator is
   260  	// started. This should result in two tasks when the orchestrator
   261  	// starts up.
   262  	err := s.Update(func(tx store.Tx) error {
   263  		j1 := &api.Service{
   264  			ID: "id1",
   265  			Spec: api.ServiceSpec{
   266  				Annotations: api.Annotations{
   267  					Name: "name1",
   268  				},
   269  				Task: api.TaskSpec{
   270  					Runtime: &api.TaskSpec_Container{
   271  						Container: &api.ContainerSpec{},
   272  					},
   273  					Restart: &api.RestartPolicy{
   274  						Condition: api.RestartOnNone,
   275  					},
   276  				},
   277  				Mode: &api.ServiceSpec_Replicated{
   278  					Replicated: &api.ReplicatedService{
   279  						Replicas: 2,
   280  					},
   281  				},
   282  			},
   283  		}
   284  		assert.NoError(t, store.CreateService(tx, j1))
   285  		return nil
   286  	})
   287  	assert.NoError(t, err)
   288  
   289  	// Start the orchestrator.
   290  	go func() {
   291  		assert.NoError(t, orchestrator.Run(ctx))
   292  	}()
   293  
   294  	observedTask1 := testutils.WatchTaskCreate(t, watch)
   295  	assert.Equal(t, observedTask1.Status.State, api.TaskStateNew)
   296  	assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1")
   297  
   298  	observedTask2 := testutils.WatchTaskCreate(t, watch)
   299  	assert.Equal(t, observedTask2.Status.State, api.TaskStateNew)
   300  	assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1")
   301  
   302  	// Fail the first task. Confirm that it does not get restarted.
   303  	updatedTask1 := observedTask1.Copy()
   304  	updatedTask1.Status.State = api.TaskStateFailed
   305  	err = s.Update(func(tx store.Tx) error {
   306  		assert.NoError(t, store.UpdateTask(tx, updatedTask1))
   307  		return nil
   308  	})
   309  	assert.NoError(t, err)
   310  	testutils.Expect(t, watch, api.EventUpdateTask{})
   311  	testutils.Expect(t, watch, api.EventUpdateTask{})
   312  
   313  	select {
   314  	case <-watch:
   315  		t.Fatal("got unexpected event")
   316  	case <-time.After(100 * time.Millisecond):
   317  	}
   318  
   319  	// Mark the second task as completed. Confirm that it does not get restarted.
   320  	updatedTask2 := observedTask2.Copy()
   321  	updatedTask2.Status = api.TaskStatus{State: api.TaskStateCompleted, Timestamp: ptypes.MustTimestampProto(time.Now())}
   322  	err = s.Update(func(tx store.Tx) error {
   323  		assert.NoError(t, store.UpdateTask(tx, updatedTask2))
   324  		return nil
   325  	})
   326  	assert.NoError(t, err)
   327  	testutils.Expect(t, watch, api.EventUpdateTask{})
   328  	testutils.Expect(t, watch, api.EventUpdateTask{})
   329  
   330  	select {
   331  	case <-watch:
   332  		t.Fatal("got unexpected event")
   333  	case <-time.After(100 * time.Millisecond):
   334  	}
   335  
   336  	// Update the service, but don't change anything in the spec. Neither
   337  	// instance should be restarted.
   338  	err = s.Update(func(tx store.Tx) error {
   339  		service := store.GetService(tx, "id1")
   340  		require.NotNil(t, service)
   341  		assert.NoError(t, store.UpdateService(tx, service))
   342  		return nil
   343  	})
   344  	assert.NoError(t, err)
   345  
   346  	select {
   347  	case <-watch:
   348  		t.Fatal("got unexpected event")
   349  	case <-time.After(100 * time.Millisecond):
   350  	}
   351  
   352  	// Update the service, and change the TaskSpec. Both instances should
   353  	// be restarted.
   354  	err = s.Update(func(tx store.Tx) error {
   355  		service := store.GetService(tx, "id1")
   356  		require.NotNil(t, service)
   357  		service.Spec.Task.ForceUpdate++
   358  		assert.NoError(t, store.UpdateService(tx, service))
   359  		return nil
   360  	})
   361  	assert.NoError(t, err)
   362  	testutils.Expect(t, watch, api.EventCreateTask{})
   363  	newTask := testutils.WatchTaskUpdate(t, watch)
   364  	assert.Equal(t, api.TaskStateRunning, newTask.DesiredState)
   365  	err = s.Update(func(tx store.Tx) error {
   366  		newTask := store.GetTask(tx, newTask.ID)
   367  		require.NotNil(t, newTask)
   368  		newTask.Status.State = api.TaskStateRunning
   369  		assert.NoError(t, store.UpdateTask(tx, newTask))
   370  		return nil
   371  	})
   372  	assert.NoError(t, err)
   373  	testutils.Expect(t, watch, api.EventUpdateTask{})
   374  
   375  	testutils.Expect(t, watch, api.EventCreateTask{})
   376  }
   377  
   378  func TestOrchestratorRestartDelay(t *testing.T) {
   379  	t.Parallel()
   380  
   381  	ctx := context.Background()
   382  	s := store.NewMemoryStore(nil)
   383  	assert.NotNil(t, s)
   384  	defer s.Close()
   385  
   386  	orchestrator := NewReplicatedOrchestrator(s)
   387  	defer orchestrator.Stop()
   388  
   389  	watch, cancel := state.Watch(s.WatchQueue() /*api.EventCreateTask{}, api.EventUpdateTask{}*/)
   390  	defer cancel()
   391  
   392  	// Create a service with two instances specified before the orchestrator is
   393  	// started. This should result in two tasks when the orchestrator
   394  	// starts up.
   395  	err := s.Update(func(tx store.Tx) error {
   396  		j1 := &api.Service{
   397  			ID: "id1",
   398  			Spec: api.ServiceSpec{
   399  				Annotations: api.Annotations{
   400  					Name: "name1",
   401  				},
   402  				Task: api.TaskSpec{
   403  					Runtime: &api.TaskSpec_Container{
   404  						Container: &api.ContainerSpec{},
   405  					},
   406  					Restart: &api.RestartPolicy{
   407  						Condition: api.RestartOnAny,
   408  						Delay:     gogotypes.DurationProto(100 * time.Millisecond),
   409  					},
   410  				},
   411  				Mode: &api.ServiceSpec_Replicated{
   412  					Replicated: &api.ReplicatedService{
   413  						Replicas: 2,
   414  					},
   415  				},
   416  			},
   417  		}
   418  		assert.NoError(t, store.CreateService(tx, j1))
   419  		return nil
   420  	})
   421  	assert.NoError(t, err)
   422  
   423  	// Start the orchestrator.
   424  	go func() {
   425  		assert.NoError(t, orchestrator.Run(ctx))
   426  	}()
   427  
   428  	observedTask1 := testutils.WatchTaskCreate(t, watch)
   429  	assert.Equal(t, observedTask1.Status.State, api.TaskStateNew)
   430  	assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1")
   431  
   432  	observedTask2 := testutils.WatchTaskCreate(t, watch)
   433  	assert.Equal(t, observedTask2.Status.State, api.TaskStateNew)
   434  	assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1")
   435  
   436  	// Fail the first task. Confirm that it gets restarted.
   437  	updatedTask1 := observedTask1.Copy()
   438  	updatedTask1.Status = api.TaskStatus{State: api.TaskStateFailed, Timestamp: ptypes.MustTimestampProto(time.Now())}
   439  	before := time.Now()
   440  	err = s.Update(func(tx store.Tx) error {
   441  		assert.NoError(t, store.UpdateTask(tx, updatedTask1))
   442  		return nil
   443  	})
   444  	assert.NoError(t, err)
   445  	testutils.Expect(t, watch, state.EventCommit{})
   446  	testutils.Expect(t, watch, api.EventUpdateTask{})
   447  	testutils.Expect(t, watch, state.EventCommit{})
   448  	testutils.Expect(t, watch, api.EventUpdateTask{})
   449  
   450  	observedTask3 := testutils.WatchTaskCreate(t, watch)
   451  	testutils.Expect(t, watch, state.EventCommit{})
   452  	assert.Equal(t, observedTask3.Status.State, api.TaskStateNew)
   453  	assert.Equal(t, observedTask3.DesiredState, api.TaskStateReady)
   454  	assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1")
   455  
   456  	observedTask4 := testutils.WatchTaskUpdate(t, watch)
   457  	after := time.Now()
   458  
   459  	// At least 100 ms should have elapsed. Only check the lower bound,
   460  	// because the system may be slow and it could have taken longer.
   461  	if after.Sub(before) < 100*time.Millisecond {
   462  		t.Fatalf("restart delay should have elapsed. Got: %v", after.Sub(before))
   463  	}
   464  
   465  	assert.Equal(t, observedTask4.Status.State, api.TaskStateNew)
   466  	assert.Equal(t, observedTask4.DesiredState, api.TaskStateRunning)
   467  	assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1")
   468  }
   469  
   470  func TestOrchestratorRestartMaxAttempts(t *testing.T) {
   471  	t.Parallel()
   472  
   473  	ctx := context.Background()
   474  	s := store.NewMemoryStore(nil)
   475  	assert.NotNil(t, s)
   476  	defer s.Close()
   477  
   478  	orchestrator := NewReplicatedOrchestrator(s)
   479  	defer orchestrator.Stop()
   480  
   481  	watch, cancel := state.Watch(s.WatchQueue(), api.EventCreateTask{}, api.EventUpdateTask{})
   482  	defer cancel()
   483  
   484  	// Create a service with two instances specified before the orchestrator is
   485  	// started. This should result in two tasks when the orchestrator
   486  	// starts up.
   487  	err := s.Update(func(tx store.Tx) error {
   488  		j1 := &api.Service{
   489  			ID: "id1",
   490  			Spec: api.ServiceSpec{
   491  				Annotations: api.Annotations{
   492  					Name: "name1",
   493  				},
   494  				Mode: &api.ServiceSpec_Replicated{
   495  					Replicated: &api.ReplicatedService{
   496  						Replicas: 2,
   497  					},
   498  				},
   499  				Task: api.TaskSpec{
   500  					Runtime: &api.TaskSpec_Container{
   501  						Container: &api.ContainerSpec{},
   502  					},
   503  					Restart: &api.RestartPolicy{
   504  						Condition:   api.RestartOnAny,
   505  						Delay:       gogotypes.DurationProto(100 * time.Millisecond),
   506  						MaxAttempts: 1,
   507  					},
   508  				},
   509  			},
   510  			SpecVersion: &api.Version{
   511  				Index: 1,
   512  			},
   513  		}
   514  		assert.NoError(t, store.CreateService(tx, j1))
   515  		return nil
   516  	})
   517  	assert.NoError(t, err)
   518  
   519  	// Start the orchestrator.
   520  	go func() {
   521  		assert.NoError(t, orchestrator.Run(ctx))
   522  	}()
   523  
   524  	failTask := func(task *api.Task, expectRestart bool) {
   525  		task = task.Copy()
   526  		task.Status = api.TaskStatus{State: api.TaskStateFailed, Timestamp: ptypes.MustTimestampProto(time.Now())}
   527  		err = s.Update(func(tx store.Tx) error {
   528  			assert.NoError(t, store.UpdateTask(tx, task))
   529  			return nil
   530  		})
   531  		assert.NoError(t, err)
   532  		testutils.Expect(t, watch, api.EventUpdateTask{})
   533  		task = testutils.WatchShutdownTask(t, watch)
   534  		if expectRestart {
   535  			createdTask := testutils.WatchTaskCreate(t, watch)
   536  			assert.Equal(t, createdTask.Status.State, api.TaskStateNew)
   537  			assert.Equal(t, createdTask.DesiredState, api.TaskStateReady)
   538  			assert.Equal(t, createdTask.ServiceAnnotations.Name, "name1")
   539  		}
   540  		err = s.Update(func(tx store.Tx) error {
   541  			task := task.Copy()
   542  			task.Status.State = api.TaskStateShutdown
   543  			assert.NoError(t, store.UpdateTask(tx, task))
   544  			return nil
   545  		})
   546  		assert.NoError(t, err)
   547  		testutils.Expect(t, watch, api.EventUpdateTask{})
   548  	}
   549  
   550  	testRestart := func(serviceUpdated bool) {
   551  		observedTask1 := testutils.WatchTaskCreate(t, watch)
   552  		assert.Equal(t, observedTask1.Status.State, api.TaskStateNew)
   553  		assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1")
   554  
   555  		if serviceUpdated {
   556  			runnableTask := testutils.WatchTaskUpdate(t, watch)
   557  			assert.Equal(t, observedTask1.ID, runnableTask.ID)
   558  			assert.Equal(t, api.TaskStateRunning, runnableTask.DesiredState)
   559  			err = s.Update(func(tx store.Tx) error {
   560  				task := runnableTask.Copy()
   561  				task.Status.State = api.TaskStateRunning
   562  				assert.NoError(t, store.UpdateTask(tx, task))
   563  				return nil
   564  			})
   565  			assert.NoError(t, err)
   566  
   567  			testutils.Expect(t, watch, api.EventUpdateTask{})
   568  		}
   569  
   570  		observedTask2 := testutils.WatchTaskCreate(t, watch)
   571  		assert.Equal(t, observedTask2.Status.State, api.TaskStateNew)
   572  		assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1")
   573  
   574  		if serviceUpdated {
   575  			testutils.Expect(t, watch, api.EventUpdateTask{})
   576  		}
   577  
   578  		// Fail the first task. Confirm that it gets restarted.
   579  		before := time.Now()
   580  		failTask(observedTask1, true)
   581  
   582  		observedTask4 := testutils.WatchTaskUpdate(t, watch)
   583  		after := time.Now()
   584  
   585  		// At least 100 ms should have elapsed. Only check the lower bound,
   586  		// because the system may be slow and it could have taken longer.
   587  		if after.Sub(before) < 100*time.Millisecond {
   588  			t.Fatal("restart delay should have elapsed")
   589  		}
   590  
   591  		assert.Equal(t, observedTask4.Status.State, api.TaskStateNew)
   592  		assert.Equal(t, observedTask4.DesiredState, api.TaskStateRunning)
   593  		assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1")
   594  
   595  		// Fail the second task. Confirm that it gets restarted.
   596  		failTask(observedTask2, true)
   597  
   598  		observedTask6 := testutils.WatchTaskUpdate(t, watch) // task gets started after a delay
   599  		assert.Equal(t, observedTask6.Status.State, api.TaskStateNew)
   600  		assert.Equal(t, observedTask6.DesiredState, api.TaskStateRunning)
   601  		assert.Equal(t, observedTask6.ServiceAnnotations.Name, "name1")
   602  
   603  		// Fail the first instance again. It should not be restarted.
   604  		failTask(observedTask4, false)
   605  
   606  		select {
   607  		case <-watch:
   608  			t.Fatal("got unexpected event")
   609  		case <-time.After(200 * time.Millisecond):
   610  		}
   611  
   612  		// Fail the second instance again. It should not be restarted.
   613  		failTask(observedTask6, false)
   614  
   615  		select {
   616  		case <-watch:
   617  			t.Fatal("got unexpected event")
   618  		case <-time.After(200 * time.Millisecond):
   619  		}
   620  	}
   621  
   622  	testRestart(false)
   623  
   624  	// Update the service spec
   625  	err = s.Update(func(tx store.Tx) error {
   626  		s := store.GetService(tx, "id1")
   627  		require.NotNil(t, s)
   628  		s.Spec.Task.GetContainer().Image = "newimage"
   629  		s.SpecVersion.Index = 2
   630  		assert.NoError(t, store.UpdateService(tx, s))
   631  		return nil
   632  	})
   633  	assert.NoError(t, err)
   634  
   635  	testRestart(true)
   636  }
   637  
   638  func TestOrchestratorRestartWindow(t *testing.T) {
   639  	t.Parallel()
   640  
   641  	ctx := context.Background()
   642  	s := store.NewMemoryStore(nil)
   643  	assert.NotNil(t, s)
   644  	defer s.Close()
   645  
   646  	orchestrator := NewReplicatedOrchestrator(s)
   647  	defer orchestrator.Stop()
   648  
   649  	watch, cancel := state.Watch(s.WatchQueue() /*api.EventCreateTask{}, api.EventUpdateTask{}*/)
   650  	defer cancel()
   651  
   652  	// Create a service with two instances specified before the orchestrator is
   653  	// started. This should result in two tasks when the orchestrator
   654  	// starts up.
   655  	err := s.Update(func(tx store.Tx) error {
   656  		j1 := &api.Service{
   657  			ID: "id1",
   658  			Spec: api.ServiceSpec{
   659  				Annotations: api.Annotations{
   660  					Name: "name1",
   661  				},
   662  				Mode: &api.ServiceSpec_Replicated{
   663  					Replicated: &api.ReplicatedService{
   664  						Replicas: 2,
   665  					},
   666  				},
   667  				Task: api.TaskSpec{
   668  					Restart: &api.RestartPolicy{
   669  						Condition:   api.RestartOnAny,
   670  						Delay:       gogotypes.DurationProto(100 * time.Millisecond),
   671  						MaxAttempts: 1,
   672  						Window:      gogotypes.DurationProto(500 * time.Millisecond),
   673  					},
   674  				},
   675  			},
   676  		}
   677  		assert.NoError(t, store.CreateService(tx, j1))
   678  		return nil
   679  	})
   680  	assert.NoError(t, err)
   681  
   682  	// Start the orchestrator.
   683  	go func() {
   684  		assert.NoError(t, orchestrator.Run(ctx))
   685  	}()
   686  
   687  	observedTask1 := testutils.WatchTaskCreate(t, watch)
   688  	assert.Equal(t, observedTask1.Status.State, api.TaskStateNew)
   689  	assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1")
   690  
   691  	observedTask2 := testutils.WatchTaskCreate(t, watch)
   692  	assert.Equal(t, observedTask2.Status.State, api.TaskStateNew)
   693  	assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1")
   694  
   695  	// Fail the first task. Confirm that it gets restarted.
   696  	updatedTask1 := observedTask1.Copy()
   697  	updatedTask1.Status = api.TaskStatus{State: api.TaskStateFailed, Timestamp: ptypes.MustTimestampProto(time.Now())}
   698  	before := time.Now()
   699  	err = s.Update(func(tx store.Tx) error {
   700  		assert.NoError(t, store.UpdateTask(tx, updatedTask1))
   701  		return nil
   702  	})
   703  	assert.NoError(t, err)
   704  	testutils.Expect(t, watch, state.EventCommit{})
   705  	testutils.Expect(t, watch, api.EventUpdateTask{})
   706  	testutils.Expect(t, watch, state.EventCommit{})
   707  	testutils.Expect(t, watch, api.EventUpdateTask{})
   708  
   709  	observedTask3 := testutils.WatchTaskCreate(t, watch)
   710  	testutils.Expect(t, watch, state.EventCommit{})
   711  	assert.Equal(t, observedTask3.Status.State, api.TaskStateNew)
   712  	assert.Equal(t, observedTask3.DesiredState, api.TaskStateReady)
   713  	assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1")
   714  
   715  	observedTask4 := testutils.WatchTaskUpdate(t, watch)
   716  	after := time.Now()
   717  
   718  	// At least 100 ms should have elapsed. Only check the lower bound,
   719  	// because the system may be slow and it could have taken longer.
   720  	if after.Sub(before) < 100*time.Millisecond {
   721  		t.Fatal("restart delay should have elapsed")
   722  	}
   723  
   724  	assert.Equal(t, observedTask4.Status.State, api.TaskStateNew)
   725  	assert.Equal(t, observedTask4.DesiredState, api.TaskStateRunning)
   726  	assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1")
   727  
   728  	// Fail the second task. Confirm that it gets restarted.
   729  	updatedTask2 := observedTask2.Copy()
   730  	updatedTask2.Status = api.TaskStatus{State: api.TaskStateFailed, Timestamp: ptypes.MustTimestampProto(time.Now())}
   731  	err = s.Update(func(tx store.Tx) error {
   732  		assert.NoError(t, store.UpdateTask(tx, updatedTask2))
   733  		return nil
   734  	})
   735  	assert.NoError(t, err)
   736  	testutils.Expect(t, watch, state.EventCommit{})
   737  	testutils.Expect(t, watch, api.EventUpdateTask{})
   738  	testutils.Expect(t, watch, state.EventCommit{})
   739  	testutils.Expect(t, watch, api.EventUpdateTask{})
   740  
   741  	observedTask5 := testutils.WatchTaskCreate(t, watch)
   742  	testutils.Expect(t, watch, state.EventCommit{})
   743  	assert.Equal(t, observedTask5.Status.State, api.TaskStateNew)
   744  	assert.Equal(t, observedTask5.DesiredState, api.TaskStateReady)
   745  	assert.Equal(t, observedTask5.ServiceAnnotations.Name, "name1")
   746  
   747  	observedTask6 := testutils.WatchTaskUpdate(t, watch) // task gets started after a delay
   748  	testutils.Expect(t, watch, state.EventCommit{})
   749  	assert.Equal(t, observedTask6.Status.State, api.TaskStateNew)
   750  	assert.Equal(t, observedTask6.DesiredState, api.TaskStateRunning)
   751  	assert.Equal(t, observedTask6.ServiceAnnotations.Name, "name1")
   752  
   753  	// Fail the first instance again. It should not be restarted.
   754  	updatedTask1 = observedTask3.Copy()
   755  	updatedTask1.Status = api.TaskStatus{State: api.TaskStateFailed, Timestamp: ptypes.MustTimestampProto(time.Now())}
   756  	err = s.Update(func(tx store.Tx) error {
   757  		assert.NoError(t, store.UpdateTask(tx, updatedTask1))
   758  		return nil
   759  	})
   760  	assert.NoError(t, err)
   761  	testutils.Expect(t, watch, api.EventUpdateTask{})
   762  	testutils.Expect(t, watch, state.EventCommit{})
   763  	testutils.Expect(t, watch, api.EventUpdateTask{})
   764  	testutils.Expect(t, watch, state.EventCommit{})
   765  
   766  	select {
   767  	case <-watch:
   768  		t.Fatal("got unexpected event")
   769  	case <-time.After(200 * time.Millisecond):
   770  	}
   771  
   772  	time.Sleep(time.Second)
   773  
   774  	// Fail the second instance again. It should get restarted because
   775  	// enough time has elapsed since the last restarts.
   776  	updatedTask2 = observedTask5.Copy()
   777  	updatedTask2.Status = api.TaskStatus{State: api.TaskStateFailed, Timestamp: ptypes.MustTimestampProto(time.Now())}
   778  	before = time.Now()
   779  	err = s.Update(func(tx store.Tx) error {
   780  		assert.NoError(t, store.UpdateTask(tx, updatedTask2))
   781  		return nil
   782  	})
   783  	assert.NoError(t, err)
   784  	testutils.Expect(t, watch, api.EventUpdateTask{})
   785  	testutils.Expect(t, watch, state.EventCommit{})
   786  	testutils.Expect(t, watch, api.EventUpdateTask{})
   787  
   788  	observedTask7 := testutils.WatchTaskCreate(t, watch)
   789  	testutils.Expect(t, watch, state.EventCommit{})
   790  	assert.Equal(t, observedTask7.Status.State, api.TaskStateNew)
   791  	assert.Equal(t, observedTask7.DesiredState, api.TaskStateReady)
   792  
   793  	observedTask8 := testutils.WatchTaskUpdate(t, watch)
   794  	after = time.Now()
   795  
   796  	// At least 100 ms should have elapsed. Only check the lower bound,
   797  	// because the system may be slow and it could have taken longer.
   798  	if after.Sub(before) < 100*time.Millisecond {
   799  		t.Fatal("restart delay should have elapsed")
   800  	}
   801  
   802  	assert.Equal(t, observedTask8.Status.State, api.TaskStateNew)
   803  	assert.Equal(t, observedTask8.DesiredState, api.TaskStateRunning)
   804  	assert.Equal(t, observedTask8.ServiceAnnotations.Name, "name1")
   805  }