github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/orchestrator/replicated/update_test.go (about)

     1  package replicated
     2  
     3  import (
     4  	"context"
     5  	"sync"
     6  	"testing"
     7  	"time"
     8  
     9  	"github.com/docker/go-events"
    10  	"github.com/docker/swarmkit/api"
    11  	"github.com/docker/swarmkit/manager/orchestrator/testutils"
    12  	"github.com/docker/swarmkit/manager/state"
    13  	"github.com/docker/swarmkit/manager/state/store"
    14  	gogotypes "github.com/gogo/protobuf/types"
    15  	"github.com/stretchr/testify/assert"
    16  	"github.com/stretchr/testify/require"
    17  )
    18  
    19  func TestUpdaterRollback(t *testing.T) {
    20  	t.Run("pause/monitor_set/spec_version_unset", func(t *testing.T) { testUpdaterRollback(t, api.UpdateConfig_PAUSE, true, false) })
    21  	t.Run("pause/monitor_set/spec_version_set", func(t *testing.T) { testUpdaterRollback(t, api.UpdateConfig_PAUSE, true, true) })
    22  	// skipped, see #2137
    23  	// t.Run("pause/monitor_unset/spec_version_unset", func(t *testing.T) { testUpdaterRollback(t, api.UpdateConfig_PAUSE, false, false) })
    24  	// t.Run("pause/monitor_unset/spec_version_set", func(t *testing.T) { testUpdaterRollback(t, api.UpdateConfig_PAUSE, false, true) })
    25  	t.Run("continue/spec_version_unset", func(t *testing.T) { testUpdaterRollback(t, api.UpdateConfig_CONTINUE, true, false) })
    26  	t.Run("continue/spec_version_set", func(t *testing.T) { testUpdaterRollback(t, api.UpdateConfig_CONTINUE, true, true) })
    27  }
    28  
    29  func testUpdaterRollback(t *testing.T, rollbackFailureAction api.UpdateConfig_FailureAction, setMonitor bool, useSpecVersion bool) {
    30  	// this test should complete within 30 seconds. if not, bail out
    31  	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
    32  	defer cancel()
    33  
    34  	s := store.NewMemoryStore(nil)
    35  	assert.NotNil(t, s)
    36  	defer s.Close()
    37  
    38  	orchestrator := NewReplicatedOrchestrator(s)
    39  
    40  	// These variables will be used to signal that The Fail Loop should start
    41  	// failing these tasks. Once they're closed, The Failing Can Begin.
    42  	var (
    43  		failMu     sync.Mutex
    44  		failImage1 bool
    45  	)
    46  
    47  	// create a watch for task creates, which we will use to verify that the
    48  	// updater works correctly.
    49  	watchCreate, cancelCreate := state.Watch(s.WatchQueue(), api.EventCreateTask{})
    50  	defer cancelCreate()
    51  
    52  	watchServiceUpdate, cancelServiceUpdate := state.Watch(s.WatchQueue(), api.EventUpdateService{})
    53  	defer cancelServiceUpdate()
    54  
    55  	// Fail new tasks the updater tries to run
    56  	watchUpdate, cancelUpdate := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
    57  	defer cancelUpdate()
    58  
    59  	// We're gonna call this big chunk here "The Fail Loop". its job is to put
    60  	// tasks into a Failed state in certain conditions.
    61  	testutils.EnsureRuns(func() {
    62  		failedLast := false
    63  		// typical go pattern: infinite for loop in a goroutine, exits on
    64  		// ctx.Done
    65  		for {
    66  			var e events.Event
    67  			select {
    68  			case e = <-watchUpdate:
    69  			case <-ctx.Done():
    70  				return
    71  			}
    72  			task := e.(api.EventUpdateTask).Task
    73  			if task.DesiredState == task.Status.State {
    74  				continue
    75  			}
    76  			// This used to have a 3rd clause,
    77  			// "&& task.Status.State != api.TaskStateRunning"
    78  			// however, this is unneeded. If DesiredState is Running, then
    79  			// actual state cannot be Running, because that would get caught
    80  			// in the condition about (DesiredState == State)
    81  			if task.DesiredState == api.TaskStateRunning && task.Status.State != api.TaskStateFailed {
    82  				err := s.Update(func(tx store.Tx) error {
    83  					task = store.GetTask(tx, task.ID)
    84  					// lock mutex governing access to failImage1.
    85  					failMu.Lock()
    86  					defer failMu.Unlock()
    87  					// we should start failing tasks with image1 only after1
    88  					if task.Spec.GetContainer().Image == "image1" && failImage1 {
    89  						// only fail the task if we can read from failImage1
    90  						// (which will only be true if it's closed)
    91  						task.Status.State = api.TaskStateFailed
    92  						failedLast = true
    93  					} else if task.Spec.GetContainer().Image == "image2" && !failedLast {
    94  						// Never fail two image2 tasks in a row, so there's a mix of
    95  						// failed and successful tasks for the rollback.
    96  						task.Status.State = api.TaskStateFailed
    97  						failedLast = true
    98  					} else {
    99  						task.Status.State = task.DesiredState
   100  						failedLast = false
   101  					}
   102  					return store.UpdateTask(tx, task)
   103  				})
   104  				assert.NoError(t, err)
   105  			} else if task.DesiredState > api.TaskStateRunning {
   106  				err := s.Update(func(tx store.Tx) error {
   107  					task = store.GetTask(tx, task.ID)
   108  					task.Status.State = task.DesiredState
   109  					return store.UpdateTask(tx, task)
   110  				})
   111  				assert.NoError(t, err)
   112  			}
   113  		}
   114  	})
   115  
   116  	// Create a service with four replicas specified before the orchestrator
   117  	// is started. This should result in two tasks when the orchestrator
   118  	// starts up.
   119  	err := s.Update(func(tx store.Tx) error {
   120  		s1 := &api.Service{
   121  			ID: "id1",
   122  			Spec: api.ServiceSpec{
   123  				Annotations: api.Annotations{
   124  					Name: "name1",
   125  				},
   126  				Task: api.TaskSpec{
   127  					Runtime: &api.TaskSpec_Container{
   128  						Container: &api.ContainerSpec{
   129  							Image: "image1",
   130  						},
   131  					},
   132  					Restart: &api.RestartPolicy{
   133  						Condition: api.RestartOnNone,
   134  					},
   135  				},
   136  				Mode: &api.ServiceSpec_Replicated{
   137  					Replicated: &api.ReplicatedService{
   138  						Replicas: 4,
   139  					},
   140  				},
   141  				Update: &api.UpdateConfig{
   142  					FailureAction:   api.UpdateConfig_ROLLBACK,
   143  					Parallelism:     1,
   144  					Delay:           10 * time.Millisecond,
   145  					MaxFailureRatio: 0.4,
   146  				},
   147  				Rollback: &api.UpdateConfig{
   148  					FailureAction:   rollbackFailureAction,
   149  					Parallelism:     1,
   150  					Delay:           10 * time.Millisecond,
   151  					MaxFailureRatio: 0.4,
   152  				},
   153  			},
   154  		}
   155  
   156  		if setMonitor {
   157  			s1.Spec.Update.Monitor = gogotypes.DurationProto(500 * time.Millisecond)
   158  			s1.Spec.Rollback.Monitor = gogotypes.DurationProto(500 * time.Millisecond)
   159  		}
   160  		if useSpecVersion {
   161  			s1.SpecVersion = &api.Version{
   162  				Index: 1,
   163  			}
   164  		}
   165  
   166  		assert.NoError(t, store.CreateService(tx, s1))
   167  		return nil
   168  	})
   169  	assert.NoError(t, err)
   170  
   171  	// Start the orchestrator.
   172  	var orchestratorError error
   173  	orchestratorDone := testutils.EnsureRuns(func() {
   174  		orchestratorError = orchestrator.Run(ctx)
   175  	})
   176  
   177  	defer func() {
   178  		orchestrator.Stop()
   179  		select {
   180  		case <-ctx.Done():
   181  		case <-orchestratorDone:
   182  			assert.NoError(t, orchestratorError)
   183  		}
   184  	}()
   185  
   186  	observedTask := testutils.WatchTaskCreate(t, watchCreate)
   187  	assert.Equal(t, observedTask.Status.State, api.TaskStateNew)
   188  	assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1")
   189  
   190  	observedTask = testutils.WatchTaskCreate(t, watchCreate)
   191  	assert.Equal(t, observedTask.Status.State, api.TaskStateNew)
   192  	assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1")
   193  
   194  	observedTask = testutils.WatchTaskCreate(t, watchCreate)
   195  	assert.Equal(t, observedTask.Status.State, api.TaskStateNew)
   196  	assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1")
   197  
   198  	observedTask = testutils.WatchTaskCreate(t, watchCreate)
   199  	assert.Equal(t, observedTask.Status.State, api.TaskStateNew)
   200  	assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1")
   201  
   202  	// Start a rolling update
   203  	err = s.Update(func(tx store.Tx) error {
   204  		s1 := store.GetService(tx, "id1")
   205  		require.NotNil(t, s1)
   206  		s1.PreviousSpec = s1.Spec.Copy()
   207  		s1.PreviousSpecVersion = s1.SpecVersion.Copy()
   208  		s1.UpdateStatus = nil
   209  		s1.Spec.Task.GetContainer().Image = "image2"
   210  		if s1.SpecVersion != nil {
   211  			s1.SpecVersion.Index = 2
   212  		}
   213  		assert.NoError(t, store.UpdateService(tx, s1))
   214  		return nil
   215  	})
   216  	assert.NoError(t, err)
   217  
   218  	// Should see three tasks started, then a rollback
   219  
   220  	observedTask = testutils.WatchTaskCreate(t, watchCreate)
   221  	assert.Equal(t, observedTask.Status.State, api.TaskStateNew)
   222  	assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2")
   223  
   224  	observedTask = testutils.WatchTaskCreate(t, watchCreate)
   225  	assert.Equal(t, observedTask.Status.State, api.TaskStateNew)
   226  	assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2")
   227  
   228  	observedTask = testutils.WatchTaskCreate(t, watchCreate)
   229  	assert.Equal(t, observedTask.Status.State, api.TaskStateNew)
   230  	assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2")
   231  
   232  	// Should get to the ROLLBACK_STARTED state
   233  	for {
   234  		var e events.Event
   235  		select {
   236  		case e = <-watchServiceUpdate:
   237  		case <-ctx.Done():
   238  			t.Error("test timed out before watchServiceUpdate provided an event")
   239  			return
   240  		}
   241  		if e.(api.EventUpdateService).Service.UpdateStatus == nil {
   242  			continue
   243  		}
   244  		if e.(api.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED {
   245  			break
   246  		}
   247  	}
   248  
   249  	observedTask = testutils.WatchTaskCreate(t, watchCreate)
   250  	assert.Equal(t, observedTask.Status.State, api.TaskStateNew)
   251  	assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1")
   252  
   253  	observedTask = testutils.WatchTaskCreate(t, watchCreate)
   254  	assert.Equal(t, observedTask.Status.State, api.TaskStateNew)
   255  	assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1")
   256  
   257  	observedTask = testutils.WatchTaskCreate(t, watchCreate)
   258  	assert.Equal(t, observedTask.Status.State, api.TaskStateNew)
   259  	assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1")
   260  
   261  	if !setMonitor {
   262  		// Exit early in this case, since it would take a long time for
   263  		// the service to reach the "*_COMPLETED" states.
   264  		return
   265  	}
   266  
   267  	// Should end up in ROLLBACK_COMPLETED state
   268  	for {
   269  		var e events.Event
   270  		select {
   271  		case e = <-watchServiceUpdate:
   272  			t.Log("service was updated")
   273  		case <-ctx.Done():
   274  			t.Error("test timed out before watchServiceUpdate provided an event")
   275  			return
   276  		}
   277  
   278  		if e.(api.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_COMPLETED {
   279  			break
   280  		}
   281  	}
   282  
   283  	// Repeat the rolling update but this time fail the tasks that the
   284  	// rollback creates.
   285  	failMu.Lock()
   286  	failImage1 = true
   287  	failMu.Unlock()
   288  
   289  	err = s.Update(func(tx store.Tx) error {
   290  		s1 := store.GetService(tx, "id1")
   291  		require.NotNil(t, s1)
   292  		s1.PreviousSpec = s1.Spec.Copy()
   293  		s1.PreviousSpecVersion = s1.SpecVersion.Copy()
   294  		s1.UpdateStatus = nil
   295  		s1.Spec.Task.GetContainer().Image = "image2"
   296  		if s1.SpecVersion != nil {
   297  			s1.SpecVersion.Index = 2
   298  		}
   299  		assert.NoError(t, store.UpdateService(tx, s1))
   300  		return nil
   301  	})
   302  	assert.NoError(t, err)
   303  
   304  	// Should see three tasks started, then a rollback
   305  
   306  	observedTask = testutils.WatchTaskCreate(t, watchCreate)
   307  	assert.Equal(t, observedTask.Status.State, api.TaskStateNew)
   308  	assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2")
   309  
   310  	observedTask = testutils.WatchTaskCreate(t, watchCreate)
   311  	assert.Equal(t, observedTask.Status.State, api.TaskStateNew)
   312  	assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2")
   313  
   314  	observedTask = testutils.WatchTaskCreate(t, watchCreate)
   315  	assert.Equal(t, observedTask.Status.State, api.TaskStateNew)
   316  	assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2")
   317  
   318  	// Should get to the ROLLBACK_STARTED state
   319  	for {
   320  		var e events.Event
   321  		select {
   322  		case e = <-watchServiceUpdate:
   323  		case <-ctx.Done():
   324  			t.Error("test timed out before watchServiceUpdate provided an event")
   325  			return
   326  		}
   327  		if e.(api.EventUpdateService).Service.UpdateStatus == nil {
   328  			continue
   329  		}
   330  		if e.(api.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED {
   331  			break
   332  		}
   333  	}
   334  
   335  	observedTask = testutils.WatchTaskCreate(t, watchCreate)
   336  	assert.Equal(t, observedTask.Status.State, api.TaskStateNew)
   337  	assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1")
   338  
   339  	observedTask = testutils.WatchTaskCreate(t, watchCreate)
   340  	assert.Equal(t, observedTask.Status.State, api.TaskStateNew)
   341  	assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1")
   342  
   343  	observedTask = testutils.WatchTaskCreate(t, watchCreate)
   344  	assert.Equal(t, observedTask.Status.State, api.TaskStateNew)
   345  	assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1")
   346  
   347  	switch rollbackFailureAction {
   348  	case api.UpdateConfig_PAUSE:
   349  		// Should end up in ROLLBACK_PAUSED state
   350  		for {
   351  			var e events.Event
   352  			select {
   353  			case e = <-watchServiceUpdate:
   354  			case <-ctx.Done():
   355  				t.Error("test timed out before watchServiceUpdate provided an event")
   356  				return
   357  			}
   358  			if e.(api.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_PAUSED {
   359  				return
   360  			}
   361  		}
   362  	case api.UpdateConfig_CONTINUE:
   363  		// Should end up in ROLLBACK_COMPLETE state
   364  		for {
   365  			var e events.Event
   366  			select {
   367  			case e = <-watchServiceUpdate:
   368  			case <-ctx.Done():
   369  				t.Error("test timed out before watchServiceUpdate provided an event")
   370  				return
   371  			}
   372  			if e.(api.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_COMPLETED {
   373  				return
   374  			}
   375  		}
   376  	}
   377  }