github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/orchestrator/jobs/replicated/reconciler_test.go

github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/orchestrator/jobs/replicated/reconciler_test.go (about)

     1  package replicated
     2  
     3  import (
     4  	. "github.com/onsi/ginkgo"
     5  	. "github.com/onsi/gomega"
     6  	. "github.com/onsi/gomega/types"
     7  
     8  	"context"
     9  	"fmt"
    10  
    11  	"github.com/docker/swarmkit/api"
    12  	"github.com/docker/swarmkit/manager/orchestrator"
    13  	"github.com/docker/swarmkit/manager/state/store"
    14  )
    15  
    16  type fakeRestartSupervisor struct {
    17  	tasks []string
    18  }
    19  
    20  func (f *fakeRestartSupervisor) Restart(_ context.Context, _ store.Tx, _ *api.Cluster, _ *api.Service, task api.Task) error {
    21  	f.tasks = append(f.tasks, task.ID)
    22  	return nil
    23  }
    24  
    25  // uniqueSlotsMatcher is used to verify that a set of tasks all have unique,
    26  // non-overlapping slot numbers
    27  type uniqueSlotsMatcher struct {
    28  	duplicatedSlot uint64
    29  }
    30  
    31  func (u uniqueSlotsMatcher) Match(actual interface{}) (bool, error) {
    32  	tasks, ok := actual.([]*api.Task)
    33  	if !ok {
    34  		return false, fmt.Errorf("actual is not []*api.Tasks{}")
    35  	}
    36  
    37  	slots := map[uint64]bool{}
    38  	for _, task := range tasks {
    39  		if filled, ok := slots[task.Slot]; ok || filled {
    40  			u.duplicatedSlot = task.Slot
    41  			return false, nil
    42  		}
    43  		slots[task.Slot] = true
    44  	}
    45  	return true, nil
    46  }
    47  
    48  func (u uniqueSlotsMatcher) FailureMessage(_ interface{}) string {
    49  	return fmt.Sprintf("expected tasks to have unique slots, but %v is duplicated", u.duplicatedSlot)
    50  }
    51  
    52  func (u uniqueSlotsMatcher) NegatedFailureMessage(_ interface{}) string {
    53  	return fmt.Sprintf("expected tasks to have duplicate slots")
    54  }
    55  
    56  func HaveUniqueSlots() GomegaMatcher {
    57  	return uniqueSlotsMatcher{}
    58  }
    59  
    60  func AllTasks(s *store.MemoryStore) []*api.Task {
    61  	var tasks []*api.Task
    62  	s.View(func(tx store.ReadTx) {
    63  		tasks, _ = store.FindTasks(tx, store.All)
    64  	})
    65  	return tasks
    66  }
    67  
    68  var _ = Describe("Replicated Job reconciler", func() {
    69  	var (
    70  		r       *Reconciler
    71  		s       *store.MemoryStore
    72  		cluster *api.Cluster
    73  		f       *fakeRestartSupervisor
    74  	)
    75  
    76  	BeforeEach(func() {
    77  		s = store.NewMemoryStore(nil)
    78  		Expect(s).ToNot(BeNil())
    79  		f = &fakeRestartSupervisor{}
    80  
    81  		r = &Reconciler{
    82  			store:   s,
    83  			restart: f,
    84  		}
    85  	})
    86  
    87  	AfterEach(func() {
    88  		s.Close()
    89  	})
    90  
    91  	Describe("ReconcileService", func() {
    92  		var (
    93  			serviceID        string
    94  			service          *api.Service
    95  			maxConcurrent    uint64
    96  			totalCompletions uint64
    97  
    98  			reconcileErr error
    99  		)
   100  
   101  		BeforeEach(func() {
   102  			serviceID = "someService"
   103  			maxConcurrent = 10
   104  			totalCompletions = 30
   105  			service = &api.Service{
   106  				ID: serviceID,
   107  				Spec: api.ServiceSpec{
   108  					Mode: &api.ServiceSpec_ReplicatedJob{
   109  						ReplicatedJob: &api.ReplicatedJob{
   110  							MaxConcurrent:    maxConcurrent,
   111  							TotalCompletions: totalCompletions,
   112  						},
   113  					},
   114  				},
   115  				JobStatus: &api.JobStatus{
   116  					JobIteration: api.Version{Index: 0},
   117  				},
   118  			}
   119  
   120  			cluster = &api.Cluster{
   121  				ID: "someCluster",
   122  				Spec: api.ClusterSpec{
   123  					Annotations: api.Annotations{
   124  						Name: "someCluster",
   125  					},
   126  					TaskDefaults: api.TaskDefaults{
   127  						LogDriver: &api.Driver{
   128  							Name: "someDriver",
   129  						},
   130  					},
   131  				},
   132  			}
   133  		})
   134  
   135  		When("a job has been updated", func() {
   136  			var (
   137  				tasks []*api.Task
   138  			)
   139  			// Before anything, create the job, reconcile the job, and let
   140  			// tasks be created
   141  			BeforeEach(func() {
   142  				err := s.Update(func(tx store.Tx) error {
   143  					if service != nil {
   144  						if err := store.CreateService(tx, service); err != nil {
   145  							return err
   146  						}
   147  					}
   148  
   149  					if cluster != nil {
   150  						return store.CreateCluster(tx, cluster)
   151  					}
   152  					return nil
   153  				})
   154  				Expect(err).ToNot(HaveOccurred())
   155  
   156  				err = r.ReconcileService(serviceID)
   157  				Expect(err).ToNot(HaveOccurred())
   158  
   159  				// verify there are maxConcurrent tasks
   160  				var tasks []*api.Task
   161  				s.View(func(tx store.ReadTx) {
   162  					tasks, err = store.FindTasks(tx, store.ByServiceID(serviceID))
   163  				})
   164  				Expect(err).ToNot(HaveOccurred())
   165  				Expect(tasks).To(HaveLen(int(maxConcurrent)))
   166  			})
   167  
   168  			JustBeforeEach(func() {
   169  				err := s.Update(func(tx store.Tx) error {
   170  					// get the service, and bump ForceUpdate and the job
   171  					// iteration
   172  					service := store.GetService(tx, serviceID)
   173  					service.Spec.Task.ForceUpdate++
   174  					service.JobStatus.JobIteration.Index++
   175  					// we don't actually look at LastExecution in the
   176  					// replicated reconciler so we don't bother to set it here.
   177  					return store.UpdateService(tx, service)
   178  				})
   179  				Expect(err).ToNot(HaveOccurred())
   180  				err = r.ReconcileService(serviceID)
   181  				Expect(err).ToNot(HaveOccurred())
   182  
   183  				// fetch the tasks before we get to the test case itself,
   184  				// because we do this in all cases.
   185  				s.View(func(tx store.ReadTx) {
   186  					tasks, err = store.FindTasks(tx, store.ByServiceID(serviceID))
   187  				})
   188  				Expect(err).ToNot(HaveOccurred())
   189  			})
   190  
   191  			It("should remove all tasks belonging to the previous service iteration", func() {
   192  				count := 0
   193  				for _, task := range tasks {
   194  					Expect(task.JobIteration).ToNot(BeNil())
   195  					// first iteration of the job should have index 0
   196  					if task.JobIteration.Index == 0 {
   197  						Expect(task.DesiredState).To(Equal(api.TaskStateRemove))
   198  						count++
   199  					}
   200  				}
   201  
   202  				Expect(count).To(Equal(int(maxConcurrent)))
   203  			})
   204  
   205  			It("should create new tasks with the new JobIteration", func() {
   206  				count := 0
   207  				for _, task := range tasks {
   208  					Expect(task.JobIteration).ToNot(BeNil())
   209  					if task.JobIteration.Index == 1 {
   210  						Expect(task.DesiredState).To(Equal(api.TaskStateCompleted))
   211  						count++
   212  					}
   213  				}
   214  
   215  				Expect(count).To(Equal(int(maxConcurrent)))
   216  			})
   217  		})
   218  
   219  		When("reconciling a service", func() {
   220  			JustBeforeEach(func() {
   221  				err := s.Update(func(tx store.Tx) error {
   222  					if service != nil {
   223  						if err := store.CreateService(tx, service); err != nil {
   224  							return err
   225  						}
   226  					}
   227  					if cluster != nil {
   228  						return store.CreateCluster(tx, cluster)
   229  					}
   230  					return nil
   231  				})
   232  				Expect(err).ToNot(HaveOccurred())
   233  
   234  				reconcileErr = r.ReconcileService(serviceID)
   235  			})
   236  
   237  			When("the job has no tasks yet created", func() {
   238  				It("should create MaxConcurrent number of tasks", func() {
   239  					tasks := AllTasks(s)
   240  					// casting maxConcurrent to an int, which we know is safe
   241  					// because we set its value ourselves.
   242  					Expect(tasks).To(HaveLen(int(maxConcurrent)))
   243  
   244  					for _, task := range tasks {
   245  						Expect(task.ServiceID).To(Equal(service.ID))
   246  						Expect(task.JobIteration).ToNot(BeNil())
   247  						Expect(task.JobIteration.Index).To(Equal(uint64(0)))
   248  					}
   249  				})
   250  
   251  				It("should assign each task to a unique slot", func() {
   252  					tasks := AllTasks(s)
   253  
   254  					Expect(tasks).To(HaveUniqueSlots())
   255  				})
   256  
   257  				It("should return no error", func() {
   258  					Expect(reconcileErr).ToNot(HaveOccurred())
   259  				})
   260  
   261  				It("should set the desired state of each task to COMPLETE", func() {
   262  					tasks := AllTasks(s)
   263  					for _, task := range tasks {
   264  						Expect(task.DesiredState).To(Equal(api.TaskStateCompleted))
   265  					}
   266  				})
   267  
   268  				It("should use the cluster to set the default log driver", func() {
   269  					tasks := AllTasks(s)
   270  					Expect(len(tasks) >= 1).To(BeTrue())
   271  
   272  					Expect(tasks[0].LogDriver).To(Equal(cluster.Spec.TaskDefaults.LogDriver))
   273  				})
   274  			})
   275  
   276  			When("the job has some tasks already in progress", func() {
   277  				BeforeEach(func() {
   278  					s.Update(func(tx store.Tx) error {
   279  						// create 6 tasks before we reconcile the service.
   280  						// also, to fully exercise the slot picking code, we'll
   281  						// assign these tasks to every other slot
   282  						for i := uint64(0); i < 12; i += 2 {
   283  							task := orchestrator.NewTask(cluster, service, i, "")
   284  							task.JobIteration = &api.Version{}
   285  							task.DesiredState = api.TaskStateCompleted
   286  
   287  							if err := store.CreateTask(tx, task); err != nil {
   288  								return err
   289  							}
   290  						}
   291  
   292  						return nil
   293  					})
   294  				})
   295  
   296  				It("should create only the number of tasks needed to reach MaxConcurrent", func() {
   297  					tasks := AllTasks(s)
   298  
   299  					Expect(tasks).To(HaveLen(int(maxConcurrent)))
   300  				})
   301  
   302  				It("should assign each new task to a unique slot", func() {
   303  					tasks := AllTasks(s)
   304  					Expect(tasks).To(HaveUniqueSlots())
   305  				})
   306  			})
   307  
   308  			When("some running tasks are desired to be shutdown", func() {
   309  				BeforeEach(func() {
   310  					err := s.Update(func(tx store.Tx) error {
   311  						for i := uint64(0); i < maxConcurrent; i++ {
   312  							task := orchestrator.NewTask(cluster, service, i, "")
   313  							task.JobIteration = &api.Version{}
   314  							task.DesiredState = api.TaskStateShutdown
   315  
   316  							if err := store.CreateTask(tx, task); err != nil {
   317  								return err
   318  							}
   319  						}
   320  						return nil
   321  					})
   322  					Expect(err).ToNot(HaveOccurred())
   323  				})
   324  
   325  				It("should ignore tasks shutting down when creating new ones", func() {
   326  					tasks := AllTasks(s)
   327  					Expect(tasks).To(HaveLen(int(maxConcurrent) * 2))
   328  
   329  				})
   330  
   331  				It("should reuse slots numbers", func() {
   332  					tasks := AllTasks(s)
   333  					Expect(tasks).ToNot(HaveUniqueSlots())
   334  				})
   335  			})
   336  
   337  			When("a job has some failing and some completed tasks", func() {
   338  				var (
   339  					desiredNewTasks uint64
   340  					failingTasks    []string
   341  				)
   342  
   343  				BeforeEach(func() {
   344  					failingTasks = []string{}
   345  					err := s.Update(func(tx store.Tx) error {
   346  						// first, create a set of tasks with slots
   347  						// [0, maxConcurrent-1] that have all succeeded
   348  						for i := uint64(0); i < maxConcurrent; i++ {
   349  							task := orchestrator.NewTask(cluster, service, i, "")
   350  							task.JobIteration = &api.Version{}
   351  							task.DesiredState = api.TaskStateCompleted
   352  							task.Status.State = api.TaskStateCompleted
   353  							if err := store.CreateTask(tx, task); err != nil {
   354  								return err
   355  							}
   356  						}
   357  
   358  						// next, create half of maxConcurrent tasks, all
   359  						// failing.
   360  						startSlot := maxConcurrent
   361  						endSlot := startSlot + (maxConcurrent / 2)
   362  						for i := startSlot; i < endSlot; i++ {
   363  							task := orchestrator.NewTask(cluster, service, i, "")
   364  							task.JobIteration = &api.Version{}
   365  							task.DesiredState = api.TaskStateCompleted
   366  							task.Status.State = api.TaskStateFailed
   367  							failingTasks = append(failingTasks, task.ID)
   368  							if err := store.CreateTask(tx, task); err != nil {
   369  								return err
   370  							}
   371  						}
   372  
   373  						// it might seem dumb to do this instead of just using
   374  						// maxConcurrent / 2, but this avoids any issues with
   375  						// the parity of maxConcurrent that might otherwise
   376  						// arise from integer division. we want enough tasks to
   377  						// get us up to maxConcurrent, including the ones
   378  						// already extant and failing.
   379  						desiredNewTasks = maxConcurrent - (maxConcurrent / 2)
   380  						return nil
   381  					})
   382  					Expect(err).ToNot(HaveOccurred())
   383  				})
   384  
   385  				It("should not reuse slot numbers", func() {
   386  					tasks := AllTasks(s)
   387  					Expect(tasks).To(HaveUniqueSlots())
   388  				})
   389  
   390  				It("should not replace the failing tasks", func() {
   391  					s.View(func(tx store.ReadTx) {
   392  						// Get all tasks that are in desired state Completed
   393  						tasks, err := store.FindTasks(tx, store.ByDesiredState(api.TaskStateCompleted))
   394  						Expect(err).ToNot(HaveOccurred())
   395  
   396  						// count the tasks that are currently active. use type
   397  						// uint64 to make comparison with maxConcurrent easier.
   398  						activeTasks := uint64(0)
   399  						for _, task := range tasks {
   400  							if task.Status.State != api.TaskStateCompleted {
   401  								activeTasks++
   402  							}
   403  						}
   404  
   405  						// Assert that there are maxConcurrent of these tasks
   406  						Expect(activeTasks).To(Equal(maxConcurrent))
   407  
   408  						// Now, assert that there are 1/2 maxConcurrent New
   409  						// tasks. This shouldn't be a problem, but while we're
   410  						// here we might as well do this sanity check
   411  						var newTasks uint64
   412  						for _, task := range tasks {
   413  							if task.Status.State == api.TaskStateNew {
   414  								newTasks++
   415  							}
   416  						}
   417  						Expect(newTasks).To(Equal(desiredNewTasks))
   418  					})
   419  				})
   420  
   421  				It("should call Restart for each failing task", func() {
   422  					Expect(f.tasks).To(ConsistOf(failingTasks))
   423  				})
   424  			})
   425  
   426  			When("a job is almost complete, and doesn't need MaxConcurrent tasks running", func() {
   427  				BeforeEach(func() {
   428  					// we need to create a rather large number of tasks, all in
   429  					// COMPLETE state.
   430  					err := s.Update(func(tx store.Tx) error {
   431  						for i := uint64(0); i < totalCompletions-10; i++ {
   432  							// each task will get a unique slot
   433  
   434  							task := orchestrator.NewTask(nil, service, i, "")
   435  							task.JobIteration = &api.Version{}
   436  							task.Status.State = api.TaskStateCompleted
   437  							task.DesiredState = api.TaskStateCompleted
   438  
   439  							if err := store.CreateTask(tx, task); err != nil {
   440  								return err
   441  							}
   442  						}
   443  						return nil
   444  					})
   445  
   446  					Expect(err).ToNot(HaveOccurred())
   447  				})
   448  
   449  				It("should create no more than the tasks needed to reach TotalCompletions", func() {
   450  					var newTasks []*api.Task
   451  					s.View(func(tx store.ReadTx) {
   452  						newTasks, _ = store.FindTasks(tx, store.ByTaskState(api.TaskStateNew))
   453  					})
   454  
   455  					Expect(newTasks).To(HaveLen(10))
   456  				})
   457  
   458  				It("should give each new task a unique slot", func() {
   459  					tasks := AllTasks(s)
   460  
   461  					Expect(tasks).To(HaveUniqueSlots())
   462  				})
   463  			})
   464  
   465  			When("the service does not exist", func() {
   466  				BeforeEach(func() {
   467  					service = nil
   468  				})
   469  
   470  				It("should return no error", func() {
   471  					Expect(reconcileErr).ToNot(HaveOccurred())
   472  				})
   473  
   474  				It("should create no tasks", func() {
   475  					s.View(func(tx store.ReadTx) {
   476  						tasks, err := store.FindTasks(tx, store.All)
   477  						Expect(err).ToNot(HaveOccurred())
   478  						Expect(tasks).To(BeEmpty())
   479  					})
   480  				})
   481  			})
   482  		})
   483  
   484  		It("should return an underflow error if there are more running tasks than TotalCompletions", func() {
   485  			// this is an error condition which should not happen in real life,
   486  			// but i want to make sure that we can't accidentally start
   487  			// creating nearly the maximum 64-bit unsigned int number of tasks.
   488  			maxConcurrent := uint64(10)
   489  			totalCompletions := uint64(20)
   490  			err := s.Update(func(tx store.Tx) error {
   491  				service := &api.Service{
   492  					ID: "someService",
   493  					Spec: api.ServiceSpec{
   494  						Mode: &api.ServiceSpec_ReplicatedJob{
   495  							ReplicatedJob: &api.ReplicatedJob{
   496  								MaxConcurrent:    maxConcurrent,
   497  								TotalCompletions: totalCompletions,
   498  							},
   499  						},
   500  					},
   501  				}
   502  				if err := store.CreateService(tx, service); err != nil {
   503  					return err
   504  				}
   505  
   506  				for i := uint64(0); i < totalCompletions+10; i++ {
   507  					task := orchestrator.NewTask(nil, service, 0, "")
   508  					task.JobIteration = &api.Version{}
   509  					task.DesiredState = api.TaskStateCompleted
   510  
   511  					if err := store.CreateTask(tx, task); err != nil {
   512  						return err
   513  					}
   514  				}
   515  				return nil
   516  			})
   517  			Expect(err).ToNot(HaveOccurred())
   518  
   519  			reconcileErr := r.ReconcileService("someService")
   520  			Expect(reconcileErr).To(HaveOccurred())
   521  			Expect(reconcileErr.Error()).To(ContainSubstring("underflow"))
   522  		})
   523  	})
   524  })