github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/scheduler/scheduler_test.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"runtime"
     7  	"strconv"
     8  	"strings"
     9  	"testing"
    10  	"time"
    11  
    12  	"github.com/docker/go-events"
    13  	"github.com/docker/swarmkit/api"
    14  	"github.com/docker/swarmkit/api/genericresource"
    15  	"github.com/docker/swarmkit/identity"
    16  	"github.com/docker/swarmkit/manager/state"
    17  	"github.com/docker/swarmkit/manager/state/store"
    18  	"github.com/stretchr/testify/assert"
    19  	"github.com/stretchr/testify/require"
    20  )
    21  
    22  func TestScheduler(t *testing.T) {
    23  	ctx := context.Background()
    24  	initialNodeSet := []*api.Node{
    25  		{
    26  			ID: "id1",
    27  			Spec: api.NodeSpec{
    28  				Annotations: api.Annotations{
    29  					Name: "name1",
    30  				},
    31  			},
    32  			Status: api.NodeStatus{
    33  				State: api.NodeStatus_READY,
    34  			},
    35  		},
    36  		{
    37  			ID: "id2",
    38  			Spec: api.NodeSpec{
    39  				Annotations: api.Annotations{
    40  					Name: "name2",
    41  				},
    42  			},
    43  			Status: api.NodeStatus{
    44  				State: api.NodeStatus_READY,
    45  			},
    46  		},
    47  		{
    48  			ID: "id3",
    49  			Spec: api.NodeSpec{
    50  				Annotations: api.Annotations{
    51  					Name: "name2",
    52  				},
    53  			},
    54  			Status: api.NodeStatus{
    55  				State: api.NodeStatus_READY,
    56  			},
    57  		},
    58  	}
    59  
    60  	initialTaskSet := []*api.Task{
    61  		{
    62  			ID:           "id1",
    63  			DesiredState: api.TaskStateRunning,
    64  			ServiceAnnotations: api.Annotations{
    65  				Name: "name1",
    66  			},
    67  
    68  			Status: api.TaskStatus{
    69  				State: api.TaskStateAssigned,
    70  			},
    71  			NodeID: initialNodeSet[0].ID,
    72  		},
    73  		{
    74  			ID:           "id2",
    75  			DesiredState: api.TaskStateRunning,
    76  			ServiceAnnotations: api.Annotations{
    77  				Name: "name2",
    78  			},
    79  			Status: api.TaskStatus{
    80  				State: api.TaskStatePending,
    81  			},
    82  		},
    83  		{
    84  			ID:           "id3",
    85  			DesiredState: api.TaskStateRunning,
    86  			ServiceAnnotations: api.Annotations{
    87  				Name: "name2",
    88  			},
    89  			Status: api.TaskStatus{
    90  				State: api.TaskStatePending,
    91  			},
    92  		},
    93  	}
    94  
    95  	s := store.NewMemoryStore(nil)
    96  	assert.NotNil(t, s)
    97  	defer s.Close()
    98  
    99  	err := s.Update(func(tx store.Tx) error {
   100  		// Prepopulate nodes
   101  		for _, n := range initialNodeSet {
   102  			assert.NoError(t, store.CreateNode(tx, n))
   103  		}
   104  
   105  		// Prepopulate tasks
   106  		for _, task := range initialTaskSet {
   107  			assert.NoError(t, store.CreateTask(tx, task))
   108  		}
   109  		return nil
   110  	})
   111  	assert.NoError(t, err)
   112  
   113  	scheduler := New(s)
   114  
   115  	watch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
   116  	defer cancel()
   117  
   118  	go func() {
   119  		assert.NoError(t, scheduler.Run(ctx))
   120  	}()
   121  	defer scheduler.Stop()
   122  
   123  	assignment1 := watchAssignment(t, watch)
   124  	// must assign to id2 or id3 since id1 already has a task
   125  	assert.Regexp(t, assignment1.NodeID, "(id2|id3)")
   126  
   127  	assignment2 := watchAssignment(t, watch)
   128  	// must assign to id2 or id3 since id1 already has a task
   129  	if assignment1.NodeID == "id2" {
   130  		assert.Equal(t, "id3", assignment2.NodeID)
   131  	} else {
   132  		assert.Equal(t, "id2", assignment2.NodeID)
   133  	}
   134  
   135  	err = s.Update(func(tx store.Tx) error {
   136  		// Update each node to make sure this doesn't mess up the
   137  		// scheduler's state.
   138  		for _, n := range initialNodeSet {
   139  			assert.NoError(t, store.UpdateNode(tx, n))
   140  		}
   141  		return nil
   142  	})
   143  	assert.NoError(t, err)
   144  
   145  	err = s.Update(func(tx store.Tx) error {
   146  		// Delete the task associated with node 1 so it's now the most lightly
   147  		// loaded node.
   148  		assert.NoError(t, store.DeleteTask(tx, "id1"))
   149  
   150  		// Create a new task. It should get assigned to id1.
   151  		t4 := &api.Task{
   152  			ID:           "id4",
   153  			DesiredState: api.TaskStateRunning,
   154  			ServiceAnnotations: api.Annotations{
   155  				Name: "name4",
   156  			},
   157  			Status: api.TaskStatus{
   158  				State: api.TaskStatePending,
   159  			},
   160  		}
   161  		assert.NoError(t, store.CreateTask(tx, t4))
   162  		return nil
   163  	})
   164  	assert.NoError(t, err)
   165  
   166  	assignment3 := watchAssignment(t, watch)
   167  	assert.Equal(t, "id1", assignment3.NodeID)
   168  
   169  	// Update a task to make it unassigned. It should get assigned by the
   170  	// scheduler.
   171  	err = s.Update(func(tx store.Tx) error {
   172  		// Remove assignment from task id4. It should get assigned
   173  		// to node id1.
   174  		t4 := &api.Task{
   175  			ID:           "id4",
   176  			DesiredState: api.TaskStateRunning,
   177  			ServiceAnnotations: api.Annotations{
   178  				Name: "name4",
   179  			},
   180  			Status: api.TaskStatus{
   181  				State: api.TaskStatePending,
   182  			},
   183  		}
   184  		assert.NoError(t, store.UpdateTask(tx, t4))
   185  		return nil
   186  	})
   187  	assert.NoError(t, err)
   188  
   189  	assignment4 := watchAssignment(t, watch)
   190  	assert.Equal(t, "id1", assignment4.NodeID)
   191  
   192  	err = s.Update(func(tx store.Tx) error {
   193  		// Create a ready node, then remove it. No tasks should ever
   194  		// be assigned to it.
   195  		node := &api.Node{
   196  			ID: "removednode",
   197  			Spec: api.NodeSpec{
   198  				Annotations: api.Annotations{
   199  					Name: "removednode",
   200  				},
   201  			},
   202  			Status: api.NodeStatus{
   203  				State: api.NodeStatus_DOWN,
   204  			},
   205  		}
   206  		assert.NoError(t, store.CreateNode(tx, node))
   207  		assert.NoError(t, store.DeleteNode(tx, node.ID))
   208  
   209  		// Create an unassigned task.
   210  		task := &api.Task{
   211  			ID:           "removednode",
   212  			DesiredState: api.TaskStateRunning,
   213  			ServiceAnnotations: api.Annotations{
   214  				Name: "removednode",
   215  			},
   216  			Status: api.TaskStatus{
   217  				State: api.TaskStatePending,
   218  			},
   219  		}
   220  		assert.NoError(t, store.CreateTask(tx, task))
   221  		return nil
   222  	})
   223  	assert.NoError(t, err)
   224  
   225  	assignmentRemovedNode := watchAssignment(t, watch)
   226  	assert.NotEqual(t, "removednode", assignmentRemovedNode.NodeID)
   227  
   228  	err = s.Update(func(tx store.Tx) error {
   229  		// Create a ready node. It should be used for the next
   230  		// assignment.
   231  		n4 := &api.Node{
   232  			ID: "id4",
   233  			Spec: api.NodeSpec{
   234  				Annotations: api.Annotations{
   235  					Name: "name4",
   236  				},
   237  			},
   238  			Status: api.NodeStatus{
   239  				State: api.NodeStatus_READY,
   240  			},
   241  		}
   242  		assert.NoError(t, store.CreateNode(tx, n4))
   243  
   244  		// Create an unassigned task.
   245  		t5 := &api.Task{
   246  			ID:           "id5",
   247  			DesiredState: api.TaskStateRunning,
   248  			ServiceAnnotations: api.Annotations{
   249  				Name: "name5",
   250  			},
   251  			Status: api.TaskStatus{
   252  				State: api.TaskStatePending,
   253  			},
   254  		}
   255  		assert.NoError(t, store.CreateTask(tx, t5))
   256  		return nil
   257  	})
   258  	assert.NoError(t, err)
   259  
   260  	assignment5 := watchAssignment(t, watch)
   261  	assert.Equal(t, "id4", assignment5.NodeID)
   262  
   263  	err = s.Update(func(tx store.Tx) error {
   264  		// Create a non-ready node. It should NOT be used for the next
   265  		// assignment.
   266  		n5 := &api.Node{
   267  			ID: "id5",
   268  			Spec: api.NodeSpec{
   269  				Annotations: api.Annotations{
   270  					Name: "name5",
   271  				},
   272  			},
   273  			Status: api.NodeStatus{
   274  				State: api.NodeStatus_DOWN,
   275  			},
   276  		}
   277  		assert.NoError(t, store.CreateNode(tx, n5))
   278  
   279  		// Create an unassigned task.
   280  		t6 := &api.Task{
   281  			ID:           "id6",
   282  			DesiredState: api.TaskStateRunning,
   283  			ServiceAnnotations: api.Annotations{
   284  				Name: "name6",
   285  			},
   286  			Status: api.TaskStatus{
   287  				State: api.TaskStatePending,
   288  			},
   289  		}
   290  		assert.NoError(t, store.CreateTask(tx, t6))
   291  		return nil
   292  	})
   293  	assert.NoError(t, err)
   294  
   295  	assignment6 := watchAssignment(t, watch)
   296  	assert.NotEqual(t, "id5", assignment6.NodeID)
   297  
   298  	err = s.Update(func(tx store.Tx) error {
   299  		// Update node id5 to put it in the READY state.
   300  		n5 := &api.Node{
   301  			ID: "id5",
   302  			Spec: api.NodeSpec{
   303  				Annotations: api.Annotations{
   304  					Name: "name5",
   305  				},
   306  			},
   307  			Status: api.NodeStatus{
   308  				State: api.NodeStatus_READY,
   309  			},
   310  		}
   311  		assert.NoError(t, store.UpdateNode(tx, n5))
   312  
   313  		// Create an unassigned task. Should be assigned to the
   314  		// now-ready node.
   315  		t7 := &api.Task{
   316  			ID:           "id7",
   317  			DesiredState: api.TaskStateRunning,
   318  			ServiceAnnotations: api.Annotations{
   319  				Name: "name7",
   320  			},
   321  			Status: api.TaskStatus{
   322  				State: api.TaskStatePending,
   323  			},
   324  		}
   325  		assert.NoError(t, store.CreateTask(tx, t7))
   326  		return nil
   327  	})
   328  	assert.NoError(t, err)
   329  
   330  	assignment7 := watchAssignment(t, watch)
   331  	assert.Equal(t, "id5", assignment7.NodeID)
   332  
   333  	err = s.Update(func(tx store.Tx) error {
   334  		// Create a ready node, then immediately take it down. The next
   335  		// unassigned task should NOT be assigned to it.
   336  		n6 := &api.Node{
   337  			ID: "id6",
   338  			Spec: api.NodeSpec{
   339  				Annotations: api.Annotations{
   340  					Name: "name6",
   341  				},
   342  			},
   343  			Status: api.NodeStatus{
   344  				State: api.NodeStatus_READY,
   345  			},
   346  		}
   347  		assert.NoError(t, store.CreateNode(tx, n6))
   348  		n6.Status.State = api.NodeStatus_DOWN
   349  		assert.NoError(t, store.UpdateNode(tx, n6))
   350  
   351  		// Create an unassigned task.
   352  		t8 := &api.Task{
   353  			ID:           "id8",
   354  			DesiredState: api.TaskStateRunning,
   355  			ServiceAnnotations: api.Annotations{
   356  				Name: "name8",
   357  			},
   358  			Status: api.TaskStatus{
   359  				State: api.TaskStatePending,
   360  			},
   361  		}
   362  		assert.NoError(t, store.CreateTask(tx, t8))
   363  		return nil
   364  	})
   365  	assert.NoError(t, err)
   366  
   367  	assignment8 := watchAssignment(t, watch)
   368  	assert.NotEqual(t, "id6", assignment8.NodeID)
   369  }
   370  
   371  func testHA(t *testing.T, useSpecVersion bool) {
   372  	ctx := context.Background()
   373  	initialNodeSet := []*api.Node{
   374  		{
   375  			ID: "id1",
   376  			Status: api.NodeStatus{
   377  				State: api.NodeStatus_READY,
   378  			},
   379  		},
   380  		{
   381  			ID: "id2",
   382  			Status: api.NodeStatus{
   383  				State: api.NodeStatus_READY,
   384  			},
   385  		},
   386  		{
   387  			ID: "id3",
   388  			Status: api.NodeStatus{
   389  				State: api.NodeStatus_READY,
   390  			},
   391  		},
   392  		{
   393  			ID: "id4",
   394  			Status: api.NodeStatus{
   395  				State: api.NodeStatus_READY,
   396  			},
   397  		},
   398  		{
   399  			ID: "id5",
   400  			Status: api.NodeStatus{
   401  				State: api.NodeStatus_READY,
   402  			},
   403  		},
   404  	}
   405  
   406  	taskTemplate1 := &api.Task{
   407  		DesiredState: api.TaskStateRunning,
   408  		ServiceID:    "service1",
   409  		Spec: api.TaskSpec{
   410  			Runtime: &api.TaskSpec_Container{
   411  				Container: &api.ContainerSpec{
   412  					Image: "v:1",
   413  				},
   414  			},
   415  		},
   416  		Status: api.TaskStatus{
   417  			State: api.TaskStatePending,
   418  		},
   419  	}
   420  
   421  	taskTemplate2 := &api.Task{
   422  		DesiredState: api.TaskStateRunning,
   423  		ServiceID:    "service2",
   424  		Spec: api.TaskSpec{
   425  			Runtime: &api.TaskSpec_Container{
   426  				Container: &api.ContainerSpec{
   427  					Image: "v:2",
   428  				},
   429  			},
   430  		},
   431  		Status: api.TaskStatus{
   432  			State: api.TaskStatePending,
   433  		},
   434  	}
   435  
   436  	if useSpecVersion {
   437  		taskTemplate1.SpecVersion = &api.Version{Index: 1}
   438  		taskTemplate2.SpecVersion = &api.Version{Index: 1}
   439  	}
   440  
   441  	s := store.NewMemoryStore(nil)
   442  	assert.NotNil(t, s)
   443  	defer s.Close()
   444  
   445  	t1Instances := 18
   446  
   447  	err := s.Update(func(tx store.Tx) error {
   448  		// Prepopulate nodes
   449  		for _, n := range initialNodeSet {
   450  			assert.NoError(t, store.CreateNode(tx, n))
   451  		}
   452  
   453  		// Prepopulate tasks from template 1
   454  		for i := 0; i != t1Instances; i++ {
   455  			taskTemplate1.ID = fmt.Sprintf("t1id%d", i)
   456  			assert.NoError(t, store.CreateTask(tx, taskTemplate1))
   457  		}
   458  		return nil
   459  	})
   460  	assert.NoError(t, err)
   461  
   462  	scheduler := New(s)
   463  
   464  	watch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
   465  	defer cancel()
   466  
   467  	go func() {
   468  		assert.NoError(t, scheduler.Run(ctx))
   469  	}()
   470  	defer scheduler.Stop()
   471  
   472  	t1Assignments := make(map[string]int)
   473  	for i := 0; i != t1Instances; i++ {
   474  		assignment := watchAssignment(t, watch)
   475  		if !strings.HasPrefix(assignment.ID, "t1") {
   476  			t.Fatal("got assignment for different kind of task")
   477  		}
   478  		t1Assignments[assignment.NodeID]++
   479  	}
   480  
   481  	assert.Len(t, t1Assignments, 5)
   482  
   483  	nodesWith3T1Tasks := 0
   484  	nodesWith4T1Tasks := 0
   485  	for nodeID, taskCount := range t1Assignments {
   486  		if taskCount == 3 {
   487  			nodesWith3T1Tasks++
   488  		} else if taskCount == 4 {
   489  			nodesWith4T1Tasks++
   490  		} else {
   491  			t.Fatalf("unexpected number of tasks %d on node %s", taskCount, nodeID)
   492  		}
   493  	}
   494  
   495  	assert.Equal(t, 3, nodesWith4T1Tasks)
   496  	assert.Equal(t, 2, nodesWith3T1Tasks)
   497  
   498  	t2Instances := 2
   499  
   500  	// Add a new service with two instances. They should fill the nodes
   501  	// that only have two tasks.
   502  	err = s.Update(func(tx store.Tx) error {
   503  		for i := 0; i != t2Instances; i++ {
   504  			taskTemplate2.ID = fmt.Sprintf("t2id%d", i)
   505  			assert.NoError(t, store.CreateTask(tx, taskTemplate2))
   506  		}
   507  		return nil
   508  	})
   509  	assert.NoError(t, err)
   510  
   511  	t2Assignments := make(map[string]int)
   512  	for i := 0; i != t2Instances; i++ {
   513  		assignment := watchAssignment(t, watch)
   514  		if !strings.HasPrefix(assignment.ID, "t2") {
   515  			t.Fatal("got assignment for different kind of task")
   516  		}
   517  		t2Assignments[assignment.NodeID]++
   518  	}
   519  
   520  	assert.Len(t, t2Assignments, 2)
   521  
   522  	for nodeID := range t2Assignments {
   523  		assert.Equal(t, 3, t1Assignments[nodeID])
   524  	}
   525  
   526  	// Scale up service 1 to 21 tasks. It should cover the two nodes that
   527  	// service 2 was assigned to, and also one other node.
   528  	err = s.Update(func(tx store.Tx) error {
   529  		for i := t1Instances; i != t1Instances+3; i++ {
   530  			taskTemplate1.ID = fmt.Sprintf("t1id%d", i)
   531  			assert.NoError(t, store.CreateTask(tx, taskTemplate1))
   532  		}
   533  		return nil
   534  	})
   535  	assert.NoError(t, err)
   536  
   537  	var sharedNodes [2]string
   538  
   539  	for i := 0; i != 3; i++ {
   540  		assignment := watchAssignment(t, watch)
   541  		if !strings.HasPrefix(assignment.ID, "t1") {
   542  			t.Fatal("got assignment for different kind of task")
   543  		}
   544  		if t1Assignments[assignment.NodeID] == 5 {
   545  			t.Fatal("more than one new task assigned to the same node")
   546  		}
   547  		t1Assignments[assignment.NodeID]++
   548  
   549  		if t2Assignments[assignment.NodeID] != 0 {
   550  			if sharedNodes[0] == "" {
   551  				sharedNodes[0] = assignment.NodeID
   552  			} else if sharedNodes[1] == "" {
   553  				sharedNodes[1] = assignment.NodeID
   554  			} else {
   555  				t.Fatal("all three assignments went to nodes with service2 tasks")
   556  			}
   557  		}
   558  	}
   559  
   560  	assert.NotEmpty(t, sharedNodes[0])
   561  	assert.NotEmpty(t, sharedNodes[1])
   562  	assert.NotEqual(t, sharedNodes[0], sharedNodes[1])
   563  
   564  	nodesWith4T1Tasks = 0
   565  	nodesWith5T1Tasks := 0
   566  	for nodeID, taskCount := range t1Assignments {
   567  		if taskCount == 4 {
   568  			nodesWith4T1Tasks++
   569  		} else if taskCount == 5 {
   570  			nodesWith5T1Tasks++
   571  		} else {
   572  			t.Fatalf("unexpected number of tasks %d on node %s", taskCount, nodeID)
   573  		}
   574  	}
   575  
   576  	assert.Equal(t, 4, nodesWith4T1Tasks)
   577  	assert.Equal(t, 1, nodesWith5T1Tasks)
   578  
   579  	// Add another task from service2. It must not land on the node that
   580  	// has 5 service1 tasks.
   581  	err = s.Update(func(tx store.Tx) error {
   582  		taskTemplate2.ID = "t2id4"
   583  		assert.NoError(t, store.CreateTask(tx, taskTemplate2))
   584  		return nil
   585  	})
   586  	assert.NoError(t, err)
   587  
   588  	assignment := watchAssignment(t, watch)
   589  	if assignment.ID != "t2id4" {
   590  		t.Fatal("got assignment for different task")
   591  	}
   592  
   593  	if t2Assignments[assignment.NodeID] != 0 {
   594  		t.Fatal("was scheduled on a node that already has a service2 task")
   595  	}
   596  	if t1Assignments[assignment.NodeID] == 5 {
   597  		t.Fatal("was scheduled on the node that has the most service1 tasks")
   598  	}
   599  	t2Assignments[assignment.NodeID]++
   600  
   601  	// Remove all tasks on node id1.
   602  	err = s.Update(func(tx store.Tx) error {
   603  		tasks, err := store.FindTasks(tx, store.ByNodeID("id1"))
   604  		assert.NoError(t, err)
   605  		for _, task := range tasks {
   606  			assert.NoError(t, store.DeleteTask(tx, task.ID))
   607  		}
   608  		return nil
   609  	})
   610  	assert.NoError(t, err)
   611  
   612  	t1Assignments["id1"] = 0
   613  	t2Assignments["id1"] = 0
   614  
   615  	// Add four instances of service1 and two instances of service2.
   616  	// All instances of service1 should land on node "id1", and one
   617  	// of the two service2 instances should as well.
   618  	// Put these in a map to randomize the order in which they are
   619  	// created.
   620  	err = s.Update(func(tx store.Tx) error {
   621  		tasksMap := make(map[string]*api.Task)
   622  		for i := 22; i <= 25; i++ {
   623  			taskTemplate1.ID = fmt.Sprintf("t1id%d", i)
   624  			tasksMap[taskTemplate1.ID] = taskTemplate1.Copy()
   625  		}
   626  		for i := 5; i <= 6; i++ {
   627  			taskTemplate2.ID = fmt.Sprintf("t2id%d", i)
   628  			tasksMap[taskTemplate2.ID] = taskTemplate2.Copy()
   629  		}
   630  		for _, task := range tasksMap {
   631  			assert.NoError(t, store.CreateTask(tx, task))
   632  		}
   633  		return nil
   634  	})
   635  	assert.NoError(t, err)
   636  
   637  	for i := 0; i != 4+2; i++ {
   638  		assignment := watchAssignment(t, watch)
   639  		if strings.HasPrefix(assignment.ID, "t1") {
   640  			t1Assignments[assignment.NodeID]++
   641  		} else if strings.HasPrefix(assignment.ID, "t2") {
   642  			t2Assignments[assignment.NodeID]++
   643  		}
   644  	}
   645  
   646  	assert.Equal(t, 4, t1Assignments["id1"])
   647  	assert.Equal(t, 1, t2Assignments["id1"])
   648  }
   649  
   650  func TestHA(t *testing.T) {
   651  	t.Run("useSpecVersion=false", func(t *testing.T) { testHA(t, false) })
   652  	t.Run("useSpecVersion=true", func(t *testing.T) { testHA(t, true) })
   653  }
   654  
   655  func testPreferences(t *testing.T, useSpecVersion bool) {
   656  	ctx := context.Background()
   657  	initialNodeSet := []*api.Node{
   658  		{
   659  			ID: "id1",
   660  			Status: api.NodeStatus{
   661  				State: api.NodeStatus_READY,
   662  			},
   663  			Spec: api.NodeSpec{
   664  				Annotations: api.Annotations{
   665  					Labels: map[string]string{
   666  						"az": "az1",
   667  					},
   668  				},
   669  			},
   670  		},
   671  		{
   672  			ID: "id2",
   673  			Status: api.NodeStatus{
   674  				State: api.NodeStatus_READY,
   675  			},
   676  			Spec: api.NodeSpec{
   677  				Annotations: api.Annotations{
   678  					Labels: map[string]string{
   679  						"az": "az2",
   680  					},
   681  				},
   682  			},
   683  		},
   684  		{
   685  			ID: "id3",
   686  			Status: api.NodeStatus{
   687  				State: api.NodeStatus_READY,
   688  			},
   689  			Spec: api.NodeSpec{
   690  				Annotations: api.Annotations{
   691  					Labels: map[string]string{
   692  						"az": "az2",
   693  					},
   694  				},
   695  			},
   696  		},
   697  		{
   698  			ID: "id4",
   699  			Status: api.NodeStatus{
   700  				State: api.NodeStatus_READY,
   701  			},
   702  			Spec: api.NodeSpec{
   703  				Annotations: api.Annotations{
   704  					Labels: map[string]string{
   705  						"az": "az2",
   706  					},
   707  				},
   708  			},
   709  		},
   710  		{
   711  			ID: "id5",
   712  			Status: api.NodeStatus{
   713  				State: api.NodeStatus_READY,
   714  			},
   715  			Spec: api.NodeSpec{
   716  				Annotations: api.Annotations{
   717  					Labels: map[string]string{
   718  						"az": "az2",
   719  					},
   720  				},
   721  			},
   722  		},
   723  	}
   724  
   725  	taskTemplate1 := &api.Task{
   726  		DesiredState: api.TaskStateRunning,
   727  		ServiceID:    "service1",
   728  		Spec: api.TaskSpec{
   729  			Runtime: &api.TaskSpec_Container{
   730  				Container: &api.ContainerSpec{
   731  					Image: "v:1",
   732  				},
   733  			},
   734  			Placement: &api.Placement{
   735  				Preferences: []*api.PlacementPreference{
   736  					{
   737  						Preference: &api.PlacementPreference_Spread{
   738  							Spread: &api.SpreadOver{
   739  								SpreadDescriptor: "node.labels.az",
   740  							},
   741  						},
   742  					},
   743  				},
   744  			},
   745  		},
   746  		Status: api.TaskStatus{
   747  			State: api.TaskStatePending,
   748  		},
   749  	}
   750  
   751  	if useSpecVersion {
   752  		taskTemplate1.SpecVersion = &api.Version{Index: 1}
   753  	}
   754  
   755  	s := store.NewMemoryStore(nil)
   756  	assert.NotNil(t, s)
   757  	defer s.Close()
   758  
   759  	t1Instances := 8
   760  
   761  	err := s.Update(func(tx store.Tx) error {
   762  		// Prepoulate nodes
   763  		for _, n := range initialNodeSet {
   764  			assert.NoError(t, store.CreateNode(tx, n))
   765  		}
   766  
   767  		// Prepopulate tasks from template 1
   768  		for i := 0; i != t1Instances; i++ {
   769  			taskTemplate1.ID = fmt.Sprintf("t1id%d", i)
   770  			assert.NoError(t, store.CreateTask(tx, taskTemplate1))
   771  		}
   772  		return nil
   773  	})
   774  	assert.NoError(t, err)
   775  
   776  	scheduler := New(s)
   777  
   778  	watch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
   779  	defer cancel()
   780  
   781  	go func() {
   782  		assert.NoError(t, scheduler.Run(ctx))
   783  	}()
   784  	defer scheduler.Stop()
   785  
   786  	t1Assignments := make(map[string]int)
   787  	for i := 0; i != t1Instances; i++ {
   788  		assignment := watchAssignment(t, watch)
   789  		if !strings.HasPrefix(assignment.ID, "t1") {
   790  			t.Fatal("got assignment for different kind of task")
   791  		}
   792  		t1Assignments[assignment.NodeID]++
   793  	}
   794  
   795  	assert.Len(t, t1Assignments, 5)
   796  	assert.Equal(t, 4, t1Assignments["id1"])
   797  	assert.Equal(t, 1, t1Assignments["id2"])
   798  	assert.Equal(t, 1, t1Assignments["id3"])
   799  	assert.Equal(t, 1, t1Assignments["id4"])
   800  	assert.Equal(t, 1, t1Assignments["id5"])
   801  }
   802  
   803  func TestPreferences(t *testing.T) {
   804  	t.Run("useSpecVersion=false", func(t *testing.T) { testPreferences(t, false) })
   805  	t.Run("useSpecVersion=true", func(t *testing.T) { testPreferences(t, true) })
   806  }
   807  
   808  func testMultiplePreferences(t *testing.T, useSpecVersion bool) {
   809  	ctx := context.Background()
   810  	initialNodeSet := []*api.Node{
   811  		{
   812  			ID: "id0",
   813  			Status: api.NodeStatus{
   814  				State: api.NodeStatus_READY,
   815  			},
   816  			Spec: api.NodeSpec{
   817  				Annotations: api.Annotations{
   818  					Labels: map[string]string{
   819  						"az":   "az1",
   820  						"rack": "rack1",
   821  					},
   822  				},
   823  			},
   824  			Description: &api.NodeDescription{
   825  				Resources: &api.Resources{
   826  					NanoCPUs:    1e9,
   827  					MemoryBytes: 1e8,
   828  					Generic: []*api.GenericResource{
   829  						genericresource.NewDiscrete("apple", 1),
   830  					},
   831  				},
   832  			},
   833  		},
   834  		{
   835  			ID: "id1",
   836  			Status: api.NodeStatus{
   837  				State: api.NodeStatus_READY,
   838  			},
   839  			Spec: api.NodeSpec{
   840  				Annotations: api.Annotations{
   841  					Labels: map[string]string{
   842  						"az":   "az1",
   843  						"rack": "rack1",
   844  					},
   845  				},
   846  			},
   847  			Description: &api.NodeDescription{
   848  				Resources: &api.Resources{
   849  					NanoCPUs:    1e9,
   850  					MemoryBytes: 1e9,
   851  					Generic: []*api.GenericResource{
   852  						genericresource.NewDiscrete("apple", 10),
   853  					},
   854  				},
   855  			},
   856  		},
   857  		{
   858  			ID: "id2",
   859  			Status: api.NodeStatus{
   860  				State: api.NodeStatus_READY,
   861  			},
   862  			Spec: api.NodeSpec{
   863  				Annotations: api.Annotations{
   864  					Labels: map[string]string{
   865  						"az":   "az2",
   866  						"rack": "rack1",
   867  					},
   868  				},
   869  			},
   870  			Description: &api.NodeDescription{
   871  				Resources: &api.Resources{
   872  					NanoCPUs:    1e9,
   873  					MemoryBytes: 1e9,
   874  					Generic: []*api.GenericResource{
   875  						genericresource.NewDiscrete("apple", 6),
   876  					},
   877  				},
   878  			},
   879  		},
   880  		{
   881  			ID: "id3",
   882  			Status: api.NodeStatus{
   883  				State: api.NodeStatus_READY,
   884  			},
   885  			Spec: api.NodeSpec{
   886  				Annotations: api.Annotations{
   887  					Labels: map[string]string{
   888  						"az":   "az2",
   889  						"rack": "rack1",
   890  					},
   891  				},
   892  			},
   893  			Description: &api.NodeDescription{
   894  				Resources: &api.Resources{
   895  					NanoCPUs:    1e9,
   896  					MemoryBytes: 1e9,
   897  					Generic: []*api.GenericResource{
   898  						genericresource.NewDiscrete("apple", 6),
   899  					},
   900  				},
   901  			},
   902  		},
   903  		{
   904  			ID: "id4",
   905  			Status: api.NodeStatus{
   906  				State: api.NodeStatus_READY,
   907  			},
   908  			Spec: api.NodeSpec{
   909  				Annotations: api.Annotations{
   910  					Labels: map[string]string{
   911  						"az":   "az2",
   912  						"rack": "rack1",
   913  					},
   914  				},
   915  			},
   916  			Description: &api.NodeDescription{
   917  				Resources: &api.Resources{
   918  					NanoCPUs:    1e9,
   919  					MemoryBytes: 1e9,
   920  					Generic: []*api.GenericResource{
   921  						genericresource.NewDiscrete("apple", 6),
   922  					},
   923  				},
   924  			},
   925  		},
   926  		{
   927  			ID: "id5",
   928  			Status: api.NodeStatus{
   929  				State: api.NodeStatus_READY,
   930  			},
   931  			Spec: api.NodeSpec{
   932  				Annotations: api.Annotations{
   933  					Labels: map[string]string{
   934  						"az":   "az2",
   935  						"rack": "rack2",
   936  					},
   937  				},
   938  			},
   939  			Description: &api.NodeDescription{
   940  				Resources: &api.Resources{
   941  					NanoCPUs:    1e9,
   942  					MemoryBytes: 1e9,
   943  					Generic: []*api.GenericResource{
   944  						genericresource.NewDiscrete("apple", 6),
   945  					},
   946  				},
   947  			},
   948  		},
   949  		{
   950  			ID: "id6",
   951  			Status: api.NodeStatus{
   952  				State: api.NodeStatus_READY,
   953  			},
   954  			Spec: api.NodeSpec{
   955  				Annotations: api.Annotations{
   956  					Labels: map[string]string{
   957  						"az":   "az2",
   958  						"rack": "rack2",
   959  					},
   960  				},
   961  			},
   962  			Description: &api.NodeDescription{
   963  				Resources: &api.Resources{
   964  					NanoCPUs:    1e9,
   965  					MemoryBytes: 1e9,
   966  					Generic: []*api.GenericResource{
   967  						genericresource.NewDiscrete("apple", 6),
   968  					},
   969  				},
   970  			},
   971  		},
   972  	}
   973  
   974  	taskTemplate1 := &api.Task{
   975  		DesiredState: api.TaskStateRunning,
   976  		ServiceID:    "service1",
   977  		Spec: api.TaskSpec{
   978  			Runtime: &api.TaskSpec_Container{
   979  				Container: &api.ContainerSpec{
   980  					Image: "v:1",
   981  				},
   982  			},
   983  			Placement: &api.Placement{
   984  				Preferences: []*api.PlacementPreference{
   985  					{
   986  						Preference: &api.PlacementPreference_Spread{
   987  							Spread: &api.SpreadOver{
   988  								SpreadDescriptor: "node.labels.az",
   989  							},
   990  						},
   991  					},
   992  					{
   993  						Preference: &api.PlacementPreference_Spread{
   994  							Spread: &api.SpreadOver{
   995  								SpreadDescriptor: "node.labels.rack",
   996  							},
   997  						},
   998  					},
   999  				},
  1000  			},
  1001  			Resources: &api.ResourceRequirements{
  1002  				Reservations: &api.Resources{
  1003  					MemoryBytes: 2e8,
  1004  					Generic: []*api.GenericResource{
  1005  						genericresource.NewDiscrete("apple", 2),
  1006  					},
  1007  				},
  1008  			},
  1009  		},
  1010  		Status: api.TaskStatus{
  1011  			State: api.TaskStatePending,
  1012  		},
  1013  	}
  1014  
  1015  	if useSpecVersion {
  1016  		taskTemplate1.SpecVersion = &api.Version{Index: 1}
  1017  	}
  1018  
  1019  	s := store.NewMemoryStore(nil)
  1020  	assert.NotNil(t, s)
  1021  	defer s.Close()
  1022  
  1023  	t1Instances := 12
  1024  
  1025  	err := s.Update(func(tx store.Tx) error {
  1026  		// Prepoulate nodes
  1027  		for _, n := range initialNodeSet {
  1028  			assert.NoError(t, store.CreateNode(tx, n))
  1029  		}
  1030  
  1031  		// Prepopulate tasks from template 1
  1032  		for i := 0; i != t1Instances; i++ {
  1033  			taskTemplate1.ID = fmt.Sprintf("t1id%d", i)
  1034  			assert.NoError(t, store.CreateTask(tx, taskTemplate1))
  1035  		}
  1036  		return nil
  1037  	})
  1038  	assert.NoError(t, err)
  1039  
  1040  	scheduler := New(s)
  1041  
  1042  	watch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
  1043  	defer cancel()
  1044  
  1045  	go func() {
  1046  		assert.NoError(t, scheduler.Run(ctx))
  1047  	}()
  1048  	defer scheduler.Stop()
  1049  
  1050  	t1Assignments := make(map[string]int)
  1051  	for i := 0; i != t1Instances; i++ {
  1052  		assignment := watchAssignment(t, watch)
  1053  		if !strings.HasPrefix(assignment.ID, "t1") {
  1054  			t.Fatal("got assignment for different kind of task")
  1055  		}
  1056  		t1Assignments[assignment.NodeID]++
  1057  	}
  1058  
  1059  	assert.Len(t, t1Assignments, 6)
  1060  
  1061  	// There should be no tasks assigned to id0 because it doesn't meet the
  1062  	// resource requirements.
  1063  	assert.Equal(t, 0, t1Assignments["id0"])
  1064  
  1065  	// There should be 5 tasks assigned to id1 because half of the 12 tasks
  1066  	// should ideally end up in az1, but id1 can only accommodate 5 due to
  1067  	// resource requirements.
  1068  	assert.Equal(t, 5, t1Assignments["id1"])
  1069  
  1070  	// The remaining 7 tasks should be spread across rack1 and rack2 of
  1071  	// az2.
  1072  
  1073  	if t1Assignments["id2"]+t1Assignments["id3"]+t1Assignments["id4"] == 4 {
  1074  		// If rack1 gets 4 and rack2 gets 3, then one of id[2-4] will have two
  1075  		// tasks and the others will have one.
  1076  		if t1Assignments["id2"] == 2 {
  1077  			assert.Equal(t, 1, t1Assignments["id3"])
  1078  			assert.Equal(t, 1, t1Assignments["id4"])
  1079  		} else if t1Assignments["id3"] == 2 {
  1080  			assert.Equal(t, 1, t1Assignments["id2"])
  1081  			assert.Equal(t, 1, t1Assignments["id4"])
  1082  		} else {
  1083  			assert.Equal(t, 1, t1Assignments["id2"])
  1084  			assert.Equal(t, 1, t1Assignments["id3"])
  1085  			assert.Equal(t, 2, t1Assignments["id4"])
  1086  		}
  1087  
  1088  		// either id5 or id6 should end up with 2 tasks
  1089  		if t1Assignments["id5"] == 1 {
  1090  			assert.Equal(t, 2, t1Assignments["id6"])
  1091  		} else {
  1092  			assert.Equal(t, 2, t1Assignments["id5"])
  1093  			assert.Equal(t, 1, t1Assignments["id6"])
  1094  		}
  1095  	} else if t1Assignments["id2"]+t1Assignments["id3"]+t1Assignments["id4"] == 3 {
  1096  		// If rack2 gets 4 and rack1 gets 3, then id[2-4] will each get
  1097  		// 1 task and id[5-6] will each get 2 tasks.
  1098  		assert.Equal(t, 1, t1Assignments["id2"])
  1099  		assert.Equal(t, 1, t1Assignments["id3"])
  1100  		assert.Equal(t, 1, t1Assignments["id4"])
  1101  		assert.Equal(t, 2, t1Assignments["id5"])
  1102  		assert.Equal(t, 2, t1Assignments["id6"])
  1103  	} else {
  1104  		t.Fatal("unexpected task layout")
  1105  	}
  1106  }
  1107  
  1108  func TestMultiplePreferences(t *testing.T) {
  1109  	t.Run("useSpecVersion=false", func(t *testing.T) { testMultiplePreferences(t, false) })
  1110  	t.Run("useSpecVersion=true", func(t *testing.T) { testMultiplePreferences(t, true) })
  1111  }
  1112  
  1113  func TestSchedulerNoReadyNodes(t *testing.T) {
  1114  	ctx := context.Background()
  1115  	initialTask := &api.Task{
  1116  		ID:           "id1",
  1117  		ServiceID:    "serviceID1",
  1118  		DesiredState: api.TaskStateRunning,
  1119  		ServiceAnnotations: api.Annotations{
  1120  			Name: "name1",
  1121  		},
  1122  		Status: api.TaskStatus{
  1123  			State: api.TaskStatePending,
  1124  		},
  1125  	}
  1126  
  1127  	s := store.NewMemoryStore(nil)
  1128  	assert.NotNil(t, s)
  1129  	defer s.Close()
  1130  
  1131  	err := s.Update(func(tx store.Tx) error {
  1132  		// Add initial service and task
  1133  		assert.NoError(t, store.CreateService(tx, &api.Service{ID: "serviceID1"}))
  1134  		assert.NoError(t, store.CreateTask(tx, initialTask))
  1135  		return nil
  1136  	})
  1137  	assert.NoError(t, err)
  1138  
  1139  	scheduler := New(s)
  1140  
  1141  	watch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
  1142  	defer cancel()
  1143  
  1144  	go func() {
  1145  		assert.NoError(t, scheduler.Run(ctx))
  1146  	}()
  1147  	defer scheduler.Stop()
  1148  
  1149  	failure := watchAssignmentFailure(t, watch)
  1150  	assert.Equal(t, "no suitable node", failure.Status.Err)
  1151  
  1152  	err = s.Update(func(tx store.Tx) error {
  1153  		// Create a ready node. The task should get assigned to this
  1154  		// node.
  1155  		node := &api.Node{
  1156  			ID: "newnode",
  1157  			Spec: api.NodeSpec{
  1158  				Annotations: api.Annotations{
  1159  					Name: "newnode",
  1160  				},
  1161  			},
  1162  			Status: api.NodeStatus{
  1163  				State: api.NodeStatus_READY,
  1164  			},
  1165  		}
  1166  		assert.NoError(t, store.CreateNode(tx, node))
  1167  		return nil
  1168  	})
  1169  	assert.NoError(t, err)
  1170  
  1171  	assignment := watchAssignment(t, watch)
  1172  	assert.Equal(t, "newnode", assignment.NodeID)
  1173  }
  1174  
  1175  func TestSchedulerFaultyNode(t *testing.T) {
  1176  	ctx := context.Background()
  1177  
  1178  	replicatedTaskTemplate := &api.Task{
  1179  		ServiceID:    "service1",
  1180  		DesiredState: api.TaskStateRunning,
  1181  		ServiceAnnotations: api.Annotations{
  1182  			Name: "name1",
  1183  		},
  1184  		Status: api.TaskStatus{
  1185  			State: api.TaskStatePending,
  1186  		},
  1187  	}
  1188  
  1189  	preassignedTaskTemplate := &api.Task{
  1190  		ServiceID:    "service2",
  1191  		NodeID:       "id1",
  1192  		DesiredState: api.TaskStateRunning,
  1193  		ServiceAnnotations: api.Annotations{
  1194  			Name: "name2",
  1195  		},
  1196  		Status: api.TaskStatus{
  1197  			State: api.TaskStatePending,
  1198  		},
  1199  	}
  1200  
  1201  	node1 := &api.Node{
  1202  		ID: "id1",
  1203  		Spec: api.NodeSpec{
  1204  			Annotations: api.Annotations{
  1205  				Name: "id1",
  1206  			},
  1207  		},
  1208  		Status: api.NodeStatus{
  1209  			State: api.NodeStatus_READY,
  1210  		},
  1211  	}
  1212  
  1213  	node2 := &api.Node{
  1214  		ID: "id2",
  1215  		Spec: api.NodeSpec{
  1216  			Annotations: api.Annotations{
  1217  				Name: "id2",
  1218  			},
  1219  		},
  1220  		Status: api.NodeStatus{
  1221  			State: api.NodeStatus_READY,
  1222  		},
  1223  	}
  1224  
  1225  	s := store.NewMemoryStore(nil)
  1226  	assert.NotNil(t, s)
  1227  	defer s.Close()
  1228  
  1229  	err := s.Update(func(tx store.Tx) error {
  1230  		// Add initial nodes, and one task of each type assigned to node id1
  1231  		assert.NoError(t, store.CreateNode(tx, node1))
  1232  		assert.NoError(t, store.CreateNode(tx, node2))
  1233  
  1234  		task1 := replicatedTaskTemplate.Copy()
  1235  		task1.ID = "id1"
  1236  		task1.NodeID = "id1"
  1237  		task1.Status.State = api.TaskStateRunning
  1238  		assert.NoError(t, store.CreateTask(tx, task1))
  1239  
  1240  		task2 := preassignedTaskTemplate.Copy()
  1241  		task2.ID = "id2"
  1242  		task2.NodeID = "id1"
  1243  		task2.Status.State = api.TaskStateRunning
  1244  		assert.NoError(t, store.CreateTask(tx, task2))
  1245  		return nil
  1246  	})
  1247  	assert.NoError(t, err)
  1248  
  1249  	scheduler := New(s)
  1250  
  1251  	watch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
  1252  	defer cancel()
  1253  
  1254  	go func() {
  1255  		assert.NoError(t, scheduler.Run(ctx))
  1256  	}()
  1257  	defer scheduler.Stop()
  1258  
  1259  	for i := 0; i != 8; i++ {
  1260  		// Simulate a task failure cycle
  1261  		newReplicatedTask := replicatedTaskTemplate.Copy()
  1262  		newReplicatedTask.ID = identity.NewID()
  1263  
  1264  		err = s.Update(func(tx store.Tx) error {
  1265  			assert.NoError(t, store.CreateTask(tx, newReplicatedTask))
  1266  			return nil
  1267  		})
  1268  		assert.NoError(t, err)
  1269  
  1270  		assignment := watchAssignment(t, watch)
  1271  		assert.Equal(t, newReplicatedTask.ID, assignment.ID)
  1272  
  1273  		if i < 5 {
  1274  			// The first 5 attempts should be assigned to node id2 because
  1275  			// it has no replicas of the service.
  1276  			assert.Equal(t, "id2", assignment.NodeID)
  1277  		} else {
  1278  			// The next ones should be assigned to id1, since we'll
  1279  			// flag id2 as potentially faulty.
  1280  			assert.Equal(t, "id1", assignment.NodeID)
  1281  		}
  1282  
  1283  		node2Info, err := scheduler.nodeSet.nodeInfo("id2")
  1284  		assert.NoError(t, err)
  1285  		expectedNode2Failures := i
  1286  		if i > 5 {
  1287  			expectedNode2Failures = 5
  1288  		}
  1289  		assert.Len(t, node2Info.recentFailures[versionedService{serviceID: "service1"}], expectedNode2Failures)
  1290  
  1291  		node1Info, err := scheduler.nodeSet.nodeInfo("id1")
  1292  		assert.NoError(t, err)
  1293  
  1294  		expectedNode1Failures := i - 5
  1295  		if i < 5 {
  1296  			expectedNode1Failures = 0
  1297  		}
  1298  		assert.Len(t, node1Info.recentFailures[versionedService{serviceID: "service1"}], expectedNode1Failures)
  1299  
  1300  		newPreassignedTask := preassignedTaskTemplate.Copy()
  1301  		newPreassignedTask.ID = identity.NewID()
  1302  
  1303  		err = s.Update(func(tx store.Tx) error {
  1304  			assert.NoError(t, store.CreateTask(tx, newPreassignedTask))
  1305  			return nil
  1306  		})
  1307  		assert.NoError(t, err)
  1308  
  1309  		assignment = watchAssignment(t, watch)
  1310  		assert.Equal(t, newPreassignedTask.ID, assignment.ID)
  1311  
  1312  		// The preassigned task is always assigned to node id1
  1313  		assert.Equal(t, "id1", assignment.NodeID)
  1314  
  1315  		// The service associated with the preassigned task will not be
  1316  		// marked as
  1317  		nodeInfo, err := scheduler.nodeSet.nodeInfo("id1")
  1318  		assert.NoError(t, err)
  1319  		assert.Len(t, nodeInfo.recentFailures[versionedService{serviceID: "service2"}], 0)
  1320  
  1321  		err = s.Update(func(tx store.Tx) error {
  1322  			newReplicatedTask := store.GetTask(tx, newReplicatedTask.ID)
  1323  			require.NotNil(t, newReplicatedTask)
  1324  			newReplicatedTask.Status.State = api.TaskStateFailed
  1325  			assert.NoError(t, store.UpdateTask(tx, newReplicatedTask))
  1326  
  1327  			newPreassignedTask := store.GetTask(tx, newPreassignedTask.ID)
  1328  			require.NotNil(t, newPreassignedTask)
  1329  			newPreassignedTask.Status.State = api.TaskStateFailed
  1330  			assert.NoError(t, store.UpdateTask(tx, newPreassignedTask))
  1331  
  1332  			return nil
  1333  		})
  1334  		assert.NoError(t, err)
  1335  	}
  1336  }
  1337  
  1338  func TestSchedulerFaultyNodeSpecVersion(t *testing.T) {
  1339  	ctx := context.Background()
  1340  
  1341  	taskTemplate := &api.Task{
  1342  		ServiceID:    "service1",
  1343  		SpecVersion:  &api.Version{Index: 1},
  1344  		DesiredState: api.TaskStateRunning,
  1345  		ServiceAnnotations: api.Annotations{
  1346  			Name: "name1",
  1347  		},
  1348  		Status: api.TaskStatus{
  1349  			State: api.TaskStatePending,
  1350  		},
  1351  	}
  1352  
  1353  	node1 := &api.Node{
  1354  		ID: "id1",
  1355  		Spec: api.NodeSpec{
  1356  			Annotations: api.Annotations{
  1357  				Name: "id1",
  1358  			},
  1359  		},
  1360  		Status: api.NodeStatus{
  1361  			State: api.NodeStatus_READY,
  1362  		},
  1363  	}
  1364  
  1365  	node2 := &api.Node{
  1366  		ID: "id2",
  1367  		Spec: api.NodeSpec{
  1368  			Annotations: api.Annotations{
  1369  				Name: "id2",
  1370  			},
  1371  		},
  1372  		Status: api.NodeStatus{
  1373  			State: api.NodeStatus_READY,
  1374  		},
  1375  	}
  1376  
  1377  	s := store.NewMemoryStore(nil)
  1378  	assert.NotNil(t, s)
  1379  	defer s.Close()
  1380  
  1381  	err := s.Update(func(tx store.Tx) error {
  1382  		// Add initial nodes, and one task assigned to node id1
  1383  		assert.NoError(t, store.CreateNode(tx, node1))
  1384  		assert.NoError(t, store.CreateNode(tx, node2))
  1385  
  1386  		task1 := taskTemplate.Copy()
  1387  		task1.ID = "id1"
  1388  		task1.NodeID = "id1"
  1389  		task1.Status.State = api.TaskStateRunning
  1390  		assert.NoError(t, store.CreateTask(tx, task1))
  1391  		return nil
  1392  	})
  1393  	assert.NoError(t, err)
  1394  
  1395  	scheduler := New(s)
  1396  
  1397  	watch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
  1398  	defer cancel()
  1399  
  1400  	go func() {
  1401  		assert.NoError(t, scheduler.Run(ctx))
  1402  	}()
  1403  	defer scheduler.Stop()
  1404  
  1405  	for i := 0; i != 15; i++ {
  1406  		// Simulate a task failure cycle
  1407  		newTask := taskTemplate.Copy()
  1408  		newTask.ID = identity.NewID()
  1409  
  1410  		// After the condition for node faultiness has been reached,
  1411  		// bump the spec version to simulate a service update.
  1412  		if i > 5 {
  1413  			newTask.SpecVersion.Index++
  1414  		}
  1415  
  1416  		err = s.Update(func(tx store.Tx) error {
  1417  			assert.NoError(t, store.CreateTask(tx, newTask))
  1418  			return nil
  1419  		})
  1420  		assert.NoError(t, err)
  1421  
  1422  		assignment := watchAssignment(t, watch)
  1423  		assert.Equal(t, newTask.ID, assignment.ID)
  1424  
  1425  		if i < 5 || (i > 5 && i < 11) {
  1426  			// The first 5 attempts should be assigned to node id2 because
  1427  			// it has no replicas of the service.
  1428  			// Same with i=6 to i=10 inclusive, which is repeating the
  1429  			// same behavior with a different SpecVersion.
  1430  			assert.Equal(t, "id2", assignment.NodeID)
  1431  		} else {
  1432  			// The next ones should be assigned to id1, since we'll
  1433  			// flag id2 as potentially faulty.
  1434  			assert.Equal(t, "id1", assignment.NodeID)
  1435  		}
  1436  
  1437  		node1Info, err := scheduler.nodeSet.nodeInfo("id1")
  1438  		assert.NoError(t, err)
  1439  		node2Info, err := scheduler.nodeSet.nodeInfo("id2")
  1440  		assert.NoError(t, err)
  1441  		expectedNode1Spec1Failures := 0
  1442  		expectedNode1Spec2Failures := 0
  1443  		expectedNode2Spec1Failures := i
  1444  		expectedNode2Spec2Failures := 0
  1445  		if i > 5 {
  1446  			expectedNode1Spec1Failures = 1
  1447  			expectedNode2Spec1Failures = 5
  1448  			expectedNode2Spec2Failures = i - 6
  1449  		}
  1450  		if i > 11 {
  1451  			expectedNode1Spec2Failures = i - 11
  1452  			expectedNode2Spec2Failures = 5
  1453  		}
  1454  		assert.Len(t, node1Info.recentFailures[versionedService{serviceID: "service1", specVersion: api.Version{Index: 1}}], expectedNode1Spec1Failures)
  1455  		assert.Len(t, node1Info.recentFailures[versionedService{serviceID: "service1", specVersion: api.Version{Index: 2}}], expectedNode1Spec2Failures)
  1456  		assert.Len(t, node2Info.recentFailures[versionedService{serviceID: "service1", specVersion: api.Version{Index: 1}}], expectedNode2Spec1Failures)
  1457  		assert.Len(t, node2Info.recentFailures[versionedService{serviceID: "service1", specVersion: api.Version{Index: 2}}], expectedNode2Spec2Failures)
  1458  
  1459  		err = s.Update(func(tx store.Tx) error {
  1460  			newTask := store.GetTask(tx, newTask.ID)
  1461  			require.NotNil(t, newTask)
  1462  			newTask.Status.State = api.TaskStateFailed
  1463  			assert.NoError(t, store.UpdateTask(tx, newTask))
  1464  			return nil
  1465  		})
  1466  		assert.NoError(t, err)
  1467  	}
  1468  }
  1469  
  1470  func TestSchedulerResourceConstraint(t *testing.T) {
  1471  	ctx := context.Background()
  1472  	// Create a ready node without enough memory to run the task.
  1473  	underprovisionedNode := &api.Node{
  1474  		ID: "underprovisioned",
  1475  		Spec: api.NodeSpec{
  1476  			Annotations: api.Annotations{
  1477  				Name: "underprovisioned",
  1478  			},
  1479  		},
  1480  		Status: api.NodeStatus{
  1481  			State: api.NodeStatus_READY,
  1482  		},
  1483  		Description: &api.NodeDescription{
  1484  			Resources: &api.Resources{
  1485  				NanoCPUs:    1e9,
  1486  				MemoryBytes: 1e9,
  1487  				Generic: append(
  1488  					genericresource.NewSet("orange", "blue"),
  1489  					genericresource.NewDiscrete("apple", 1),
  1490  				),
  1491  			},
  1492  		},
  1493  	}
  1494  
  1495  	// Non-ready nodes that satisfy the constraints but shouldn't be used
  1496  	nonready1 := &api.Node{
  1497  		ID: "nonready1",
  1498  		Spec: api.NodeSpec{
  1499  			Annotations: api.Annotations{
  1500  				Name: "nonready1",
  1501  			},
  1502  		},
  1503  		Status: api.NodeStatus{
  1504  			State: api.NodeStatus_UNKNOWN,
  1505  		},
  1506  		Description: &api.NodeDescription{
  1507  			Resources: &api.Resources{
  1508  				NanoCPUs:    2e9,
  1509  				MemoryBytes: 2e9,
  1510  				Generic: append(
  1511  					genericresource.NewSet("orange", "blue", "red"),
  1512  					genericresource.NewDiscrete("apple", 2),
  1513  				),
  1514  			},
  1515  		},
  1516  	}
  1517  	nonready2 := &api.Node{
  1518  		ID: "nonready2",
  1519  		Spec: api.NodeSpec{
  1520  			Annotations: api.Annotations{
  1521  				Name: "nonready2",
  1522  			},
  1523  		},
  1524  		Status: api.NodeStatus{
  1525  			State: api.NodeStatus_UNKNOWN,
  1526  		},
  1527  		Description: &api.NodeDescription{
  1528  			Resources: &api.Resources{
  1529  				NanoCPUs:    2e9,
  1530  				MemoryBytes: 2e9,
  1531  				Generic: append(
  1532  					genericresource.NewSet("orange", "blue", "red"),
  1533  					genericresource.NewDiscrete("apple", 2),
  1534  				),
  1535  			},
  1536  		},
  1537  	}
  1538  
  1539  	initialTask := &api.Task{
  1540  		ID:           "id1",
  1541  		ServiceID:    "serviceID1",
  1542  		DesiredState: api.TaskStateRunning,
  1543  		Spec: api.TaskSpec{
  1544  			Runtime: &api.TaskSpec_Container{
  1545  				Container: &api.ContainerSpec{},
  1546  			},
  1547  			Resources: &api.ResourceRequirements{
  1548  				Reservations: &api.Resources{
  1549  					MemoryBytes: 2e9,
  1550  					Generic: []*api.GenericResource{
  1551  						genericresource.NewDiscrete("orange", 2),
  1552  						genericresource.NewDiscrete("apple", 2),
  1553  					},
  1554  				},
  1555  			},
  1556  		},
  1557  		ServiceAnnotations: api.Annotations{
  1558  			Name: "name1",
  1559  		},
  1560  		Status: api.TaskStatus{
  1561  			State: api.TaskStatePending,
  1562  		},
  1563  	}
  1564  
  1565  	initialService := &api.Service{
  1566  		ID: "serviceID1",
  1567  	}
  1568  
  1569  	s := store.NewMemoryStore(nil)
  1570  	assert.NotNil(t, s)
  1571  	defer s.Close()
  1572  
  1573  	err := s.Update(func(tx store.Tx) error {
  1574  		// Add initial node, service and task
  1575  		assert.NoError(t, store.CreateService(tx, initialService))
  1576  		assert.NoError(t, store.CreateTask(tx, initialTask))
  1577  		assert.NoError(t, store.CreateNode(tx, underprovisionedNode))
  1578  		assert.NoError(t, store.CreateNode(tx, nonready1))
  1579  		assert.NoError(t, store.CreateNode(tx, nonready2))
  1580  		return nil
  1581  	})
  1582  	assert.NoError(t, err)
  1583  
  1584  	scheduler := New(s)
  1585  
  1586  	watch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
  1587  	defer cancel()
  1588  
  1589  	go func() {
  1590  		assert.NoError(t, scheduler.Run(ctx))
  1591  	}()
  1592  	defer scheduler.Stop()
  1593  
  1594  	failure := watchAssignmentFailure(t, watch)
  1595  	assert.Equal(t, "no suitable node (2 nodes not available for new tasks; insufficient resources on 1 node)", failure.Status.Err)
  1596  
  1597  	err = s.Update(func(tx store.Tx) error {
  1598  		// Create a node with enough memory. The task should get
  1599  		// assigned to this node.
  1600  		node := &api.Node{
  1601  			ID: "bignode",
  1602  			Spec: api.NodeSpec{
  1603  				Annotations: api.Annotations{
  1604  					Name: "bignode",
  1605  				},
  1606  			},
  1607  			Description: &api.NodeDescription{
  1608  				Resources: &api.Resources{
  1609  					NanoCPUs:    4e9,
  1610  					MemoryBytes: 8e9,
  1611  					Generic: append(
  1612  						genericresource.NewSet("orange", "blue", "red", "green"),
  1613  						genericresource.NewDiscrete("apple", 4),
  1614  					),
  1615  				},
  1616  			},
  1617  			Status: api.NodeStatus{
  1618  				State: api.NodeStatus_READY,
  1619  			},
  1620  		}
  1621  		assert.NoError(t, store.CreateNode(tx, node))
  1622  		return nil
  1623  	})
  1624  	assert.NoError(t, err)
  1625  
  1626  	assignment := watchAssignment(t, watch)
  1627  	assert.Equal(t, "bignode", assignment.NodeID)
  1628  }
  1629  
  1630  func TestSchedulerResourceConstraintHA(t *testing.T) {
  1631  	// node 1 starts with 1 task, node 2 starts with 3 tasks.
  1632  	// however, node 1 only has enough memory to schedule one more task.
  1633  
  1634  	ctx := context.Background()
  1635  	node1 := &api.Node{
  1636  		ID: "id1",
  1637  		Spec: api.NodeSpec{
  1638  			Annotations: api.Annotations{
  1639  				Name: "id1",
  1640  			},
  1641  		},
  1642  		Status: api.NodeStatus{
  1643  			State: api.NodeStatus_READY,
  1644  		},
  1645  		Description: &api.NodeDescription{
  1646  			Resources: &api.Resources{
  1647  				MemoryBytes: 1e9,
  1648  				Generic: []*api.GenericResource{
  1649  					genericresource.NewDiscrete("apple", 2),
  1650  				},
  1651  			},
  1652  		},
  1653  	}
  1654  	node2 := &api.Node{
  1655  		ID: "id2",
  1656  		Spec: api.NodeSpec{
  1657  			Annotations: api.Annotations{
  1658  				Name: "id2",
  1659  			},
  1660  		},
  1661  		Status: api.NodeStatus{
  1662  			State: api.NodeStatus_READY,
  1663  		},
  1664  		Description: &api.NodeDescription{
  1665  			Resources: &api.Resources{
  1666  				MemoryBytes: 1e11,
  1667  				Generic: []*api.GenericResource{
  1668  					genericresource.NewDiscrete("apple", 5),
  1669  				},
  1670  			},
  1671  		},
  1672  	}
  1673  
  1674  	taskTemplate := &api.Task{
  1675  		DesiredState: api.TaskStateRunning,
  1676  		Spec: api.TaskSpec{
  1677  			Runtime: &api.TaskSpec_Container{
  1678  				Container: &api.ContainerSpec{},
  1679  			},
  1680  			Resources: &api.ResourceRequirements{
  1681  				Reservations: &api.Resources{
  1682  					MemoryBytes: 5e8,
  1683  					Generic: []*api.GenericResource{
  1684  						genericresource.NewDiscrete("apple", 1),
  1685  					},
  1686  				},
  1687  			},
  1688  		},
  1689  		ServiceAnnotations: api.Annotations{
  1690  			Name: "name1",
  1691  		},
  1692  		Status: api.TaskStatus{
  1693  			State: api.TaskStatePending,
  1694  		},
  1695  	}
  1696  
  1697  	s := store.NewMemoryStore(nil)
  1698  	assert.NotNil(t, s)
  1699  	defer s.Close()
  1700  
  1701  	err := s.Update(func(tx store.Tx) error {
  1702  		// Add initial node and task
  1703  		assert.NoError(t, store.CreateNode(tx, node1))
  1704  		assert.NoError(t, store.CreateNode(tx, node2))
  1705  
  1706  		// preassigned tasks
  1707  		task1 := taskTemplate.Copy()
  1708  		task1.ID = "id1"
  1709  		task1.NodeID = "id1"
  1710  		task1.Status.State = api.TaskStateRunning
  1711  		assert.NoError(t, store.CreateTask(tx, task1))
  1712  
  1713  		task2 := taskTemplate.Copy()
  1714  		task2.ID = "id2"
  1715  		task2.NodeID = "id2"
  1716  		task2.Status.State = api.TaskStateRunning
  1717  		assert.NoError(t, store.CreateTask(tx, task2))
  1718  
  1719  		task3 := taskTemplate.Copy()
  1720  		task3.ID = "id3"
  1721  		task3.NodeID = "id2"
  1722  		task3.Status.State = api.TaskStateRunning
  1723  		assert.NoError(t, store.CreateTask(tx, task3))
  1724  
  1725  		task4 := taskTemplate.Copy()
  1726  		task4.ID = "id4"
  1727  		task4.NodeID = "id2"
  1728  		task4.Status.State = api.TaskStateRunning
  1729  		assert.NoError(t, store.CreateTask(tx, task4))
  1730  
  1731  		// tasks to assign
  1732  		task5 := taskTemplate.Copy()
  1733  		task5.ID = "id5"
  1734  		assert.NoError(t, store.CreateTask(tx, task5))
  1735  
  1736  		task6 := taskTemplate.Copy()
  1737  		task6.ID = "id6"
  1738  		assert.NoError(t, store.CreateTask(tx, task6))
  1739  
  1740  		return nil
  1741  	})
  1742  	assert.NoError(t, err)
  1743  
  1744  	scheduler := New(s)
  1745  
  1746  	watch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
  1747  	defer cancel()
  1748  
  1749  	go func() {
  1750  		assert.NoError(t, scheduler.Run(ctx))
  1751  	}()
  1752  	defer scheduler.Stop()
  1753  
  1754  	assignment1 := watchAssignment(t, watch)
  1755  	if assignment1.ID != "id5" && assignment1.ID != "id6" {
  1756  		t.Fatal("assignment for unexpected task")
  1757  	}
  1758  	assignment2 := watchAssignment(t, watch)
  1759  	if assignment1.ID == "id5" {
  1760  		assert.Equal(t, "id6", assignment2.ID)
  1761  	} else {
  1762  		assert.Equal(t, "id5", assignment2.ID)
  1763  	}
  1764  
  1765  	if assignment1.NodeID == "id1" {
  1766  		assert.Equal(t, "id2", assignment2.NodeID)
  1767  	} else {
  1768  		assert.Equal(t, "id1", assignment2.NodeID)
  1769  	}
  1770  }
  1771  
  1772  func TestSchedulerResourceConstraintDeadTask(t *testing.T) {
  1773  	ctx := context.Background()
  1774  	// Create a ready node without enough memory to run the task.
  1775  	node := &api.Node{
  1776  		ID: "id1",
  1777  		Spec: api.NodeSpec{
  1778  			Annotations: api.Annotations{
  1779  				Name: "node",
  1780  			},
  1781  		},
  1782  		Status: api.NodeStatus{
  1783  			State: api.NodeStatus_READY,
  1784  		},
  1785  		Description: &api.NodeDescription{
  1786  			Resources: &api.Resources{
  1787  				NanoCPUs:    1e9,
  1788  				MemoryBytes: 1e9,
  1789  				Generic: []*api.GenericResource{
  1790  					genericresource.NewDiscrete("apple", 4),
  1791  				},
  1792  			},
  1793  		},
  1794  	}
  1795  
  1796  	bigTask1 := &api.Task{
  1797  		DesiredState: api.TaskStateRunning,
  1798  		ID:           "id1",
  1799  		ServiceID:    "serviceID1",
  1800  		Spec: api.TaskSpec{
  1801  			Resources: &api.ResourceRequirements{
  1802  				Reservations: &api.Resources{
  1803  					MemoryBytes: 8e8,
  1804  					Generic: []*api.GenericResource{
  1805  						genericresource.NewDiscrete("apple", 3),
  1806  					},
  1807  				},
  1808  			},
  1809  		},
  1810  		ServiceAnnotations: api.Annotations{
  1811  			Name: "big",
  1812  		},
  1813  		Status: api.TaskStatus{
  1814  			State: api.TaskStatePending,
  1815  		},
  1816  	}
  1817  
  1818  	bigTask2 := bigTask1.Copy()
  1819  	bigTask2.ID = "id2"
  1820  
  1821  	bigService := &api.Service{
  1822  		ID: "serviceID1",
  1823  	}
  1824  
  1825  	s := store.NewMemoryStore(nil)
  1826  	assert.NotNil(t, s)
  1827  	defer s.Close()
  1828  
  1829  	err := s.Update(func(tx store.Tx) error {
  1830  		// Add initial node, service and task
  1831  		assert.NoError(t, store.CreateService(tx, bigService))
  1832  		assert.NoError(t, store.CreateNode(tx, node))
  1833  		assert.NoError(t, store.CreateTask(tx, bigTask1))
  1834  		return nil
  1835  	})
  1836  	assert.NoError(t, err)
  1837  
  1838  	scheduler := New(s)
  1839  
  1840  	watch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
  1841  	defer cancel()
  1842  
  1843  	go func() {
  1844  		assert.NoError(t, scheduler.Run(ctx))
  1845  	}()
  1846  	defer scheduler.Stop()
  1847  
  1848  	// The task fits, so it should get assigned
  1849  	assignment := watchAssignment(t, watch)
  1850  	assert.Equal(t, "id1", assignment.ID)
  1851  	assert.Equal(t, "id1", assignment.NodeID)
  1852  
  1853  	err = s.Update(func(tx store.Tx) error {
  1854  		// Add a second task. It shouldn't get assigned because of
  1855  		// resource constraints.
  1856  		return store.CreateTask(tx, bigTask2)
  1857  	})
  1858  	assert.NoError(t, err)
  1859  
  1860  	failure := watchAssignmentFailure(t, watch)
  1861  	assert.Equal(t, "no suitable node (insufficient resources on 1 node)", failure.Status.Err)
  1862  
  1863  	err = s.Update(func(tx store.Tx) error {
  1864  		// The task becomes dead
  1865  		updatedTask := store.GetTask(tx, bigTask1.ID)
  1866  		updatedTask.Status.State = api.TaskStateShutdown
  1867  		return store.UpdateTask(tx, updatedTask)
  1868  	})
  1869  	assert.NoError(t, err)
  1870  
  1871  	// With the first task no longer consuming resources, the second
  1872  	// one can be scheduled.
  1873  	assignment = watchAssignment(t, watch)
  1874  	assert.Equal(t, "id2", assignment.ID)
  1875  	assert.Equal(t, "id1", assignment.NodeID)
  1876  }
  1877  
  1878  func TestSchedulerPreexistingDeadTask(t *testing.T) {
  1879  	ctx := context.Background()
  1880  	// Create a ready node without enough memory to run two tasks at once.
  1881  	node := &api.Node{
  1882  		ID: "id1",
  1883  		Spec: api.NodeSpec{
  1884  			Annotations: api.Annotations{
  1885  				Name: "node",
  1886  			},
  1887  		},
  1888  		Status: api.NodeStatus{
  1889  			State: api.NodeStatus_READY,
  1890  		},
  1891  		Description: &api.NodeDescription{
  1892  			Resources: &api.Resources{
  1893  				NanoCPUs:    1e9,
  1894  				MemoryBytes: 1e9,
  1895  				Generic: []*api.GenericResource{
  1896  					genericresource.NewDiscrete("apple", 1),
  1897  				},
  1898  			},
  1899  		},
  1900  	}
  1901  
  1902  	deadTask := &api.Task{
  1903  		DesiredState: api.TaskStateRunning,
  1904  		ID:           "id1",
  1905  		NodeID:       "id1",
  1906  		Spec: api.TaskSpec{
  1907  			Resources: &api.ResourceRequirements{
  1908  				Reservations: &api.Resources{
  1909  					MemoryBytes: 8e8,
  1910  					Generic: []*api.GenericResource{
  1911  						genericresource.NewDiscrete("apple", 1),
  1912  					},
  1913  				},
  1914  			},
  1915  		},
  1916  		ServiceAnnotations: api.Annotations{
  1917  			Name: "big",
  1918  		},
  1919  		Status: api.TaskStatus{
  1920  			State: api.TaskStateShutdown,
  1921  		},
  1922  	}
  1923  
  1924  	bigTask2 := deadTask.Copy()
  1925  	bigTask2.ID = "id2"
  1926  	bigTask2.Status.State = api.TaskStatePending
  1927  
  1928  	s := store.NewMemoryStore(nil)
  1929  	assert.NotNil(t, s)
  1930  	defer s.Close()
  1931  
  1932  	err := s.Update(func(tx store.Tx) error {
  1933  		// Add initial node and task
  1934  		assert.NoError(t, store.CreateNode(tx, node))
  1935  		assert.NoError(t, store.CreateTask(tx, deadTask))
  1936  		return nil
  1937  	})
  1938  	assert.NoError(t, err)
  1939  
  1940  	scheduler := New(s)
  1941  
  1942  	watch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
  1943  	defer cancel()
  1944  
  1945  	go func() {
  1946  		assert.NoError(t, scheduler.Run(ctx))
  1947  	}()
  1948  	defer scheduler.Stop()
  1949  
  1950  	err = s.Update(func(tx store.Tx) error {
  1951  		// Add a second task. It should get assigned because the task
  1952  		// using the resources is past the running state.
  1953  		return store.CreateTask(tx, bigTask2)
  1954  	})
  1955  	assert.NoError(t, err)
  1956  
  1957  	assignment := watchAssignment(t, watch)
  1958  	assert.Equal(t, "id2", assignment.ID)
  1959  	assert.Equal(t, "id1", assignment.NodeID)
  1960  }
  1961  
  1962  func TestSchedulerCompatiblePlatform(t *testing.T) {
  1963  	ctx := context.Background()
  1964  	// create tasks
  1965  	// task1 - has a node it can run on
  1966  	task1 := &api.Task{
  1967  		ID:           "id1",
  1968  		ServiceID:    "serviceID1",
  1969  		DesiredState: api.TaskStateRunning,
  1970  		ServiceAnnotations: api.Annotations{
  1971  			Name: "name1",
  1972  		},
  1973  		Status: api.TaskStatus{
  1974  			State: api.TaskStatePending,
  1975  		},
  1976  		Spec: api.TaskSpec{
  1977  			Placement: &api.Placement{
  1978  				Platforms: []*api.Platform{
  1979  					{
  1980  						Architecture: "amd64",
  1981  						OS:           "linux",
  1982  					},
  1983  				},
  1984  			},
  1985  		},
  1986  	}
  1987  
  1988  	// task2 - has no node it can run on
  1989  	task2 := &api.Task{
  1990  		ID:           "id2",
  1991  		ServiceID:    "serviceID1",
  1992  		DesiredState: api.TaskStateRunning,
  1993  		ServiceAnnotations: api.Annotations{
  1994  			Name: "name2",
  1995  		},
  1996  		Status: api.TaskStatus{
  1997  			State: api.TaskStatePending,
  1998  		},
  1999  		Spec: api.TaskSpec{
  2000  			Placement: &api.Placement{
  2001  				Platforms: []*api.Platform{
  2002  					{
  2003  						Architecture: "arm",
  2004  						OS:           "linux",
  2005  					},
  2006  				},
  2007  			},
  2008  		},
  2009  	}
  2010  
  2011  	// task3 - no platform constraints, should run on any node
  2012  	task3 := &api.Task{
  2013  		ID:           "id3",
  2014  		ServiceID:    "serviceID1",
  2015  		DesiredState: api.TaskStateRunning,
  2016  		ServiceAnnotations: api.Annotations{
  2017  			Name: "name3",
  2018  		},
  2019  		Status: api.TaskStatus{
  2020  			State: api.TaskStatePending,
  2021  		},
  2022  	}
  2023  
  2024  	// task4 - only OS constraint, is runnable on any linux node
  2025  	task4 := &api.Task{
  2026  		ID:           "id4",
  2027  		ServiceID:    "serviceID1",
  2028  		DesiredState: api.TaskStateRunning,
  2029  		ServiceAnnotations: api.Annotations{
  2030  			Name: "name4",
  2031  		},
  2032  		Status: api.TaskStatus{
  2033  			State: api.TaskStatePending,
  2034  		},
  2035  		Spec: api.TaskSpec{
  2036  			Placement: &api.Placement{
  2037  				Platforms: []*api.Platform{
  2038  					{
  2039  						Architecture: "",
  2040  						OS:           "linux",
  2041  					},
  2042  				},
  2043  			},
  2044  		},
  2045  	}
  2046  
  2047  	// task5 - supported on multiple platforms
  2048  	task5 := &api.Task{
  2049  		ID:           "id5",
  2050  		ServiceID:    "serviceID1",
  2051  		DesiredState: api.TaskStateRunning,
  2052  		ServiceAnnotations: api.Annotations{
  2053  			Name: "name5",
  2054  		},
  2055  		Status: api.TaskStatus{
  2056  			State: api.TaskStatePending,
  2057  		},
  2058  		Spec: api.TaskSpec{
  2059  			Placement: &api.Placement{
  2060  				Platforms: []*api.Platform{
  2061  					{
  2062  						Architecture: "amd64",
  2063  						OS:           "linux",
  2064  					},
  2065  					{
  2066  						Architecture: "x86_64",
  2067  						OS:           "windows",
  2068  					},
  2069  				},
  2070  			},
  2071  		},
  2072  	}
  2073  
  2074  	node1 := &api.Node{
  2075  		ID: "node1",
  2076  		Spec: api.NodeSpec{
  2077  			Annotations: api.Annotations{
  2078  				Name: "node1",
  2079  			},
  2080  		},
  2081  		Status: api.NodeStatus{
  2082  			State: api.NodeStatus_READY,
  2083  		},
  2084  		Description: &api.NodeDescription{
  2085  			Platform: &api.Platform{
  2086  				Architecture: "x86_64",
  2087  				OS:           "linux",
  2088  			},
  2089  		},
  2090  	}
  2091  
  2092  	node2 := &api.Node{
  2093  		ID: "node2",
  2094  		Spec: api.NodeSpec{
  2095  			Annotations: api.Annotations{
  2096  				Name: "node2",
  2097  			},
  2098  		},
  2099  		Status: api.NodeStatus{
  2100  			State: api.NodeStatus_READY,
  2101  		},
  2102  		Description: &api.NodeDescription{
  2103  			Platform: &api.Platform{
  2104  				Architecture: "amd64",
  2105  				OS:           "windows",
  2106  			},
  2107  		},
  2108  	}
  2109  
  2110  	// node with nil platform description, cannot schedule anything
  2111  	// with a platform constraint
  2112  	node3 := &api.Node{
  2113  		ID: "node3",
  2114  		Spec: api.NodeSpec{
  2115  			Annotations: api.Annotations{
  2116  				Name: "node3",
  2117  			},
  2118  		},
  2119  		Status: api.NodeStatus{
  2120  			State: api.NodeStatus_READY,
  2121  		},
  2122  		Description: &api.NodeDescription{},
  2123  	}
  2124  
  2125  	service1 := &api.Service{
  2126  		ID: "serviceID1",
  2127  	}
  2128  	s := store.NewMemoryStore(nil)
  2129  	assert.NotNil(t, s)
  2130  	defer s.Close()
  2131  
  2132  	err := s.Update(func(tx store.Tx) error {
  2133  		// Add initial task, service and nodes to the store
  2134  		assert.NoError(t, store.CreateService(tx, service1))
  2135  		assert.NoError(t, store.CreateTask(tx, task1))
  2136  		assert.NoError(t, store.CreateNode(tx, node1))
  2137  		assert.NoError(t, store.CreateNode(tx, node2))
  2138  		assert.NoError(t, store.CreateNode(tx, node3))
  2139  		return nil
  2140  	})
  2141  	assert.NoError(t, err)
  2142  
  2143  	scheduler := New(s)
  2144  
  2145  	watch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
  2146  	defer cancel()
  2147  
  2148  	go func() {
  2149  		assert.NoError(t, scheduler.Run(ctx))
  2150  	}()
  2151  	defer scheduler.Stop()
  2152  
  2153  	// task1 should get assigned
  2154  	assignment1 := watchAssignment(t, watch)
  2155  	assert.Equal(t, "node1", assignment1.NodeID)
  2156  
  2157  	// add task2
  2158  	err = s.Update(func(tx store.Tx) error {
  2159  		assert.NoError(t, store.CreateTask(tx, task2))
  2160  		return nil
  2161  	})
  2162  	assert.NoError(t, err)
  2163  	failure := watchAssignmentFailure(t, watch)
  2164  	assert.Equal(t, "no suitable node (unsupported platform on 3 nodes)", failure.Status.Err)
  2165  
  2166  	// add task3
  2167  	err = s.Update(func(tx store.Tx) error {
  2168  		assert.NoError(t, store.CreateTask(tx, task3))
  2169  		return nil
  2170  	})
  2171  	assert.NoError(t, err)
  2172  	assignment2 := watchAssignment(t, watch)
  2173  	assert.Regexp(t, assignment2.NodeID, "(node2|node3)")
  2174  
  2175  	// add task4
  2176  	err = s.Update(func(tx store.Tx) error {
  2177  		assert.NoError(t, store.CreateTask(tx, task4))
  2178  		return nil
  2179  	})
  2180  	assert.NoError(t, err)
  2181  	assignment3 := watchAssignment(t, watch)
  2182  	assert.Equal(t, "node1", assignment3.NodeID)
  2183  
  2184  	// add task5
  2185  	err = s.Update(func(tx store.Tx) error {
  2186  		assert.NoError(t, store.CreateTask(tx, task5))
  2187  		return nil
  2188  	})
  2189  	assert.NoError(t, err)
  2190  	assignment4 := watchAssignment(t, watch)
  2191  	assert.Regexp(t, assignment4.NodeID, "(node1|node2)")
  2192  }
  2193  
  2194  // TestSchedulerUnassignedMap tests that unassigned tasks are deleted from unassignedTasks when the service is removed
  2195  func TestSchedulerUnassignedMap(t *testing.T) {
  2196  	ctx := context.Background()
  2197  	// create a service and a task with OS constraint that is not met
  2198  	task1 := &api.Task{
  2199  		ID:           "id1",
  2200  		ServiceID:    "serviceID1",
  2201  		DesiredState: api.TaskStateRunning,
  2202  		ServiceAnnotations: api.Annotations{
  2203  			Name: "name1",
  2204  		},
  2205  		Status: api.TaskStatus{
  2206  			State: api.TaskStatePending,
  2207  		},
  2208  		Spec: api.TaskSpec{
  2209  			Placement: &api.Placement{
  2210  				Platforms: []*api.Platform{
  2211  					{
  2212  						Architecture: "amd64",
  2213  						OS:           "windows",
  2214  					},
  2215  				},
  2216  			},
  2217  		},
  2218  	}
  2219  
  2220  	node1 := &api.Node{
  2221  		ID: "node1",
  2222  		Spec: api.NodeSpec{
  2223  			Annotations: api.Annotations{
  2224  				Name: "node1",
  2225  			},
  2226  		},
  2227  		Status: api.NodeStatus{
  2228  			State: api.NodeStatus_READY,
  2229  		},
  2230  		Description: &api.NodeDescription{
  2231  			Platform: &api.Platform{
  2232  				Architecture: "x86_64",
  2233  				OS:           "linux",
  2234  			},
  2235  		},
  2236  	}
  2237  
  2238  	service1 := &api.Service{
  2239  		ID: "serviceID1",
  2240  	}
  2241  
  2242  	s := store.NewMemoryStore(nil)
  2243  	assert.NotNil(t, s)
  2244  	defer s.Close()
  2245  
  2246  	err := s.Update(func(tx store.Tx) error {
  2247  		// Add initial task, service and nodes to the store
  2248  		assert.NoError(t, store.CreateService(tx, service1))
  2249  		assert.NoError(t, store.CreateTask(tx, task1))
  2250  		assert.NoError(t, store.CreateNode(tx, node1))
  2251  		return nil
  2252  	})
  2253  	assert.NoError(t, err)
  2254  
  2255  	scheduler := New(s)
  2256  	scheduler.unassignedTasks["id1"] = task1
  2257  
  2258  	scheduler.tick(ctx)
  2259  	// task1 is in the unassigned map
  2260  	assert.Contains(t, scheduler.unassignedTasks, task1.ID)
  2261  
  2262  	// delete the service of an unassigned task
  2263  	err = s.Update(func(tx store.Tx) error {
  2264  		assert.NoError(t, store.DeleteService(tx, service1.ID))
  2265  		return nil
  2266  	})
  2267  	assert.NoError(t, err)
  2268  
  2269  	scheduler.tick(ctx)
  2270  	// task1 is removed from the unassigned map
  2271  	assert.NotContains(t, scheduler.unassignedTasks, task1.ID)
  2272  }
  2273  
  2274  func TestPreassignedTasks(t *testing.T) {
  2275  	ctx := context.Background()
  2276  	initialNodeSet := []*api.Node{
  2277  		{
  2278  			ID: "node1",
  2279  			Spec: api.NodeSpec{
  2280  				Annotations: api.Annotations{
  2281  					Name: "name1",
  2282  				},
  2283  			},
  2284  			Status: api.NodeStatus{
  2285  				State: api.NodeStatus_READY,
  2286  			},
  2287  		},
  2288  		{
  2289  			ID: "node2",
  2290  			Spec: api.NodeSpec{
  2291  				Annotations: api.Annotations{
  2292  					Name: "name2",
  2293  				},
  2294  			},
  2295  			Status: api.NodeStatus{
  2296  				State: api.NodeStatus_READY,
  2297  			},
  2298  		},
  2299  	}
  2300  
  2301  	initialTaskSet := []*api.Task{
  2302  		{
  2303  			ID:           "task1",
  2304  			DesiredState: api.TaskStateRunning,
  2305  			ServiceAnnotations: api.Annotations{
  2306  				Name: "name1",
  2307  			},
  2308  
  2309  			Status: api.TaskStatus{
  2310  				State: api.TaskStatePending,
  2311  			},
  2312  		},
  2313  		{
  2314  			ID:           "task2",
  2315  			DesiredState: api.TaskStateRunning,
  2316  			ServiceAnnotations: api.Annotations{
  2317  				Name: "name2",
  2318  			},
  2319  			Status: api.TaskStatus{
  2320  				State: api.TaskStatePending,
  2321  			},
  2322  			NodeID: initialNodeSet[0].ID,
  2323  		},
  2324  		{
  2325  			ID:           "task3",
  2326  			DesiredState: api.TaskStateRunning,
  2327  			ServiceAnnotations: api.Annotations{
  2328  				Name: "name2",
  2329  			},
  2330  			Status: api.TaskStatus{
  2331  				State: api.TaskStatePending,
  2332  			},
  2333  			NodeID: initialNodeSet[0].ID,
  2334  		},
  2335  	}
  2336  
  2337  	s := store.NewMemoryStore(nil)
  2338  	assert.NotNil(t, s)
  2339  	defer s.Close()
  2340  
  2341  	err := s.Update(func(tx store.Tx) error {
  2342  		// Prepopulate nodes
  2343  		for _, n := range initialNodeSet {
  2344  			assert.NoError(t, store.CreateNode(tx, n))
  2345  		}
  2346  
  2347  		// Prepopulate tasks
  2348  		for _, task := range initialTaskSet {
  2349  			assert.NoError(t, store.CreateTask(tx, task))
  2350  		}
  2351  		return nil
  2352  	})
  2353  	assert.NoError(t, err)
  2354  
  2355  	scheduler := New(s)
  2356  
  2357  	watch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
  2358  	defer cancel()
  2359  
  2360  	go func() {
  2361  		assert.NoError(t, scheduler.Run(ctx))
  2362  	}()
  2363  
  2364  	//preassigned tasks would be processed first
  2365  	assignment1 := watchAssignment(t, watch)
  2366  	// task2 and task3 are preassigned to node1
  2367  	assert.Equal(t, assignment1.NodeID, "node1")
  2368  	assert.Regexp(t, assignment1.ID, "(task2|task3)")
  2369  
  2370  	assignment2 := watchAssignment(t, watch)
  2371  	if assignment1.ID == "task2" {
  2372  		assert.Equal(t, "task3", assignment2.ID)
  2373  	} else {
  2374  		assert.Equal(t, "task2", assignment2.ID)
  2375  	}
  2376  
  2377  	// task1 would be assigned to node2 because node1 has 2 tasks already
  2378  	assignment3 := watchAssignment(t, watch)
  2379  	assert.Equal(t, assignment3.ID, "task1")
  2380  	assert.Equal(t, assignment3.NodeID, "node2")
  2381  }
  2382  
  2383  func TestIgnoreTasks(t *testing.T) {
  2384  	ctx := context.Background()
  2385  	initialNodeSet := []*api.Node{
  2386  		{
  2387  			ID: "node1",
  2388  			Spec: api.NodeSpec{
  2389  				Annotations: api.Annotations{
  2390  					Name: "name1",
  2391  				},
  2392  			},
  2393  			Status: api.NodeStatus{
  2394  				State: api.NodeStatus_READY,
  2395  			},
  2396  		},
  2397  	}
  2398  
  2399  	// Tasks with desired state running, shutdown, remove.
  2400  	initialTaskSet := []*api.Task{
  2401  		{
  2402  			ID:           "task1",
  2403  			DesiredState: api.TaskStateRunning,
  2404  			ServiceAnnotations: api.Annotations{
  2405  				Name: "name1",
  2406  			},
  2407  
  2408  			Status: api.TaskStatus{
  2409  				State: api.TaskStatePending,
  2410  			},
  2411  		},
  2412  		{
  2413  			ID:           "task2",
  2414  			DesiredState: api.TaskStateShutdown,
  2415  			ServiceAnnotations: api.Annotations{
  2416  				Name: "name2",
  2417  			},
  2418  			Status: api.TaskStatus{
  2419  				State: api.TaskStatePending,
  2420  			},
  2421  			NodeID: initialNodeSet[0].ID,
  2422  		},
  2423  		{
  2424  			ID:           "task3",
  2425  			DesiredState: api.TaskStateRemove,
  2426  			ServiceAnnotations: api.Annotations{
  2427  				Name: "name2",
  2428  			},
  2429  			Status: api.TaskStatus{
  2430  				State: api.TaskStatePending,
  2431  			},
  2432  			NodeID: initialNodeSet[0].ID,
  2433  		},
  2434  	}
  2435  
  2436  	s := store.NewMemoryStore(nil)
  2437  	assert.NotNil(t, s)
  2438  	defer s.Close()
  2439  
  2440  	err := s.Update(func(tx store.Tx) error {
  2441  		// Prepopulate nodes
  2442  		for _, n := range initialNodeSet {
  2443  			assert.NoError(t, store.CreateNode(tx, n))
  2444  		}
  2445  
  2446  		// Prepopulate tasks
  2447  		for _, task := range initialTaskSet {
  2448  			assert.NoError(t, store.CreateTask(tx, task))
  2449  		}
  2450  		return nil
  2451  	})
  2452  	assert.NoError(t, err)
  2453  
  2454  	scheduler := New(s)
  2455  
  2456  	watch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
  2457  	defer cancel()
  2458  
  2459  	go func() {
  2460  		assert.NoError(t, scheduler.Run(ctx))
  2461  	}()
  2462  
  2463  	// task1 is the only task that gets assigned since other two tasks
  2464  	// are ignored by the scheduler.
  2465  	// Normally task2/task3 should get assigned first since its a preassigned task.
  2466  	assignment3 := watchAssignment(t, watch)
  2467  	assert.Equal(t, assignment3.ID, "task1")
  2468  	assert.Equal(t, assignment3.NodeID, "node1")
  2469  }
  2470  
  2471  // TestNoStuckTask tests that a task which is cannot be scheduled (because of
  2472  // MaxReplicas or otherwise) does not remain stuck in the Pending state forever
  2473  // if the service is updated.
  2474  //
  2475  // Before the change which introduced this test, if a task got stuck in
  2476  // Pending, it could stay there forever, because it could not progress through
  2477  // the scheduler, and could likewise not be shut down.
  2478  //
  2479  // After the change which introduced this test, if the desired state of a task
  2480  // is terminal, and the task is in pending, and there is no suitable node, then
  2481  // the task is shut down.
  2482  func TestUnscheduleableTask(t *testing.T) {
  2483  	ctx := context.Background()
  2484  	node := &api.Node{
  2485  		ID: "nodeid1",
  2486  		Spec: api.NodeSpec{
  2487  			Annotations: api.Annotations{
  2488  				Name: "node",
  2489  			},
  2490  		},
  2491  		Status: api.NodeStatus{
  2492  			State: api.NodeStatus_READY,
  2493  		},
  2494  		Description: &api.NodeDescription{},
  2495  	}
  2496  
  2497  	task1 := &api.Task{
  2498  		ID:           "taskid1",
  2499  		ServiceID:    "serviceid1",
  2500  		DesiredState: api.TaskStateRunning,
  2501  		SpecVersion: &api.Version{
  2502  			Index: 0,
  2503  		},
  2504  		Spec: api.TaskSpec{
  2505  			Runtime: &api.TaskSpec_Container{
  2506  				Container: &api.ContainerSpec{},
  2507  			},
  2508  			Placement: &api.Placement{
  2509  				MaxReplicas: 1,
  2510  			},
  2511  		},
  2512  		ServiceAnnotations: api.Annotations{
  2513  			Name: "servicename1",
  2514  		},
  2515  		Status: api.TaskStatus{
  2516  			State: api.TaskStatePending,
  2517  		},
  2518  	}
  2519  
  2520  	task2 := &api.Task{
  2521  		ID:           "taskid2",
  2522  		ServiceID:    "serviceid1",
  2523  		DesiredState: api.TaskStateRunning,
  2524  		SpecVersion: &api.Version{
  2525  			Index: 0,
  2526  		},
  2527  		Spec: api.TaskSpec{
  2528  			Runtime: &api.TaskSpec_Container{
  2529  				Container: &api.ContainerSpec{},
  2530  			},
  2531  			Placement: &api.Placement{
  2532  				MaxReplicas: 1,
  2533  			},
  2534  		},
  2535  		ServiceAnnotations: api.Annotations{
  2536  			Name: "servicename1",
  2537  		},
  2538  		Status: api.TaskStatus{
  2539  			State: api.TaskStatePending,
  2540  		},
  2541  	}
  2542  
  2543  	service1 := &api.Service{
  2544  		ID: "serviceid1",
  2545  		SpecVersion: &api.Version{
  2546  			Index: 0,
  2547  		},
  2548  	}
  2549  
  2550  	s := store.NewMemoryStore(nil)
  2551  	assert.NotNil(t, s)
  2552  	defer s.Close()
  2553  
  2554  	err := s.Update(func(tx store.Tx) error {
  2555  		assert.NoError(t, store.CreateService(tx, service1))
  2556  		assert.NoError(t, store.CreateTask(tx, task1))
  2557  		assert.NoError(t, store.CreateTask(tx, task2))
  2558  		assert.NoError(t, store.CreateNode(tx, node))
  2559  		return nil
  2560  	})
  2561  	assert.NoError(t, err)
  2562  
  2563  	scheduler := New(s)
  2564  
  2565  	watch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
  2566  	defer cancel()
  2567  
  2568  	go func() {
  2569  		assert.NoError(t, scheduler.Run(ctx))
  2570  	}()
  2571  	defer scheduler.Stop()
  2572  
  2573  	var assigned, failed *api.Task
  2574  watchAttempt:
  2575  	for {
  2576  		select {
  2577  		case event := <-watch:
  2578  			if task, ok := event.(api.EventUpdateTask); ok {
  2579  				if task.Task.Status.State < api.TaskStateAssigned {
  2580  					failed = task.Task.Copy()
  2581  				} else if task.Task.Status.State >= api.TaskStateAssigned &&
  2582  					task.Task.Status.State <= api.TaskStateRunning &&
  2583  					task.Task.NodeID != "" {
  2584  					assigned = task.Task.Copy()
  2585  				}
  2586  			}
  2587  		case <-time.After(time.Second):
  2588  			assignedID := "none"
  2589  			failedID := "none"
  2590  
  2591  			if assigned != nil {
  2592  				assignedID = assigned.ID
  2593  			}
  2594  			if failed != nil {
  2595  				failedID = failed.ID
  2596  			}
  2597  			t.Fatalf(
  2598  				"did not get assignment and failure. Assigned: %v, Failed: %v",
  2599  				assignedID, failedID,
  2600  			)
  2601  		}
  2602  		if assigned != nil && failed != nil {
  2603  			break watchAttempt
  2604  		}
  2605  	}
  2606  
  2607  	assert.Equal(t, "no suitable node (max replicas per node limit exceed)", failed.Status.Err)
  2608  
  2609  	// this is a case where the service is scaled down. in practice, scaling
  2610  	// down a service does not work like this, but for this test, it can.
  2611  	task1Update := &api.Task{
  2612  		ID:           "taskid1update",
  2613  		ServiceID:    "serviceid1",
  2614  		DesiredState: api.TaskStateRunning,
  2615  		SpecVersion: &api.Version{
  2616  			Index: 1,
  2617  		},
  2618  		Spec: api.TaskSpec{
  2619  			Runtime: &api.TaskSpec_Container{
  2620  				Container: &api.ContainerSpec{},
  2621  			},
  2622  			Placement: &api.Placement{
  2623  				MaxReplicas: 1,
  2624  			},
  2625  		},
  2626  		ServiceAnnotations: api.Annotations{
  2627  			Name: "servicename1",
  2628  		},
  2629  		Status: api.TaskStatus{
  2630  			State: api.TaskStatePending,
  2631  		},
  2632  	}
  2633  
  2634  	service1.SpecVersion.Index = 1
  2635  
  2636  	// now, update the tasks.
  2637  	err = s.Update(func(tx store.Tx) error {
  2638  		assigned.Status.State = api.TaskStateRunning
  2639  		// simulate Start First ordering, where we'll start the new task then
  2640  		// stop the old one. this is worst-case scenario, because it means that
  2641  		// the other task (the one that succeeded) cannot be freed yet.
  2642  		//
  2643  		// if we set the old task to a terminal state, it there will be a race
  2644  		// in the test where the old task might be marked freed, allowing the
  2645  		// failed task to progress. We want to handle the case where this does
  2646  		// not happen
  2647  		assert.NoError(t, store.UpdateTask(tx, assigned))
  2648  
  2649  		failed.DesiredState = api.TaskStateShutdown
  2650  		assert.NoError(t, store.UpdateTask(tx, failed))
  2651  
  2652  		assert.NoError(t, store.CreateTask(tx, task1Update))
  2653  
  2654  		assert.NoError(t, store.UpdateService(tx, service1))
  2655  
  2656  		return nil
  2657  	})
  2658  
  2659  	assert.NoError(t, err)
  2660  
  2661  	// because the failed task is still currently under the purview of the
  2662  	// scheduler, the scheduler should shut it down.
  2663  watchShutdown:
  2664  	for {
  2665  		select {
  2666  		case event := <-watch:
  2667  			if task, ok := event.(api.EventUpdateTask); ok {
  2668  				if task.Task.ID == failed.ID {
  2669  					if task.Task.Status.State >= api.TaskStateShutdown {
  2670  						break watchShutdown
  2671  					}
  2672  				}
  2673  				if task.Task.ID == task1Update.ID {
  2674  					if task.Task.Status.State == api.TaskStateAssigned {
  2675  						t.Logf("updated task assigned")
  2676  					}
  2677  				}
  2678  			}
  2679  		case <-time.After(time.Second):
  2680  			t.Fatalf("old task %s never shut down", failed.ID)
  2681  		}
  2682  	}
  2683  }
  2684  
  2685  func watchAssignmentFailure(t *testing.T, watch chan events.Event) *api.Task {
  2686  	for {
  2687  		select {
  2688  		case event := <-watch:
  2689  			if task, ok := event.(api.EventUpdateTask); ok {
  2690  				if task.Task.Status.State < api.TaskStateAssigned {
  2691  					return task.Task
  2692  				}
  2693  			}
  2694  		case <-time.After(time.Second):
  2695  			t.Fatal("no task assignment failure")
  2696  		}
  2697  	}
  2698  }
  2699  
  2700  func watchAssignment(t *testing.T, watch chan events.Event) *api.Task {
  2701  	for {
  2702  		select {
  2703  		case event := <-watch:
  2704  			if task, ok := event.(api.EventUpdateTask); ok {
  2705  				if task.Task.Status.State >= api.TaskStateAssigned &&
  2706  					task.Task.Status.State <= api.TaskStateRunning &&
  2707  					task.Task.NodeID != "" {
  2708  					return task.Task
  2709  				}
  2710  			}
  2711  		case <-time.After(time.Second):
  2712  			t.Fatal("no task assignment")
  2713  		}
  2714  	}
  2715  }
  2716  
  2717  func TestSchedulerPluginConstraint(t *testing.T) {
  2718  	ctx := context.Background()
  2719  
  2720  	// Node1: vol plugin1
  2721  	n1 := &api.Node{
  2722  		ID: "node1_ID",
  2723  		Spec: api.NodeSpec{
  2724  			Annotations: api.Annotations{
  2725  				Name: "node1",
  2726  			},
  2727  		},
  2728  		Description: &api.NodeDescription{
  2729  			Engine: &api.EngineDescription{
  2730  				Plugins: []api.PluginDescription{
  2731  					{
  2732  						Type: "Volume",
  2733  						Name: "plugin1",
  2734  					},
  2735  					{
  2736  						Type: "Log",
  2737  						Name: "default",
  2738  					},
  2739  				},
  2740  			},
  2741  		},
  2742  		Status: api.NodeStatus{
  2743  			State: api.NodeStatus_READY,
  2744  		},
  2745  	}
  2746  
  2747  	// Node2: vol plugin1, vol plugin2
  2748  	n2 := &api.Node{
  2749  		ID: "node2_ID",
  2750  		Spec: api.NodeSpec{
  2751  			Annotations: api.Annotations{
  2752  				Name: "node2",
  2753  			},
  2754  		},
  2755  		Description: &api.NodeDescription{
  2756  			Engine: &api.EngineDescription{
  2757  				Plugins: []api.PluginDescription{
  2758  					{
  2759  						Type: "Volume",
  2760  						Name: "plugin1",
  2761  					},
  2762  					{
  2763  						Type: "Volume",
  2764  						Name: "plugin2",
  2765  					},
  2766  					{
  2767  						Type: "Log",
  2768  						Name: "default",
  2769  					},
  2770  				},
  2771  			},
  2772  		},
  2773  		Status: api.NodeStatus{
  2774  			State: api.NodeStatus_READY,
  2775  		},
  2776  	}
  2777  
  2778  	// Node3: vol plugin1, network plugin1
  2779  	n3 := &api.Node{
  2780  		ID: "node3_ID",
  2781  		Spec: api.NodeSpec{
  2782  			Annotations: api.Annotations{
  2783  				Name: "node3",
  2784  			},
  2785  		},
  2786  		Description: &api.NodeDescription{
  2787  			Engine: &api.EngineDescription{
  2788  				Plugins: []api.PluginDescription{
  2789  					{
  2790  						Type: "Volume",
  2791  						Name: "plugin1",
  2792  					},
  2793  					{
  2794  						Type: "Network",
  2795  						Name: "plugin1",
  2796  					},
  2797  					{
  2798  						Type: "Log",
  2799  						Name: "default",
  2800  					},
  2801  				},
  2802  			},
  2803  		},
  2804  		Status: api.NodeStatus{
  2805  			State: api.NodeStatus_READY,
  2806  		},
  2807  	}
  2808  
  2809  	// Node4: log plugin1
  2810  	n4 := &api.Node{
  2811  		ID: "node4_ID",
  2812  		Spec: api.NodeSpec{
  2813  			Annotations: api.Annotations{
  2814  				Name: "node4",
  2815  			},
  2816  		},
  2817  		Description: &api.NodeDescription{
  2818  			Engine: &api.EngineDescription{
  2819  				Plugins: []api.PluginDescription{
  2820  					{
  2821  						Type: "Log",
  2822  						Name: "plugin1",
  2823  					},
  2824  				},
  2825  			},
  2826  		},
  2827  		Status: api.NodeStatus{
  2828  			State: api.NodeStatus_READY,
  2829  		},
  2830  	}
  2831  
  2832  	volumeOptionsDriver := func(driver string) *api.Mount_VolumeOptions {
  2833  		return &api.Mount_VolumeOptions{
  2834  			DriverConfig: &api.Driver{
  2835  				Name: driver,
  2836  			},
  2837  		}
  2838  	}
  2839  
  2840  	// Task0: bind mount
  2841  	t0 := &api.Task{
  2842  		ID:           "task0_ID",
  2843  		ServiceID:    "serviceID1",
  2844  		DesiredState: api.TaskStateRunning,
  2845  		Spec: api.TaskSpec{
  2846  			Runtime: &api.TaskSpec_Container{
  2847  				Container: &api.ContainerSpec{
  2848  					Mounts: []api.Mount{
  2849  						{
  2850  							Source: "/src",
  2851  							Target: "/foo",
  2852  							Type:   api.MountTypeBind,
  2853  						},
  2854  					},
  2855  				},
  2856  			},
  2857  		},
  2858  		ServiceAnnotations: api.Annotations{
  2859  			Name: "task0",
  2860  		},
  2861  		Status: api.TaskStatus{
  2862  			State: api.TaskStatePending,
  2863  		},
  2864  	}
  2865  
  2866  	// Task1: vol plugin1
  2867  	t1 := &api.Task{
  2868  		ID:           "task1_ID",
  2869  		ServiceID:    "serviceID1",
  2870  		DesiredState: api.TaskStateRunning,
  2871  		Spec: api.TaskSpec{
  2872  			Runtime: &api.TaskSpec_Container{
  2873  				Container: &api.ContainerSpec{
  2874  					Mounts: []api.Mount{
  2875  						{
  2876  							Source:        "testVol1",
  2877  							Target:        "/foo",
  2878  							Type:          api.MountTypeVolume,
  2879  							VolumeOptions: volumeOptionsDriver("plugin1"),
  2880  						},
  2881  					},
  2882  				},
  2883  			},
  2884  		},
  2885  		ServiceAnnotations: api.Annotations{
  2886  			Name: "task1",
  2887  		},
  2888  		Status: api.TaskStatus{
  2889  			State: api.TaskStatePending,
  2890  		},
  2891  	}
  2892  
  2893  	// Task2: vol plugin1, vol plugin2
  2894  	t2 := &api.Task{
  2895  		ID:           "task2_ID",
  2896  		ServiceID:    "serviceID1",
  2897  		DesiredState: api.TaskStateRunning,
  2898  		Spec: api.TaskSpec{
  2899  			Runtime: &api.TaskSpec_Container{
  2900  				Container: &api.ContainerSpec{
  2901  					Mounts: []api.Mount{
  2902  						{
  2903  							Source:        "testVol1",
  2904  							Target:        "/foo",
  2905  							Type:          api.MountTypeVolume,
  2906  							VolumeOptions: volumeOptionsDriver("plugin1"),
  2907  						},
  2908  						{
  2909  							Source:        "testVol2",
  2910  							Target:        "/foo",
  2911  							Type:          api.MountTypeVolume,
  2912  							VolumeOptions: volumeOptionsDriver("plugin2"),
  2913  						},
  2914  					},
  2915  				},
  2916  			},
  2917  		},
  2918  		ServiceAnnotations: api.Annotations{
  2919  			Name: "task2",
  2920  		},
  2921  		Status: api.TaskStatus{
  2922  			State: api.TaskStatePending,
  2923  		},
  2924  	}
  2925  
  2926  	// Task3: vol plugin1, network plugin1
  2927  	t3 := &api.Task{
  2928  		ID:           "task3_ID",
  2929  		ServiceID:    "serviceID1",
  2930  		DesiredState: api.TaskStateRunning,
  2931  		Networks: []*api.NetworkAttachment{
  2932  			{
  2933  				Network: &api.Network{
  2934  					ID: "testNwID1",
  2935  					Spec: api.NetworkSpec{
  2936  						Annotations: api.Annotations{
  2937  							Name: "testVol1",
  2938  						},
  2939  					},
  2940  					DriverState: &api.Driver{
  2941  						Name: "plugin1",
  2942  					},
  2943  				},
  2944  			},
  2945  		},
  2946  		Spec: api.TaskSpec{
  2947  			Runtime: &api.TaskSpec_Container{
  2948  				Container: &api.ContainerSpec{
  2949  					Mounts: []api.Mount{
  2950  						{
  2951  							Source:        "testVol1",
  2952  							Target:        "/foo",
  2953  							Type:          api.MountTypeVolume,
  2954  							VolumeOptions: volumeOptionsDriver("plugin1"),
  2955  						},
  2956  					},
  2957  				},
  2958  			},
  2959  		},
  2960  		ServiceAnnotations: api.Annotations{
  2961  			Name: "task2",
  2962  		},
  2963  		Status: api.TaskStatus{
  2964  			State: api.TaskStatePending,
  2965  		},
  2966  	}
  2967  	// Task4: log plugin1
  2968  	t4 := &api.Task{
  2969  		ID:           "task4_ID",
  2970  		ServiceID:    "serviceID1",
  2971  		DesiredState: api.TaskStateRunning,
  2972  		Spec: api.TaskSpec{
  2973  			Runtime: &api.TaskSpec_Container{
  2974  				Container: &api.ContainerSpec{},
  2975  			},
  2976  			LogDriver: &api.Driver{Name: "plugin1"},
  2977  		},
  2978  		ServiceAnnotations: api.Annotations{
  2979  			Name: "task4",
  2980  		},
  2981  		Status: api.TaskStatus{
  2982  			State: api.TaskStatePending,
  2983  		},
  2984  	}
  2985  	// Task5: log plugin1
  2986  	t5 := &api.Task{
  2987  		ID:           "task5_ID",
  2988  		ServiceID:    "serviceID1",
  2989  		DesiredState: api.TaskStateRunning,
  2990  		Spec: api.TaskSpec{
  2991  			Runtime: &api.TaskSpec_Container{
  2992  				Container: &api.ContainerSpec{},
  2993  			},
  2994  			LogDriver: &api.Driver{Name: "plugin1"},
  2995  		},
  2996  		ServiceAnnotations: api.Annotations{
  2997  			Name: "task5",
  2998  		},
  2999  		Status: api.TaskStatus{
  3000  			State: api.TaskStatePending,
  3001  		},
  3002  	}
  3003  
  3004  	// no logging
  3005  	t6 := &api.Task{
  3006  		ID:           "task6_ID",
  3007  		ServiceID:    "serviceID1",
  3008  		DesiredState: api.TaskStateRunning,
  3009  		Spec: api.TaskSpec{
  3010  			Runtime: &api.TaskSpec_Container{
  3011  				Container: &api.ContainerSpec{},
  3012  			},
  3013  			LogDriver: &api.Driver{Name: "none"},
  3014  		},
  3015  		ServiceAnnotations: api.Annotations{
  3016  			Name: "task6",
  3017  		},
  3018  		Status: api.TaskStatus{
  3019  			State: api.TaskStatePending,
  3020  		},
  3021  	}
  3022  
  3023  	// log driver with no name
  3024  	t7 := &api.Task{
  3025  		ID:           "task7_ID",
  3026  		ServiceID:    "serviceID1",
  3027  		DesiredState: api.TaskStateRunning,
  3028  		Spec: api.TaskSpec{
  3029  			Runtime: &api.TaskSpec_Container{
  3030  				Container: &api.ContainerSpec{},
  3031  			},
  3032  			LogDriver: &api.Driver{
  3033  				Options: map[string]string{
  3034  					"max-size": "50k",
  3035  				},
  3036  			},
  3037  		},
  3038  		ServiceAnnotations: api.Annotations{
  3039  			Name: "task7",
  3040  		},
  3041  		Status: api.TaskStatus{
  3042  			State: api.TaskStatePending,
  3043  		},
  3044  	}
  3045  
  3046  	s1 := &api.Service{
  3047  		ID: "serviceID1",
  3048  	}
  3049  	s := store.NewMemoryStore(nil)
  3050  	assert.NotNil(t, s)
  3051  	defer s.Close()
  3052  
  3053  	// Add initial node, service and task
  3054  	err := s.Update(func(tx store.Tx) error {
  3055  		assert.NoError(t, store.CreateService(tx, s1))
  3056  		assert.NoError(t, store.CreateTask(tx, t1))
  3057  		assert.NoError(t, store.CreateNode(tx, n1))
  3058  		return nil
  3059  	})
  3060  	assert.NoError(t, err)
  3061  
  3062  	scheduler := New(s)
  3063  
  3064  	watch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
  3065  	defer cancel()
  3066  
  3067  	go func() {
  3068  		assert.NoError(t, scheduler.Run(ctx))
  3069  	}()
  3070  	defer scheduler.Stop()
  3071  
  3072  	// t1 should get assigned
  3073  	assignment := watchAssignment(t, watch)
  3074  	assert.Equal(t, assignment.NodeID, "node1_ID")
  3075  
  3076  	// Create t0; it should get assigned because the plugin filter shouldn't
  3077  	// be enabled for tasks that have bind mounts
  3078  	err = s.Update(func(tx store.Tx) error {
  3079  		assert.NoError(t, store.CreateTask(tx, t0))
  3080  		return nil
  3081  	})
  3082  	assert.NoError(t, err)
  3083  
  3084  	assignment0 := watchAssignment(t, watch)
  3085  	assert.Equal(t, assignment0.ID, "task0_ID")
  3086  	assert.Equal(t, assignment0.NodeID, "node1_ID")
  3087  
  3088  	// Create t2; it should stay in the pending state because there is
  3089  	// no node that with volume plugin `plugin2`
  3090  	err = s.Update(func(tx store.Tx) error {
  3091  		assert.NoError(t, store.CreateTask(tx, t2))
  3092  		return nil
  3093  	})
  3094  	assert.NoError(t, err)
  3095  
  3096  	failure := watchAssignmentFailure(t, watch)
  3097  	assert.Equal(t, "no suitable node (missing plugin on 1 node)", failure.Status.Err)
  3098  
  3099  	// Now add the second node
  3100  	err = s.Update(func(tx store.Tx) error {
  3101  		assert.NoError(t, store.CreateNode(tx, n2))
  3102  		return nil
  3103  	})
  3104  	assert.NoError(t, err)
  3105  
  3106  	// Check that t2 has been assigned
  3107  	assignment1 := watchAssignment(t, watch)
  3108  	assert.Equal(t, assignment1.ID, "task2_ID")
  3109  	assert.Equal(t, assignment1.NodeID, "node2_ID")
  3110  
  3111  	// Create t3; it should stay in the pending state because there is
  3112  	// no node that with network plugin `plugin1`
  3113  	err = s.Update(func(tx store.Tx) error {
  3114  		assert.NoError(t, store.CreateTask(tx, t3))
  3115  		return nil
  3116  	})
  3117  	assert.NoError(t, err)
  3118  
  3119  	failure = watchAssignmentFailure(t, watch)
  3120  	assert.Equal(t, "no suitable node (missing plugin on 2 nodes)", failure.Status.Err)
  3121  
  3122  	// Now add the node3
  3123  	err = s.Update(func(tx store.Tx) error {
  3124  		assert.NoError(t, store.CreateNode(tx, n3))
  3125  		return nil
  3126  	})
  3127  	assert.NoError(t, err)
  3128  
  3129  	// Check that t3 has been assigned
  3130  	assignment2 := watchAssignment(t, watch)
  3131  	assert.Equal(t, assignment2.ID, "task3_ID")
  3132  	assert.Equal(t, assignment2.NodeID, "node3_ID")
  3133  
  3134  	// Create t4; it should stay in the pending state because there is
  3135  	// no node that with log plugin `plugin1`
  3136  	err = s.Update(func(tx store.Tx) error {
  3137  		assert.NoError(t, store.CreateTask(tx, t4))
  3138  		return nil
  3139  	})
  3140  	assert.NoError(t, err)
  3141  
  3142  	// check that t4 has been assigned
  3143  	failure2 := watchAssignmentFailure(t, watch)
  3144  	assert.Equal(t, "no suitable node (missing plugin on 3 nodes)", failure2.Status.Err)
  3145  
  3146  	err = s.Update(func(tx store.Tx) error {
  3147  		assert.NoError(t, store.CreateNode(tx, n4))
  3148  		return nil
  3149  	})
  3150  	assert.NoError(t, err)
  3151  
  3152  	// Check that t4 has been assigned
  3153  	assignment3 := watchAssignment(t, watch)
  3154  	assert.Equal(t, assignment3.ID, "task4_ID")
  3155  	assert.Equal(t, assignment3.NodeID, "node4_ID")
  3156  
  3157  	err = s.Update(func(tx store.Tx) error {
  3158  		assert.NoError(t, store.CreateTask(tx, t5))
  3159  		return nil
  3160  	})
  3161  	assert.NoError(t, err)
  3162  	assignment4 := watchAssignment(t, watch)
  3163  	assert.Equal(t, assignment4.ID, "task5_ID")
  3164  	assert.Equal(t, assignment4.NodeID, "node4_ID")
  3165  
  3166  	// check that t6 gets assigned to some node
  3167  	err = s.Update(func(tx store.Tx) error {
  3168  		assert.NoError(t, store.CreateTask(tx, t6))
  3169  		return nil
  3170  	})
  3171  	assert.NoError(t, err)
  3172  	assignment5 := watchAssignment(t, watch)
  3173  	assert.Equal(t, assignment5.ID, "task6_ID")
  3174  	assert.NotEqual(t, assignment5.NodeID, "")
  3175  
  3176  	// check that t7 gets assigned to some node
  3177  	err = s.Update(func(tx store.Tx) error {
  3178  		assert.NoError(t, store.CreateTask(tx, t7))
  3179  		return nil
  3180  	})
  3181  	assert.NoError(t, err)
  3182  	assignment6 := watchAssignment(t, watch)
  3183  	assert.Equal(t, assignment6.ID, "task7_ID")
  3184  	assert.NotEqual(t, assignment6.NodeID, "")
  3185  }
  3186  
  3187  func BenchmarkScheduler1kNodes1kTasks(b *testing.B) {
  3188  	benchScheduler(b, 1e3, 1e3, false)
  3189  }
  3190  
  3191  func BenchmarkScheduler1kNodes10kTasks(b *testing.B) {
  3192  	benchScheduler(b, 1e3, 1e4, false)
  3193  }
  3194  
  3195  func BenchmarkScheduler1kNodes100kTasks(b *testing.B) {
  3196  	benchScheduler(b, 1e3, 1e5, false)
  3197  }
  3198  
  3199  func BenchmarkScheduler100kNodes100kTasks(b *testing.B) {
  3200  	benchScheduler(b, 1e5, 1e5, false)
  3201  }
  3202  
  3203  func BenchmarkScheduler100kNodes1kTasks(b *testing.B) {
  3204  	benchScheduler(b, 1e5, 1e3, false)
  3205  }
  3206  
  3207  func BenchmarkScheduler100kNodes1MTasks(b *testing.B) {
  3208  	benchScheduler(b, 1e5, 1e6, false)
  3209  }
  3210  
  3211  func BenchmarkSchedulerConstraints1kNodes1kTasks(b *testing.B) {
  3212  	benchScheduler(b, 1e3, 1e3, true)
  3213  }
  3214  
  3215  func BenchmarkSchedulerConstraints1kNodes10kTasks(b *testing.B) {
  3216  	benchScheduler(b, 1e3, 1e4, true)
  3217  }
  3218  
  3219  func BenchmarkSchedulerConstraints1kNodes100kTasks(b *testing.B) {
  3220  	benchScheduler(b, 1e3, 1e5, true)
  3221  }
  3222  
  3223  func BenchmarkSchedulerConstraints5kNodes100kTasks(b *testing.B) {
  3224  	benchScheduler(b, 5e3, 1e5, true)
  3225  }
  3226  
  3227  func benchScheduler(b *testing.B, nodes, tasks int, networkConstraints bool) {
  3228  	ctx := context.Background()
  3229  
  3230  	for iters := 0; iters < b.N; iters++ {
  3231  		b.StopTimer()
  3232  		s := store.NewMemoryStore(nil)
  3233  		scheduler := New(s)
  3234  
  3235  		watch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
  3236  
  3237  		go func() {
  3238  			_ = scheduler.Run(ctx)
  3239  		}()
  3240  
  3241  		// Let the scheduler get started
  3242  		runtime.Gosched()
  3243  
  3244  		_ = s.Update(func(tx store.Tx) error {
  3245  			// Create initial nodes and tasks
  3246  			for i := 0; i < nodes; i++ {
  3247  				n := &api.Node{
  3248  					ID: identity.NewID(),
  3249  					Spec: api.NodeSpec{
  3250  						Annotations: api.Annotations{
  3251  							Name:   "name" + strconv.Itoa(i),
  3252  							Labels: make(map[string]string),
  3253  						},
  3254  					},
  3255  					Status: api.NodeStatus{
  3256  						State: api.NodeStatus_READY,
  3257  					},
  3258  					Description: &api.NodeDescription{
  3259  						Engine: &api.EngineDescription{},
  3260  					},
  3261  				}
  3262  				// Give every third node a special network
  3263  				if i%3 == 0 {
  3264  					n.Description.Engine.Plugins = []api.PluginDescription{
  3265  						{
  3266  							Name: "network",
  3267  							Type: "Network",
  3268  						},
  3269  					}
  3270  
  3271  				}
  3272  				err := store.CreateNode(tx, n)
  3273  				if err != nil {
  3274  					panic(err)
  3275  				}
  3276  			}
  3277  			for i := 0; i < tasks; i++ {
  3278  				id := "task" + strconv.Itoa(i)
  3279  				t := &api.Task{
  3280  					ID:           id,
  3281  					DesiredState: api.TaskStateRunning,
  3282  					ServiceAnnotations: api.Annotations{
  3283  						Name: id,
  3284  					},
  3285  					Status: api.TaskStatus{
  3286  						State: api.TaskStatePending,
  3287  					},
  3288  				}
  3289  				if networkConstraints {
  3290  					t.Networks = []*api.NetworkAttachment{
  3291  						{
  3292  							Network: &api.Network{
  3293  								DriverState: &api.Driver{
  3294  									Name: "network",
  3295  								},
  3296  							},
  3297  						},
  3298  					}
  3299  				}
  3300  				err := store.CreateTask(tx, t)
  3301  				if err != nil {
  3302  					panic(err)
  3303  				}
  3304  			}
  3305  			b.StartTimer()
  3306  			return nil
  3307  		})
  3308  
  3309  		for i := 0; i != tasks; i++ {
  3310  			<-watch
  3311  		}
  3312  
  3313  		scheduler.Stop()
  3314  		cancel()
  3315  		s.Close()
  3316  	}
  3317  }
  3318  
  3319  func TestSchedulerHostPort(t *testing.T) {
  3320  	ctx := context.Background()
  3321  	node1 := &api.Node{
  3322  		ID: "nodeid1",
  3323  		Spec: api.NodeSpec{
  3324  			Annotations: api.Annotations{
  3325  				Name: "node1",
  3326  			},
  3327  		},
  3328  		Status: api.NodeStatus{
  3329  			State: api.NodeStatus_READY,
  3330  		},
  3331  	}
  3332  	node2 := &api.Node{
  3333  		ID: "nodeid2",
  3334  		Spec: api.NodeSpec{
  3335  			Annotations: api.Annotations{
  3336  				Name: "node2",
  3337  			},
  3338  		},
  3339  		Status: api.NodeStatus{
  3340  			State: api.NodeStatus_READY,
  3341  		},
  3342  	}
  3343  
  3344  	task1 := &api.Task{
  3345  		ID:           "id1",
  3346  		ServiceID:    "serviceID1",
  3347  		DesiredState: api.TaskStateRunning,
  3348  		Spec: api.TaskSpec{
  3349  			Runtime: &api.TaskSpec_Container{
  3350  				Container: &api.ContainerSpec{},
  3351  			},
  3352  		},
  3353  		ServiceAnnotations: api.Annotations{
  3354  			Name: "name1",
  3355  		},
  3356  		Status: api.TaskStatus{
  3357  			State: api.TaskStatePending,
  3358  		},
  3359  		Endpoint: &api.Endpoint{
  3360  			Ports: []*api.PortConfig{
  3361  				{
  3362  					PublishMode:   api.PublishModeHost,
  3363  					PublishedPort: 58,
  3364  					Protocol:      api.ProtocolTCP,
  3365  				},
  3366  			},
  3367  		},
  3368  	}
  3369  	task2 := &api.Task{
  3370  		ID:           "id2",
  3371  		ServiceID:    "serviceID1",
  3372  		DesiredState: api.TaskStateRunning,
  3373  		Spec: api.TaskSpec{
  3374  			Runtime: &api.TaskSpec_Container{
  3375  				Container: &api.ContainerSpec{},
  3376  			},
  3377  		},
  3378  		ServiceAnnotations: api.Annotations{
  3379  			Name: "name2",
  3380  		},
  3381  		Status: api.TaskStatus{
  3382  			State: api.TaskStatePending,
  3383  		},
  3384  		Endpoint: &api.Endpoint{
  3385  			Ports: []*api.PortConfig{
  3386  				{
  3387  					PublishMode:   api.PublishModeHost,
  3388  					PublishedPort: 58,
  3389  					Protocol:      api.ProtocolUDP,
  3390  				},
  3391  			},
  3392  		},
  3393  	}
  3394  	task3 := &api.Task{
  3395  		ID:           "id3",
  3396  		ServiceID:    "serviceID1",
  3397  		DesiredState: api.TaskStateRunning,
  3398  		Spec: api.TaskSpec{
  3399  			Runtime: &api.TaskSpec_Container{
  3400  				Container: &api.ContainerSpec{},
  3401  			},
  3402  		},
  3403  		ServiceAnnotations: api.Annotations{
  3404  			Name: "name3",
  3405  		},
  3406  		Status: api.TaskStatus{
  3407  			State: api.TaskStatePending,
  3408  		},
  3409  		Endpoint: &api.Endpoint{
  3410  			Ports: []*api.PortConfig{
  3411  				{
  3412  					PublishMode:   api.PublishModeHost,
  3413  					PublishedPort: 58,
  3414  					Protocol:      api.ProtocolUDP,
  3415  				},
  3416  				{
  3417  					PublishMode:   api.PublishModeHost,
  3418  					PublishedPort: 58,
  3419  					Protocol:      api.ProtocolTCP,
  3420  				},
  3421  			},
  3422  		},
  3423  	}
  3424  
  3425  	service1 := &api.Service{
  3426  		ID: "serviceID1",
  3427  	}
  3428  
  3429  	s := store.NewMemoryStore(nil)
  3430  	assert.NotNil(t, s)
  3431  	defer s.Close()
  3432  
  3433  	err := s.Update(func(tx store.Tx) error {
  3434  		// Add initial node, service and task
  3435  		assert.NoError(t, store.CreateService(tx, service1))
  3436  		assert.NoError(t, store.CreateTask(tx, task1))
  3437  		assert.NoError(t, store.CreateTask(tx, task2))
  3438  		return nil
  3439  	})
  3440  	assert.NoError(t, err)
  3441  
  3442  	scheduler := New(s)
  3443  
  3444  	watch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
  3445  	defer cancel()
  3446  
  3447  	go func() {
  3448  		assert.NoError(t, scheduler.Run(ctx))
  3449  	}()
  3450  	defer scheduler.Stop()
  3451  
  3452  	// Tasks shouldn't be scheduled because there are no nodes.
  3453  	watchAssignmentFailure(t, watch)
  3454  	watchAssignmentFailure(t, watch)
  3455  
  3456  	err = s.Update(func(tx store.Tx) error {
  3457  		// Add initial node and task
  3458  		assert.NoError(t, store.CreateNode(tx, node1))
  3459  		assert.NoError(t, store.CreateNode(tx, node2))
  3460  		return nil
  3461  	})
  3462  	assert.NoError(t, err)
  3463  
  3464  	// Tasks 1 and 2 should be assigned to different nodes.
  3465  	assignment1 := watchAssignment(t, watch)
  3466  	assignment2 := watchAssignment(t, watch)
  3467  	assert.True(t, assignment1 != assignment2)
  3468  
  3469  	// Task 3 should not be schedulable.
  3470  	err = s.Update(func(tx store.Tx) error {
  3471  		assert.NoError(t, store.CreateTask(tx, task3))
  3472  		return nil
  3473  	})
  3474  	assert.NoError(t, err)
  3475  
  3476  	failure := watchAssignmentFailure(t, watch)
  3477  	assert.Equal(t, "no suitable node (host-mode port already in use on 2 nodes)", failure.Status.Err)
  3478  }
  3479  
  3480  func TestSchedulerMaxReplicas(t *testing.T) {
  3481  	ctx := context.Background()
  3482  	node1 := &api.Node{
  3483  		ID: "nodeid1",
  3484  		Spec: api.NodeSpec{
  3485  			Annotations: api.Annotations{
  3486  				Name: "node1",
  3487  			},
  3488  		},
  3489  		Status: api.NodeStatus{
  3490  			State: api.NodeStatus_READY,
  3491  		},
  3492  	}
  3493  	node2 := &api.Node{
  3494  		ID: "nodeid2",
  3495  		Spec: api.NodeSpec{
  3496  			Annotations: api.Annotations{
  3497  				Name: "node2",
  3498  			},
  3499  		},
  3500  		Status: api.NodeStatus{
  3501  			State: api.NodeStatus_READY,
  3502  		},
  3503  	}
  3504  	task1 := &api.Task{
  3505  		ID:           "id1",
  3506  		ServiceID:    "serviceID1",
  3507  		DesiredState: api.TaskStateRunning,
  3508  		Spec: api.TaskSpec{
  3509  			Runtime: &api.TaskSpec_Container{
  3510  				Container: &api.ContainerSpec{},
  3511  			},
  3512  			Placement: &api.Placement{
  3513  				MaxReplicas: 1,
  3514  			},
  3515  		},
  3516  		ServiceAnnotations: api.Annotations{
  3517  			Name: "name1",
  3518  		},
  3519  		Status: api.TaskStatus{
  3520  			State: api.TaskStatePending,
  3521  		},
  3522  	}
  3523  	task2 := &api.Task{
  3524  		ID:           "id2",
  3525  		ServiceID:    "serviceID1",
  3526  		DesiredState: api.TaskStateRunning,
  3527  		Spec: api.TaskSpec{
  3528  			Runtime: &api.TaskSpec_Container{
  3529  				Container: &api.ContainerSpec{},
  3530  			},
  3531  			Placement: &api.Placement{
  3532  				MaxReplicas: 1,
  3533  			},
  3534  		},
  3535  		ServiceAnnotations: api.Annotations{
  3536  			Name: "name2",
  3537  		},
  3538  		Status: api.TaskStatus{
  3539  			State: api.TaskStatePending,
  3540  		},
  3541  	}
  3542  	task3 := &api.Task{
  3543  		ID:           "id3",
  3544  		ServiceID:    "serviceID1",
  3545  		DesiredState: api.TaskStateRunning,
  3546  		Spec: api.TaskSpec{
  3547  			Runtime: &api.TaskSpec_Container{
  3548  				Container: &api.ContainerSpec{},
  3549  			},
  3550  			Placement: &api.Placement{
  3551  				MaxReplicas: 1,
  3552  			},
  3553  		},
  3554  		ServiceAnnotations: api.Annotations{
  3555  			Name: "name3",
  3556  		},
  3557  		Status: api.TaskStatus{
  3558  			State: api.TaskStatePending,
  3559  		},
  3560  	}
  3561  	service1 := &api.Service{
  3562  		ID: "serviceID1",
  3563  	}
  3564  
  3565  	s := store.NewMemoryStore(nil)
  3566  	assert.NotNil(t, s)
  3567  	defer s.Close()
  3568  
  3569  	err := s.Update(func(tx store.Tx) error {
  3570  		// Add initial node, service and task
  3571  		assert.NoError(t, store.CreateService(tx, service1))
  3572  		assert.NoError(t, store.CreateTask(tx, task1))
  3573  		assert.NoError(t, store.CreateTask(tx, task2))
  3574  		return nil
  3575  	})
  3576  	assert.NoError(t, err)
  3577  
  3578  	scheduler := New(s)
  3579  
  3580  	watch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
  3581  	defer cancel()
  3582  
  3583  	go func() {
  3584  		assert.NoError(t, scheduler.Run(ctx))
  3585  	}()
  3586  	defer scheduler.Stop()
  3587  
  3588  	// Tasks shouldn't be scheduled because there are no nodes.
  3589  	watchAssignmentFailure(t, watch)
  3590  	watchAssignmentFailure(t, watch)
  3591  
  3592  	err = s.Update(func(tx store.Tx) error {
  3593  		// Add initial node and task
  3594  		assert.NoError(t, store.CreateNode(tx, node1))
  3595  		assert.NoError(t, store.CreateNode(tx, node2))
  3596  		return nil
  3597  	})
  3598  	assert.NoError(t, err)
  3599  
  3600  	// Tasks 1 and 2 should be assigned to different nodes.
  3601  	assignment1 := watchAssignment(t, watch)
  3602  	assignment2 := watchAssignment(t, watch)
  3603  	assert.True(t, assignment1 != assignment2)
  3604  
  3605  	// Task 3 should not be schedulable.
  3606  	err = s.Update(func(tx store.Tx) error {
  3607  		assert.NoError(t, store.CreateTask(tx, task3))
  3608  		return nil
  3609  	})
  3610  	assert.NoError(t, err)
  3611  
  3612  	failure := watchAssignmentFailure(t, watch)
  3613  	assert.Equal(t, "no suitable node (max replicas per node limit exceed)", failure.Status.Err)
  3614  
  3615  	// Add third node to get task 3 scheduled
  3616  	node3 := &api.Node{
  3617  		ID: "nodeid3",
  3618  		Spec: api.NodeSpec{
  3619  			Annotations: api.Annotations{
  3620  				Name: "node3",
  3621  			},
  3622  		},
  3623  		Status: api.NodeStatus{
  3624  			State: api.NodeStatus_READY,
  3625  		},
  3626  	}
  3627  	err = s.Update(func(tx store.Tx) error {
  3628  		assert.NoError(t, store.CreateNode(tx, node3))
  3629  		return nil
  3630  	})
  3631  	assert.NoError(t, err)
  3632  
  3633  	// Create four more tasks to node 1
  3634  	task4 := &api.Task{
  3635  		ID:           "id4",
  3636  		ServiceID:    "serviceID1",
  3637  		DesiredState: api.TaskStateRunning,
  3638  		Spec: api.TaskSpec{
  3639  			Runtime: &api.TaskSpec_Container{
  3640  				Container: &api.ContainerSpec{},
  3641  			},
  3642  			Placement: &api.Placement{
  3643  				Constraints: []string{"node.hostname==node1"},
  3644  				MaxReplicas: 3,
  3645  			},
  3646  		},
  3647  		ServiceAnnotations: api.Annotations{
  3648  			Name: "name4",
  3649  		},
  3650  		Status: api.TaskStatus{
  3651  			State: api.TaskStatePending,
  3652  		},
  3653  	}
  3654  	task5 := &api.Task{
  3655  		ID:           "id5",
  3656  		ServiceID:    "serviceID1",
  3657  		DesiredState: api.TaskStateRunning,
  3658  		Spec: api.TaskSpec{
  3659  			Runtime: &api.TaskSpec_Container{
  3660  				Container: &api.ContainerSpec{},
  3661  			},
  3662  			Placement: &api.Placement{
  3663  				Constraints: []string{"node.hostname==node1"},
  3664  				MaxReplicas: 3,
  3665  			},
  3666  		},
  3667  		ServiceAnnotations: api.Annotations{
  3668  			Name: "name5",
  3669  		},
  3670  		Status: api.TaskStatus{
  3671  			State: api.TaskStatePending,
  3672  		},
  3673  	}
  3674  	task6 := &api.Task{
  3675  		ID:           "id6",
  3676  		ServiceID:    "serviceID1",
  3677  		DesiredState: api.TaskStateRunning,
  3678  		Spec: api.TaskSpec{
  3679  			Runtime: &api.TaskSpec_Container{
  3680  				Container: &api.ContainerSpec{},
  3681  			},
  3682  			Placement: &api.Placement{
  3683  				Constraints: []string{"node.hostname==node1"},
  3684  				MaxReplicas: 3,
  3685  			},
  3686  		},
  3687  		ServiceAnnotations: api.Annotations{
  3688  			Name: "name6",
  3689  		},
  3690  		Status: api.TaskStatus{
  3691  			State: api.TaskStatePending,
  3692  		},
  3693  	}
  3694  	task7 := &api.Task{
  3695  		ID:           "id7",
  3696  		ServiceID:    "serviceID1",
  3697  		DesiredState: api.TaskStateRunning,
  3698  		Spec: api.TaskSpec{
  3699  			Runtime: &api.TaskSpec_Container{
  3700  				Container: &api.ContainerSpec{},
  3701  			},
  3702  			Placement: &api.Placement{
  3703  				Constraints: []string{"node.hostname==node1"},
  3704  				MaxReplicas: 3,
  3705  			},
  3706  		},
  3707  		ServiceAnnotations: api.Annotations{
  3708  			Name: "name7",
  3709  		},
  3710  		Status: api.TaskStatus{
  3711  			State: api.TaskStatePending,
  3712  		},
  3713  	}
  3714  	err = s.Update(func(tx store.Tx) error {
  3715  		assert.NoError(t, store.CreateTask(tx, task4))
  3716  		assert.NoError(t, store.CreateTask(tx, task5))
  3717  		assert.NoError(t, store.CreateTask(tx, task6))
  3718  		return nil
  3719  	})
  3720  	assert.NoError(t, err)
  3721  
  3722  	// Task 7 should not be schedulable.
  3723  	err = s.Update(func(tx store.Tx) error {
  3724  		assert.NoError(t, store.CreateTask(tx, task7))
  3725  		return nil
  3726  	})
  3727  	assert.NoError(t, err)
  3728  
  3729  	failure = watchAssignmentFailure(t, watch)
  3730  	assert.Equal(t, "no suitable node (scheduling constraints not satisfied on 3 nodes)", failure.Status.Err)
  3731  }