github.com/zoomfoo/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/scheduler/reconcile_test.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"fmt"
     5  	"reflect"
     6  	"regexp"
     7  	"strconv"
     8  	"testing"
     9  	"time"
    10  
    11  	"github.com/hashicorp/nomad/helper"
    12  	"github.com/hashicorp/nomad/helper/testlog"
    13  	"github.com/hashicorp/nomad/helper/uuid"
    14  	"github.com/hashicorp/nomad/nomad/mock"
    15  	"github.com/hashicorp/nomad/nomad/structs"
    16  	"github.com/kr/pretty"
    17  	"github.com/stretchr/testify/assert"
    18  	"github.com/stretchr/testify/require"
    19  )
    20  
    21  var (
    22  	canaryUpdate = &structs.UpdateStrategy{
    23  		Canary:          2,
    24  		MaxParallel:     2,
    25  		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
    26  		MinHealthyTime:  10 * time.Second,
    27  		HealthyDeadline: 10 * time.Minute,
    28  		Stagger:         31 * time.Second,
    29  	}
    30  
    31  	noCanaryUpdate = &structs.UpdateStrategy{
    32  		MaxParallel:     4,
    33  		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
    34  		MinHealthyTime:  10 * time.Second,
    35  		HealthyDeadline: 10 * time.Minute,
    36  		Stagger:         31 * time.Second,
    37  	}
    38  )
    39  
    40  func allocUpdateFnIgnore(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) {
    41  	return true, false, nil
    42  }
    43  
    44  func allocUpdateFnDestructive(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) {
    45  	return false, true, nil
    46  }
    47  
    48  func allocUpdateFnInplace(existing *structs.Allocation, _ *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) {
    49  	// Create a shallow copy
    50  	newAlloc := existing.CopySkipJob()
    51  	newAlloc.TaskResources = make(map[string]*structs.Resources)
    52  
    53  	// Use the new task resources but keep the network from the old
    54  	for _, task := range newTG.Tasks {
    55  		r := task.Resources.Copy()
    56  		r.Networks = existing.TaskResources[task.Name].Networks
    57  		newAlloc.TaskResources[task.Name] = r
    58  	}
    59  
    60  	return false, false, newAlloc
    61  }
    62  
    63  func allocUpdateFnMock(handled map[string]allocUpdateType, unhandled allocUpdateType) allocUpdateType {
    64  	return func(existing *structs.Allocation, newJob *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) {
    65  		if fn, ok := handled[existing.ID]; ok {
    66  			return fn(existing, newJob, newTG)
    67  		}
    68  
    69  		return unhandled(existing, newJob, newTG)
    70  	}
    71  }
    72  
    73  var (
    74  	// AllocationIndexRegex is a regular expression to find the allocation index.
    75  	allocationIndexRegex = regexp.MustCompile(".+\\[(\\d+)\\]$")
    76  )
    77  
    78  // allocNameToIndex returns the index of the allocation.
    79  func allocNameToIndex(name string) uint {
    80  	matches := allocationIndexRegex.FindStringSubmatch(name)
    81  	if len(matches) != 2 {
    82  		return 0
    83  	}
    84  
    85  	index, err := strconv.Atoi(matches[1])
    86  	if err != nil {
    87  		return 0
    88  	}
    89  
    90  	return uint(index)
    91  }
    92  
    93  func assertNamesHaveIndexes(t *testing.T, indexes []int, names []string) {
    94  	t.Helper()
    95  	m := make(map[uint]int)
    96  	for _, i := range indexes {
    97  		m[uint(i)] += 1
    98  	}
    99  
   100  	for _, n := range names {
   101  		index := allocNameToIndex(n)
   102  		val, contained := m[index]
   103  		if !contained {
   104  			t.Fatalf("Unexpected index %d from name %s\nAll names: %v", index, n, names)
   105  		}
   106  
   107  		val--
   108  		if val < 0 {
   109  			t.Fatalf("Index %d repeated too many times\nAll names: %v", index, names)
   110  		}
   111  		m[index] = val
   112  	}
   113  
   114  	for k, remainder := range m {
   115  		if remainder != 0 {
   116  			t.Fatalf("Index %d has %d remaining uses expected\nAll names: %v", k, remainder, names)
   117  		}
   118  	}
   119  }
   120  
   121  func assertNoCanariesStopped(t *testing.T, d *structs.Deployment, stop []allocStopResult) {
   122  	t.Helper()
   123  	canaryIndex := make(map[string]struct{})
   124  	for _, state := range d.TaskGroups {
   125  		for _, c := range state.PlacedCanaries {
   126  			canaryIndex[c] = struct{}{}
   127  		}
   128  	}
   129  
   130  	for _, s := range stop {
   131  		if _, ok := canaryIndex[s.alloc.ID]; ok {
   132  			t.Fatalf("Stopping canary alloc %q %q", s.alloc.ID, s.alloc.Name)
   133  		}
   134  	}
   135  }
   136  
   137  func assertPlaceResultsHavePreviousAllocs(t *testing.T, numPrevious int, place []allocPlaceResult) {
   138  	t.Helper()
   139  	names := make(map[string]struct{}, numPrevious)
   140  
   141  	found := 0
   142  	for _, p := range place {
   143  		if _, ok := names[p.name]; ok {
   144  			t.Fatalf("Name %q already placed", p.name)
   145  		}
   146  		names[p.name] = struct{}{}
   147  
   148  		if p.previousAlloc == nil {
   149  			continue
   150  		}
   151  
   152  		if act := p.previousAlloc.Name; p.name != act {
   153  			t.Fatalf("Name mismatch on previous alloc; got %q; want %q", act, p.name)
   154  		}
   155  		found++
   156  	}
   157  	if numPrevious != found {
   158  		t.Fatalf("wanted %d; got %d placements with previous allocs", numPrevious, found)
   159  	}
   160  }
   161  
   162  func assertPlacementsAreRescheduled(t *testing.T, numRescheduled int, place []allocPlaceResult) {
   163  	t.Helper()
   164  	names := make(map[string]struct{}, numRescheduled)
   165  
   166  	found := 0
   167  	for _, p := range place {
   168  		if _, ok := names[p.name]; ok {
   169  			t.Fatalf("Name %q already placed", p.name)
   170  		}
   171  		names[p.name] = struct{}{}
   172  
   173  		if p.previousAlloc == nil {
   174  			continue
   175  		}
   176  		if p.reschedule {
   177  			found++
   178  		}
   179  
   180  	}
   181  	if numRescheduled != found {
   182  		t.Fatalf("wanted %d; got %d placements that are rescheduled", numRescheduled, found)
   183  	}
   184  }
   185  
   186  func intRange(pairs ...int) []int {
   187  	if len(pairs)%2 != 0 {
   188  		return nil
   189  	}
   190  
   191  	var r []int
   192  	for i := 0; i < len(pairs); i += 2 {
   193  		for j := pairs[i]; j <= pairs[i+1]; j++ {
   194  			r = append(r, j)
   195  		}
   196  	}
   197  	return r
   198  }
   199  
   200  func placeResultsToNames(place []allocPlaceResult) []string {
   201  	names := make([]string, 0, len(place))
   202  	for _, p := range place {
   203  		names = append(names, p.name)
   204  	}
   205  	return names
   206  }
   207  
   208  func destructiveResultsToNames(destructive []allocDestructiveResult) []string {
   209  	names := make([]string, 0, len(destructive))
   210  	for _, d := range destructive {
   211  		names = append(names, d.placeName)
   212  	}
   213  	return names
   214  }
   215  
   216  func stopResultsToNames(stop []allocStopResult) []string {
   217  	names := make([]string, 0, len(stop))
   218  	for _, s := range stop {
   219  		names = append(names, s.alloc.Name)
   220  	}
   221  	return names
   222  }
   223  
   224  func attributeUpdatesToNames(attributeUpdates map[string]*structs.Allocation) []string {
   225  	names := make([]string, 0, len(attributeUpdates))
   226  	for _, a := range attributeUpdates {
   227  		names = append(names, a.Name)
   228  	}
   229  	return names
   230  }
   231  
   232  func allocsToNames(allocs []*structs.Allocation) []string {
   233  	names := make([]string, 0, len(allocs))
   234  	for _, a := range allocs {
   235  		names = append(names, a.Name)
   236  	}
   237  	return names
   238  }
   239  
   240  type resultExpectation struct {
   241  	createDeployment  *structs.Deployment
   242  	deploymentUpdates []*structs.DeploymentStatusUpdate
   243  	place             int
   244  	destructive       int
   245  	inplace           int
   246  	attributeUpdates  int
   247  	stop              int
   248  	desiredTGUpdates  map[string]*structs.DesiredUpdates
   249  }
   250  
   251  func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) {
   252  	t.Helper()
   253  	assert := assert.New(t)
   254  
   255  	if exp.createDeployment != nil && r.deployment == nil {
   256  		t.Errorf("Expect a created deployment got none")
   257  	} else if exp.createDeployment == nil && r.deployment != nil {
   258  		t.Errorf("Expect no created deployment; got %#v", r.deployment)
   259  	} else if exp.createDeployment != nil && r.deployment != nil {
   260  		// Clear the deployment ID
   261  		r.deployment.ID, exp.createDeployment.ID = "", ""
   262  		if !reflect.DeepEqual(r.deployment, exp.createDeployment) {
   263  			t.Errorf("Unexpected createdDeployment; got\n %#v\nwant\n%#v\nDiff: %v",
   264  				r.deployment, exp.createDeployment, pretty.Diff(r.deployment, exp.createDeployment))
   265  		}
   266  	}
   267  
   268  	assert.EqualValues(exp.deploymentUpdates, r.deploymentUpdates, "Expected Deployment Updates")
   269  	assert.Len(r.place, exp.place, "Expected Placements")
   270  	assert.Len(r.destructiveUpdate, exp.destructive, "Expected Destructive")
   271  	assert.Len(r.inplaceUpdate, exp.inplace, "Expected Inplace Updates")
   272  	assert.Len(r.attributeUpdates, exp.attributeUpdates, "Expected Attribute Updates")
   273  	assert.Len(r.stop, exp.stop, "Expected Stops")
   274  	assert.EqualValues(exp.desiredTGUpdates, r.desiredTGUpdates, "Expected Desired TG Update Annotations")
   275  }
   276  
   277  // Tests the reconciler properly handles placements for a job that has no
   278  // existing allocations
   279  func TestReconciler_Place_NoExisting(t *testing.T) {
   280  	job := mock.Job()
   281  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, nil, nil, "")
   282  	r := reconciler.Compute()
   283  
   284  	// Assert the correct results
   285  	assertResults(t, r, &resultExpectation{
   286  		createDeployment:  nil,
   287  		deploymentUpdates: nil,
   288  		place:             10,
   289  		inplace:           0,
   290  		stop:              0,
   291  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   292  			job.TaskGroups[0].Name: {
   293  				Place: 10,
   294  			},
   295  		},
   296  	})
   297  
   298  	assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
   299  }
   300  
   301  // Tests the reconciler properly handles placements for a job that has some
   302  // existing allocations
   303  func TestReconciler_Place_Existing(t *testing.T) {
   304  	job := mock.Job()
   305  
   306  	// Create 3 existing allocations
   307  	var allocs []*structs.Allocation
   308  	for i := 0; i < 5; i++ {
   309  		alloc := mock.Alloc()
   310  		alloc.Job = job
   311  		alloc.JobID = job.ID
   312  		alloc.NodeID = uuid.Generate()
   313  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   314  		allocs = append(allocs, alloc)
   315  	}
   316  
   317  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
   318  	r := reconciler.Compute()
   319  
   320  	// Assert the correct results
   321  	assertResults(t, r, &resultExpectation{
   322  		createDeployment:  nil,
   323  		deploymentUpdates: nil,
   324  		place:             5,
   325  		inplace:           0,
   326  		stop:              0,
   327  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   328  			job.TaskGroups[0].Name: {
   329  				Place:  5,
   330  				Ignore: 5,
   331  			},
   332  		},
   333  	})
   334  
   335  	assertNamesHaveIndexes(t, intRange(5, 9), placeResultsToNames(r.place))
   336  }
   337  
   338  // Tests the reconciler properly handles stopping allocations for a job that has
   339  // scaled down
   340  func TestReconciler_ScaleDown_Partial(t *testing.T) {
   341  	// Has desired 10
   342  	job := mock.Job()
   343  
   344  	// Create 20 existing allocations
   345  	var allocs []*structs.Allocation
   346  	for i := 0; i < 20; i++ {
   347  		alloc := mock.Alloc()
   348  		alloc.Job = job
   349  		alloc.JobID = job.ID
   350  		alloc.NodeID = uuid.Generate()
   351  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   352  		allocs = append(allocs, alloc)
   353  	}
   354  
   355  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
   356  	r := reconciler.Compute()
   357  
   358  	// Assert the correct results
   359  	assertResults(t, r, &resultExpectation{
   360  		createDeployment:  nil,
   361  		deploymentUpdates: nil,
   362  		place:             0,
   363  		inplace:           0,
   364  		stop:              10,
   365  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   366  			job.TaskGroups[0].Name: {
   367  				Ignore: 10,
   368  				Stop:   10,
   369  			},
   370  		},
   371  	})
   372  
   373  	assertNamesHaveIndexes(t, intRange(10, 19), stopResultsToNames(r.stop))
   374  }
   375  
   376  // Tests the reconciler properly handles stopping allocations for a job that has
   377  // scaled down to zero desired
   378  func TestReconciler_ScaleDown_Zero(t *testing.T) {
   379  	// Set desired 0
   380  	job := mock.Job()
   381  	job.TaskGroups[0].Count = 0
   382  
   383  	// Create 20 existing allocations
   384  	var allocs []*structs.Allocation
   385  	for i := 0; i < 20; i++ {
   386  		alloc := mock.Alloc()
   387  		alloc.Job = job
   388  		alloc.JobID = job.ID
   389  		alloc.NodeID = uuid.Generate()
   390  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   391  		allocs = append(allocs, alloc)
   392  	}
   393  
   394  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
   395  	r := reconciler.Compute()
   396  
   397  	// Assert the correct results
   398  	assertResults(t, r, &resultExpectation{
   399  		createDeployment:  nil,
   400  		deploymentUpdates: nil,
   401  		place:             0,
   402  		inplace:           0,
   403  		stop:              20,
   404  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   405  			job.TaskGroups[0].Name: {
   406  				Stop: 20,
   407  			},
   408  		},
   409  	})
   410  
   411  	assertNamesHaveIndexes(t, intRange(0, 19), stopResultsToNames(r.stop))
   412  }
   413  
   414  // Tests the reconciler properly handles stopping allocations for a job that has
   415  // scaled down to zero desired where allocs have duplicate names
   416  func TestReconciler_ScaleDown_Zero_DuplicateNames(t *testing.T) {
   417  	// Set desired 0
   418  	job := mock.Job()
   419  	job.TaskGroups[0].Count = 0
   420  
   421  	// Create 20 existing allocations
   422  	var allocs []*structs.Allocation
   423  	var expectedStopped []int
   424  	for i := 0; i < 20; i++ {
   425  		alloc := mock.Alloc()
   426  		alloc.Job = job
   427  		alloc.JobID = job.ID
   428  		alloc.NodeID = uuid.Generate()
   429  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2))
   430  		allocs = append(allocs, alloc)
   431  		expectedStopped = append(expectedStopped, i%2)
   432  	}
   433  
   434  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
   435  	r := reconciler.Compute()
   436  
   437  	// Assert the correct results
   438  	assertResults(t, r, &resultExpectation{
   439  		createDeployment:  nil,
   440  		deploymentUpdates: nil,
   441  		place:             0,
   442  		inplace:           0,
   443  		stop:              20,
   444  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   445  			job.TaskGroups[0].Name: {
   446  				Stop: 20,
   447  			},
   448  		},
   449  	})
   450  
   451  	assertNamesHaveIndexes(t, expectedStopped, stopResultsToNames(r.stop))
   452  }
   453  
   454  // Tests the reconciler properly handles inplace upgrading allocations
   455  func TestReconciler_Inplace(t *testing.T) {
   456  	job := mock.Job()
   457  
   458  	// Create 10 existing allocations
   459  	var allocs []*structs.Allocation
   460  	for i := 0; i < 10; i++ {
   461  		alloc := mock.Alloc()
   462  		alloc.Job = job
   463  		alloc.JobID = job.ID
   464  		alloc.NodeID = uuid.Generate()
   465  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   466  		allocs = append(allocs, alloc)
   467  	}
   468  
   469  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "")
   470  	r := reconciler.Compute()
   471  
   472  	// Assert the correct results
   473  	assertResults(t, r, &resultExpectation{
   474  		createDeployment:  nil,
   475  		deploymentUpdates: nil,
   476  		place:             0,
   477  		inplace:           10,
   478  		stop:              0,
   479  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   480  			job.TaskGroups[0].Name: {
   481  				InPlaceUpdate: 10,
   482  			},
   483  		},
   484  	})
   485  
   486  	assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate))
   487  }
   488  
   489  // Tests the reconciler properly handles inplace upgrading allocations while
   490  // scaling up
   491  func TestReconciler_Inplace_ScaleUp(t *testing.T) {
   492  	// Set desired 15
   493  	job := mock.Job()
   494  	job.TaskGroups[0].Count = 15
   495  
   496  	// Create 10 existing allocations
   497  	var allocs []*structs.Allocation
   498  	for i := 0; i < 10; i++ {
   499  		alloc := mock.Alloc()
   500  		alloc.Job = job
   501  		alloc.JobID = job.ID
   502  		alloc.NodeID = uuid.Generate()
   503  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   504  		allocs = append(allocs, alloc)
   505  	}
   506  
   507  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "")
   508  	r := reconciler.Compute()
   509  
   510  	// Assert the correct results
   511  	assertResults(t, r, &resultExpectation{
   512  		createDeployment:  nil,
   513  		deploymentUpdates: nil,
   514  		place:             5,
   515  		inplace:           10,
   516  		stop:              0,
   517  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   518  			job.TaskGroups[0].Name: {
   519  				Place:         5,
   520  				InPlaceUpdate: 10,
   521  			},
   522  		},
   523  	})
   524  
   525  	assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate))
   526  	assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place))
   527  }
   528  
   529  // Tests the reconciler properly handles inplace upgrading allocations while
   530  // scaling down
   531  func TestReconciler_Inplace_ScaleDown(t *testing.T) {
   532  	// Set desired 5
   533  	job := mock.Job()
   534  	job.TaskGroups[0].Count = 5
   535  
   536  	// Create 10 existing allocations
   537  	var allocs []*structs.Allocation
   538  	for i := 0; i < 10; i++ {
   539  		alloc := mock.Alloc()
   540  		alloc.Job = job
   541  		alloc.JobID = job.ID
   542  		alloc.NodeID = uuid.Generate()
   543  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   544  		allocs = append(allocs, alloc)
   545  	}
   546  
   547  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "")
   548  	r := reconciler.Compute()
   549  
   550  	// Assert the correct results
   551  	assertResults(t, r, &resultExpectation{
   552  		createDeployment:  nil,
   553  		deploymentUpdates: nil,
   554  		place:             0,
   555  		inplace:           5,
   556  		stop:              5,
   557  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   558  			job.TaskGroups[0].Name: {
   559  				Stop:          5,
   560  				InPlaceUpdate: 5,
   561  			},
   562  		},
   563  	})
   564  
   565  	assertNamesHaveIndexes(t, intRange(0, 4), allocsToNames(r.inplaceUpdate))
   566  	assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop))
   567  }
   568  
   569  // Tests the reconciler properly handles destructive upgrading allocations
   570  func TestReconciler_Destructive(t *testing.T) {
   571  	job := mock.Job()
   572  
   573  	// Create 10 existing allocations
   574  	var allocs []*structs.Allocation
   575  	for i := 0; i < 10; i++ {
   576  		alloc := mock.Alloc()
   577  		alloc.Job = job
   578  		alloc.JobID = job.ID
   579  		alloc.NodeID = uuid.Generate()
   580  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   581  		allocs = append(allocs, alloc)
   582  	}
   583  
   584  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
   585  	r := reconciler.Compute()
   586  
   587  	// Assert the correct results
   588  	assertResults(t, r, &resultExpectation{
   589  		createDeployment:  nil,
   590  		deploymentUpdates: nil,
   591  		destructive:       10,
   592  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   593  			job.TaskGroups[0].Name: {
   594  				DestructiveUpdate: 10,
   595  			},
   596  		},
   597  	})
   598  
   599  	assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate))
   600  }
   601  
   602  // Tests the reconciler properly handles destructive upgrading allocations while
   603  // scaling up
   604  func TestReconciler_Destructive_ScaleUp(t *testing.T) {
   605  	// Set desired 15
   606  	job := mock.Job()
   607  	job.TaskGroups[0].Count = 15
   608  
   609  	// Create 10 existing allocations
   610  	var allocs []*structs.Allocation
   611  	for i := 0; i < 10; i++ {
   612  		alloc := mock.Alloc()
   613  		alloc.Job = job
   614  		alloc.JobID = job.ID
   615  		alloc.NodeID = uuid.Generate()
   616  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   617  		allocs = append(allocs, alloc)
   618  	}
   619  
   620  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
   621  	r := reconciler.Compute()
   622  
   623  	// Assert the correct results
   624  	assertResults(t, r, &resultExpectation{
   625  		createDeployment:  nil,
   626  		deploymentUpdates: nil,
   627  		place:             5,
   628  		destructive:       10,
   629  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   630  			job.TaskGroups[0].Name: {
   631  				Place:             5,
   632  				DestructiveUpdate: 10,
   633  			},
   634  		},
   635  	})
   636  
   637  	assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate))
   638  	assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place))
   639  }
   640  
   641  // Tests the reconciler properly handles destructive upgrading allocations while
   642  // scaling down
   643  func TestReconciler_Destructive_ScaleDown(t *testing.T) {
   644  	// Set desired 5
   645  	job := mock.Job()
   646  	job.TaskGroups[0].Count = 5
   647  
   648  	// Create 10 existing allocations
   649  	var allocs []*structs.Allocation
   650  	for i := 0; i < 10; i++ {
   651  		alloc := mock.Alloc()
   652  		alloc.Job = job
   653  		alloc.JobID = job.ID
   654  		alloc.NodeID = uuid.Generate()
   655  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   656  		allocs = append(allocs, alloc)
   657  	}
   658  
   659  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
   660  	r := reconciler.Compute()
   661  
   662  	// Assert the correct results
   663  	assertResults(t, r, &resultExpectation{
   664  		createDeployment:  nil,
   665  		deploymentUpdates: nil,
   666  		destructive:       5,
   667  		stop:              5,
   668  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   669  			job.TaskGroups[0].Name: {
   670  				Stop:              5,
   671  				DestructiveUpdate: 5,
   672  			},
   673  		},
   674  	})
   675  
   676  	assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop))
   677  	assertNamesHaveIndexes(t, intRange(0, 4), destructiveResultsToNames(r.destructiveUpdate))
   678  }
   679  
   680  // Tests the reconciler properly handles lost nodes with allocations
   681  func TestReconciler_LostNode(t *testing.T) {
   682  	job := mock.Job()
   683  
   684  	// Create 10 existing allocations
   685  	var allocs []*structs.Allocation
   686  	for i := 0; i < 10; i++ {
   687  		alloc := mock.Alloc()
   688  		alloc.Job = job
   689  		alloc.JobID = job.ID
   690  		alloc.NodeID = uuid.Generate()
   691  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   692  		allocs = append(allocs, alloc)
   693  	}
   694  
   695  	// Build a map of tainted nodes
   696  	tainted := make(map[string]*structs.Node, 2)
   697  	for i := 0; i < 2; i++ {
   698  		n := mock.Node()
   699  		n.ID = allocs[i].NodeID
   700  		n.Status = structs.NodeStatusDown
   701  		tainted[n.ID] = n
   702  	}
   703  
   704  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   705  	r := reconciler.Compute()
   706  
   707  	// Assert the correct results
   708  	assertResults(t, r, &resultExpectation{
   709  		createDeployment:  nil,
   710  		deploymentUpdates: nil,
   711  		place:             2,
   712  		inplace:           0,
   713  		stop:              2,
   714  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   715  			job.TaskGroups[0].Name: {
   716  				Place:  2,
   717  				Stop:   2,
   718  				Ignore: 8,
   719  			},
   720  		},
   721  	})
   722  
   723  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
   724  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
   725  }
   726  
   727  // Tests the reconciler properly handles lost nodes with allocations while
   728  // scaling up
   729  func TestReconciler_LostNode_ScaleUp(t *testing.T) {
   730  	// Set desired 15
   731  	job := mock.Job()
   732  	job.TaskGroups[0].Count = 15
   733  
   734  	// Create 10 existing allocations
   735  	var allocs []*structs.Allocation
   736  	for i := 0; i < 10; i++ {
   737  		alloc := mock.Alloc()
   738  		alloc.Job = job
   739  		alloc.JobID = job.ID
   740  		alloc.NodeID = uuid.Generate()
   741  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   742  		allocs = append(allocs, alloc)
   743  	}
   744  
   745  	// Build a map of tainted nodes
   746  	tainted := make(map[string]*structs.Node, 2)
   747  	for i := 0; i < 2; i++ {
   748  		n := mock.Node()
   749  		n.ID = allocs[i].NodeID
   750  		n.Status = structs.NodeStatusDown
   751  		tainted[n.ID] = n
   752  	}
   753  
   754  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   755  	r := reconciler.Compute()
   756  
   757  	// Assert the correct results
   758  	assertResults(t, r, &resultExpectation{
   759  		createDeployment:  nil,
   760  		deploymentUpdates: nil,
   761  		place:             7,
   762  		inplace:           0,
   763  		stop:              2,
   764  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   765  			job.TaskGroups[0].Name: {
   766  				Place:  7,
   767  				Stop:   2,
   768  				Ignore: 8,
   769  			},
   770  		},
   771  	})
   772  
   773  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
   774  	assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place))
   775  }
   776  
   777  // Tests the reconciler properly handles lost nodes with allocations while
   778  // scaling down
   779  func TestReconciler_LostNode_ScaleDown(t *testing.T) {
   780  	// Set desired 5
   781  	job := mock.Job()
   782  	job.TaskGroups[0].Count = 5
   783  
   784  	// Create 10 existing allocations
   785  	var allocs []*structs.Allocation
   786  	for i := 0; i < 10; i++ {
   787  		alloc := mock.Alloc()
   788  		alloc.Job = job
   789  		alloc.JobID = job.ID
   790  		alloc.NodeID = uuid.Generate()
   791  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   792  		allocs = append(allocs, alloc)
   793  	}
   794  
   795  	// Build a map of tainted nodes
   796  	tainted := make(map[string]*structs.Node, 2)
   797  	for i := 0; i < 2; i++ {
   798  		n := mock.Node()
   799  		n.ID = allocs[i].NodeID
   800  		n.Status = structs.NodeStatusDown
   801  		tainted[n.ID] = n
   802  	}
   803  
   804  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   805  	r := reconciler.Compute()
   806  
   807  	// Assert the correct results
   808  	assertResults(t, r, &resultExpectation{
   809  		createDeployment:  nil,
   810  		deploymentUpdates: nil,
   811  		place:             0,
   812  		inplace:           0,
   813  		stop:              5,
   814  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   815  			job.TaskGroups[0].Name: {
   816  				Stop:   5,
   817  				Ignore: 5,
   818  			},
   819  		},
   820  	})
   821  
   822  	assertNamesHaveIndexes(t, intRange(0, 1, 7, 9), stopResultsToNames(r.stop))
   823  }
   824  
   825  // Tests the reconciler properly handles draining nodes with allocations
   826  func TestReconciler_DrainNode(t *testing.T) {
   827  	job := mock.Job()
   828  
   829  	// Create 10 existing allocations
   830  	var allocs []*structs.Allocation
   831  	for i := 0; i < 10; i++ {
   832  		alloc := mock.Alloc()
   833  		alloc.Job = job
   834  		alloc.JobID = job.ID
   835  		alloc.NodeID = uuid.Generate()
   836  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   837  		allocs = append(allocs, alloc)
   838  	}
   839  
   840  	// Build a map of tainted nodes
   841  	tainted := make(map[string]*structs.Node, 2)
   842  	for i := 0; i < 2; i++ {
   843  		n := mock.Node()
   844  		n.ID = allocs[i].NodeID
   845  		allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true)
   846  		n.Drain = true
   847  		tainted[n.ID] = n
   848  	}
   849  
   850  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   851  	r := reconciler.Compute()
   852  
   853  	// Assert the correct results
   854  	assertResults(t, r, &resultExpectation{
   855  		createDeployment:  nil,
   856  		deploymentUpdates: nil,
   857  		place:             2,
   858  		inplace:           0,
   859  		stop:              2,
   860  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   861  			job.TaskGroups[0].Name: {
   862  				Migrate: 2,
   863  				Ignore:  8,
   864  			},
   865  		},
   866  	})
   867  
   868  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
   869  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
   870  	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
   871  	// These should not have the reschedule field set
   872  	assertPlacementsAreRescheduled(t, 0, r.place)
   873  }
   874  
   875  // Tests the reconciler properly handles draining nodes with allocations while
   876  // scaling up
   877  func TestReconciler_DrainNode_ScaleUp(t *testing.T) {
   878  	// Set desired 15
   879  	job := mock.Job()
   880  	job.TaskGroups[0].Count = 15
   881  
   882  	// Create 10 existing allocations
   883  	var allocs []*structs.Allocation
   884  	for i := 0; i < 10; i++ {
   885  		alloc := mock.Alloc()
   886  		alloc.Job = job
   887  		alloc.JobID = job.ID
   888  		alloc.NodeID = uuid.Generate()
   889  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   890  		allocs = append(allocs, alloc)
   891  	}
   892  
   893  	// Build a map of tainted nodes
   894  	tainted := make(map[string]*structs.Node, 2)
   895  	for i := 0; i < 2; i++ {
   896  		n := mock.Node()
   897  		n.ID = allocs[i].NodeID
   898  		allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true)
   899  		n.Drain = true
   900  		tainted[n.ID] = n
   901  	}
   902  
   903  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   904  	r := reconciler.Compute()
   905  
   906  	// Assert the correct results
   907  	assertResults(t, r, &resultExpectation{
   908  		createDeployment:  nil,
   909  		deploymentUpdates: nil,
   910  		place:             7,
   911  		inplace:           0,
   912  		stop:              2,
   913  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   914  			job.TaskGroups[0].Name: {
   915  				Place:   5,
   916  				Migrate: 2,
   917  				Ignore:  8,
   918  			},
   919  		},
   920  	})
   921  
   922  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
   923  	assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place))
   924  	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
   925  	// These should not have the reschedule field set
   926  	assertPlacementsAreRescheduled(t, 0, r.place)
   927  }
   928  
   929  // Tests the reconciler properly handles draining nodes with allocations while
   930  // scaling down
   931  func TestReconciler_DrainNode_ScaleDown(t *testing.T) {
   932  	// Set desired 8
   933  	job := mock.Job()
   934  	job.TaskGroups[0].Count = 8
   935  
   936  	// Create 10 existing allocations
   937  	var allocs []*structs.Allocation
   938  	for i := 0; i < 10; i++ {
   939  		alloc := mock.Alloc()
   940  		alloc.Job = job
   941  		alloc.JobID = job.ID
   942  		alloc.NodeID = uuid.Generate()
   943  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   944  		allocs = append(allocs, alloc)
   945  	}
   946  
   947  	// Build a map of tainted nodes
   948  	tainted := make(map[string]*structs.Node, 3)
   949  	for i := 0; i < 3; i++ {
   950  		n := mock.Node()
   951  		n.ID = allocs[i].NodeID
   952  		allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true)
   953  		n.Drain = true
   954  		tainted[n.ID] = n
   955  	}
   956  
   957  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   958  	r := reconciler.Compute()
   959  
   960  	// Assert the correct results
   961  	assertResults(t, r, &resultExpectation{
   962  		createDeployment:  nil,
   963  		deploymentUpdates: nil,
   964  		place:             1,
   965  		inplace:           0,
   966  		stop:              3,
   967  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   968  			job.TaskGroups[0].Name: {
   969  				Migrate: 1,
   970  				Stop:    2,
   971  				Ignore:  7,
   972  			},
   973  		},
   974  	})
   975  
   976  	assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop))
   977  	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
   978  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
   979  	// These should not have the reschedule field set
   980  	assertPlacementsAreRescheduled(t, 0, r.place)
   981  }
   982  
   983  // Tests the reconciler properly handles a task group being removed
   984  func TestReconciler_RemovedTG(t *testing.T) {
   985  	job := mock.Job()
   986  
   987  	// Create 10 allocations for a tg that no longer exists
   988  	var allocs []*structs.Allocation
   989  	for i := 0; i < 10; i++ {
   990  		alloc := mock.Alloc()
   991  		alloc.Job = job
   992  		alloc.JobID = job.ID
   993  		alloc.NodeID = uuid.Generate()
   994  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   995  		allocs = append(allocs, alloc)
   996  	}
   997  
   998  	oldName := job.TaskGroups[0].Name
   999  	newName := "different"
  1000  	job.TaskGroups[0].Name = newName
  1001  
  1002  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1003  	r := reconciler.Compute()
  1004  
  1005  	// Assert the correct results
  1006  	assertResults(t, r, &resultExpectation{
  1007  		createDeployment:  nil,
  1008  		deploymentUpdates: nil,
  1009  		place:             10,
  1010  		inplace:           0,
  1011  		stop:              10,
  1012  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1013  			oldName: {
  1014  				Stop: 10,
  1015  			},
  1016  			newName: {
  1017  				Place: 10,
  1018  			},
  1019  		},
  1020  	})
  1021  
  1022  	assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop))
  1023  	assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
  1024  }
  1025  
  1026  // Tests the reconciler properly handles a job in stopped states
  1027  func TestReconciler_JobStopped(t *testing.T) {
  1028  	job := mock.Job()
  1029  	job.Stop = true
  1030  
  1031  	cases := []struct {
  1032  		name             string
  1033  		job              *structs.Job
  1034  		jobID, taskGroup string
  1035  	}{
  1036  		{
  1037  			name:      "stopped job",
  1038  			job:       job,
  1039  			jobID:     job.ID,
  1040  			taskGroup: job.TaskGroups[0].Name,
  1041  		},
  1042  		{
  1043  			name:      "nil job",
  1044  			job:       nil,
  1045  			jobID:     "foo",
  1046  			taskGroup: "bar",
  1047  		},
  1048  	}
  1049  
  1050  	for _, c := range cases {
  1051  		t.Run(c.name, func(t *testing.T) {
  1052  			// Create 10 allocations
  1053  			var allocs []*structs.Allocation
  1054  			for i := 0; i < 10; i++ {
  1055  				alloc := mock.Alloc()
  1056  				alloc.Job = c.job
  1057  				alloc.JobID = c.jobID
  1058  				alloc.NodeID = uuid.Generate()
  1059  				alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i))
  1060  				alloc.TaskGroup = c.taskGroup
  1061  				allocs = append(allocs, alloc)
  1062  			}
  1063  
  1064  			reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, c.jobID, c.job, nil, allocs, nil, "")
  1065  			r := reconciler.Compute()
  1066  
  1067  			// Assert the correct results
  1068  			assertResults(t, r, &resultExpectation{
  1069  				createDeployment:  nil,
  1070  				deploymentUpdates: nil,
  1071  				place:             0,
  1072  				inplace:           0,
  1073  				stop:              10,
  1074  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1075  					c.taskGroup: {
  1076  						Stop: 10,
  1077  					},
  1078  				},
  1079  			})
  1080  
  1081  			assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop))
  1082  		})
  1083  	}
  1084  }
  1085  
  1086  // Tests the reconciler properly handles jobs with multiple task groups
  1087  func TestReconciler_MultiTG(t *testing.T) {
  1088  	job := mock.Job()
  1089  	tg2 := job.TaskGroups[0].Copy()
  1090  	tg2.Name = "foo"
  1091  	job.TaskGroups = append(job.TaskGroups, tg2)
  1092  
  1093  	// Create 2 existing allocations for the first tg
  1094  	var allocs []*structs.Allocation
  1095  	for i := 0; i < 2; i++ {
  1096  		alloc := mock.Alloc()
  1097  		alloc.Job = job
  1098  		alloc.JobID = job.ID
  1099  		alloc.NodeID = uuid.Generate()
  1100  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1101  		allocs = append(allocs, alloc)
  1102  	}
  1103  
  1104  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1105  	r := reconciler.Compute()
  1106  
  1107  	// Assert the correct results
  1108  	assertResults(t, r, &resultExpectation{
  1109  		createDeployment:  nil,
  1110  		deploymentUpdates: nil,
  1111  		place:             18,
  1112  		inplace:           0,
  1113  		stop:              0,
  1114  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1115  			job.TaskGroups[0].Name: {
  1116  				Place:  8,
  1117  				Ignore: 2,
  1118  			},
  1119  			tg2.Name: {
  1120  				Place: 10,
  1121  			},
  1122  		},
  1123  	})
  1124  
  1125  	assertNamesHaveIndexes(t, intRange(2, 9, 0, 9), placeResultsToNames(r.place))
  1126  }
  1127  
  1128  // Tests the reconciler properly handles jobs with multiple task groups with
  1129  // only one having an update stanza and a deployment already being created
  1130  func TestReconciler_MultiTG_SingleUpdateStanza(t *testing.T) {
  1131  	job := mock.Job()
  1132  	tg2 := job.TaskGroups[0].Copy()
  1133  	tg2.Name = "foo"
  1134  	job.TaskGroups = append(job.TaskGroups, tg2)
  1135  	job.TaskGroups[0].Update = noCanaryUpdate
  1136  
  1137  	// Create all the allocs
  1138  	var allocs []*structs.Allocation
  1139  	for i := 0; i < 2; i++ {
  1140  		for j := 0; j < 10; j++ {
  1141  			alloc := mock.Alloc()
  1142  			alloc.Job = job
  1143  			alloc.JobID = job.ID
  1144  			alloc.NodeID = uuid.Generate()
  1145  			alloc.Name = structs.AllocName(job.ID, job.TaskGroups[i].Name, uint(j))
  1146  			alloc.TaskGroup = job.TaskGroups[i].Name
  1147  			allocs = append(allocs, alloc)
  1148  		}
  1149  	}
  1150  
  1151  	d := structs.NewDeployment(job)
  1152  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  1153  		DesiredTotal: 10,
  1154  	}
  1155  
  1156  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  1157  	r := reconciler.Compute()
  1158  
  1159  	// Assert the correct results
  1160  	assertResults(t, r, &resultExpectation{
  1161  		createDeployment:  nil,
  1162  		deploymentUpdates: nil,
  1163  		place:             0,
  1164  		inplace:           0,
  1165  		stop:              0,
  1166  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1167  			job.TaskGroups[0].Name: {
  1168  				Ignore: 10,
  1169  			},
  1170  			tg2.Name: {
  1171  				Ignore: 10,
  1172  			},
  1173  		},
  1174  	})
  1175  }
  1176  
  1177  // Tests delayed rescheduling of failed batch allocations
  1178  func TestReconciler_RescheduleLater_Batch(t *testing.T) {
  1179  	require := require.New(t)
  1180  
  1181  	// Set desired 4
  1182  	job := mock.Job()
  1183  	job.TaskGroups[0].Count = 4
  1184  	now := time.Now()
  1185  
  1186  	// Set up reschedule policy
  1187  	delayDur := 15 * time.Second
  1188  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "constant"}
  1189  	tgName := job.TaskGroups[0].Name
  1190  
  1191  	// Create 6 existing allocations - 2 running, 1 complete and 3 failed
  1192  	var allocs []*structs.Allocation
  1193  	for i := 0; i < 6; i++ {
  1194  		alloc := mock.Alloc()
  1195  		alloc.Job = job
  1196  		alloc.JobID = job.ID
  1197  		alloc.NodeID = uuid.Generate()
  1198  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1199  		allocs = append(allocs, alloc)
  1200  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1201  	}
  1202  
  1203  	// Mark 3 as failed with restart tracking info
  1204  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1205  	allocs[0].NextAllocation = allocs[1].ID
  1206  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1207  	allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1208  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1209  			PrevAllocID: allocs[0].ID,
  1210  			PrevNodeID:  uuid.Generate(),
  1211  		},
  1212  	}}
  1213  	allocs[1].NextAllocation = allocs[2].ID
  1214  	allocs[2].ClientStatus = structs.AllocClientStatusFailed
  1215  	allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1216  		StartedAt:  now.Add(-1 * time.Hour),
  1217  		FinishedAt: now}}
  1218  	allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1219  		{RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(),
  1220  			PrevAllocID: allocs[0].ID,
  1221  			PrevNodeID:  uuid.Generate(),
  1222  		},
  1223  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1224  			PrevAllocID: allocs[1].ID,
  1225  			PrevNodeID:  uuid.Generate(),
  1226  		},
  1227  	}}
  1228  
  1229  	// Mark one as complete
  1230  	allocs[5].ClientStatus = structs.AllocClientStatusComplete
  1231  
  1232  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, uuid.Generate())
  1233  	r := reconciler.Compute()
  1234  
  1235  	// Two reschedule attempts were already made, one more can be made at a future time
  1236  	// Verify that the follow up eval has the expected waitUntil time
  1237  	evals := r.desiredFollowupEvals[tgName]
  1238  	require.NotNil(evals)
  1239  	require.Equal(1, len(evals))
  1240  	require.Equal(now.Add(delayDur), evals[0].WaitUntil)
  1241  
  1242  	// Alloc 5 should not be replaced because it is terminal
  1243  	assertResults(t, r, &resultExpectation{
  1244  		createDeployment:  nil,
  1245  		deploymentUpdates: nil,
  1246  		place:             0,
  1247  		inplace:           0,
  1248  		attributeUpdates:  1,
  1249  		stop:              0,
  1250  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1251  			job.TaskGroups[0].Name: {
  1252  				Place:         0,
  1253  				InPlaceUpdate: 0,
  1254  				Ignore:        4,
  1255  			},
  1256  		},
  1257  	})
  1258  	assertNamesHaveIndexes(t, intRange(2, 2), attributeUpdatesToNames(r.attributeUpdates))
  1259  
  1260  	// Verify that the followup evalID field is set correctly
  1261  	var annotated *structs.Allocation
  1262  	for _, a := range r.attributeUpdates {
  1263  		annotated = a
  1264  	}
  1265  	require.Equal(evals[0].ID, annotated.FollowupEvalID)
  1266  }
  1267  
  1268  // Tests delayed rescheduling of failed batch allocations and batching of allocs
  1269  // with fail times that are close together
  1270  func TestReconciler_RescheduleLaterWithBatchedEvals_Batch(t *testing.T) {
  1271  	require := require.New(t)
  1272  
  1273  	// Set desired 4
  1274  	job := mock.Job()
  1275  	job.TaskGroups[0].Count = 10
  1276  	now := time.Now()
  1277  
  1278  	// Set up reschedule policy
  1279  	delayDur := 15 * time.Second
  1280  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "constant"}
  1281  	tgName := job.TaskGroups[0].Name
  1282  
  1283  	// Create 10 existing allocations
  1284  	var allocs []*structs.Allocation
  1285  	for i := 0; i < 10; i++ {
  1286  		alloc := mock.Alloc()
  1287  		alloc.Job = job
  1288  		alloc.JobID = job.ID
  1289  		alloc.NodeID = uuid.Generate()
  1290  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1291  		allocs = append(allocs, alloc)
  1292  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1293  	}
  1294  
  1295  	// Mark 5 as failed with fail times very close together
  1296  	for i := 0; i < 5; i++ {
  1297  		allocs[i].ClientStatus = structs.AllocClientStatusFailed
  1298  		allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1299  			StartedAt:  now.Add(-1 * time.Hour),
  1300  			FinishedAt: now.Add(time.Duration(50*i) * time.Millisecond)}}
  1301  	}
  1302  
  1303  	// Mark two more as failed several seconds later
  1304  	for i := 5; i < 7; i++ {
  1305  		allocs[i].ClientStatus = structs.AllocClientStatusFailed
  1306  		allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1307  			StartedAt:  now.Add(-1 * time.Hour),
  1308  			FinishedAt: now.Add(10 * time.Second)}}
  1309  	}
  1310  
  1311  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, uuid.Generate())
  1312  	r := reconciler.Compute()
  1313  
  1314  	// Verify that two follow up evals were created
  1315  	evals := r.desiredFollowupEvals[tgName]
  1316  	require.NotNil(evals)
  1317  	require.Equal(2, len(evals))
  1318  
  1319  	// Verify expected WaitUntil values for both batched evals
  1320  	require.Equal(now.Add(delayDur), evals[0].WaitUntil)
  1321  	secondBatchDuration := delayDur + 10*time.Second
  1322  	require.Equal(now.Add(secondBatchDuration), evals[1].WaitUntil)
  1323  
  1324  	// Alloc 5 should not be replaced because it is terminal
  1325  	assertResults(t, r, &resultExpectation{
  1326  		createDeployment:  nil,
  1327  		deploymentUpdates: nil,
  1328  		place:             0,
  1329  		inplace:           0,
  1330  		attributeUpdates:  7,
  1331  		stop:              0,
  1332  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1333  			job.TaskGroups[0].Name: {
  1334  				Place:         0,
  1335  				InPlaceUpdate: 0,
  1336  				Ignore:        10,
  1337  			},
  1338  		},
  1339  	})
  1340  	assertNamesHaveIndexes(t, intRange(0, 6), attributeUpdatesToNames(r.attributeUpdates))
  1341  
  1342  	// Verify that the followup evalID field is set correctly
  1343  	for _, alloc := range r.attributeUpdates {
  1344  		if allocNameToIndex(alloc.Name) < 5 {
  1345  			require.Equal(evals[0].ID, alloc.FollowupEvalID)
  1346  		} else if allocNameToIndex(alloc.Name) < 7 {
  1347  			require.Equal(evals[1].ID, alloc.FollowupEvalID)
  1348  		} else {
  1349  			t.Fatalf("Unexpected alloc name in Inplace results %v", alloc.Name)
  1350  		}
  1351  	}
  1352  }
  1353  
  1354  // Tests rescheduling failed batch allocations
  1355  func TestReconciler_RescheduleNow_Batch(t *testing.T) {
  1356  	require := require.New(t)
  1357  	// Set desired 4
  1358  	job := mock.Job()
  1359  	job.TaskGroups[0].Count = 4
  1360  	now := time.Now()
  1361  	// Set up reschedule policy
  1362  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: 5 * time.Second, DelayFunction: "constant"}
  1363  	tgName := job.TaskGroups[0].Name
  1364  	// Create 6 existing allocations - 2 running, 1 complete and 3 failed
  1365  	var allocs []*structs.Allocation
  1366  	for i := 0; i < 6; i++ {
  1367  		alloc := mock.Alloc()
  1368  		alloc.Job = job
  1369  		alloc.JobID = job.ID
  1370  		alloc.NodeID = uuid.Generate()
  1371  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1372  		allocs = append(allocs, alloc)
  1373  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1374  	}
  1375  	// Mark 3 as failed with restart tracking info
  1376  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1377  	allocs[0].NextAllocation = allocs[1].ID
  1378  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1379  	allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1380  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1381  			PrevAllocID: allocs[0].ID,
  1382  			PrevNodeID:  uuid.Generate(),
  1383  		},
  1384  	}}
  1385  	allocs[1].NextAllocation = allocs[2].ID
  1386  	allocs[2].ClientStatus = structs.AllocClientStatusFailed
  1387  	allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1388  		StartedAt:  now.Add(-1 * time.Hour),
  1389  		FinishedAt: now.Add(-5 * time.Second)}}
  1390  	allocs[2].FollowupEvalID = uuid.Generate()
  1391  	allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1392  		{RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(),
  1393  			PrevAllocID: allocs[0].ID,
  1394  			PrevNodeID:  uuid.Generate(),
  1395  		},
  1396  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1397  			PrevAllocID: allocs[1].ID,
  1398  			PrevNodeID:  uuid.Generate(),
  1399  		},
  1400  	}}
  1401  	// Mark one as complete
  1402  	allocs[5].ClientStatus = structs.AllocClientStatusComplete
  1403  
  1404  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, "")
  1405  	reconciler.now = now
  1406  	r := reconciler.Compute()
  1407  
  1408  	// Verify that no follow up evals were created
  1409  	evals := r.desiredFollowupEvals[tgName]
  1410  	require.Nil(evals)
  1411  
  1412  	// Two reschedule attempts were made, one more can be made now
  1413  	// Alloc 5 should not be replaced because it is terminal
  1414  	assertResults(t, r, &resultExpectation{
  1415  		createDeployment:  nil,
  1416  		deploymentUpdates: nil,
  1417  		place:             1,
  1418  		inplace:           0,
  1419  		stop:              0,
  1420  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1421  			job.TaskGroups[0].Name: {
  1422  				Place:  1,
  1423  				Ignore: 3,
  1424  			},
  1425  		},
  1426  	})
  1427  
  1428  	assertNamesHaveIndexes(t, intRange(2, 2), placeResultsToNames(r.place))
  1429  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1430  	assertPlacementsAreRescheduled(t, 1, r.place)
  1431  
  1432  }
  1433  
  1434  // Tests rescheduling failed service allocations with desired state stop
  1435  func TestReconciler_RescheduleLater_Service(t *testing.T) {
  1436  	require := require.New(t)
  1437  
  1438  	// Set desired 5
  1439  	job := mock.Job()
  1440  	job.TaskGroups[0].Count = 5
  1441  	tgName := job.TaskGroups[0].Name
  1442  	now := time.Now()
  1443  
  1444  	// Set up reschedule policy
  1445  	delayDur := 15 * time.Second
  1446  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour, Delay: delayDur, MaxDelay: 1 * time.Hour}
  1447  
  1448  	// Create 5 existing allocations
  1449  	var allocs []*structs.Allocation
  1450  	for i := 0; i < 5; i++ {
  1451  		alloc := mock.Alloc()
  1452  		alloc.Job = job
  1453  		alloc.JobID = job.ID
  1454  		alloc.NodeID = uuid.Generate()
  1455  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1456  		allocs = append(allocs, alloc)
  1457  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1458  	}
  1459  
  1460  	// Mark two as failed
  1461  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1462  
  1463  	// Mark one of them as already rescheduled once
  1464  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1465  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1466  			PrevAllocID: uuid.Generate(),
  1467  			PrevNodeID:  uuid.Generate(),
  1468  		},
  1469  	}}
  1470  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1471  		StartedAt:  now.Add(-1 * time.Hour),
  1472  		FinishedAt: now}}
  1473  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1474  
  1475  	// Mark one as desired state stop
  1476  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  1477  
  1478  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, uuid.Generate())
  1479  	r := reconciler.Compute()
  1480  
  1481  	// Should place a new placement and create a follow up eval for the delayed reschedule
  1482  	// Verify that the follow up eval has the expected waitUntil time
  1483  	evals := r.desiredFollowupEvals[tgName]
  1484  	require.NotNil(evals)
  1485  	require.Equal(1, len(evals))
  1486  	require.Equal(now.Add(delayDur), evals[0].WaitUntil)
  1487  
  1488  	assertResults(t, r, &resultExpectation{
  1489  		createDeployment:  nil,
  1490  		deploymentUpdates: nil,
  1491  		place:             1,
  1492  		inplace:           0,
  1493  		attributeUpdates:  1,
  1494  		stop:              0,
  1495  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1496  			job.TaskGroups[0].Name: {
  1497  				Place:         1,
  1498  				InPlaceUpdate: 0,
  1499  				Ignore:        4,
  1500  			},
  1501  		},
  1502  	})
  1503  
  1504  	assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
  1505  	assertNamesHaveIndexes(t, intRange(1, 1), attributeUpdatesToNames(r.attributeUpdates))
  1506  
  1507  	// Verify that the followup evalID field is set correctly
  1508  	var annotated *structs.Allocation
  1509  	for _, a := range r.attributeUpdates {
  1510  		annotated = a
  1511  	}
  1512  	require.Equal(evals[0].ID, annotated.FollowupEvalID)
  1513  }
  1514  
  1515  // Tests service allocations with client status complete
  1516  func TestReconciler_Service_ClientStatusComplete(t *testing.T) {
  1517  	// Set desired 5
  1518  	job := mock.Job()
  1519  	job.TaskGroups[0].Count = 5
  1520  
  1521  	// Set up reschedule policy
  1522  	delayDur := 15 * time.Second
  1523  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1524  		Attempts: 1,
  1525  		Interval: 24 * time.Hour,
  1526  		Delay:    delayDur,
  1527  		MaxDelay: 1 * time.Hour,
  1528  	}
  1529  
  1530  	// Create 5 existing allocations
  1531  	var allocs []*structs.Allocation
  1532  	for i := 0; i < 5; i++ {
  1533  		alloc := mock.Alloc()
  1534  		alloc.Job = job
  1535  		alloc.JobID = job.ID
  1536  		alloc.NodeID = uuid.Generate()
  1537  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1538  		allocs = append(allocs, alloc)
  1539  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1540  		alloc.DesiredStatus = structs.AllocDesiredStatusRun
  1541  	}
  1542  
  1543  	// Mark one as client status complete
  1544  	allocs[4].ClientStatus = structs.AllocClientStatusComplete
  1545  
  1546  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1547  	r := reconciler.Compute()
  1548  
  1549  	// Should place a new placement for the alloc that was marked complete
  1550  	assertResults(t, r, &resultExpectation{
  1551  		createDeployment:  nil,
  1552  		deploymentUpdates: nil,
  1553  		place:             1,
  1554  		inplace:           0,
  1555  		stop:              0,
  1556  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1557  			job.TaskGroups[0].Name: {
  1558  				Place:         1,
  1559  				InPlaceUpdate: 0,
  1560  				Ignore:        4,
  1561  			},
  1562  		},
  1563  	})
  1564  
  1565  	assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
  1566  
  1567  }
  1568  
  1569  // Tests service job placement with desired stop and client status complete
  1570  func TestReconciler_Service_DesiredStop_ClientStatusComplete(t *testing.T) {
  1571  	// Set desired 5
  1572  	job := mock.Job()
  1573  	job.TaskGroups[0].Count = 5
  1574  
  1575  	// Set up reschedule policy
  1576  	delayDur := 15 * time.Second
  1577  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1578  		Attempts: 1,
  1579  		Interval: 24 * time.Hour,
  1580  		Delay:    delayDur,
  1581  		MaxDelay: 1 * time.Hour,
  1582  	}
  1583  
  1584  	// Create 5 existing allocations
  1585  	var allocs []*structs.Allocation
  1586  	for i := 0; i < 5; i++ {
  1587  		alloc := mock.Alloc()
  1588  		alloc.Job = job
  1589  		alloc.JobID = job.ID
  1590  		alloc.NodeID = uuid.Generate()
  1591  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1592  		allocs = append(allocs, alloc)
  1593  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1594  		alloc.DesiredStatus = structs.AllocDesiredStatusRun
  1595  	}
  1596  
  1597  	// Mark one as failed but with desired status stop
  1598  	// Should not trigger rescheduling logic but should trigger a placement
  1599  	allocs[4].ClientStatus = structs.AllocClientStatusFailed
  1600  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  1601  
  1602  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1603  	r := reconciler.Compute()
  1604  
  1605  	// Should place a new placement for the alloc that was marked stopped
  1606  	assertResults(t, r, &resultExpectation{
  1607  		createDeployment:  nil,
  1608  		deploymentUpdates: nil,
  1609  		place:             1,
  1610  		inplace:           0,
  1611  		stop:              0,
  1612  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1613  			job.TaskGroups[0].Name: {
  1614  				Place:         1,
  1615  				InPlaceUpdate: 0,
  1616  				Ignore:        4,
  1617  			},
  1618  		},
  1619  	})
  1620  
  1621  	assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
  1622  
  1623  	// Should not have any follow up evals created
  1624  	require := require.New(t)
  1625  	require.Equal(0, len(r.desiredFollowupEvals))
  1626  }
  1627  
  1628  // Tests rescheduling failed service allocations with desired state stop
  1629  func TestReconciler_RescheduleNow_Service(t *testing.T) {
  1630  	require := require.New(t)
  1631  
  1632  	// Set desired 5
  1633  	job := mock.Job()
  1634  	job.TaskGroups[0].Count = 5
  1635  	tgName := job.TaskGroups[0].Name
  1636  	now := time.Now()
  1637  
  1638  	// Set up reschedule policy and update stanza
  1639  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1640  		Attempts:      1,
  1641  		Interval:      24 * time.Hour,
  1642  		Delay:         5 * time.Second,
  1643  		DelayFunction: "",
  1644  		MaxDelay:      1 * time.Hour,
  1645  		Unlimited:     false,
  1646  	}
  1647  	job.TaskGroups[0].Update = noCanaryUpdate
  1648  
  1649  	// Create 5 existing allocations
  1650  	var allocs []*structs.Allocation
  1651  	for i := 0; i < 5; i++ {
  1652  		alloc := mock.Alloc()
  1653  		alloc.Job = job
  1654  		alloc.JobID = job.ID
  1655  		alloc.NodeID = uuid.Generate()
  1656  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1657  		allocs = append(allocs, alloc)
  1658  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1659  	}
  1660  
  1661  	// Mark two as failed
  1662  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1663  
  1664  	// Mark one of them as already rescheduled once
  1665  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1666  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1667  			PrevAllocID: uuid.Generate(),
  1668  			PrevNodeID:  uuid.Generate(),
  1669  		},
  1670  	}}
  1671  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1672  		StartedAt:  now.Add(-1 * time.Hour),
  1673  		FinishedAt: now.Add(-10 * time.Second)}}
  1674  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1675  
  1676  	// Mark one as desired state stop
  1677  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  1678  
  1679  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1680  	r := reconciler.Compute()
  1681  
  1682  	// Verify that no follow up evals were created
  1683  	evals := r.desiredFollowupEvals[tgName]
  1684  	require.Nil(evals)
  1685  
  1686  	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
  1687  	assertResults(t, r, &resultExpectation{
  1688  		createDeployment:  nil,
  1689  		deploymentUpdates: nil,
  1690  		place:             2,
  1691  		inplace:           0,
  1692  		stop:              0,
  1693  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1694  			job.TaskGroups[0].Name: {
  1695  				Place:  2,
  1696  				Ignore: 3,
  1697  			},
  1698  		},
  1699  	})
  1700  
  1701  	// Rescheduled allocs should have previous allocs
  1702  	assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place))
  1703  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1704  	assertPlacementsAreRescheduled(t, 1, r.place)
  1705  }
  1706  
  1707  // Tests rescheduling failed service allocations when there's clock drift (upto a second)
  1708  func TestReconciler_RescheduleNow_WithinAllowedTimeWindow(t *testing.T) {
  1709  	require := require.New(t)
  1710  
  1711  	// Set desired 5
  1712  	job := mock.Job()
  1713  	job.TaskGroups[0].Count = 5
  1714  	tgName := job.TaskGroups[0].Name
  1715  	now := time.Now()
  1716  
  1717  	// Set up reschedule policy and update stanza
  1718  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1719  		Attempts:      1,
  1720  		Interval:      24 * time.Hour,
  1721  		Delay:         5 * time.Second,
  1722  		DelayFunction: "",
  1723  		MaxDelay:      1 * time.Hour,
  1724  		Unlimited:     false,
  1725  	}
  1726  	job.TaskGroups[0].Update = noCanaryUpdate
  1727  
  1728  	// Create 5 existing allocations
  1729  	var allocs []*structs.Allocation
  1730  	for i := 0; i < 5; i++ {
  1731  		alloc := mock.Alloc()
  1732  		alloc.Job = job
  1733  		alloc.JobID = job.ID
  1734  		alloc.NodeID = uuid.Generate()
  1735  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1736  		allocs = append(allocs, alloc)
  1737  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1738  	}
  1739  
  1740  	// Mark one as failed
  1741  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1742  
  1743  	// Mark one of them as already rescheduled once
  1744  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1745  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1746  			PrevAllocID: uuid.Generate(),
  1747  			PrevNodeID:  uuid.Generate(),
  1748  		},
  1749  	}}
  1750  	// Set fail time to 4 seconds ago which falls within the reschedule window
  1751  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1752  		StartedAt:  now.Add(-1 * time.Hour),
  1753  		FinishedAt: now.Add(-4 * time.Second)}}
  1754  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1755  
  1756  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1757  	reconciler.now = now
  1758  	r := reconciler.Compute()
  1759  
  1760  	// Verify that no follow up evals were created
  1761  	evals := r.desiredFollowupEvals[tgName]
  1762  	require.Nil(evals)
  1763  
  1764  	// Verify that one rescheduled alloc was placed
  1765  	assertResults(t, r, &resultExpectation{
  1766  		createDeployment:  nil,
  1767  		deploymentUpdates: nil,
  1768  		place:             1,
  1769  		inplace:           0,
  1770  		stop:              0,
  1771  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1772  			job.TaskGroups[0].Name: {
  1773  				Place:  1,
  1774  				Ignore: 4,
  1775  			},
  1776  		},
  1777  	})
  1778  
  1779  	// Rescheduled allocs should have previous allocs
  1780  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  1781  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1782  	assertPlacementsAreRescheduled(t, 1, r.place)
  1783  }
  1784  
  1785  // Tests rescheduling failed service allocations when the eval ID matches and there's a large clock drift
  1786  func TestReconciler_RescheduleNow_EvalIDMatch(t *testing.T) {
  1787  	require := require.New(t)
  1788  
  1789  	// Set desired 5
  1790  	job := mock.Job()
  1791  	job.TaskGroups[0].Count = 5
  1792  	tgName := job.TaskGroups[0].Name
  1793  	now := time.Now()
  1794  
  1795  	// Set up reschedule policy and update stanza
  1796  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1797  		Attempts:      1,
  1798  		Interval:      24 * time.Hour,
  1799  		Delay:         5 * time.Second,
  1800  		DelayFunction: "",
  1801  		MaxDelay:      1 * time.Hour,
  1802  		Unlimited:     false,
  1803  	}
  1804  	job.TaskGroups[0].Update = noCanaryUpdate
  1805  
  1806  	// Create 5 existing allocations
  1807  	var allocs []*structs.Allocation
  1808  	for i := 0; i < 5; i++ {
  1809  		alloc := mock.Alloc()
  1810  		alloc.Job = job
  1811  		alloc.JobID = job.ID
  1812  		alloc.NodeID = uuid.Generate()
  1813  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1814  		allocs = append(allocs, alloc)
  1815  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1816  	}
  1817  
  1818  	// Mark one as failed
  1819  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1820  
  1821  	// Mark one of them as already rescheduled once
  1822  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1823  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1824  			PrevAllocID: uuid.Generate(),
  1825  			PrevNodeID:  uuid.Generate(),
  1826  		},
  1827  	}}
  1828  	// Set fail time to 5 seconds ago and eval ID
  1829  	evalID := uuid.Generate()
  1830  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1831  		StartedAt:  now.Add(-1 * time.Hour),
  1832  		FinishedAt: now.Add(-5 * time.Second)}}
  1833  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1834  	allocs[1].FollowupEvalID = evalID
  1835  
  1836  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, evalID)
  1837  	reconciler.now = now.Add(-30 * time.Second)
  1838  	r := reconciler.Compute()
  1839  
  1840  	// Verify that no follow up evals were created
  1841  	evals := r.desiredFollowupEvals[tgName]
  1842  	require.Nil(evals)
  1843  
  1844  	// Verify that one rescheduled alloc was placed
  1845  	assertResults(t, r, &resultExpectation{
  1846  		createDeployment:  nil,
  1847  		deploymentUpdates: nil,
  1848  		place:             1,
  1849  		inplace:           0,
  1850  		stop:              0,
  1851  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1852  			job.TaskGroups[0].Name: {
  1853  				Place:  1,
  1854  				Ignore: 4,
  1855  			},
  1856  		},
  1857  	})
  1858  
  1859  	// Rescheduled allocs should have previous allocs
  1860  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  1861  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1862  	assertPlacementsAreRescheduled(t, 1, r.place)
  1863  }
  1864  
  1865  // Tests rescheduling failed service allocations when there are canaries
  1866  func TestReconciler_RescheduleNow_Service_WithCanaries(t *testing.T) {
  1867  	require := require.New(t)
  1868  
  1869  	// Set desired 5
  1870  	job := mock.Job()
  1871  	job.TaskGroups[0].Count = 5
  1872  	tgName := job.TaskGroups[0].Name
  1873  	now := time.Now()
  1874  
  1875  	// Set up reschedule policy and update stanza
  1876  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1877  		Attempts:      1,
  1878  		Interval:      24 * time.Hour,
  1879  		Delay:         5 * time.Second,
  1880  		DelayFunction: "",
  1881  		MaxDelay:      1 * time.Hour,
  1882  		Unlimited:     false,
  1883  	}
  1884  	job.TaskGroups[0].Update = canaryUpdate
  1885  
  1886  	job2 := job.Copy()
  1887  	job2.Version++
  1888  
  1889  	d := structs.NewDeployment(job2)
  1890  	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  1891  	s := &structs.DeploymentState{
  1892  		DesiredCanaries: 2,
  1893  		DesiredTotal:    5,
  1894  	}
  1895  	d.TaskGroups[job.TaskGroups[0].Name] = s
  1896  
  1897  	// Create 5 existing allocations
  1898  	var allocs []*structs.Allocation
  1899  	for i := 0; i < 5; i++ {
  1900  		alloc := mock.Alloc()
  1901  		alloc.Job = job
  1902  		alloc.JobID = job.ID
  1903  		alloc.NodeID = uuid.Generate()
  1904  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1905  		allocs = append(allocs, alloc)
  1906  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1907  	}
  1908  
  1909  	// Mark three as failed
  1910  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1911  
  1912  	// Mark one of them as already rescheduled once
  1913  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1914  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1915  			PrevAllocID: uuid.Generate(),
  1916  			PrevNodeID:  uuid.Generate(),
  1917  		},
  1918  	}}
  1919  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1920  		StartedAt:  now.Add(-1 * time.Hour),
  1921  		FinishedAt: now.Add(-10 * time.Second)}}
  1922  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1923  
  1924  	// Mark one as desired state stop
  1925  	allocs[4].ClientStatus = structs.AllocClientStatusFailed
  1926  
  1927  	// Create 2 canary allocations
  1928  	for i := 0; i < 2; i++ {
  1929  		alloc := mock.Alloc()
  1930  		alloc.Job = job
  1931  		alloc.JobID = job.ID
  1932  		alloc.NodeID = uuid.Generate()
  1933  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1934  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1935  		alloc.DeploymentID = d.ID
  1936  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  1937  			Canary:  true,
  1938  			Healthy: helper.BoolToPtr(false),
  1939  		}
  1940  		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
  1941  		allocs = append(allocs, alloc)
  1942  	}
  1943  
  1944  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "")
  1945  	r := reconciler.Compute()
  1946  
  1947  	// Verify that no follow up evals were created
  1948  	evals := r.desiredFollowupEvals[tgName]
  1949  	require.Nil(evals)
  1950  
  1951  	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
  1952  	assertResults(t, r, &resultExpectation{
  1953  		createDeployment:  nil,
  1954  		deploymentUpdates: nil,
  1955  		place:             2,
  1956  		inplace:           0,
  1957  		stop:              0,
  1958  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1959  			job.TaskGroups[0].Name: {
  1960  				Place:  2,
  1961  				Ignore: 5,
  1962  			},
  1963  		},
  1964  	})
  1965  
  1966  	// Rescheduled allocs should have previous allocs
  1967  	assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place))
  1968  	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
  1969  	assertPlacementsAreRescheduled(t, 2, r.place)
  1970  }
  1971  
  1972  // Tests rescheduling failed canary service allocations
  1973  func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) {
  1974  	require := require.New(t)
  1975  
  1976  	// Set desired 5
  1977  	job := mock.Job()
  1978  	job.TaskGroups[0].Count = 5
  1979  	tgName := job.TaskGroups[0].Name
  1980  	now := time.Now()
  1981  
  1982  	// Set up reschedule policy and update stanza
  1983  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1984  		Delay:         5 * time.Second,
  1985  		DelayFunction: "constant",
  1986  		MaxDelay:      1 * time.Hour,
  1987  		Unlimited:     true,
  1988  	}
  1989  	job.TaskGroups[0].Update = canaryUpdate
  1990  
  1991  	job2 := job.Copy()
  1992  	job2.Version++
  1993  
  1994  	d := structs.NewDeployment(job2)
  1995  	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  1996  	s := &structs.DeploymentState{
  1997  		DesiredCanaries: 2,
  1998  		DesiredTotal:    5,
  1999  	}
  2000  	d.TaskGroups[job.TaskGroups[0].Name] = s
  2001  
  2002  	// Create 5 existing allocations
  2003  	var allocs []*structs.Allocation
  2004  	for i := 0; i < 5; i++ {
  2005  		alloc := mock.Alloc()
  2006  		alloc.Job = job
  2007  		alloc.JobID = job.ID
  2008  		alloc.NodeID = uuid.Generate()
  2009  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2010  		allocs = append(allocs, alloc)
  2011  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2012  	}
  2013  
  2014  	// Create 2 healthy canary allocations
  2015  	for i := 0; i < 2; i++ {
  2016  		alloc := mock.Alloc()
  2017  		alloc.Job = job
  2018  		alloc.JobID = job.ID
  2019  		alloc.NodeID = uuid.Generate()
  2020  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2021  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2022  		alloc.DeploymentID = d.ID
  2023  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  2024  			Canary:  true,
  2025  			Healthy: helper.BoolToPtr(false),
  2026  		}
  2027  		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
  2028  		allocs = append(allocs, alloc)
  2029  	}
  2030  
  2031  	// Mark the canaries as failed
  2032  	allocs[5].ClientStatus = structs.AllocClientStatusFailed
  2033  	allocs[5].DesiredTransition.Reschedule = helper.BoolToPtr(true)
  2034  
  2035  	// Mark one of them as already rescheduled once
  2036  	allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  2037  		{RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(),
  2038  			PrevAllocID: uuid.Generate(),
  2039  			PrevNodeID:  uuid.Generate(),
  2040  		},
  2041  	}}
  2042  
  2043  	allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  2044  		StartedAt:  now.Add(-1 * time.Hour),
  2045  		FinishedAt: now.Add(-10 * time.Second)}}
  2046  	allocs[6].ClientStatus = structs.AllocClientStatusFailed
  2047  	allocs[6].DesiredTransition.Reschedule = helper.BoolToPtr(true)
  2048  
  2049  	// Create 4 unhealthy canary allocations that have already been replaced
  2050  	for i := 0; i < 4; i++ {
  2051  		alloc := mock.Alloc()
  2052  		alloc.Job = job
  2053  		alloc.JobID = job.ID
  2054  		alloc.NodeID = uuid.Generate()
  2055  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2))
  2056  		alloc.ClientStatus = structs.AllocClientStatusFailed
  2057  		alloc.DeploymentID = d.ID
  2058  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  2059  			Canary:  true,
  2060  			Healthy: helper.BoolToPtr(false),
  2061  		}
  2062  		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
  2063  		allocs = append(allocs, alloc)
  2064  	}
  2065  
  2066  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "")
  2067  	reconciler.now = now
  2068  	r := reconciler.Compute()
  2069  
  2070  	// Verify that no follow up evals were created
  2071  	evals := r.desiredFollowupEvals[tgName]
  2072  	require.Nil(evals)
  2073  
  2074  	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
  2075  	assertResults(t, r, &resultExpectation{
  2076  		createDeployment:  nil,
  2077  		deploymentUpdates: nil,
  2078  		place:             2,
  2079  		inplace:           0,
  2080  		stop:              0,
  2081  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2082  			job.TaskGroups[0].Name: {
  2083  				Place:  2,
  2084  				Ignore: 9,
  2085  			},
  2086  		},
  2087  	})
  2088  
  2089  	// Rescheduled allocs should have previous allocs
  2090  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  2091  	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
  2092  	assertPlacementsAreRescheduled(t, 2, r.place)
  2093  }
  2094  
  2095  // Tests rescheduling failed canary service allocations when one has reached its
  2096  // reschedule limit
  2097  func TestReconciler_RescheduleNow_Service_Canaries_Limit(t *testing.T) {
  2098  	require := require.New(t)
  2099  
  2100  	// Set desired 5
  2101  	job := mock.Job()
  2102  	job.TaskGroups[0].Count = 5
  2103  	tgName := job.TaskGroups[0].Name
  2104  	now := time.Now()
  2105  
  2106  	// Set up reschedule policy and update stanza
  2107  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  2108  		Attempts:      1,
  2109  		Interval:      24 * time.Hour,
  2110  		Delay:         5 * time.Second,
  2111  		DelayFunction: "",
  2112  		MaxDelay:      1 * time.Hour,
  2113  		Unlimited:     false,
  2114  	}
  2115  	job.TaskGroups[0].Update = canaryUpdate
  2116  
  2117  	job2 := job.Copy()
  2118  	job2.Version++
  2119  
  2120  	d := structs.NewDeployment(job2)
  2121  	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  2122  	s := &structs.DeploymentState{
  2123  		DesiredCanaries: 2,
  2124  		DesiredTotal:    5,
  2125  	}
  2126  	d.TaskGroups[job.TaskGroups[0].Name] = s
  2127  
  2128  	// Create 5 existing allocations
  2129  	var allocs []*structs.Allocation
  2130  	for i := 0; i < 5; i++ {
  2131  		alloc := mock.Alloc()
  2132  		alloc.Job = job
  2133  		alloc.JobID = job.ID
  2134  		alloc.NodeID = uuid.Generate()
  2135  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2136  		allocs = append(allocs, alloc)
  2137  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2138  	}
  2139  
  2140  	// Create 2 healthy canary allocations
  2141  	for i := 0; i < 2; i++ {
  2142  		alloc := mock.Alloc()
  2143  		alloc.Job = job
  2144  		alloc.JobID = job.ID
  2145  		alloc.NodeID = uuid.Generate()
  2146  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2147  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2148  		alloc.DeploymentID = d.ID
  2149  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  2150  			Canary:  true,
  2151  			Healthy: helper.BoolToPtr(false),
  2152  		}
  2153  		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
  2154  		allocs = append(allocs, alloc)
  2155  	}
  2156  
  2157  	// Mark the canaries as failed
  2158  	allocs[5].ClientStatus = structs.AllocClientStatusFailed
  2159  	allocs[5].DesiredTransition.Reschedule = helper.BoolToPtr(true)
  2160  
  2161  	// Mark one of them as already rescheduled once
  2162  	allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  2163  		{RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(),
  2164  			PrevAllocID: uuid.Generate(),
  2165  			PrevNodeID:  uuid.Generate(),
  2166  		},
  2167  	}}
  2168  
  2169  	allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  2170  		StartedAt:  now.Add(-1 * time.Hour),
  2171  		FinishedAt: now.Add(-10 * time.Second)}}
  2172  	allocs[6].ClientStatus = structs.AllocClientStatusFailed
  2173  	allocs[6].DesiredTransition.Reschedule = helper.BoolToPtr(true)
  2174  
  2175  	// Create 4 unhealthy canary allocations that have already been replaced
  2176  	for i := 0; i < 4; i++ {
  2177  		alloc := mock.Alloc()
  2178  		alloc.Job = job
  2179  		alloc.JobID = job.ID
  2180  		alloc.NodeID = uuid.Generate()
  2181  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2))
  2182  		alloc.ClientStatus = structs.AllocClientStatusFailed
  2183  		alloc.DeploymentID = d.ID
  2184  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  2185  			Canary:  true,
  2186  			Healthy: helper.BoolToPtr(false),
  2187  		}
  2188  		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
  2189  		allocs = append(allocs, alloc)
  2190  	}
  2191  
  2192  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "")
  2193  	reconciler.now = now
  2194  	r := reconciler.Compute()
  2195  
  2196  	// Verify that no follow up evals were created
  2197  	evals := r.desiredFollowupEvals[tgName]
  2198  	require.Nil(evals)
  2199  
  2200  	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
  2201  	assertResults(t, r, &resultExpectation{
  2202  		createDeployment:  nil,
  2203  		deploymentUpdates: nil,
  2204  		place:             1,
  2205  		inplace:           0,
  2206  		stop:              0,
  2207  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2208  			job.TaskGroups[0].Name: {
  2209  				Place:  1,
  2210  				Ignore: 10,
  2211  			},
  2212  		},
  2213  	})
  2214  
  2215  	// Rescheduled allocs should have previous allocs
  2216  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  2217  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  2218  	assertPlacementsAreRescheduled(t, 1, r.place)
  2219  }
  2220  
  2221  // Tests failed service allocations that were already rescheduled won't be rescheduled again
  2222  func TestReconciler_DontReschedule_PreviouslyRescheduled(t *testing.T) {
  2223  	// Set desired 5
  2224  	job := mock.Job()
  2225  	job.TaskGroups[0].Count = 5
  2226  
  2227  	// Set up reschedule policy
  2228  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 5, Interval: 24 * time.Hour}
  2229  
  2230  	// Create 7 existing allocations
  2231  	var allocs []*structs.Allocation
  2232  	for i := 0; i < 7; i++ {
  2233  		alloc := mock.Alloc()
  2234  		alloc.Job = job
  2235  		alloc.JobID = job.ID
  2236  		alloc.NodeID = uuid.Generate()
  2237  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2238  		allocs = append(allocs, alloc)
  2239  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2240  	}
  2241  	// Mark two as failed and rescheduled
  2242  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  2243  	allocs[0].ID = allocs[1].ID
  2244  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  2245  	allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  2246  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  2247  			PrevAllocID: uuid.Generate(),
  2248  			PrevNodeID:  uuid.Generate(),
  2249  		},
  2250  	}}
  2251  	allocs[1].NextAllocation = allocs[2].ID
  2252  
  2253  	// Mark one as desired state stop
  2254  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  2255  
  2256  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  2257  	r := reconciler.Compute()
  2258  
  2259  	// Should place 1 - one is a new placement to make up the desired count of 5
  2260  	// failing allocs are not rescheduled
  2261  	assertResults(t, r, &resultExpectation{
  2262  		createDeployment:  nil,
  2263  		deploymentUpdates: nil,
  2264  		place:             1,
  2265  		inplace:           0,
  2266  		stop:              0,
  2267  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2268  			job.TaskGroups[0].Name: {
  2269  				Place:  1,
  2270  				Ignore: 4,
  2271  			},
  2272  		},
  2273  	})
  2274  
  2275  	// name index 0 is used for the replacement because its
  2276  	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
  2277  }
  2278  
  2279  // Tests the reconciler cancels an old deployment when the job is being stopped
  2280  func TestReconciler_CancelDeployment_JobStop(t *testing.T) {
  2281  	job := mock.Job()
  2282  	job.Stop = true
  2283  
  2284  	running := structs.NewDeployment(job)
  2285  	failed := structs.NewDeployment(job)
  2286  	failed.Status = structs.DeploymentStatusFailed
  2287  
  2288  	cases := []struct {
  2289  		name             string
  2290  		job              *structs.Job
  2291  		jobID, taskGroup string
  2292  		deployment       *structs.Deployment
  2293  		cancel           bool
  2294  	}{
  2295  		{
  2296  			name:       "stopped job, running deployment",
  2297  			job:        job,
  2298  			jobID:      job.ID,
  2299  			taskGroup:  job.TaskGroups[0].Name,
  2300  			deployment: running,
  2301  			cancel:     true,
  2302  		},
  2303  		{
  2304  			name:       "nil job, running deployment",
  2305  			job:        nil,
  2306  			jobID:      "foo",
  2307  			taskGroup:  "bar",
  2308  			deployment: running,
  2309  			cancel:     true,
  2310  		},
  2311  		{
  2312  			name:       "stopped job, failed deployment",
  2313  			job:        job,
  2314  			jobID:      job.ID,
  2315  			taskGroup:  job.TaskGroups[0].Name,
  2316  			deployment: failed,
  2317  			cancel:     false,
  2318  		},
  2319  		{
  2320  			name:       "nil job, failed deployment",
  2321  			job:        nil,
  2322  			jobID:      "foo",
  2323  			taskGroup:  "bar",
  2324  			deployment: failed,
  2325  			cancel:     false,
  2326  		},
  2327  	}
  2328  
  2329  	for _, c := range cases {
  2330  		t.Run(c.name, func(t *testing.T) {
  2331  			// Create 10 allocations
  2332  			var allocs []*structs.Allocation
  2333  			for i := 0; i < 10; i++ {
  2334  				alloc := mock.Alloc()
  2335  				alloc.Job = c.job
  2336  				alloc.JobID = c.jobID
  2337  				alloc.NodeID = uuid.Generate()
  2338  				alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i))
  2339  				alloc.TaskGroup = c.taskGroup
  2340  				allocs = append(allocs, alloc)
  2341  			}
  2342  
  2343  			reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, c.jobID, c.job, c.deployment, allocs, nil, "")
  2344  			r := reconciler.Compute()
  2345  
  2346  			var updates []*structs.DeploymentStatusUpdate
  2347  			if c.cancel {
  2348  				updates = []*structs.DeploymentStatusUpdate{
  2349  					{
  2350  						DeploymentID:      c.deployment.ID,
  2351  						Status:            structs.DeploymentStatusCancelled,
  2352  						StatusDescription: structs.DeploymentStatusDescriptionStoppedJob,
  2353  					},
  2354  				}
  2355  			}
  2356  
  2357  			// Assert the correct results
  2358  			assertResults(t, r, &resultExpectation{
  2359  				createDeployment:  nil,
  2360  				deploymentUpdates: updates,
  2361  				place:             0,
  2362  				inplace:           0,
  2363  				stop:              10,
  2364  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2365  					c.taskGroup: {
  2366  						Stop: 10,
  2367  					},
  2368  				},
  2369  			})
  2370  
  2371  			assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop))
  2372  		})
  2373  	}
  2374  }
  2375  
  2376  // Tests the reconciler cancels an old deployment when the job is updated
  2377  func TestReconciler_CancelDeployment_JobUpdate(t *testing.T) {
  2378  	// Create a base job
  2379  	job := mock.Job()
  2380  
  2381  	// Create two deployments
  2382  	running := structs.NewDeployment(job)
  2383  	failed := structs.NewDeployment(job)
  2384  	failed.Status = structs.DeploymentStatusFailed
  2385  
  2386  	// Make the job newer than the deployment
  2387  	job.Version += 10
  2388  
  2389  	cases := []struct {
  2390  		name       string
  2391  		deployment *structs.Deployment
  2392  		cancel     bool
  2393  	}{
  2394  		{
  2395  			name:       "running deployment",
  2396  			deployment: running,
  2397  			cancel:     true,
  2398  		},
  2399  		{
  2400  			name:       "failed deployment",
  2401  			deployment: failed,
  2402  			cancel:     false,
  2403  		},
  2404  	}
  2405  
  2406  	for _, c := range cases {
  2407  		t.Run(c.name, func(t *testing.T) {
  2408  			// Create 10 allocations
  2409  			var allocs []*structs.Allocation
  2410  			for i := 0; i < 10; i++ {
  2411  				alloc := mock.Alloc()
  2412  				alloc.Job = job
  2413  				alloc.JobID = job.ID
  2414  				alloc.NodeID = uuid.Generate()
  2415  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2416  				alloc.TaskGroup = job.TaskGroups[0].Name
  2417  				allocs = append(allocs, alloc)
  2418  			}
  2419  
  2420  			reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, c.deployment, allocs, nil, "")
  2421  			r := reconciler.Compute()
  2422  
  2423  			var updates []*structs.DeploymentStatusUpdate
  2424  			if c.cancel {
  2425  				updates = []*structs.DeploymentStatusUpdate{
  2426  					{
  2427  						DeploymentID:      c.deployment.ID,
  2428  						Status:            structs.DeploymentStatusCancelled,
  2429  						StatusDescription: structs.DeploymentStatusDescriptionNewerJob,
  2430  					},
  2431  				}
  2432  			}
  2433  
  2434  			// Assert the correct results
  2435  			assertResults(t, r, &resultExpectation{
  2436  				createDeployment:  nil,
  2437  				deploymentUpdates: updates,
  2438  				place:             0,
  2439  				inplace:           0,
  2440  				stop:              0,
  2441  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2442  					job.TaskGroups[0].Name: {
  2443  						Ignore: 10,
  2444  					},
  2445  				},
  2446  			})
  2447  		})
  2448  	}
  2449  }
  2450  
  2451  // Tests the reconciler creates a deployment and does a rolling upgrade with
  2452  // destructive changes
  2453  func TestReconciler_CreateDeployment_RollingUpgrade_Destructive(t *testing.T) {
  2454  	job := mock.Job()
  2455  	job.TaskGroups[0].Update = noCanaryUpdate
  2456  
  2457  	// Create 10 allocations from the old job
  2458  	var allocs []*structs.Allocation
  2459  	for i := 0; i < 10; i++ {
  2460  		alloc := mock.Alloc()
  2461  		alloc.Job = job
  2462  		alloc.JobID = job.ID
  2463  		alloc.NodeID = uuid.Generate()
  2464  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2465  		alloc.TaskGroup = job.TaskGroups[0].Name
  2466  		allocs = append(allocs, alloc)
  2467  	}
  2468  
  2469  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  2470  	r := reconciler.Compute()
  2471  
  2472  	d := structs.NewDeployment(job)
  2473  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2474  		DesiredTotal: 10,
  2475  	}
  2476  
  2477  	// Assert the correct results
  2478  	assertResults(t, r, &resultExpectation{
  2479  		createDeployment:  d,
  2480  		deploymentUpdates: nil,
  2481  		destructive:       4,
  2482  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2483  			job.TaskGroups[0].Name: {
  2484  				DestructiveUpdate: 4,
  2485  				Ignore:            6,
  2486  			},
  2487  		},
  2488  	})
  2489  
  2490  	assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate))
  2491  }
  2492  
  2493  // Tests the reconciler creates a deployment for inplace updates
  2494  func TestReconciler_CreateDeployment_RollingUpgrade_Inplace(t *testing.T) {
  2495  	jobOld := mock.Job()
  2496  	job := jobOld.Copy()
  2497  	job.Version++
  2498  	job.TaskGroups[0].Update = noCanaryUpdate
  2499  
  2500  	// Create 10 allocations from the old job
  2501  	var allocs []*structs.Allocation
  2502  	for i := 0; i < 10; i++ {
  2503  		alloc := mock.Alloc()
  2504  		alloc.Job = jobOld
  2505  		alloc.JobID = job.ID
  2506  		alloc.NodeID = uuid.Generate()
  2507  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2508  		alloc.TaskGroup = job.TaskGroups[0].Name
  2509  		allocs = append(allocs, alloc)
  2510  	}
  2511  
  2512  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "")
  2513  	r := reconciler.Compute()
  2514  
  2515  	d := structs.NewDeployment(job)
  2516  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2517  		DesiredTotal: 10,
  2518  	}
  2519  
  2520  	// Assert the correct results
  2521  	assertResults(t, r, &resultExpectation{
  2522  		createDeployment:  d,
  2523  		deploymentUpdates: nil,
  2524  		place:             0,
  2525  		inplace:           10,
  2526  		stop:              0,
  2527  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2528  			job.TaskGroups[0].Name: {
  2529  				InPlaceUpdate: 10,
  2530  			},
  2531  		},
  2532  	})
  2533  }
  2534  
  2535  // Tests the reconciler creates a deployment when the job has a newer create index
  2536  func TestReconciler_CreateDeployment_NewerCreateIndex(t *testing.T) {
  2537  	jobOld := mock.Job()
  2538  	job := jobOld.Copy()
  2539  	job.TaskGroups[0].Update = noCanaryUpdate
  2540  	job.CreateIndex += 100
  2541  
  2542  	// Create 5 allocations from the old job
  2543  	var allocs []*structs.Allocation
  2544  	for i := 0; i < 5; i++ {
  2545  		alloc := mock.Alloc()
  2546  		alloc.Job = jobOld
  2547  		alloc.JobID = jobOld.ID
  2548  		alloc.NodeID = uuid.Generate()
  2549  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2550  		alloc.TaskGroup = job.TaskGroups[0].Name
  2551  		allocs = append(allocs, alloc)
  2552  	}
  2553  
  2554  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  2555  	r := reconciler.Compute()
  2556  
  2557  	d := structs.NewDeployment(job)
  2558  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2559  		DesiredTotal: 5,
  2560  	}
  2561  
  2562  	// Assert the correct results
  2563  	assertResults(t, r, &resultExpectation{
  2564  		createDeployment:  d,
  2565  		deploymentUpdates: nil,
  2566  		place:             5,
  2567  		destructive:       0,
  2568  		inplace:           0,
  2569  		stop:              0,
  2570  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2571  			job.TaskGroups[0].Name: {
  2572  				InPlaceUpdate:     0,
  2573  				Ignore:            5,
  2574  				Place:             5,
  2575  				DestructiveUpdate: 0,
  2576  			},
  2577  		},
  2578  	})
  2579  }
  2580  
  2581  // Tests the reconciler doesn't creates a deployment if there are no changes
  2582  func TestReconciler_DontCreateDeployment_NoChanges(t *testing.T) {
  2583  	job := mock.Job()
  2584  	job.TaskGroups[0].Update = noCanaryUpdate
  2585  
  2586  	// Create 10 allocations from the job
  2587  	var allocs []*structs.Allocation
  2588  	for i := 0; i < 10; i++ {
  2589  		alloc := mock.Alloc()
  2590  		alloc.Job = job
  2591  		alloc.JobID = job.ID
  2592  		alloc.NodeID = uuid.Generate()
  2593  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2594  		alloc.TaskGroup = job.TaskGroups[0].Name
  2595  		allocs = append(allocs, alloc)
  2596  	}
  2597  
  2598  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  2599  	r := reconciler.Compute()
  2600  
  2601  	// Assert the correct results
  2602  	assertResults(t, r, &resultExpectation{
  2603  		createDeployment:  nil,
  2604  		deploymentUpdates: nil,
  2605  		place:             0,
  2606  		inplace:           0,
  2607  		stop:              0,
  2608  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2609  			job.TaskGroups[0].Name: {
  2610  				DestructiveUpdate: 0,
  2611  				Ignore:            10,
  2612  			},
  2613  		},
  2614  	})
  2615  }
  2616  
  2617  // Tests the reconciler doesn't place any more canaries when the deployment is
  2618  // paused or failed
  2619  func TestReconciler_PausedOrFailedDeployment_NoMoreCanaries(t *testing.T) {
  2620  	job := mock.Job()
  2621  	job.TaskGroups[0].Update = canaryUpdate
  2622  
  2623  	cases := []struct {
  2624  		name             string
  2625  		deploymentStatus string
  2626  		stop             uint64
  2627  	}{
  2628  		{
  2629  			name:             "paused deployment",
  2630  			deploymentStatus: structs.DeploymentStatusPaused,
  2631  			stop:             0,
  2632  		},
  2633  		{
  2634  			name:             "failed deployment",
  2635  			deploymentStatus: structs.DeploymentStatusFailed,
  2636  			stop:             1,
  2637  		},
  2638  	}
  2639  
  2640  	for _, c := range cases {
  2641  		t.Run(c.name, func(t *testing.T) {
  2642  			// Create a deployment that is paused/failed and has placed some canaries
  2643  			d := structs.NewDeployment(job)
  2644  			d.Status = c.deploymentStatus
  2645  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2646  				Promoted:        false,
  2647  				DesiredCanaries: 2,
  2648  				DesiredTotal:    10,
  2649  				PlacedAllocs:    1,
  2650  			}
  2651  
  2652  			// Create 10 allocations for the original job
  2653  			var allocs []*structs.Allocation
  2654  			for i := 0; i < 10; i++ {
  2655  				alloc := mock.Alloc()
  2656  				alloc.Job = job
  2657  				alloc.JobID = job.ID
  2658  				alloc.NodeID = uuid.Generate()
  2659  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2660  				alloc.TaskGroup = job.TaskGroups[0].Name
  2661  				allocs = append(allocs, alloc)
  2662  			}
  2663  
  2664  			// Create one canary
  2665  			canary := mock.Alloc()
  2666  			canary.Job = job
  2667  			canary.JobID = job.ID
  2668  			canary.NodeID = uuid.Generate()
  2669  			canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0)
  2670  			canary.TaskGroup = job.TaskGroups[0].Name
  2671  			canary.DeploymentID = d.ID
  2672  			allocs = append(allocs, canary)
  2673  			d.TaskGroups[canary.TaskGroup].PlacedCanaries = []string{canary.ID}
  2674  
  2675  			mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{canary.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive)
  2676  			reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  2677  			r := reconciler.Compute()
  2678  
  2679  			// Assert the correct results
  2680  			assertResults(t, r, &resultExpectation{
  2681  				createDeployment:  nil,
  2682  				deploymentUpdates: nil,
  2683  				place:             0,
  2684  				inplace:           0,
  2685  				stop:              int(c.stop),
  2686  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2687  					job.TaskGroups[0].Name: {
  2688  						Ignore: 11 - c.stop,
  2689  						Stop:   c.stop,
  2690  					},
  2691  				},
  2692  			})
  2693  		})
  2694  	}
  2695  }
  2696  
  2697  // Tests the reconciler doesn't place any more allocs when the deployment is
  2698  // paused or failed
  2699  func TestReconciler_PausedOrFailedDeployment_NoMorePlacements(t *testing.T) {
  2700  	job := mock.Job()
  2701  	job.TaskGroups[0].Update = noCanaryUpdate
  2702  	job.TaskGroups[0].Count = 15
  2703  
  2704  	cases := []struct {
  2705  		name             string
  2706  		deploymentStatus string
  2707  	}{
  2708  		{
  2709  			name:             "paused deployment",
  2710  			deploymentStatus: structs.DeploymentStatusPaused,
  2711  		},
  2712  		{
  2713  			name:             "failed deployment",
  2714  			deploymentStatus: structs.DeploymentStatusFailed,
  2715  		},
  2716  	}
  2717  
  2718  	for _, c := range cases {
  2719  		t.Run(c.name, func(t *testing.T) {
  2720  			// Create a deployment that is paused and has placed some canaries
  2721  			d := structs.NewDeployment(job)
  2722  			d.Status = c.deploymentStatus
  2723  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2724  				Promoted:     false,
  2725  				DesiredTotal: 15,
  2726  				PlacedAllocs: 10,
  2727  			}
  2728  
  2729  			// Create 10 allocations for the new job
  2730  			var allocs []*structs.Allocation
  2731  			for i := 0; i < 10; i++ {
  2732  				alloc := mock.Alloc()
  2733  				alloc.Job = job
  2734  				alloc.JobID = job.ID
  2735  				alloc.NodeID = uuid.Generate()
  2736  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2737  				alloc.TaskGroup = job.TaskGroups[0].Name
  2738  				allocs = append(allocs, alloc)
  2739  			}
  2740  
  2741  			reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  2742  			r := reconciler.Compute()
  2743  
  2744  			// Assert the correct results
  2745  			assertResults(t, r, &resultExpectation{
  2746  				createDeployment:  nil,
  2747  				deploymentUpdates: nil,
  2748  				place:             0,
  2749  				inplace:           0,
  2750  				stop:              0,
  2751  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2752  					job.TaskGroups[0].Name: {
  2753  						Ignore: 10,
  2754  					},
  2755  				},
  2756  			})
  2757  		})
  2758  	}
  2759  }
  2760  
  2761  // Tests the reconciler doesn't do any more destructive updates when the
  2762  // deployment is paused or failed
  2763  func TestReconciler_PausedOrFailedDeployment_NoMoreDestructiveUpdates(t *testing.T) {
  2764  	job := mock.Job()
  2765  	job.TaskGroups[0].Update = noCanaryUpdate
  2766  
  2767  	cases := []struct {
  2768  		name             string
  2769  		deploymentStatus string
  2770  	}{
  2771  		{
  2772  			name:             "paused deployment",
  2773  			deploymentStatus: structs.DeploymentStatusPaused,
  2774  		},
  2775  		{
  2776  			name:             "failed deployment",
  2777  			deploymentStatus: structs.DeploymentStatusFailed,
  2778  		},
  2779  	}
  2780  
  2781  	for _, c := range cases {
  2782  		t.Run(c.name, func(t *testing.T) {
  2783  			// Create a deployment that is paused and has placed some canaries
  2784  			d := structs.NewDeployment(job)
  2785  			d.Status = c.deploymentStatus
  2786  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2787  				Promoted:     false,
  2788  				DesiredTotal: 10,
  2789  				PlacedAllocs: 1,
  2790  			}
  2791  
  2792  			// Create 9 allocations for the original job
  2793  			var allocs []*structs.Allocation
  2794  			for i := 1; i < 10; i++ {
  2795  				alloc := mock.Alloc()
  2796  				alloc.Job = job
  2797  				alloc.JobID = job.ID
  2798  				alloc.NodeID = uuid.Generate()
  2799  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2800  				alloc.TaskGroup = job.TaskGroups[0].Name
  2801  				allocs = append(allocs, alloc)
  2802  			}
  2803  
  2804  			// Create one for the new job
  2805  			newAlloc := mock.Alloc()
  2806  			newAlloc.Job = job
  2807  			newAlloc.JobID = job.ID
  2808  			newAlloc.NodeID = uuid.Generate()
  2809  			newAlloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0)
  2810  			newAlloc.TaskGroup = job.TaskGroups[0].Name
  2811  			newAlloc.DeploymentID = d.ID
  2812  			allocs = append(allocs, newAlloc)
  2813  
  2814  			mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{newAlloc.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive)
  2815  			reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  2816  			r := reconciler.Compute()
  2817  
  2818  			// Assert the correct results
  2819  			assertResults(t, r, &resultExpectation{
  2820  				createDeployment:  nil,
  2821  				deploymentUpdates: nil,
  2822  				place:             0,
  2823  				inplace:           0,
  2824  				stop:              0,
  2825  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2826  					job.TaskGroups[0].Name: {
  2827  						Ignore: 10,
  2828  					},
  2829  				},
  2830  			})
  2831  		})
  2832  	}
  2833  }
  2834  
  2835  // Tests the reconciler handles migrations correctly when a deployment is paused
  2836  // or failed
  2837  func TestReconciler_PausedOrFailedDeployment_Migrations(t *testing.T) {
  2838  	job := mock.Job()
  2839  	job.TaskGroups[0].Update = noCanaryUpdate
  2840  
  2841  	cases := []struct {
  2842  		name              string
  2843  		deploymentStatus  string
  2844  		place             int
  2845  		stop              int
  2846  		ignoreAnnotation  uint64
  2847  		migrateAnnotation uint64
  2848  		stopAnnotation    uint64
  2849  	}{
  2850  		{
  2851  			name:             "paused deployment",
  2852  			deploymentStatus: structs.DeploymentStatusPaused,
  2853  			place:            0,
  2854  			stop:             3,
  2855  			ignoreAnnotation: 5,
  2856  			stopAnnotation:   3,
  2857  		},
  2858  		{
  2859  			name:              "failed deployment",
  2860  			deploymentStatus:  structs.DeploymentStatusFailed,
  2861  			place:             0,
  2862  			stop:              3,
  2863  			ignoreAnnotation:  5,
  2864  			migrateAnnotation: 0,
  2865  			stopAnnotation:    3,
  2866  		},
  2867  	}
  2868  
  2869  	for _, c := range cases {
  2870  		t.Run(c.name, func(t *testing.T) {
  2871  			// Create a deployment that is paused and has placed some canaries
  2872  			d := structs.NewDeployment(job)
  2873  			d.Status = c.deploymentStatus
  2874  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2875  				Promoted:     false,
  2876  				DesiredTotal: 10,
  2877  				PlacedAllocs: 8,
  2878  			}
  2879  
  2880  			// Create 8 allocations in the deployment
  2881  			var allocs []*structs.Allocation
  2882  			for i := 0; i < 8; i++ {
  2883  				alloc := mock.Alloc()
  2884  				alloc.Job = job
  2885  				alloc.JobID = job.ID
  2886  				alloc.NodeID = uuid.Generate()
  2887  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2888  				alloc.TaskGroup = job.TaskGroups[0].Name
  2889  				alloc.DeploymentID = d.ID
  2890  				allocs = append(allocs, alloc)
  2891  			}
  2892  
  2893  			// Build a map of tainted nodes
  2894  			tainted := make(map[string]*structs.Node, 3)
  2895  			for i := 0; i < 3; i++ {
  2896  				n := mock.Node()
  2897  				n.ID = allocs[i].NodeID
  2898  				allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true)
  2899  				n.Drain = true
  2900  				tainted[n.ID] = n
  2901  			}
  2902  
  2903  			reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, tainted, "")
  2904  			r := reconciler.Compute()
  2905  
  2906  			// Assert the correct results
  2907  			assertResults(t, r, &resultExpectation{
  2908  				createDeployment:  nil,
  2909  				deploymentUpdates: nil,
  2910  				place:             c.place,
  2911  				inplace:           0,
  2912  				stop:              c.stop,
  2913  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2914  					job.TaskGroups[0].Name: {
  2915  						Migrate: c.migrateAnnotation,
  2916  						Ignore:  c.ignoreAnnotation,
  2917  						Stop:    c.stopAnnotation,
  2918  					},
  2919  				},
  2920  			})
  2921  		})
  2922  	}
  2923  }
  2924  
  2925  // Tests the reconciler handles migrating a canary correctly on a draining node
  2926  func TestReconciler_DrainNode_Canary(t *testing.T) {
  2927  	job := mock.Job()
  2928  	job.TaskGroups[0].Update = canaryUpdate
  2929  
  2930  	// Create a deployment that is paused and has placed some canaries
  2931  	d := structs.NewDeployment(job)
  2932  	s := &structs.DeploymentState{
  2933  		Promoted:        false,
  2934  		DesiredTotal:    10,
  2935  		DesiredCanaries: 2,
  2936  		PlacedAllocs:    2,
  2937  	}
  2938  	d.TaskGroups[job.TaskGroups[0].Name] = s
  2939  
  2940  	// Create 10 allocations from the old job
  2941  	var allocs []*structs.Allocation
  2942  	for i := 0; i < 10; i++ {
  2943  		alloc := mock.Alloc()
  2944  		alloc.Job = job
  2945  		alloc.JobID = job.ID
  2946  		alloc.NodeID = uuid.Generate()
  2947  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2948  		alloc.TaskGroup = job.TaskGroups[0].Name
  2949  		allocs = append(allocs, alloc)
  2950  	}
  2951  
  2952  	// Create two canaries for the new job
  2953  	handled := make(map[string]allocUpdateType)
  2954  	for i := 0; i < 2; i++ {
  2955  		// Create one canary
  2956  		canary := mock.Alloc()
  2957  		canary.Job = job
  2958  		canary.JobID = job.ID
  2959  		canary.NodeID = uuid.Generate()
  2960  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2961  		canary.TaskGroup = job.TaskGroups[0].Name
  2962  		canary.DeploymentID = d.ID
  2963  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  2964  		allocs = append(allocs, canary)
  2965  		handled[canary.ID] = allocUpdateFnIgnore
  2966  	}
  2967  
  2968  	// Build a map of tainted nodes that contains the last canary
  2969  	tainted := make(map[string]*structs.Node, 1)
  2970  	n := mock.Node()
  2971  	n.ID = allocs[11].NodeID
  2972  	allocs[11].DesiredTransition.Migrate = helper.BoolToPtr(true)
  2973  	n.Drain = true
  2974  	tainted[n.ID] = n
  2975  
  2976  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  2977  	reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "")
  2978  	r := reconciler.Compute()
  2979  
  2980  	// Assert the correct results
  2981  	assertResults(t, r, &resultExpectation{
  2982  		createDeployment:  nil,
  2983  		deploymentUpdates: nil,
  2984  		place:             1,
  2985  		inplace:           0,
  2986  		stop:              1,
  2987  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2988  			job.TaskGroups[0].Name: {
  2989  				Canary: 1,
  2990  				Ignore: 11,
  2991  			},
  2992  		},
  2993  	})
  2994  	assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop))
  2995  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  2996  }
  2997  
  2998  // Tests the reconciler handles migrating a canary correctly on a lost node
  2999  func TestReconciler_LostNode_Canary(t *testing.T) {
  3000  	job := mock.Job()
  3001  	job.TaskGroups[0].Update = canaryUpdate
  3002  
  3003  	// Create a deployment that is paused and has placed some canaries
  3004  	d := structs.NewDeployment(job)
  3005  	s := &structs.DeploymentState{
  3006  		Promoted:        false,
  3007  		DesiredTotal:    10,
  3008  		DesiredCanaries: 2,
  3009  		PlacedAllocs:    2,
  3010  	}
  3011  	d.TaskGroups[job.TaskGroups[0].Name] = s
  3012  
  3013  	// Create 10 allocations from the old job
  3014  	var allocs []*structs.Allocation
  3015  	for i := 0; i < 10; i++ {
  3016  		alloc := mock.Alloc()
  3017  		alloc.Job = job
  3018  		alloc.JobID = job.ID
  3019  		alloc.NodeID = uuid.Generate()
  3020  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3021  		alloc.TaskGroup = job.TaskGroups[0].Name
  3022  		allocs = append(allocs, alloc)
  3023  	}
  3024  
  3025  	// Create two canaries for the new job
  3026  	handled := make(map[string]allocUpdateType)
  3027  	for i := 0; i < 2; i++ {
  3028  		// Create one canary
  3029  		canary := mock.Alloc()
  3030  		canary.Job = job
  3031  		canary.JobID = job.ID
  3032  		canary.NodeID = uuid.Generate()
  3033  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3034  		canary.TaskGroup = job.TaskGroups[0].Name
  3035  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3036  		canary.DeploymentID = d.ID
  3037  		allocs = append(allocs, canary)
  3038  		handled[canary.ID] = allocUpdateFnIgnore
  3039  	}
  3040  
  3041  	// Build a map of tainted nodes that contains the last canary
  3042  	tainted := make(map[string]*structs.Node, 1)
  3043  	n := mock.Node()
  3044  	n.ID = allocs[11].NodeID
  3045  	n.Status = structs.NodeStatusDown
  3046  	tainted[n.ID] = n
  3047  
  3048  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3049  	reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "")
  3050  	r := reconciler.Compute()
  3051  
  3052  	// Assert the correct results
  3053  	assertResults(t, r, &resultExpectation{
  3054  		createDeployment:  nil,
  3055  		deploymentUpdates: nil,
  3056  		place:             1,
  3057  		inplace:           0,
  3058  		stop:              1,
  3059  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3060  			job.TaskGroups[0].Name: {
  3061  				Canary: 1,
  3062  				Ignore: 11,
  3063  			},
  3064  		},
  3065  	})
  3066  
  3067  	assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop))
  3068  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  3069  }
  3070  
  3071  // Tests the reconciler handles stopping canaries from older deployments
  3072  func TestReconciler_StopOldCanaries(t *testing.T) {
  3073  	job := mock.Job()
  3074  	job.TaskGroups[0].Update = canaryUpdate
  3075  
  3076  	// Create an old deployment that has placed some canaries
  3077  	d := structs.NewDeployment(job)
  3078  	s := &structs.DeploymentState{
  3079  		Promoted:        false,
  3080  		DesiredTotal:    10,
  3081  		DesiredCanaries: 2,
  3082  		PlacedAllocs:    2,
  3083  	}
  3084  	d.TaskGroups[job.TaskGroups[0].Name] = s
  3085  
  3086  	// Update the job
  3087  	job.Version += 10
  3088  
  3089  	// Create 10 allocations from the old job
  3090  	var allocs []*structs.Allocation
  3091  	for i := 0; i < 10; i++ {
  3092  		alloc := mock.Alloc()
  3093  		alloc.Job = job
  3094  		alloc.JobID = job.ID
  3095  		alloc.NodeID = uuid.Generate()
  3096  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3097  		alloc.TaskGroup = job.TaskGroups[0].Name
  3098  		allocs = append(allocs, alloc)
  3099  	}
  3100  
  3101  	// Create canaries
  3102  	for i := 0; i < 2; i++ {
  3103  		// Create one canary
  3104  		canary := mock.Alloc()
  3105  		canary.Job = job
  3106  		canary.JobID = job.ID
  3107  		canary.NodeID = uuid.Generate()
  3108  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3109  		canary.TaskGroup = job.TaskGroups[0].Name
  3110  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3111  		canary.DeploymentID = d.ID
  3112  		allocs = append(allocs, canary)
  3113  	}
  3114  
  3115  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  3116  	r := reconciler.Compute()
  3117  
  3118  	newD := structs.NewDeployment(job)
  3119  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3120  	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3121  		DesiredCanaries: 2,
  3122  		DesiredTotal:    10,
  3123  	}
  3124  
  3125  	// Assert the correct results
  3126  	assertResults(t, r, &resultExpectation{
  3127  		createDeployment: newD,
  3128  		deploymentUpdates: []*structs.DeploymentStatusUpdate{
  3129  			{
  3130  				DeploymentID:      d.ID,
  3131  				Status:            structs.DeploymentStatusCancelled,
  3132  				StatusDescription: structs.DeploymentStatusDescriptionNewerJob,
  3133  			},
  3134  		},
  3135  		place:   2,
  3136  		inplace: 0,
  3137  		stop:    2,
  3138  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3139  			job.TaskGroups[0].Name: {
  3140  				Canary: 2,
  3141  				Stop:   2,
  3142  				Ignore: 10,
  3143  			},
  3144  		},
  3145  	})
  3146  
  3147  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  3148  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  3149  }
  3150  
  3151  // Tests the reconciler creates new canaries when the job changes
  3152  func TestReconciler_NewCanaries(t *testing.T) {
  3153  	job := mock.Job()
  3154  	job.TaskGroups[0].Update = canaryUpdate
  3155  
  3156  	// Create 10 allocations from the old job
  3157  	var allocs []*structs.Allocation
  3158  	for i := 0; i < 10; i++ {
  3159  		alloc := mock.Alloc()
  3160  		alloc.Job = job
  3161  		alloc.JobID = job.ID
  3162  		alloc.NodeID = uuid.Generate()
  3163  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3164  		alloc.TaskGroup = job.TaskGroups[0].Name
  3165  		allocs = append(allocs, alloc)
  3166  	}
  3167  
  3168  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3169  	r := reconciler.Compute()
  3170  
  3171  	newD := structs.NewDeployment(job)
  3172  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3173  	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3174  		DesiredCanaries: 2,
  3175  		DesiredTotal:    10,
  3176  	}
  3177  
  3178  	// Assert the correct results
  3179  	assertResults(t, r, &resultExpectation{
  3180  		createDeployment:  newD,
  3181  		deploymentUpdates: nil,
  3182  		place:             2,
  3183  		inplace:           0,
  3184  		stop:              0,
  3185  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3186  			job.TaskGroups[0].Name: {
  3187  				Canary: 2,
  3188  				Ignore: 10,
  3189  			},
  3190  		},
  3191  	})
  3192  
  3193  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  3194  }
  3195  
  3196  // Tests the reconciler creates new canaries when the job changes and the
  3197  // canary count is greater than the task group count
  3198  func TestReconciler_NewCanaries_CountGreater(t *testing.T) {
  3199  	job := mock.Job()
  3200  	job.TaskGroups[0].Count = 3
  3201  	job.TaskGroups[0].Update = canaryUpdate.Copy()
  3202  	job.TaskGroups[0].Update.Canary = 7
  3203  
  3204  	// Create 3 allocations from the old job
  3205  	var allocs []*structs.Allocation
  3206  	for i := 0; i < 3; i++ {
  3207  		alloc := mock.Alloc()
  3208  		alloc.Job = job
  3209  		alloc.JobID = job.ID
  3210  		alloc.NodeID = uuid.Generate()
  3211  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3212  		alloc.TaskGroup = job.TaskGroups[0].Name
  3213  		allocs = append(allocs, alloc)
  3214  	}
  3215  
  3216  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3217  	r := reconciler.Compute()
  3218  
  3219  	newD := structs.NewDeployment(job)
  3220  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3221  	state := &structs.DeploymentState{
  3222  		DesiredCanaries: 7,
  3223  		DesiredTotal:    3,
  3224  	}
  3225  	newD.TaskGroups[job.TaskGroups[0].Name] = state
  3226  
  3227  	// Assert the correct results
  3228  	assertResults(t, r, &resultExpectation{
  3229  		createDeployment:  newD,
  3230  		deploymentUpdates: nil,
  3231  		place:             7,
  3232  		inplace:           0,
  3233  		stop:              0,
  3234  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3235  			job.TaskGroups[0].Name: {
  3236  				Canary: 7,
  3237  				Ignore: 3,
  3238  			},
  3239  		},
  3240  	})
  3241  
  3242  	assertNamesHaveIndexes(t, intRange(0, 2, 3, 6), placeResultsToNames(r.place))
  3243  }
  3244  
  3245  // Tests the reconciler creates new canaries when the job changes for multiple
  3246  // task groups
  3247  func TestReconciler_NewCanaries_MultiTG(t *testing.T) {
  3248  	job := mock.Job()
  3249  	job.TaskGroups[0].Update = canaryUpdate
  3250  	job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy())
  3251  	job.TaskGroups[0].Name = "tg2"
  3252  
  3253  	// Create 10 allocations from the old job for each tg
  3254  	var allocs []*structs.Allocation
  3255  	for j := 0; j < 2; j++ {
  3256  		for i := 0; i < 10; i++ {
  3257  			alloc := mock.Alloc()
  3258  			alloc.Job = job
  3259  			alloc.JobID = job.ID
  3260  			alloc.NodeID = uuid.Generate()
  3261  			alloc.Name = structs.AllocName(job.ID, job.TaskGroups[j].Name, uint(i))
  3262  			alloc.TaskGroup = job.TaskGroups[j].Name
  3263  			allocs = append(allocs, alloc)
  3264  		}
  3265  	}
  3266  
  3267  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3268  	r := reconciler.Compute()
  3269  
  3270  	newD := structs.NewDeployment(job)
  3271  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3272  	state := &structs.DeploymentState{
  3273  		DesiredCanaries: 2,
  3274  		DesiredTotal:    10,
  3275  	}
  3276  	newD.TaskGroups[job.TaskGroups[0].Name] = state
  3277  	newD.TaskGroups[job.TaskGroups[1].Name] = state.Copy()
  3278  
  3279  	// Assert the correct results
  3280  	assertResults(t, r, &resultExpectation{
  3281  		createDeployment:  newD,
  3282  		deploymentUpdates: nil,
  3283  		place:             4,
  3284  		inplace:           0,
  3285  		stop:              0,
  3286  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3287  			job.TaskGroups[0].Name: {
  3288  				Canary: 2,
  3289  				Ignore: 10,
  3290  			},
  3291  			job.TaskGroups[1].Name: {
  3292  				Canary: 2,
  3293  				Ignore: 10,
  3294  			},
  3295  		},
  3296  	})
  3297  
  3298  	assertNamesHaveIndexes(t, intRange(0, 1, 0, 1), placeResultsToNames(r.place))
  3299  }
  3300  
  3301  // Tests the reconciler creates new canaries when the job changes and scales up
  3302  func TestReconciler_NewCanaries_ScaleUp(t *testing.T) {
  3303  	// Scale the job up to 15
  3304  	job := mock.Job()
  3305  	job.TaskGroups[0].Update = canaryUpdate
  3306  	job.TaskGroups[0].Count = 15
  3307  
  3308  	// Create 10 allocations from the old job
  3309  	var allocs []*structs.Allocation
  3310  	for i := 0; i < 10; i++ {
  3311  		alloc := mock.Alloc()
  3312  		alloc.Job = job
  3313  		alloc.JobID = job.ID
  3314  		alloc.NodeID = uuid.Generate()
  3315  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3316  		alloc.TaskGroup = job.TaskGroups[0].Name
  3317  		allocs = append(allocs, alloc)
  3318  	}
  3319  
  3320  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3321  	r := reconciler.Compute()
  3322  
  3323  	newD := structs.NewDeployment(job)
  3324  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3325  	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3326  		DesiredCanaries: 2,
  3327  		DesiredTotal:    15,
  3328  	}
  3329  
  3330  	// Assert the correct results
  3331  	assertResults(t, r, &resultExpectation{
  3332  		createDeployment:  newD,
  3333  		deploymentUpdates: nil,
  3334  		place:             2,
  3335  		inplace:           0,
  3336  		stop:              0,
  3337  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3338  			job.TaskGroups[0].Name: {
  3339  				Canary: 2,
  3340  				Ignore: 10,
  3341  			},
  3342  		},
  3343  	})
  3344  
  3345  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  3346  }
  3347  
  3348  // Tests the reconciler creates new canaries when the job changes and scales
  3349  // down
  3350  func TestReconciler_NewCanaries_ScaleDown(t *testing.T) {
  3351  	// Scale the job down to 5
  3352  	job := mock.Job()
  3353  	job.TaskGroups[0].Update = canaryUpdate
  3354  	job.TaskGroups[0].Count = 5
  3355  
  3356  	// Create 10 allocations from the old job
  3357  	var allocs []*structs.Allocation
  3358  	for i := 0; i < 10; i++ {
  3359  		alloc := mock.Alloc()
  3360  		alloc.Job = job
  3361  		alloc.JobID = job.ID
  3362  		alloc.NodeID = uuid.Generate()
  3363  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3364  		alloc.TaskGroup = job.TaskGroups[0].Name
  3365  		allocs = append(allocs, alloc)
  3366  	}
  3367  
  3368  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3369  	r := reconciler.Compute()
  3370  
  3371  	newD := structs.NewDeployment(job)
  3372  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3373  	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3374  		DesiredCanaries: 2,
  3375  		DesiredTotal:    5,
  3376  	}
  3377  
  3378  	// Assert the correct results
  3379  	assertResults(t, r, &resultExpectation{
  3380  		createDeployment:  newD,
  3381  		deploymentUpdates: nil,
  3382  		place:             2,
  3383  		inplace:           0,
  3384  		stop:              5,
  3385  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3386  			job.TaskGroups[0].Name: {
  3387  				Canary: 2,
  3388  				Stop:   5,
  3389  				Ignore: 5,
  3390  			},
  3391  		},
  3392  	})
  3393  
  3394  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  3395  	assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop))
  3396  }
  3397  
  3398  // Tests the reconciler handles filling the names of partially placed canaries
  3399  func TestReconciler_NewCanaries_FillNames(t *testing.T) {
  3400  	job := mock.Job()
  3401  	job.TaskGroups[0].Update = &structs.UpdateStrategy{
  3402  		Canary:          4,
  3403  		MaxParallel:     2,
  3404  		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
  3405  		MinHealthyTime:  10 * time.Second,
  3406  		HealthyDeadline: 10 * time.Minute,
  3407  	}
  3408  
  3409  	// Create an existing deployment that has placed some canaries
  3410  	d := structs.NewDeployment(job)
  3411  	s := &structs.DeploymentState{
  3412  		Promoted:        false,
  3413  		DesiredTotal:    10,
  3414  		DesiredCanaries: 4,
  3415  		PlacedAllocs:    2,
  3416  	}
  3417  	d.TaskGroups[job.TaskGroups[0].Name] = s
  3418  
  3419  	// Create 10 allocations from the old job
  3420  	var allocs []*structs.Allocation
  3421  	for i := 0; i < 10; i++ {
  3422  		alloc := mock.Alloc()
  3423  		alloc.Job = job
  3424  		alloc.JobID = job.ID
  3425  		alloc.NodeID = uuid.Generate()
  3426  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3427  		alloc.TaskGroup = job.TaskGroups[0].Name
  3428  		allocs = append(allocs, alloc)
  3429  	}
  3430  
  3431  	// Create canaries but pick names at the ends
  3432  	for i := 0; i < 4; i += 3 {
  3433  		// Create one canary
  3434  		canary := mock.Alloc()
  3435  		canary.Job = job
  3436  		canary.JobID = job.ID
  3437  		canary.NodeID = uuid.Generate()
  3438  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3439  		canary.TaskGroup = job.TaskGroups[0].Name
  3440  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3441  		canary.DeploymentID = d.ID
  3442  		allocs = append(allocs, canary)
  3443  	}
  3444  
  3445  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  3446  	r := reconciler.Compute()
  3447  
  3448  	// Assert the correct results
  3449  	assertResults(t, r, &resultExpectation{
  3450  		createDeployment:  nil,
  3451  		deploymentUpdates: nil,
  3452  		place:             2,
  3453  		inplace:           0,
  3454  		stop:              0,
  3455  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3456  			job.TaskGroups[0].Name: {
  3457  				Canary: 2,
  3458  				Ignore: 12,
  3459  			},
  3460  		},
  3461  	})
  3462  
  3463  	assertNamesHaveIndexes(t, intRange(1, 2), placeResultsToNames(r.place))
  3464  }
  3465  
  3466  // Tests the reconciler handles canary promotion by unblocking max_parallel
  3467  func TestReconciler_PromoteCanaries_Unblock(t *testing.T) {
  3468  	job := mock.Job()
  3469  	job.TaskGroups[0].Update = canaryUpdate
  3470  
  3471  	// Create an existing deployment that has placed some canaries and mark them
  3472  	// promoted
  3473  	d := structs.NewDeployment(job)
  3474  	s := &structs.DeploymentState{
  3475  		Promoted:        true,
  3476  		DesiredTotal:    10,
  3477  		DesiredCanaries: 2,
  3478  		PlacedAllocs:    2,
  3479  	}
  3480  	d.TaskGroups[job.TaskGroups[0].Name] = s
  3481  
  3482  	// Create 10 allocations from the old job
  3483  	var allocs []*structs.Allocation
  3484  	for i := 0; i < 10; i++ {
  3485  		alloc := mock.Alloc()
  3486  		alloc.Job = job
  3487  		alloc.JobID = job.ID
  3488  		alloc.NodeID = uuid.Generate()
  3489  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3490  		alloc.TaskGroup = job.TaskGroups[0].Name
  3491  		allocs = append(allocs, alloc)
  3492  	}
  3493  
  3494  	// Create the canaries
  3495  	handled := make(map[string]allocUpdateType)
  3496  	for i := 0; i < 2; i++ {
  3497  		// Create one canary
  3498  		canary := mock.Alloc()
  3499  		canary.Job = job
  3500  		canary.JobID = job.ID
  3501  		canary.NodeID = uuid.Generate()
  3502  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3503  		canary.TaskGroup = job.TaskGroups[0].Name
  3504  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3505  		canary.DeploymentID = d.ID
  3506  		canary.DeploymentStatus = &structs.AllocDeploymentStatus{
  3507  			Healthy: helper.BoolToPtr(true),
  3508  		}
  3509  		allocs = append(allocs, canary)
  3510  		handled[canary.ID] = allocUpdateFnIgnore
  3511  	}
  3512  
  3513  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3514  	reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  3515  	r := reconciler.Compute()
  3516  
  3517  	// Assert the correct results
  3518  	assertResults(t, r, &resultExpectation{
  3519  		createDeployment:  nil,
  3520  		deploymentUpdates: nil,
  3521  		destructive:       2,
  3522  		stop:              2,
  3523  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3524  			job.TaskGroups[0].Name: {
  3525  				Stop:              2,
  3526  				DestructiveUpdate: 2,
  3527  				Ignore:            8,
  3528  			},
  3529  		},
  3530  	})
  3531  
  3532  	assertNoCanariesStopped(t, d, r.stop)
  3533  	assertNamesHaveIndexes(t, intRange(2, 3), destructiveResultsToNames(r.destructiveUpdate))
  3534  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  3535  }
  3536  
  3537  // Tests the reconciler handles canary promotion when the canary count equals
  3538  // the total correctly
  3539  func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) {
  3540  	job := mock.Job()
  3541  	job.TaskGroups[0].Update = canaryUpdate
  3542  	job.TaskGroups[0].Count = 2
  3543  
  3544  	// Create an existing deployment that has placed some canaries and mark them
  3545  	// promoted
  3546  	d := structs.NewDeployment(job)
  3547  	s := &structs.DeploymentState{
  3548  		Promoted:        true,
  3549  		DesiredTotal:    2,
  3550  		DesiredCanaries: 2,
  3551  		PlacedAllocs:    2,
  3552  		HealthyAllocs:   2,
  3553  	}
  3554  	d.TaskGroups[job.TaskGroups[0].Name] = s
  3555  
  3556  	// Create 2 allocations from the old job
  3557  	var allocs []*structs.Allocation
  3558  	for i := 0; i < 2; i++ {
  3559  		alloc := mock.Alloc()
  3560  		alloc.Job = job
  3561  		alloc.JobID = job.ID
  3562  		alloc.NodeID = uuid.Generate()
  3563  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3564  		alloc.TaskGroup = job.TaskGroups[0].Name
  3565  		allocs = append(allocs, alloc)
  3566  	}
  3567  
  3568  	// Create the canaries
  3569  	handled := make(map[string]allocUpdateType)
  3570  	for i := 0; i < 2; i++ {
  3571  		// Create one canary
  3572  		canary := mock.Alloc()
  3573  		canary.Job = job
  3574  		canary.JobID = job.ID
  3575  		canary.NodeID = uuid.Generate()
  3576  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3577  		canary.TaskGroup = job.TaskGroups[0].Name
  3578  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3579  		canary.DeploymentID = d.ID
  3580  		canary.DeploymentStatus = &structs.AllocDeploymentStatus{
  3581  			Healthy: helper.BoolToPtr(true),
  3582  		}
  3583  		allocs = append(allocs, canary)
  3584  		handled[canary.ID] = allocUpdateFnIgnore
  3585  	}
  3586  
  3587  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3588  	reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  3589  	r := reconciler.Compute()
  3590  
  3591  	updates := []*structs.DeploymentStatusUpdate{
  3592  		{
  3593  			DeploymentID:      d.ID,
  3594  			Status:            structs.DeploymentStatusSuccessful,
  3595  			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
  3596  		},
  3597  	}
  3598  
  3599  	// Assert the correct results
  3600  	assertResults(t, r, &resultExpectation{
  3601  		createDeployment:  nil,
  3602  		deploymentUpdates: updates,
  3603  		place:             0,
  3604  		inplace:           0,
  3605  		stop:              2,
  3606  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3607  			job.TaskGroups[0].Name: {
  3608  				Stop:   2,
  3609  				Ignore: 2,
  3610  			},
  3611  		},
  3612  	})
  3613  
  3614  	assertNoCanariesStopped(t, d, r.stop)
  3615  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  3616  }
  3617  
  3618  // Tests the reconciler checks the health of placed allocs to determine the
  3619  // limit
  3620  func TestReconciler_DeploymentLimit_HealthAccounting(t *testing.T) {
  3621  	job := mock.Job()
  3622  	job.TaskGroups[0].Update = noCanaryUpdate
  3623  
  3624  	cases := []struct {
  3625  		healthy int
  3626  	}{
  3627  		{
  3628  			healthy: 0,
  3629  		},
  3630  		{
  3631  			healthy: 1,
  3632  		},
  3633  		{
  3634  			healthy: 2,
  3635  		},
  3636  		{
  3637  			healthy: 3,
  3638  		},
  3639  		{
  3640  			healthy: 4,
  3641  		},
  3642  	}
  3643  
  3644  	for _, c := range cases {
  3645  		t.Run(fmt.Sprintf("%d healthy", c.healthy), func(t *testing.T) {
  3646  			// Create an existing deployment that has placed some canaries and mark them
  3647  			// promoted
  3648  			d := structs.NewDeployment(job)
  3649  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3650  				Promoted:     true,
  3651  				DesiredTotal: 10,
  3652  				PlacedAllocs: 4,
  3653  			}
  3654  
  3655  			// Create 6 allocations from the old job
  3656  			var allocs []*structs.Allocation
  3657  			for i := 4; i < 10; i++ {
  3658  				alloc := mock.Alloc()
  3659  				alloc.Job = job
  3660  				alloc.JobID = job.ID
  3661  				alloc.NodeID = uuid.Generate()
  3662  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3663  				alloc.TaskGroup = job.TaskGroups[0].Name
  3664  				allocs = append(allocs, alloc)
  3665  			}
  3666  
  3667  			// Create the new allocs
  3668  			handled := make(map[string]allocUpdateType)
  3669  			for i := 0; i < 4; i++ {
  3670  				new := mock.Alloc()
  3671  				new.Job = job
  3672  				new.JobID = job.ID
  3673  				new.NodeID = uuid.Generate()
  3674  				new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3675  				new.TaskGroup = job.TaskGroups[0].Name
  3676  				new.DeploymentID = d.ID
  3677  				if i < c.healthy {
  3678  					new.DeploymentStatus = &structs.AllocDeploymentStatus{
  3679  						Healthy: helper.BoolToPtr(true),
  3680  					}
  3681  				}
  3682  				allocs = append(allocs, new)
  3683  				handled[new.ID] = allocUpdateFnIgnore
  3684  			}
  3685  
  3686  			mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3687  			reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  3688  			r := reconciler.Compute()
  3689  
  3690  			// Assert the correct results
  3691  			assertResults(t, r, &resultExpectation{
  3692  				createDeployment:  nil,
  3693  				deploymentUpdates: nil,
  3694  				destructive:       c.healthy,
  3695  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3696  					job.TaskGroups[0].Name: {
  3697  						DestructiveUpdate: uint64(c.healthy),
  3698  						Ignore:            uint64(10 - c.healthy),
  3699  					},
  3700  				},
  3701  			})
  3702  
  3703  			if c.healthy != 0 {
  3704  				assertNamesHaveIndexes(t, intRange(4, 3+c.healthy), destructiveResultsToNames(r.destructiveUpdate))
  3705  			}
  3706  		})
  3707  	}
  3708  }
  3709  
  3710  // Tests the reconciler handles an alloc on a tainted node during a rolling
  3711  // update
  3712  func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) {
  3713  	job := mock.Job()
  3714  	job.TaskGroups[0].Update = noCanaryUpdate
  3715  
  3716  	// Create an existing deployment that has some placed allocs
  3717  	d := structs.NewDeployment(job)
  3718  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3719  		Promoted:     true,
  3720  		DesiredTotal: 10,
  3721  		PlacedAllocs: 7,
  3722  	}
  3723  
  3724  	// Create 2 allocations from the old job
  3725  	var allocs []*structs.Allocation
  3726  	for i := 8; i < 10; i++ {
  3727  		alloc := mock.Alloc()
  3728  		alloc.Job = job
  3729  		alloc.JobID = job.ID
  3730  		alloc.NodeID = uuid.Generate()
  3731  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3732  		alloc.TaskGroup = job.TaskGroups[0].Name
  3733  		allocs = append(allocs, alloc)
  3734  	}
  3735  
  3736  	// Create the healthy replacements
  3737  	handled := make(map[string]allocUpdateType)
  3738  	for i := 0; i < 8; i++ {
  3739  		new := mock.Alloc()
  3740  		new.Job = job
  3741  		new.JobID = job.ID
  3742  		new.NodeID = uuid.Generate()
  3743  		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3744  		new.TaskGroup = job.TaskGroups[0].Name
  3745  		new.DeploymentID = d.ID
  3746  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  3747  			Healthy: helper.BoolToPtr(true),
  3748  		}
  3749  		allocs = append(allocs, new)
  3750  		handled[new.ID] = allocUpdateFnIgnore
  3751  	}
  3752  
  3753  	// Build a map of tainted nodes
  3754  	tainted := make(map[string]*structs.Node, 3)
  3755  	for i := 0; i < 3; i++ {
  3756  		n := mock.Node()
  3757  		n.ID = allocs[2+i].NodeID
  3758  		if i == 0 {
  3759  			n.Status = structs.NodeStatusDown
  3760  		} else {
  3761  			n.Drain = true
  3762  			allocs[2+i].DesiredTransition.Migrate = helper.BoolToPtr(true)
  3763  		}
  3764  		tainted[n.ID] = n
  3765  	}
  3766  
  3767  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3768  	reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "")
  3769  	r := reconciler.Compute()
  3770  
  3771  	// Assert the correct results
  3772  	assertResults(t, r, &resultExpectation{
  3773  		createDeployment:  nil,
  3774  		deploymentUpdates: nil,
  3775  		place:             3,
  3776  		destructive:       2,
  3777  		stop:              3,
  3778  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3779  			job.TaskGroups[0].Name: {
  3780  				Place:             1, // Place the lost
  3781  				Stop:              1, // Stop the lost
  3782  				Migrate:           2, // Migrate the tainted
  3783  				DestructiveUpdate: 2,
  3784  				Ignore:            5,
  3785  			},
  3786  		},
  3787  	})
  3788  
  3789  	assertNamesHaveIndexes(t, intRange(8, 9), destructiveResultsToNames(r.destructiveUpdate))
  3790  	assertNamesHaveIndexes(t, intRange(0, 2), placeResultsToNames(r.place))
  3791  	assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop))
  3792  }
  3793  
  3794  // Tests the reconciler handles a failed deployment and only replaces lost
  3795  // deployments
  3796  func TestReconciler_FailedDeployment_PlacementLost(t *testing.T) {
  3797  	job := mock.Job()
  3798  	job.TaskGroups[0].Update = noCanaryUpdate
  3799  
  3800  	// Create an existing failed deployment that has some placed allocs
  3801  	d := structs.NewDeployment(job)
  3802  	d.Status = structs.DeploymentStatusFailed
  3803  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3804  		Promoted:     true,
  3805  		DesiredTotal: 10,
  3806  		PlacedAllocs: 4,
  3807  	}
  3808  
  3809  	// Create 6 allocations from the old job
  3810  	var allocs []*structs.Allocation
  3811  	for i := 4; i < 10; i++ {
  3812  		alloc := mock.Alloc()
  3813  		alloc.Job = job
  3814  		alloc.JobID = job.ID
  3815  		alloc.NodeID = uuid.Generate()
  3816  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3817  		alloc.TaskGroup = job.TaskGroups[0].Name
  3818  		allocs = append(allocs, alloc)
  3819  	}
  3820  
  3821  	// Create the healthy replacements
  3822  	handled := make(map[string]allocUpdateType)
  3823  	for i := 0; i < 4; i++ {
  3824  		new := mock.Alloc()
  3825  		new.Job = job
  3826  		new.JobID = job.ID
  3827  		new.NodeID = uuid.Generate()
  3828  		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3829  		new.TaskGroup = job.TaskGroups[0].Name
  3830  		new.DeploymentID = d.ID
  3831  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  3832  			Healthy: helper.BoolToPtr(true),
  3833  		}
  3834  		allocs = append(allocs, new)
  3835  		handled[new.ID] = allocUpdateFnIgnore
  3836  	}
  3837  
  3838  	// Build a map of tainted nodes
  3839  	tainted := make(map[string]*structs.Node, 2)
  3840  	for i := 0; i < 2; i++ {
  3841  		n := mock.Node()
  3842  		n.ID = allocs[6+i].NodeID
  3843  		if i == 0 {
  3844  			n.Status = structs.NodeStatusDown
  3845  		} else {
  3846  			n.Drain = true
  3847  			allocs[6+i].DesiredTransition.Migrate = helper.BoolToPtr(true)
  3848  		}
  3849  		tainted[n.ID] = n
  3850  	}
  3851  
  3852  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3853  	reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "")
  3854  	r := reconciler.Compute()
  3855  
  3856  	// Assert the correct results
  3857  	assertResults(t, r, &resultExpectation{
  3858  		createDeployment:  nil,
  3859  		deploymentUpdates: nil,
  3860  		place:             1, // Only replace the lost node
  3861  		inplace:           0,
  3862  		stop:              2,
  3863  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3864  			job.TaskGroups[0].Name: {
  3865  				Place:  1,
  3866  				Stop:   2,
  3867  				Ignore: 8,
  3868  			},
  3869  		},
  3870  	})
  3871  
  3872  	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
  3873  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  3874  }
  3875  
  3876  // Tests the reconciler handles a run after a deployment is complete
  3877  // successfully.
  3878  func TestReconciler_CompleteDeployment(t *testing.T) {
  3879  	job := mock.Job()
  3880  	job.TaskGroups[0].Update = canaryUpdate
  3881  
  3882  	d := structs.NewDeployment(job)
  3883  	d.Status = structs.DeploymentStatusSuccessful
  3884  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3885  		Promoted:        true,
  3886  		DesiredTotal:    10,
  3887  		DesiredCanaries: 2,
  3888  		PlacedAllocs:    10,
  3889  		HealthyAllocs:   10,
  3890  	}
  3891  
  3892  	// Create allocations from the old job
  3893  	var allocs []*structs.Allocation
  3894  	for i := 0; i < 10; i++ {
  3895  		alloc := mock.Alloc()
  3896  		alloc.Job = job
  3897  		alloc.JobID = job.ID
  3898  		alloc.NodeID = uuid.Generate()
  3899  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3900  		alloc.TaskGroup = job.TaskGroups[0].Name
  3901  		alloc.DeploymentID = d.ID
  3902  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  3903  			Healthy: helper.BoolToPtr(true),
  3904  		}
  3905  		allocs = append(allocs, alloc)
  3906  	}
  3907  
  3908  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  3909  	r := reconciler.Compute()
  3910  
  3911  	// Assert the correct results
  3912  	assertResults(t, r, &resultExpectation{
  3913  		createDeployment:  nil,
  3914  		deploymentUpdates: nil,
  3915  		place:             0,
  3916  		inplace:           0,
  3917  		stop:              0,
  3918  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3919  			job.TaskGroups[0].Name: {
  3920  				Ignore: 10,
  3921  			},
  3922  		},
  3923  	})
  3924  }
  3925  
  3926  // Tests that the reconciler marks a deployment as complete once there is
  3927  // nothing left to place even if there are failed allocations that are part of
  3928  // the deployment.
  3929  func TestReconciler_MarkDeploymentComplete_FailedAllocations(t *testing.T) {
  3930  	job := mock.Job()
  3931  	job.TaskGroups[0].Update = noCanaryUpdate
  3932  
  3933  	d := structs.NewDeployment(job)
  3934  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3935  		DesiredTotal:  10,
  3936  		PlacedAllocs:  20,
  3937  		HealthyAllocs: 10,
  3938  	}
  3939  
  3940  	// Create 10 healthy allocs and 10 allocs that are failed
  3941  	var allocs []*structs.Allocation
  3942  	for i := 0; i < 20; i++ {
  3943  		alloc := mock.Alloc()
  3944  		alloc.Job = job
  3945  		alloc.JobID = job.ID
  3946  		alloc.NodeID = uuid.Generate()
  3947  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%10))
  3948  		alloc.TaskGroup = job.TaskGroups[0].Name
  3949  		alloc.DeploymentID = d.ID
  3950  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{}
  3951  		if i < 10 {
  3952  			alloc.ClientStatus = structs.AllocClientStatusRunning
  3953  			alloc.DeploymentStatus.Healthy = helper.BoolToPtr(true)
  3954  		} else {
  3955  			alloc.DesiredStatus = structs.AllocDesiredStatusStop
  3956  			alloc.ClientStatus = structs.AllocClientStatusFailed
  3957  			alloc.DeploymentStatus.Healthy = helper.BoolToPtr(false)
  3958  		}
  3959  
  3960  		allocs = append(allocs, alloc)
  3961  	}
  3962  
  3963  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  3964  	r := reconciler.Compute()
  3965  
  3966  	updates := []*structs.DeploymentStatusUpdate{
  3967  		{
  3968  			DeploymentID:      d.ID,
  3969  			Status:            structs.DeploymentStatusSuccessful,
  3970  			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
  3971  		},
  3972  	}
  3973  
  3974  	// Assert the correct results
  3975  	assertResults(t, r, &resultExpectation{
  3976  		createDeployment:  nil,
  3977  		deploymentUpdates: updates,
  3978  		place:             0,
  3979  		inplace:           0,
  3980  		stop:              0,
  3981  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3982  			job.TaskGroups[0].Name: {
  3983  				Ignore: 10,
  3984  			},
  3985  		},
  3986  	})
  3987  }
  3988  
  3989  // Test that a failed deployment cancels non-promoted canaries
  3990  func TestReconciler_FailedDeployment_CancelCanaries(t *testing.T) {
  3991  	// Create a job with two task groups
  3992  	job := mock.Job()
  3993  	job.TaskGroups[0].Update = canaryUpdate
  3994  	job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy())
  3995  	job.TaskGroups[1].Name = "two"
  3996  
  3997  	// Create an existing failed deployment that has promoted one task group
  3998  	d := structs.NewDeployment(job)
  3999  	d.Status = structs.DeploymentStatusFailed
  4000  	s0 := &structs.DeploymentState{
  4001  		Promoted:        true,
  4002  		DesiredTotal:    10,
  4003  		DesiredCanaries: 2,
  4004  		PlacedAllocs:    4,
  4005  	}
  4006  	s1 := &structs.DeploymentState{
  4007  		Promoted:        false,
  4008  		DesiredTotal:    10,
  4009  		DesiredCanaries: 2,
  4010  		PlacedAllocs:    2,
  4011  	}
  4012  	d.TaskGroups[job.TaskGroups[0].Name] = s0
  4013  	d.TaskGroups[job.TaskGroups[1].Name] = s1
  4014  
  4015  	// Create 6 allocations from the old job
  4016  	var allocs []*structs.Allocation
  4017  	handled := make(map[string]allocUpdateType)
  4018  	for _, group := range []int{0, 1} {
  4019  		replacements := 4
  4020  		state := s0
  4021  		if group == 1 {
  4022  			replacements = 2
  4023  			state = s1
  4024  		}
  4025  
  4026  		// Create the healthy replacements
  4027  		for i := 0; i < replacements; i++ {
  4028  			new := mock.Alloc()
  4029  			new.Job = job
  4030  			new.JobID = job.ID
  4031  			new.NodeID = uuid.Generate()
  4032  			new.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i))
  4033  			new.TaskGroup = job.TaskGroups[group].Name
  4034  			new.DeploymentID = d.ID
  4035  			new.DeploymentStatus = &structs.AllocDeploymentStatus{
  4036  				Healthy: helper.BoolToPtr(true),
  4037  			}
  4038  			allocs = append(allocs, new)
  4039  			handled[new.ID] = allocUpdateFnIgnore
  4040  
  4041  			// Add the alloc to the canary list
  4042  			if i < 2 {
  4043  				state.PlacedCanaries = append(state.PlacedCanaries, new.ID)
  4044  			}
  4045  		}
  4046  		for i := replacements; i < 10; i++ {
  4047  			alloc := mock.Alloc()
  4048  			alloc.Job = job
  4049  			alloc.JobID = job.ID
  4050  			alloc.NodeID = uuid.Generate()
  4051  			alloc.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i))
  4052  			alloc.TaskGroup = job.TaskGroups[group].Name
  4053  			allocs = append(allocs, alloc)
  4054  		}
  4055  	}
  4056  
  4057  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  4058  	reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  4059  	r := reconciler.Compute()
  4060  
  4061  	// Assert the correct results
  4062  	assertResults(t, r, &resultExpectation{
  4063  		createDeployment:  nil,
  4064  		deploymentUpdates: nil,
  4065  		place:             0,
  4066  		inplace:           0,
  4067  		stop:              2,
  4068  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4069  			job.TaskGroups[0].Name: {
  4070  				Ignore: 10,
  4071  			},
  4072  			job.TaskGroups[1].Name: {
  4073  				Stop:   2,
  4074  				Ignore: 8,
  4075  			},
  4076  		},
  4077  	})
  4078  
  4079  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  4080  }
  4081  
  4082  // Test that a failed deployment and updated job works
  4083  func TestReconciler_FailedDeployment_NewJob(t *testing.T) {
  4084  	job := mock.Job()
  4085  	job.TaskGroups[0].Update = noCanaryUpdate
  4086  
  4087  	// Create an existing failed deployment that has some placed allocs
  4088  	d := structs.NewDeployment(job)
  4089  	d.Status = structs.DeploymentStatusFailed
  4090  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4091  		Promoted:     true,
  4092  		DesiredTotal: 10,
  4093  		PlacedAllocs: 4,
  4094  	}
  4095  
  4096  	// Create 6 allocations from the old job
  4097  	var allocs []*structs.Allocation
  4098  	for i := 4; i < 10; i++ {
  4099  		alloc := mock.Alloc()
  4100  		alloc.Job = job
  4101  		alloc.JobID = job.ID
  4102  		alloc.NodeID = uuid.Generate()
  4103  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4104  		alloc.TaskGroup = job.TaskGroups[0].Name
  4105  		allocs = append(allocs, alloc)
  4106  	}
  4107  
  4108  	// Create the healthy replacements
  4109  	for i := 0; i < 4; i++ {
  4110  		new := mock.Alloc()
  4111  		new.Job = job
  4112  		new.JobID = job.ID
  4113  		new.NodeID = uuid.Generate()
  4114  		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4115  		new.TaskGroup = job.TaskGroups[0].Name
  4116  		new.DeploymentID = d.ID
  4117  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  4118  			Healthy: helper.BoolToPtr(true),
  4119  		}
  4120  		allocs = append(allocs, new)
  4121  	}
  4122  
  4123  	// Up the job version
  4124  	jobNew := job.Copy()
  4125  	jobNew.Version += 100
  4126  
  4127  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, jobNew, d, allocs, nil, "")
  4128  	r := reconciler.Compute()
  4129  
  4130  	dnew := structs.NewDeployment(jobNew)
  4131  	dnew.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4132  		DesiredTotal: 10,
  4133  	}
  4134  
  4135  	// Assert the correct results
  4136  	assertResults(t, r, &resultExpectation{
  4137  		createDeployment:  dnew,
  4138  		deploymentUpdates: nil,
  4139  		destructive:       4,
  4140  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4141  			job.TaskGroups[0].Name: {
  4142  				DestructiveUpdate: 4,
  4143  				Ignore:            6,
  4144  			},
  4145  		},
  4146  	})
  4147  
  4148  	assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate))
  4149  }
  4150  
  4151  // Tests the reconciler marks a deployment as complete
  4152  func TestReconciler_MarkDeploymentComplete(t *testing.T) {
  4153  	job := mock.Job()
  4154  	job.TaskGroups[0].Update = noCanaryUpdate
  4155  
  4156  	d := structs.NewDeployment(job)
  4157  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4158  		Promoted:      true,
  4159  		DesiredTotal:  10,
  4160  		PlacedAllocs:  10,
  4161  		HealthyAllocs: 10,
  4162  	}
  4163  
  4164  	// Create allocations from the old job
  4165  	var allocs []*structs.Allocation
  4166  	for i := 0; i < 10; i++ {
  4167  		alloc := mock.Alloc()
  4168  		alloc.Job = job
  4169  		alloc.JobID = job.ID
  4170  		alloc.NodeID = uuid.Generate()
  4171  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4172  		alloc.TaskGroup = job.TaskGroups[0].Name
  4173  		alloc.DeploymentID = d.ID
  4174  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  4175  			Healthy: helper.BoolToPtr(true),
  4176  		}
  4177  		allocs = append(allocs, alloc)
  4178  	}
  4179  
  4180  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  4181  	r := reconciler.Compute()
  4182  
  4183  	updates := []*structs.DeploymentStatusUpdate{
  4184  		{
  4185  			DeploymentID:      d.ID,
  4186  			Status:            structs.DeploymentStatusSuccessful,
  4187  			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
  4188  		},
  4189  	}
  4190  
  4191  	// Assert the correct results
  4192  	assertResults(t, r, &resultExpectation{
  4193  		createDeployment:  nil,
  4194  		deploymentUpdates: updates,
  4195  		place:             0,
  4196  		inplace:           0,
  4197  		stop:              0,
  4198  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4199  			job.TaskGroups[0].Name: {
  4200  				Ignore: 10,
  4201  			},
  4202  		},
  4203  	})
  4204  }
  4205  
  4206  // Tests the reconciler handles changing a job such that a deployment is created
  4207  // while doing a scale up but as the second eval.
  4208  func TestReconciler_JobChange_ScaleUp_SecondEval(t *testing.T) {
  4209  	// Scale the job up to 15
  4210  	job := mock.Job()
  4211  	job.TaskGroups[0].Update = noCanaryUpdate
  4212  	job.TaskGroups[0].Count = 30
  4213  
  4214  	// Create a deployment that is paused and has placed some canaries
  4215  	d := structs.NewDeployment(job)
  4216  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4217  		Promoted:     false,
  4218  		DesiredTotal: 30,
  4219  		PlacedAllocs: 20,
  4220  	}
  4221  
  4222  	// Create 10 allocations from the old job
  4223  	var allocs []*structs.Allocation
  4224  	for i := 0; i < 10; i++ {
  4225  		alloc := mock.Alloc()
  4226  		alloc.Job = job
  4227  		alloc.JobID = job.ID
  4228  		alloc.NodeID = uuid.Generate()
  4229  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4230  		alloc.TaskGroup = job.TaskGroups[0].Name
  4231  		allocs = append(allocs, alloc)
  4232  	}
  4233  
  4234  	// Create 20 from new job
  4235  	handled := make(map[string]allocUpdateType)
  4236  	for i := 10; i < 30; i++ {
  4237  		alloc := mock.Alloc()
  4238  		alloc.Job = job
  4239  		alloc.JobID = job.ID
  4240  		alloc.DeploymentID = d.ID
  4241  		alloc.NodeID = uuid.Generate()
  4242  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4243  		alloc.TaskGroup = job.TaskGroups[0].Name
  4244  		allocs = append(allocs, alloc)
  4245  		handled[alloc.ID] = allocUpdateFnIgnore
  4246  	}
  4247  
  4248  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  4249  	reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  4250  	r := reconciler.Compute()
  4251  
  4252  	// Assert the correct results
  4253  	assertResults(t, r, &resultExpectation{
  4254  		createDeployment:  nil,
  4255  		deploymentUpdates: nil,
  4256  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4257  			job.TaskGroups[0].Name: {
  4258  				// All should be ignored because nothing has been marked as
  4259  				// healthy.
  4260  				Ignore: 30,
  4261  			},
  4262  		},
  4263  	})
  4264  }
  4265  
  4266  // Tests the reconciler doesn't stop allocations when doing a rolling upgrade
  4267  // where the count of the old job allocs is < desired count.
  4268  func TestReconciler_RollingUpgrade_MissingAllocs(t *testing.T) {
  4269  	job := mock.Job()
  4270  	job.TaskGroups[0].Update = noCanaryUpdate
  4271  
  4272  	// Create 7 allocations from the old job
  4273  	var allocs []*structs.Allocation
  4274  	for i := 0; i < 7; i++ {
  4275  		alloc := mock.Alloc()
  4276  		alloc.Job = job
  4277  		alloc.JobID = job.ID
  4278  		alloc.NodeID = uuid.Generate()
  4279  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4280  		alloc.TaskGroup = job.TaskGroups[0].Name
  4281  		allocs = append(allocs, alloc)
  4282  	}
  4283  
  4284  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  4285  	r := reconciler.Compute()
  4286  
  4287  	d := structs.NewDeployment(job)
  4288  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4289  		DesiredTotal: 10,
  4290  	}
  4291  
  4292  	// Assert the correct results
  4293  	assertResults(t, r, &resultExpectation{
  4294  		createDeployment:  d,
  4295  		deploymentUpdates: nil,
  4296  		place:             3,
  4297  		destructive:       1,
  4298  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4299  			job.TaskGroups[0].Name: {
  4300  				Place:             3,
  4301  				DestructiveUpdate: 1,
  4302  				Ignore:            6,
  4303  			},
  4304  		},
  4305  	})
  4306  
  4307  	assertNamesHaveIndexes(t, intRange(7, 9), placeResultsToNames(r.place))
  4308  	assertNamesHaveIndexes(t, intRange(0, 0), destructiveResultsToNames(r.destructiveUpdate))
  4309  }
  4310  
  4311  // Tests that the reconciler handles rerunning a batch job in the case that the
  4312  // allocations are from an older instance of the job.
  4313  func TestReconciler_Batch_Rerun(t *testing.T) {
  4314  	job := mock.Job()
  4315  	job.Type = structs.JobTypeBatch
  4316  	job.TaskGroups[0].Update = nil
  4317  
  4318  	// Create 10 allocations from the old job and have them be complete
  4319  	var allocs []*structs.Allocation
  4320  	for i := 0; i < 10; i++ {
  4321  		alloc := mock.Alloc()
  4322  		alloc.Job = job
  4323  		alloc.JobID = job.ID
  4324  		alloc.NodeID = uuid.Generate()
  4325  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4326  		alloc.TaskGroup = job.TaskGroups[0].Name
  4327  		alloc.ClientStatus = structs.AllocClientStatusComplete
  4328  		alloc.DesiredStatus = structs.AllocDesiredStatusStop
  4329  		allocs = append(allocs, alloc)
  4330  	}
  4331  
  4332  	// Create a copy of the job that is "new"
  4333  	job2 := job.Copy()
  4334  	job2.CreateIndex++
  4335  
  4336  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, true, job2.ID, job2, nil, allocs, nil, "")
  4337  	r := reconciler.Compute()
  4338  
  4339  	// Assert the correct results
  4340  	assertResults(t, r, &resultExpectation{
  4341  		createDeployment:  nil,
  4342  		deploymentUpdates: nil,
  4343  		place:             10,
  4344  		destructive:       0,
  4345  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4346  			job.TaskGroups[0].Name: {
  4347  				Place:             10,
  4348  				DestructiveUpdate: 0,
  4349  				Ignore:            10,
  4350  			},
  4351  		},
  4352  	})
  4353  
  4354  	assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
  4355  }
  4356  
  4357  // Test that a failed deployment will not result in rescheduling failed allocations
  4358  func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) {
  4359  	job := mock.Job()
  4360  	job.TaskGroups[0].Update = noCanaryUpdate
  4361  
  4362  	tgName := job.TaskGroups[0].Name
  4363  	now := time.Now()
  4364  	// Create an existing failed deployment that has some placed allocs
  4365  	d := structs.NewDeployment(job)
  4366  	d.Status = structs.DeploymentStatusFailed
  4367  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4368  		Promoted:     true,
  4369  		DesiredTotal: 5,
  4370  		PlacedAllocs: 4,
  4371  	}
  4372  
  4373  	// Create 4 allocations and mark two as failed
  4374  	var allocs []*structs.Allocation
  4375  	for i := 0; i < 4; i++ {
  4376  		alloc := mock.Alloc()
  4377  		alloc.Job = job
  4378  		alloc.JobID = job.ID
  4379  		alloc.NodeID = uuid.Generate()
  4380  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4381  		alloc.TaskGroup = job.TaskGroups[0].Name
  4382  		alloc.DeploymentID = d.ID
  4383  		allocs = append(allocs, alloc)
  4384  	}
  4385  
  4386  	//create some allocations that are reschedulable now
  4387  	allocs[2].ClientStatus = structs.AllocClientStatusFailed
  4388  	allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  4389  		StartedAt:  now.Add(-1 * time.Hour),
  4390  		FinishedAt: now.Add(-10 * time.Second)}}
  4391  
  4392  	allocs[3].ClientStatus = structs.AllocClientStatusFailed
  4393  	allocs[3].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  4394  		StartedAt:  now.Add(-1 * time.Hour),
  4395  		FinishedAt: now.Add(-10 * time.Second)}}
  4396  
  4397  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  4398  	r := reconciler.Compute()
  4399  
  4400  	// Assert that no rescheduled placements were created
  4401  	assertResults(t, r, &resultExpectation{
  4402  		place:             0,
  4403  		createDeployment:  nil,
  4404  		deploymentUpdates: nil,
  4405  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4406  			job.TaskGroups[0].Name: {
  4407  				Ignore: 2,
  4408  			},
  4409  		},
  4410  	})
  4411  }
  4412  
  4413  // Test that a running deployment with failed allocs will not result in
  4414  // rescheduling failed allocations unless they are marked as reschedulable.
  4415  func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) {
  4416  	job := mock.Job()
  4417  	job.TaskGroups[0].Update = noCanaryUpdate
  4418  	tgName := job.TaskGroups[0].Name
  4419  	now := time.Now()
  4420  
  4421  	// Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet
  4422  	d := structs.NewDeployment(job)
  4423  	d.Status = structs.DeploymentStatusRunning
  4424  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4425  		Promoted:     false,
  4426  		DesiredTotal: 10,
  4427  		PlacedAllocs: 10,
  4428  	}
  4429  
  4430  	// Create 10 allocations
  4431  	var allocs []*structs.Allocation
  4432  	for i := 0; i < 10; i++ {
  4433  		alloc := mock.Alloc()
  4434  		alloc.Job = job
  4435  		alloc.JobID = job.ID
  4436  		alloc.NodeID = uuid.Generate()
  4437  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4438  		alloc.TaskGroup = job.TaskGroups[0].Name
  4439  		alloc.DeploymentID = d.ID
  4440  		alloc.ClientStatus = structs.AllocClientStatusFailed
  4441  		alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  4442  			StartedAt:  now.Add(-1 * time.Hour),
  4443  			FinishedAt: now.Add(-10 * time.Second)}}
  4444  		allocs = append(allocs, alloc)
  4445  	}
  4446  
  4447  	// Mark half of them as reschedulable
  4448  	for i := 0; i < 5; i++ {
  4449  		allocs[i].DesiredTransition.Reschedule = helper.BoolToPtr(true)
  4450  	}
  4451  
  4452  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  4453  	r := reconciler.Compute()
  4454  
  4455  	// Assert that no rescheduled placements were created
  4456  	assertResults(t, r, &resultExpectation{
  4457  		place:             5,
  4458  		createDeployment:  nil,
  4459  		deploymentUpdates: nil,
  4460  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4461  			job.TaskGroups[0].Name: {
  4462  				Place:  5,
  4463  				Ignore: 5,
  4464  			},
  4465  		},
  4466  	})
  4467  }
  4468  
  4469  // Test that a failed deployment cancels non-promoted canaries
  4470  func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) {
  4471  	// Create a job
  4472  	job := mock.Job()
  4473  	job.TaskGroups[0].Count = 3
  4474  	job.TaskGroups[0].Update = &structs.UpdateStrategy{
  4475  		Canary:          3,
  4476  		MaxParallel:     2,
  4477  		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
  4478  		MinHealthyTime:  10 * time.Second,
  4479  		HealthyDeadline: 10 * time.Minute,
  4480  		Stagger:         31 * time.Second,
  4481  	}
  4482  
  4483  	// Create v1 of the job
  4484  	jobv1 := job.Copy()
  4485  	jobv1.Version = 1
  4486  	jobv1.TaskGroups[0].Meta = map[string]string{"version": "1"}
  4487  
  4488  	// Create v2 of the job
  4489  	jobv2 := job.Copy()
  4490  	jobv2.Version = 2
  4491  	jobv2.TaskGroups[0].Meta = map[string]string{"version": "2"}
  4492  
  4493  	d := structs.NewDeployment(jobv2)
  4494  	state := &structs.DeploymentState{
  4495  		Promoted:      true,
  4496  		DesiredTotal:  3,
  4497  		PlacedAllocs:  3,
  4498  		HealthyAllocs: 3,
  4499  	}
  4500  	d.TaskGroups[job.TaskGroups[0].Name] = state
  4501  
  4502  	// Create the original
  4503  	var allocs []*structs.Allocation
  4504  	for i := 0; i < 3; i++ {
  4505  		new := mock.Alloc()
  4506  		new.Job = jobv2
  4507  		new.JobID = job.ID
  4508  		new.NodeID = uuid.Generate()
  4509  		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4510  		new.TaskGroup = job.TaskGroups[0].Name
  4511  		new.DeploymentID = d.ID
  4512  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  4513  			Healthy: helper.BoolToPtr(true),
  4514  		}
  4515  		new.ClientStatus = structs.AllocClientStatusRunning
  4516  		allocs = append(allocs, new)
  4517  
  4518  	}
  4519  	for i := 0; i < 3; i++ {
  4520  		new := mock.Alloc()
  4521  		new.Job = jobv1
  4522  		new.JobID = jobv1.ID
  4523  		new.NodeID = uuid.Generate()
  4524  		new.Name = structs.AllocName(jobv1.ID, jobv1.TaskGroups[0].Name, uint(i))
  4525  		new.TaskGroup = job.TaskGroups[0].Name
  4526  		new.DeploymentID = uuid.Generate()
  4527  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  4528  			Healthy: helper.BoolToPtr(false),
  4529  		}
  4530  		new.DesiredStatus = structs.AllocDesiredStatusStop
  4531  		new.ClientStatus = structs.AllocClientStatusFailed
  4532  		allocs = append(allocs, new)
  4533  	}
  4534  
  4535  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, jobv2, d, allocs, nil, "")
  4536  	r := reconciler.Compute()
  4537  
  4538  	updates := []*structs.DeploymentStatusUpdate{
  4539  		{
  4540  			DeploymentID:      d.ID,
  4541  			Status:            structs.DeploymentStatusSuccessful,
  4542  			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
  4543  		},
  4544  	}
  4545  
  4546  	// Assert the correct results
  4547  	assertResults(t, r, &resultExpectation{
  4548  		createDeployment:  nil,
  4549  		deploymentUpdates: updates,
  4550  		place:             0,
  4551  		inplace:           0,
  4552  		stop:              0,
  4553  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4554  			job.TaskGroups[0].Name: {
  4555  				Stop:          0,
  4556  				InPlaceUpdate: 0,
  4557  				Ignore:        3,
  4558  			},
  4559  		},
  4560  	})
  4561  }
  4562  
  4563  // Test that a successful deployment with failed allocs will result in
  4564  // rescheduling failed allocations
  4565  func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T) {
  4566  	job := mock.Job()
  4567  	job.TaskGroups[0].Update = noCanaryUpdate
  4568  	tgName := job.TaskGroups[0].Name
  4569  	now := time.Now()
  4570  
  4571  	// Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet
  4572  	d := structs.NewDeployment(job)
  4573  	d.Status = structs.DeploymentStatusSuccessful
  4574  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4575  		Promoted:     false,
  4576  		DesiredTotal: 10,
  4577  		PlacedAllocs: 10,
  4578  	}
  4579  
  4580  	// Create 10 allocations
  4581  	var allocs []*structs.Allocation
  4582  	for i := 0; i < 10; i++ {
  4583  		alloc := mock.Alloc()
  4584  		alloc.Job = job
  4585  		alloc.JobID = job.ID
  4586  		alloc.NodeID = uuid.Generate()
  4587  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4588  		alloc.TaskGroup = job.TaskGroups[0].Name
  4589  		alloc.DeploymentID = d.ID
  4590  		alloc.ClientStatus = structs.AllocClientStatusFailed
  4591  		alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  4592  			StartedAt:  now.Add(-1 * time.Hour),
  4593  			FinishedAt: now.Add(-10 * time.Second)}}
  4594  		allocs = append(allocs, alloc)
  4595  	}
  4596  
  4597  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  4598  	r := reconciler.Compute()
  4599  
  4600  	// Assert that rescheduled placements were created
  4601  	assertResults(t, r, &resultExpectation{
  4602  		place:             10,
  4603  		createDeployment:  nil,
  4604  		deploymentUpdates: nil,
  4605  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4606  			job.TaskGroups[0].Name: {
  4607  				Place:  10,
  4608  				Ignore: 0,
  4609  			},
  4610  		},
  4611  	})
  4612  	assertPlaceResultsHavePreviousAllocs(t, 10, r.place)
  4613  }
  4614  
  4615  // Tests force rescheduling a failed alloc that is past its reschedule limit
  4616  func TestReconciler_ForceReschedule_Service(t *testing.T) {
  4617  	require := require.New(t)
  4618  
  4619  	// Set desired 5
  4620  	job := mock.Job()
  4621  	job.TaskGroups[0].Count = 5
  4622  	tgName := job.TaskGroups[0].Name
  4623  
  4624  	// Set up reschedule policy and update stanza
  4625  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  4626  		Attempts:      1,
  4627  		Interval:      24 * time.Hour,
  4628  		Delay:         5 * time.Second,
  4629  		DelayFunction: "",
  4630  		MaxDelay:      1 * time.Hour,
  4631  		Unlimited:     false,
  4632  	}
  4633  	job.TaskGroups[0].Update = noCanaryUpdate
  4634  
  4635  	// Create 5 existing allocations
  4636  	var allocs []*structs.Allocation
  4637  	for i := 0; i < 5; i++ {
  4638  		alloc := mock.Alloc()
  4639  		alloc.Job = job
  4640  		alloc.JobID = job.ID
  4641  		alloc.NodeID = uuid.Generate()
  4642  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4643  		allocs = append(allocs, alloc)
  4644  		alloc.ClientStatus = structs.AllocClientStatusRunning
  4645  	}
  4646  
  4647  	// Mark one as failed and past its reschedule limit so not eligible to reschedule
  4648  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  4649  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  4650  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  4651  			PrevAllocID: uuid.Generate(),
  4652  			PrevNodeID:  uuid.Generate(),
  4653  		},
  4654  	}}
  4655  
  4656  	// Mark DesiredTransition ForceReschedule
  4657  	allocs[0].DesiredTransition = structs.DesiredTransition{ForceReschedule: helper.BoolToPtr(true)}
  4658  
  4659  	reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  4660  	r := reconciler.Compute()
  4661  
  4662  	// Verify that no follow up evals were created
  4663  	evals := r.desiredFollowupEvals[tgName]
  4664  	require.Nil(evals)
  4665  
  4666  	// Verify that one rescheduled alloc was created because of the forced reschedule
  4667  	assertResults(t, r, &resultExpectation{
  4668  		createDeployment:  nil,
  4669  		deploymentUpdates: nil,
  4670  		place:             1,
  4671  		inplace:           0,
  4672  		stop:              0,
  4673  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4674  			job.TaskGroups[0].Name: {
  4675  				Place:  1,
  4676  				Ignore: 4,
  4677  			},
  4678  		},
  4679  	})
  4680  
  4681  	// Rescheduled allocs should have previous allocs
  4682  	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
  4683  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  4684  	assertPlacementsAreRescheduled(t, 1, r.place)
  4685  }