github.com/smithx10/nomad@v0.9.1-rc1/scheduler/reconcile_test.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"fmt"
     5  	"reflect"
     6  	"regexp"
     7  	"strconv"
     8  	"testing"
     9  	"time"
    10  
    11  	"github.com/hashicorp/nomad/helper"
    12  	"github.com/hashicorp/nomad/helper/testlog"
    13  	"github.com/hashicorp/nomad/helper/uuid"
    14  	"github.com/hashicorp/nomad/nomad/mock"
    15  	"github.com/hashicorp/nomad/nomad/structs"
    16  	"github.com/kr/pretty"
    17  	"github.com/stretchr/testify/assert"
    18  	"github.com/stretchr/testify/require"
    19  )
    20  
    21  var (
    22  	canaryUpdate = &structs.UpdateStrategy{
    23  		Canary:          2,
    24  		MaxParallel:     2,
    25  		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
    26  		MinHealthyTime:  10 * time.Second,
    27  		HealthyDeadline: 10 * time.Minute,
    28  		Stagger:         31 * time.Second,
    29  	}
    30  
    31  	noCanaryUpdate = &structs.UpdateStrategy{
    32  		MaxParallel:     4,
    33  		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
    34  		MinHealthyTime:  10 * time.Second,
    35  		HealthyDeadline: 10 * time.Minute,
    36  		Stagger:         31 * time.Second,
    37  	}
    38  )
    39  
    40  func allocUpdateFnIgnore(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) {
    41  	return true, false, nil
    42  }
    43  
    44  func allocUpdateFnDestructive(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) {
    45  	return false, true, nil
    46  }
    47  
    48  func allocUpdateFnInplace(existing *structs.Allocation, _ *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) {
    49  	// Create a shallow copy
    50  	newAlloc := existing.CopySkipJob()
    51  	newAlloc.AllocatedResources = &structs.AllocatedResources{
    52  		Tasks: map[string]*structs.AllocatedTaskResources{},
    53  		Shared: structs.AllocatedSharedResources{
    54  			DiskMB: int64(newTG.EphemeralDisk.SizeMB),
    55  		},
    56  	}
    57  
    58  	// Use the new task resources but keep the network from the old
    59  	for _, task := range newTG.Tasks {
    60  		networks := existing.AllocatedResources.Tasks[task.Name].Copy().Networks
    61  		newAlloc.AllocatedResources.Tasks[task.Name] = &structs.AllocatedTaskResources{
    62  			Cpu: structs.AllocatedCpuResources{
    63  				CpuShares: int64(task.Resources.CPU),
    64  			},
    65  			Memory: structs.AllocatedMemoryResources{
    66  				MemoryMB: int64(task.Resources.MemoryMB),
    67  			},
    68  			Networks: networks,
    69  		}
    70  	}
    71  
    72  	return false, false, newAlloc
    73  }
    74  
    75  func allocUpdateFnMock(handled map[string]allocUpdateType, unhandled allocUpdateType) allocUpdateType {
    76  	return func(existing *structs.Allocation, newJob *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) {
    77  		if fn, ok := handled[existing.ID]; ok {
    78  			return fn(existing, newJob, newTG)
    79  		}
    80  
    81  		return unhandled(existing, newJob, newTG)
    82  	}
    83  }
    84  
    85  var (
    86  	// AllocationIndexRegex is a regular expression to find the allocation index.
    87  	allocationIndexRegex = regexp.MustCompile(".+\\[(\\d+)\\]$")
    88  )
    89  
    90  // allocNameToIndex returns the index of the allocation.
    91  func allocNameToIndex(name string) uint {
    92  	matches := allocationIndexRegex.FindStringSubmatch(name)
    93  	if len(matches) != 2 {
    94  		return 0
    95  	}
    96  
    97  	index, err := strconv.Atoi(matches[1])
    98  	if err != nil {
    99  		return 0
   100  	}
   101  
   102  	return uint(index)
   103  }
   104  
   105  func assertNamesHaveIndexes(t *testing.T, indexes []int, names []string) {
   106  	t.Helper()
   107  	m := make(map[uint]int)
   108  	for _, i := range indexes {
   109  		m[uint(i)] += 1
   110  	}
   111  
   112  	for _, n := range names {
   113  		index := allocNameToIndex(n)
   114  		val, contained := m[index]
   115  		if !contained {
   116  			t.Fatalf("Unexpected index %d from name %s\nAll names: %v", index, n, names)
   117  		}
   118  
   119  		val--
   120  		if val < 0 {
   121  			t.Fatalf("Index %d repeated too many times\nAll names: %v", index, names)
   122  		}
   123  		m[index] = val
   124  	}
   125  
   126  	for k, remainder := range m {
   127  		if remainder != 0 {
   128  			t.Fatalf("Index %d has %d remaining uses expected\nAll names: %v", k, remainder, names)
   129  		}
   130  	}
   131  }
   132  
   133  func assertNoCanariesStopped(t *testing.T, d *structs.Deployment, stop []allocStopResult) {
   134  	t.Helper()
   135  	canaryIndex := make(map[string]struct{})
   136  	for _, state := range d.TaskGroups {
   137  		for _, c := range state.PlacedCanaries {
   138  			canaryIndex[c] = struct{}{}
   139  		}
   140  	}
   141  
   142  	for _, s := range stop {
   143  		if _, ok := canaryIndex[s.alloc.ID]; ok {
   144  			t.Fatalf("Stopping canary alloc %q %q", s.alloc.ID, s.alloc.Name)
   145  		}
   146  	}
   147  }
   148  
   149  func assertPlaceResultsHavePreviousAllocs(t *testing.T, numPrevious int, place []allocPlaceResult) {
   150  	t.Helper()
   151  	names := make(map[string]struct{}, numPrevious)
   152  
   153  	found := 0
   154  	for _, p := range place {
   155  		if _, ok := names[p.name]; ok {
   156  			t.Fatalf("Name %q already placed", p.name)
   157  		}
   158  		names[p.name] = struct{}{}
   159  
   160  		if p.previousAlloc == nil {
   161  			continue
   162  		}
   163  
   164  		if act := p.previousAlloc.Name; p.name != act {
   165  			t.Fatalf("Name mismatch on previous alloc; got %q; want %q", act, p.name)
   166  		}
   167  		found++
   168  	}
   169  	if numPrevious != found {
   170  		t.Fatalf("wanted %d; got %d placements with previous allocs", numPrevious, found)
   171  	}
   172  }
   173  
   174  func assertPlacementsAreRescheduled(t *testing.T, numRescheduled int, place []allocPlaceResult) {
   175  	t.Helper()
   176  	names := make(map[string]struct{}, numRescheduled)
   177  
   178  	found := 0
   179  	for _, p := range place {
   180  		if _, ok := names[p.name]; ok {
   181  			t.Fatalf("Name %q already placed", p.name)
   182  		}
   183  		names[p.name] = struct{}{}
   184  
   185  		if p.previousAlloc == nil {
   186  			continue
   187  		}
   188  		if p.reschedule {
   189  			found++
   190  		}
   191  
   192  	}
   193  	if numRescheduled != found {
   194  		t.Fatalf("wanted %d; got %d placements that are rescheduled", numRescheduled, found)
   195  	}
   196  }
   197  
   198  func intRange(pairs ...int) []int {
   199  	if len(pairs)%2 != 0 {
   200  		return nil
   201  	}
   202  
   203  	var r []int
   204  	for i := 0; i < len(pairs); i += 2 {
   205  		for j := pairs[i]; j <= pairs[i+1]; j++ {
   206  			r = append(r, j)
   207  		}
   208  	}
   209  	return r
   210  }
   211  
   212  func placeResultsToNames(place []allocPlaceResult) []string {
   213  	names := make([]string, 0, len(place))
   214  	for _, p := range place {
   215  		names = append(names, p.name)
   216  	}
   217  	return names
   218  }
   219  
   220  func destructiveResultsToNames(destructive []allocDestructiveResult) []string {
   221  	names := make([]string, 0, len(destructive))
   222  	for _, d := range destructive {
   223  		names = append(names, d.placeName)
   224  	}
   225  	return names
   226  }
   227  
   228  func stopResultsToNames(stop []allocStopResult) []string {
   229  	names := make([]string, 0, len(stop))
   230  	for _, s := range stop {
   231  		names = append(names, s.alloc.Name)
   232  	}
   233  	return names
   234  }
   235  
   236  func attributeUpdatesToNames(attributeUpdates map[string]*structs.Allocation) []string {
   237  	names := make([]string, 0, len(attributeUpdates))
   238  	for _, a := range attributeUpdates {
   239  		names = append(names, a.Name)
   240  	}
   241  	return names
   242  }
   243  
   244  func allocsToNames(allocs []*structs.Allocation) []string {
   245  	names := make([]string, 0, len(allocs))
   246  	for _, a := range allocs {
   247  		names = append(names, a.Name)
   248  	}
   249  	return names
   250  }
   251  
   252  type resultExpectation struct {
   253  	createDeployment  *structs.Deployment
   254  	deploymentUpdates []*structs.DeploymentStatusUpdate
   255  	place             int
   256  	destructive       int
   257  	inplace           int
   258  	attributeUpdates  int
   259  	stop              int
   260  	desiredTGUpdates  map[string]*structs.DesiredUpdates
   261  }
   262  
   263  func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) {
   264  	t.Helper()
   265  	assert := assert.New(t)
   266  
   267  	if exp.createDeployment != nil && r.deployment == nil {
   268  		t.Errorf("Expect a created deployment got none")
   269  	} else if exp.createDeployment == nil && r.deployment != nil {
   270  		t.Errorf("Expect no created deployment; got %#v", r.deployment)
   271  	} else if exp.createDeployment != nil && r.deployment != nil {
   272  		// Clear the deployment ID
   273  		r.deployment.ID, exp.createDeployment.ID = "", ""
   274  		if !reflect.DeepEqual(r.deployment, exp.createDeployment) {
   275  			t.Errorf("Unexpected createdDeployment; got\n %#v\nwant\n%#v\nDiff: %v",
   276  				r.deployment, exp.createDeployment, pretty.Diff(r.deployment, exp.createDeployment))
   277  		}
   278  	}
   279  
   280  	assert.EqualValues(exp.deploymentUpdates, r.deploymentUpdates, "Expected Deployment Updates")
   281  	assert.Len(r.place, exp.place, "Expected Placements")
   282  	assert.Len(r.destructiveUpdate, exp.destructive, "Expected Destructive")
   283  	assert.Len(r.inplaceUpdate, exp.inplace, "Expected Inplace Updates")
   284  	assert.Len(r.attributeUpdates, exp.attributeUpdates, "Expected Attribute Updates")
   285  	assert.Len(r.stop, exp.stop, "Expected Stops")
   286  	assert.EqualValues(exp.desiredTGUpdates, r.desiredTGUpdates, "Expected Desired TG Update Annotations")
   287  }
   288  
   289  // Tests the reconciler properly handles placements for a job that has no
   290  // existing allocations
   291  func TestReconciler_Place_NoExisting(t *testing.T) {
   292  	job := mock.Job()
   293  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, nil, nil, "")
   294  	r := reconciler.Compute()
   295  
   296  	// Assert the correct results
   297  	assertResults(t, r, &resultExpectation{
   298  		createDeployment:  nil,
   299  		deploymentUpdates: nil,
   300  		place:             10,
   301  		inplace:           0,
   302  		stop:              0,
   303  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   304  			job.TaskGroups[0].Name: {
   305  				Place: 10,
   306  			},
   307  		},
   308  	})
   309  
   310  	assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
   311  }
   312  
   313  // Tests the reconciler properly handles placements for a job that has some
   314  // existing allocations
   315  func TestReconciler_Place_Existing(t *testing.T) {
   316  	job := mock.Job()
   317  
   318  	// Create 3 existing allocations
   319  	var allocs []*structs.Allocation
   320  	for i := 0; i < 5; i++ {
   321  		alloc := mock.Alloc()
   322  		alloc.Job = job
   323  		alloc.JobID = job.ID
   324  		alloc.NodeID = uuid.Generate()
   325  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   326  		allocs = append(allocs, alloc)
   327  	}
   328  
   329  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
   330  	r := reconciler.Compute()
   331  
   332  	// Assert the correct results
   333  	assertResults(t, r, &resultExpectation{
   334  		createDeployment:  nil,
   335  		deploymentUpdates: nil,
   336  		place:             5,
   337  		inplace:           0,
   338  		stop:              0,
   339  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   340  			job.TaskGroups[0].Name: {
   341  				Place:  5,
   342  				Ignore: 5,
   343  			},
   344  		},
   345  	})
   346  
   347  	assertNamesHaveIndexes(t, intRange(5, 9), placeResultsToNames(r.place))
   348  }
   349  
   350  // Tests the reconciler properly handles stopping allocations for a job that has
   351  // scaled down
   352  func TestReconciler_ScaleDown_Partial(t *testing.T) {
   353  	// Has desired 10
   354  	job := mock.Job()
   355  
   356  	// Create 20 existing allocations
   357  	var allocs []*structs.Allocation
   358  	for i := 0; i < 20; i++ {
   359  		alloc := mock.Alloc()
   360  		alloc.Job = job
   361  		alloc.JobID = job.ID
   362  		alloc.NodeID = uuid.Generate()
   363  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   364  		allocs = append(allocs, alloc)
   365  	}
   366  
   367  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
   368  	r := reconciler.Compute()
   369  
   370  	// Assert the correct results
   371  	assertResults(t, r, &resultExpectation{
   372  		createDeployment:  nil,
   373  		deploymentUpdates: nil,
   374  		place:             0,
   375  		inplace:           0,
   376  		stop:              10,
   377  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   378  			job.TaskGroups[0].Name: {
   379  				Ignore: 10,
   380  				Stop:   10,
   381  			},
   382  		},
   383  	})
   384  
   385  	assertNamesHaveIndexes(t, intRange(10, 19), stopResultsToNames(r.stop))
   386  }
   387  
   388  // Tests the reconciler properly handles stopping allocations for a job that has
   389  // scaled down to zero desired
   390  func TestReconciler_ScaleDown_Zero(t *testing.T) {
   391  	// Set desired 0
   392  	job := mock.Job()
   393  	job.TaskGroups[0].Count = 0
   394  
   395  	// Create 20 existing allocations
   396  	var allocs []*structs.Allocation
   397  	for i := 0; i < 20; i++ {
   398  		alloc := mock.Alloc()
   399  		alloc.Job = job
   400  		alloc.JobID = job.ID
   401  		alloc.NodeID = uuid.Generate()
   402  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   403  		allocs = append(allocs, alloc)
   404  	}
   405  
   406  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
   407  	r := reconciler.Compute()
   408  
   409  	// Assert the correct results
   410  	assertResults(t, r, &resultExpectation{
   411  		createDeployment:  nil,
   412  		deploymentUpdates: nil,
   413  		place:             0,
   414  		inplace:           0,
   415  		stop:              20,
   416  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   417  			job.TaskGroups[0].Name: {
   418  				Stop: 20,
   419  			},
   420  		},
   421  	})
   422  
   423  	assertNamesHaveIndexes(t, intRange(0, 19), stopResultsToNames(r.stop))
   424  }
   425  
   426  // Tests the reconciler properly handles stopping allocations for a job that has
   427  // scaled down to zero desired where allocs have duplicate names
   428  func TestReconciler_ScaleDown_Zero_DuplicateNames(t *testing.T) {
   429  	// Set desired 0
   430  	job := mock.Job()
   431  	job.TaskGroups[0].Count = 0
   432  
   433  	// Create 20 existing allocations
   434  	var allocs []*structs.Allocation
   435  	var expectedStopped []int
   436  	for i := 0; i < 20; i++ {
   437  		alloc := mock.Alloc()
   438  		alloc.Job = job
   439  		alloc.JobID = job.ID
   440  		alloc.NodeID = uuid.Generate()
   441  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2))
   442  		allocs = append(allocs, alloc)
   443  		expectedStopped = append(expectedStopped, i%2)
   444  	}
   445  
   446  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
   447  	r := reconciler.Compute()
   448  
   449  	// Assert the correct results
   450  	assertResults(t, r, &resultExpectation{
   451  		createDeployment:  nil,
   452  		deploymentUpdates: nil,
   453  		place:             0,
   454  		inplace:           0,
   455  		stop:              20,
   456  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   457  			job.TaskGroups[0].Name: {
   458  				Stop: 20,
   459  			},
   460  		},
   461  	})
   462  
   463  	assertNamesHaveIndexes(t, expectedStopped, stopResultsToNames(r.stop))
   464  }
   465  
   466  // Tests the reconciler properly handles inplace upgrading allocations
   467  func TestReconciler_Inplace(t *testing.T) {
   468  	job := mock.Job()
   469  
   470  	// Create 10 existing allocations
   471  	var allocs []*structs.Allocation
   472  	for i := 0; i < 10; i++ {
   473  		alloc := mock.Alloc()
   474  		alloc.Job = job
   475  		alloc.JobID = job.ID
   476  		alloc.NodeID = uuid.Generate()
   477  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   478  		allocs = append(allocs, alloc)
   479  	}
   480  
   481  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "")
   482  	r := reconciler.Compute()
   483  
   484  	// Assert the correct results
   485  	assertResults(t, r, &resultExpectation{
   486  		createDeployment:  nil,
   487  		deploymentUpdates: nil,
   488  		place:             0,
   489  		inplace:           10,
   490  		stop:              0,
   491  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   492  			job.TaskGroups[0].Name: {
   493  				InPlaceUpdate: 10,
   494  			},
   495  		},
   496  	})
   497  
   498  	assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate))
   499  }
   500  
   501  // Tests the reconciler properly handles inplace upgrading allocations while
   502  // scaling up
   503  func TestReconciler_Inplace_ScaleUp(t *testing.T) {
   504  	// Set desired 15
   505  	job := mock.Job()
   506  	job.TaskGroups[0].Count = 15
   507  
   508  	// Create 10 existing allocations
   509  	var allocs []*structs.Allocation
   510  	for i := 0; i < 10; i++ {
   511  		alloc := mock.Alloc()
   512  		alloc.Job = job
   513  		alloc.JobID = job.ID
   514  		alloc.NodeID = uuid.Generate()
   515  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   516  		allocs = append(allocs, alloc)
   517  	}
   518  
   519  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "")
   520  	r := reconciler.Compute()
   521  
   522  	// Assert the correct results
   523  	assertResults(t, r, &resultExpectation{
   524  		createDeployment:  nil,
   525  		deploymentUpdates: nil,
   526  		place:             5,
   527  		inplace:           10,
   528  		stop:              0,
   529  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   530  			job.TaskGroups[0].Name: {
   531  				Place:         5,
   532  				InPlaceUpdate: 10,
   533  			},
   534  		},
   535  	})
   536  
   537  	assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate))
   538  	assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place))
   539  }
   540  
   541  // Tests the reconciler properly handles inplace upgrading allocations while
   542  // scaling down
   543  func TestReconciler_Inplace_ScaleDown(t *testing.T) {
   544  	// Set desired 5
   545  	job := mock.Job()
   546  	job.TaskGroups[0].Count = 5
   547  
   548  	// Create 10 existing allocations
   549  	var allocs []*structs.Allocation
   550  	for i := 0; i < 10; i++ {
   551  		alloc := mock.Alloc()
   552  		alloc.Job = job
   553  		alloc.JobID = job.ID
   554  		alloc.NodeID = uuid.Generate()
   555  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   556  		allocs = append(allocs, alloc)
   557  	}
   558  
   559  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "")
   560  	r := reconciler.Compute()
   561  
   562  	// Assert the correct results
   563  	assertResults(t, r, &resultExpectation{
   564  		createDeployment:  nil,
   565  		deploymentUpdates: nil,
   566  		place:             0,
   567  		inplace:           5,
   568  		stop:              5,
   569  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   570  			job.TaskGroups[0].Name: {
   571  				Stop:          5,
   572  				InPlaceUpdate: 5,
   573  			},
   574  		},
   575  	})
   576  
   577  	assertNamesHaveIndexes(t, intRange(0, 4), allocsToNames(r.inplaceUpdate))
   578  	assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop))
   579  }
   580  
   581  // Tests the reconciler properly handles destructive upgrading allocations
   582  func TestReconciler_Destructive(t *testing.T) {
   583  	job := mock.Job()
   584  
   585  	// Create 10 existing allocations
   586  	var allocs []*structs.Allocation
   587  	for i := 0; i < 10; i++ {
   588  		alloc := mock.Alloc()
   589  		alloc.Job = job
   590  		alloc.JobID = job.ID
   591  		alloc.NodeID = uuid.Generate()
   592  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   593  		allocs = append(allocs, alloc)
   594  	}
   595  
   596  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
   597  	r := reconciler.Compute()
   598  
   599  	// Assert the correct results
   600  	assertResults(t, r, &resultExpectation{
   601  		createDeployment:  nil,
   602  		deploymentUpdates: nil,
   603  		destructive:       10,
   604  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   605  			job.TaskGroups[0].Name: {
   606  				DestructiveUpdate: 10,
   607  			},
   608  		},
   609  	})
   610  
   611  	assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate))
   612  }
   613  
   614  // Tests the reconciler properly handles destructive upgrading allocations while
   615  // scaling up
   616  func TestReconciler_Destructive_ScaleUp(t *testing.T) {
   617  	// Set desired 15
   618  	job := mock.Job()
   619  	job.TaskGroups[0].Count = 15
   620  
   621  	// Create 10 existing allocations
   622  	var allocs []*structs.Allocation
   623  	for i := 0; i < 10; i++ {
   624  		alloc := mock.Alloc()
   625  		alloc.Job = job
   626  		alloc.JobID = job.ID
   627  		alloc.NodeID = uuid.Generate()
   628  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   629  		allocs = append(allocs, alloc)
   630  	}
   631  
   632  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
   633  	r := reconciler.Compute()
   634  
   635  	// Assert the correct results
   636  	assertResults(t, r, &resultExpectation{
   637  		createDeployment:  nil,
   638  		deploymentUpdates: nil,
   639  		place:             5,
   640  		destructive:       10,
   641  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   642  			job.TaskGroups[0].Name: {
   643  				Place:             5,
   644  				DestructiveUpdate: 10,
   645  			},
   646  		},
   647  	})
   648  
   649  	assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate))
   650  	assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place))
   651  }
   652  
   653  // Tests the reconciler properly handles destructive upgrading allocations while
   654  // scaling down
   655  func TestReconciler_Destructive_ScaleDown(t *testing.T) {
   656  	// Set desired 5
   657  	job := mock.Job()
   658  	job.TaskGroups[0].Count = 5
   659  
   660  	// Create 10 existing allocations
   661  	var allocs []*structs.Allocation
   662  	for i := 0; i < 10; i++ {
   663  		alloc := mock.Alloc()
   664  		alloc.Job = job
   665  		alloc.JobID = job.ID
   666  		alloc.NodeID = uuid.Generate()
   667  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   668  		allocs = append(allocs, alloc)
   669  	}
   670  
   671  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
   672  	r := reconciler.Compute()
   673  
   674  	// Assert the correct results
   675  	assertResults(t, r, &resultExpectation{
   676  		createDeployment:  nil,
   677  		deploymentUpdates: nil,
   678  		destructive:       5,
   679  		stop:              5,
   680  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   681  			job.TaskGroups[0].Name: {
   682  				Stop:              5,
   683  				DestructiveUpdate: 5,
   684  			},
   685  		},
   686  	})
   687  
   688  	assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop))
   689  	assertNamesHaveIndexes(t, intRange(0, 4), destructiveResultsToNames(r.destructiveUpdate))
   690  }
   691  
   692  // Tests the reconciler properly handles lost nodes with allocations
   693  func TestReconciler_LostNode(t *testing.T) {
   694  	job := mock.Job()
   695  
   696  	// Create 10 existing allocations
   697  	var allocs []*structs.Allocation
   698  	for i := 0; i < 10; i++ {
   699  		alloc := mock.Alloc()
   700  		alloc.Job = job
   701  		alloc.JobID = job.ID
   702  		alloc.NodeID = uuid.Generate()
   703  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   704  		allocs = append(allocs, alloc)
   705  	}
   706  
   707  	// Build a map of tainted nodes
   708  	tainted := make(map[string]*structs.Node, 2)
   709  	for i := 0; i < 2; i++ {
   710  		n := mock.Node()
   711  		n.ID = allocs[i].NodeID
   712  		n.Status = structs.NodeStatusDown
   713  		tainted[n.ID] = n
   714  	}
   715  
   716  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   717  	r := reconciler.Compute()
   718  
   719  	// Assert the correct results
   720  	assertResults(t, r, &resultExpectation{
   721  		createDeployment:  nil,
   722  		deploymentUpdates: nil,
   723  		place:             2,
   724  		inplace:           0,
   725  		stop:              2,
   726  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   727  			job.TaskGroups[0].Name: {
   728  				Place:  2,
   729  				Stop:   2,
   730  				Ignore: 8,
   731  			},
   732  		},
   733  	})
   734  
   735  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
   736  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
   737  }
   738  
   739  // Tests the reconciler properly handles lost nodes with allocations while
   740  // scaling up
   741  func TestReconciler_LostNode_ScaleUp(t *testing.T) {
   742  	// Set desired 15
   743  	job := mock.Job()
   744  	job.TaskGroups[0].Count = 15
   745  
   746  	// Create 10 existing allocations
   747  	var allocs []*structs.Allocation
   748  	for i := 0; i < 10; i++ {
   749  		alloc := mock.Alloc()
   750  		alloc.Job = job
   751  		alloc.JobID = job.ID
   752  		alloc.NodeID = uuid.Generate()
   753  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   754  		allocs = append(allocs, alloc)
   755  	}
   756  
   757  	// Build a map of tainted nodes
   758  	tainted := make(map[string]*structs.Node, 2)
   759  	for i := 0; i < 2; i++ {
   760  		n := mock.Node()
   761  		n.ID = allocs[i].NodeID
   762  		n.Status = structs.NodeStatusDown
   763  		tainted[n.ID] = n
   764  	}
   765  
   766  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   767  	r := reconciler.Compute()
   768  
   769  	// Assert the correct results
   770  	assertResults(t, r, &resultExpectation{
   771  		createDeployment:  nil,
   772  		deploymentUpdates: nil,
   773  		place:             7,
   774  		inplace:           0,
   775  		stop:              2,
   776  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   777  			job.TaskGroups[0].Name: {
   778  				Place:  7,
   779  				Stop:   2,
   780  				Ignore: 8,
   781  			},
   782  		},
   783  	})
   784  
   785  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
   786  	assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place))
   787  }
   788  
   789  // Tests the reconciler properly handles lost nodes with allocations while
   790  // scaling down
   791  func TestReconciler_LostNode_ScaleDown(t *testing.T) {
   792  	// Set desired 5
   793  	job := mock.Job()
   794  	job.TaskGroups[0].Count = 5
   795  
   796  	// Create 10 existing allocations
   797  	var allocs []*structs.Allocation
   798  	for i := 0; i < 10; i++ {
   799  		alloc := mock.Alloc()
   800  		alloc.Job = job
   801  		alloc.JobID = job.ID
   802  		alloc.NodeID = uuid.Generate()
   803  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   804  		allocs = append(allocs, alloc)
   805  	}
   806  
   807  	// Build a map of tainted nodes
   808  	tainted := make(map[string]*structs.Node, 2)
   809  	for i := 0; i < 2; i++ {
   810  		n := mock.Node()
   811  		n.ID = allocs[i].NodeID
   812  		n.Status = structs.NodeStatusDown
   813  		tainted[n.ID] = n
   814  	}
   815  
   816  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   817  	r := reconciler.Compute()
   818  
   819  	// Assert the correct results
   820  	assertResults(t, r, &resultExpectation{
   821  		createDeployment:  nil,
   822  		deploymentUpdates: nil,
   823  		place:             0,
   824  		inplace:           0,
   825  		stop:              5,
   826  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   827  			job.TaskGroups[0].Name: {
   828  				Stop:   5,
   829  				Ignore: 5,
   830  			},
   831  		},
   832  	})
   833  
   834  	assertNamesHaveIndexes(t, intRange(0, 1, 7, 9), stopResultsToNames(r.stop))
   835  }
   836  
   837  // Tests the reconciler properly handles draining nodes with allocations
   838  func TestReconciler_DrainNode(t *testing.T) {
   839  	job := mock.Job()
   840  
   841  	// Create 10 existing allocations
   842  	var allocs []*structs.Allocation
   843  	for i := 0; i < 10; i++ {
   844  		alloc := mock.Alloc()
   845  		alloc.Job = job
   846  		alloc.JobID = job.ID
   847  		alloc.NodeID = uuid.Generate()
   848  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   849  		allocs = append(allocs, alloc)
   850  	}
   851  
   852  	// Build a map of tainted nodes
   853  	tainted := make(map[string]*structs.Node, 2)
   854  	for i := 0; i < 2; i++ {
   855  		n := mock.Node()
   856  		n.ID = allocs[i].NodeID
   857  		allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true)
   858  		n.Drain = true
   859  		tainted[n.ID] = n
   860  	}
   861  
   862  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   863  	r := reconciler.Compute()
   864  
   865  	// Assert the correct results
   866  	assertResults(t, r, &resultExpectation{
   867  		createDeployment:  nil,
   868  		deploymentUpdates: nil,
   869  		place:             2,
   870  		inplace:           0,
   871  		stop:              2,
   872  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   873  			job.TaskGroups[0].Name: {
   874  				Migrate: 2,
   875  				Ignore:  8,
   876  			},
   877  		},
   878  	})
   879  
   880  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
   881  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
   882  	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
   883  	// These should not have the reschedule field set
   884  	assertPlacementsAreRescheduled(t, 0, r.place)
   885  }
   886  
   887  // Tests the reconciler properly handles draining nodes with allocations while
   888  // scaling up
   889  func TestReconciler_DrainNode_ScaleUp(t *testing.T) {
   890  	// Set desired 15
   891  	job := mock.Job()
   892  	job.TaskGroups[0].Count = 15
   893  
   894  	// Create 10 existing allocations
   895  	var allocs []*structs.Allocation
   896  	for i := 0; i < 10; i++ {
   897  		alloc := mock.Alloc()
   898  		alloc.Job = job
   899  		alloc.JobID = job.ID
   900  		alloc.NodeID = uuid.Generate()
   901  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   902  		allocs = append(allocs, alloc)
   903  	}
   904  
   905  	// Build a map of tainted nodes
   906  	tainted := make(map[string]*structs.Node, 2)
   907  	for i := 0; i < 2; i++ {
   908  		n := mock.Node()
   909  		n.ID = allocs[i].NodeID
   910  		allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true)
   911  		n.Drain = true
   912  		tainted[n.ID] = n
   913  	}
   914  
   915  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   916  	r := reconciler.Compute()
   917  
   918  	// Assert the correct results
   919  	assertResults(t, r, &resultExpectation{
   920  		createDeployment:  nil,
   921  		deploymentUpdates: nil,
   922  		place:             7,
   923  		inplace:           0,
   924  		stop:              2,
   925  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   926  			job.TaskGroups[0].Name: {
   927  				Place:   5,
   928  				Migrate: 2,
   929  				Ignore:  8,
   930  			},
   931  		},
   932  	})
   933  
   934  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
   935  	assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place))
   936  	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
   937  	// These should not have the reschedule field set
   938  	assertPlacementsAreRescheduled(t, 0, r.place)
   939  }
   940  
   941  // Tests the reconciler properly handles draining nodes with allocations while
   942  // scaling down
   943  func TestReconciler_DrainNode_ScaleDown(t *testing.T) {
   944  	// Set desired 8
   945  	job := mock.Job()
   946  	job.TaskGroups[0].Count = 8
   947  
   948  	// Create 10 existing allocations
   949  	var allocs []*structs.Allocation
   950  	for i := 0; i < 10; i++ {
   951  		alloc := mock.Alloc()
   952  		alloc.Job = job
   953  		alloc.JobID = job.ID
   954  		alloc.NodeID = uuid.Generate()
   955  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   956  		allocs = append(allocs, alloc)
   957  	}
   958  
   959  	// Build a map of tainted nodes
   960  	tainted := make(map[string]*structs.Node, 3)
   961  	for i := 0; i < 3; i++ {
   962  		n := mock.Node()
   963  		n.ID = allocs[i].NodeID
   964  		allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true)
   965  		n.Drain = true
   966  		tainted[n.ID] = n
   967  	}
   968  
   969  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   970  	r := reconciler.Compute()
   971  
   972  	// Assert the correct results
   973  	assertResults(t, r, &resultExpectation{
   974  		createDeployment:  nil,
   975  		deploymentUpdates: nil,
   976  		place:             1,
   977  		inplace:           0,
   978  		stop:              3,
   979  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   980  			job.TaskGroups[0].Name: {
   981  				Migrate: 1,
   982  				Stop:    2,
   983  				Ignore:  7,
   984  			},
   985  		},
   986  	})
   987  
   988  	assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop))
   989  	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
   990  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
   991  	// These should not have the reschedule field set
   992  	assertPlacementsAreRescheduled(t, 0, r.place)
   993  }
   994  
   995  // Tests the reconciler properly handles a task group being removed
   996  func TestReconciler_RemovedTG(t *testing.T) {
   997  	job := mock.Job()
   998  
   999  	// Create 10 allocations for a tg that no longer exists
  1000  	var allocs []*structs.Allocation
  1001  	for i := 0; i < 10; i++ {
  1002  		alloc := mock.Alloc()
  1003  		alloc.Job = job
  1004  		alloc.JobID = job.ID
  1005  		alloc.NodeID = uuid.Generate()
  1006  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1007  		allocs = append(allocs, alloc)
  1008  	}
  1009  
  1010  	oldName := job.TaskGroups[0].Name
  1011  	newName := "different"
  1012  	job.TaskGroups[0].Name = newName
  1013  
  1014  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1015  	r := reconciler.Compute()
  1016  
  1017  	// Assert the correct results
  1018  	assertResults(t, r, &resultExpectation{
  1019  		createDeployment:  nil,
  1020  		deploymentUpdates: nil,
  1021  		place:             10,
  1022  		inplace:           0,
  1023  		stop:              10,
  1024  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1025  			oldName: {
  1026  				Stop: 10,
  1027  			},
  1028  			newName: {
  1029  				Place: 10,
  1030  			},
  1031  		},
  1032  	})
  1033  
  1034  	assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop))
  1035  	assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
  1036  }
  1037  
  1038  // Tests the reconciler properly handles a job in stopped states
  1039  func TestReconciler_JobStopped(t *testing.T) {
  1040  	job := mock.Job()
  1041  	job.Stop = true
  1042  
  1043  	cases := []struct {
  1044  		name             string
  1045  		job              *structs.Job
  1046  		jobID, taskGroup string
  1047  	}{
  1048  		{
  1049  			name:      "stopped job",
  1050  			job:       job,
  1051  			jobID:     job.ID,
  1052  			taskGroup: job.TaskGroups[0].Name,
  1053  		},
  1054  		{
  1055  			name:      "nil job",
  1056  			job:       nil,
  1057  			jobID:     "foo",
  1058  			taskGroup: "bar",
  1059  		},
  1060  	}
  1061  
  1062  	for _, c := range cases {
  1063  		t.Run(c.name, func(t *testing.T) {
  1064  			// Create 10 allocations
  1065  			var allocs []*structs.Allocation
  1066  			for i := 0; i < 10; i++ {
  1067  				alloc := mock.Alloc()
  1068  				alloc.Job = c.job
  1069  				alloc.JobID = c.jobID
  1070  				alloc.NodeID = uuid.Generate()
  1071  				alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i))
  1072  				alloc.TaskGroup = c.taskGroup
  1073  				allocs = append(allocs, alloc)
  1074  			}
  1075  
  1076  			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job, nil, allocs, nil, "")
  1077  			r := reconciler.Compute()
  1078  
  1079  			// Assert the correct results
  1080  			assertResults(t, r, &resultExpectation{
  1081  				createDeployment:  nil,
  1082  				deploymentUpdates: nil,
  1083  				place:             0,
  1084  				inplace:           0,
  1085  				stop:              10,
  1086  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1087  					c.taskGroup: {
  1088  						Stop: 10,
  1089  					},
  1090  				},
  1091  			})
  1092  
  1093  			assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop))
  1094  		})
  1095  	}
  1096  }
  1097  
  1098  // Tests the reconciler doesn't update allocs in terminal state
  1099  // when job is stopped or nil
  1100  func TestReconciler_JobStopped_TerminalAllocs(t *testing.T) {
  1101  	job := mock.Job()
  1102  	job.Stop = true
  1103  
  1104  	cases := []struct {
  1105  		name             string
  1106  		job              *structs.Job
  1107  		jobID, taskGroup string
  1108  	}{
  1109  		{
  1110  			name:      "stopped job",
  1111  			job:       job,
  1112  			jobID:     job.ID,
  1113  			taskGroup: job.TaskGroups[0].Name,
  1114  		},
  1115  		{
  1116  			name:      "nil job",
  1117  			job:       nil,
  1118  			jobID:     "foo",
  1119  			taskGroup: "bar",
  1120  		},
  1121  	}
  1122  
  1123  	for _, c := range cases {
  1124  		t.Run(c.name, func(t *testing.T) {
  1125  			// Create 10 terminal allocations
  1126  			var allocs []*structs.Allocation
  1127  			for i := 0; i < 10; i++ {
  1128  				alloc := mock.Alloc()
  1129  				alloc.Job = c.job
  1130  				alloc.JobID = c.jobID
  1131  				alloc.NodeID = uuid.Generate()
  1132  				alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i))
  1133  				alloc.TaskGroup = c.taskGroup
  1134  				if i%2 == 0 {
  1135  					alloc.DesiredStatus = structs.AllocDesiredStatusStop
  1136  				} else {
  1137  					alloc.ClientStatus = structs.AllocClientStatusFailed
  1138  				}
  1139  				allocs = append(allocs, alloc)
  1140  			}
  1141  
  1142  			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job, nil, allocs, nil, "")
  1143  			r := reconciler.Compute()
  1144  			require.Len(t, r.stop, 0)
  1145  			// Assert the correct results
  1146  			assertResults(t, r, &resultExpectation{
  1147  				createDeployment:  nil,
  1148  				deploymentUpdates: nil,
  1149  				place:             0,
  1150  				inplace:           0,
  1151  				stop:              0,
  1152  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1153  					c.taskGroup: {},
  1154  				},
  1155  			})
  1156  		})
  1157  	}
  1158  }
  1159  
  1160  // Tests the reconciler properly handles jobs with multiple task groups
  1161  func TestReconciler_MultiTG(t *testing.T) {
  1162  	job := mock.Job()
  1163  	tg2 := job.TaskGroups[0].Copy()
  1164  	tg2.Name = "foo"
  1165  	job.TaskGroups = append(job.TaskGroups, tg2)
  1166  
  1167  	// Create 2 existing allocations for the first tg
  1168  	var allocs []*structs.Allocation
  1169  	for i := 0; i < 2; i++ {
  1170  		alloc := mock.Alloc()
  1171  		alloc.Job = job
  1172  		alloc.JobID = job.ID
  1173  		alloc.NodeID = uuid.Generate()
  1174  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1175  		allocs = append(allocs, alloc)
  1176  	}
  1177  
  1178  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1179  	r := reconciler.Compute()
  1180  
  1181  	// Assert the correct results
  1182  	assertResults(t, r, &resultExpectation{
  1183  		createDeployment:  nil,
  1184  		deploymentUpdates: nil,
  1185  		place:             18,
  1186  		inplace:           0,
  1187  		stop:              0,
  1188  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1189  			job.TaskGroups[0].Name: {
  1190  				Place:  8,
  1191  				Ignore: 2,
  1192  			},
  1193  			tg2.Name: {
  1194  				Place: 10,
  1195  			},
  1196  		},
  1197  	})
  1198  
  1199  	assertNamesHaveIndexes(t, intRange(2, 9, 0, 9), placeResultsToNames(r.place))
  1200  }
  1201  
  1202  // Tests the reconciler properly handles jobs with multiple task groups with
  1203  // only one having an update stanza and a deployment already being created
  1204  func TestReconciler_MultiTG_SingleUpdateStanza(t *testing.T) {
  1205  	job := mock.Job()
  1206  	tg2 := job.TaskGroups[0].Copy()
  1207  	tg2.Name = "foo"
  1208  	job.TaskGroups = append(job.TaskGroups, tg2)
  1209  	job.TaskGroups[0].Update = noCanaryUpdate
  1210  
  1211  	// Create all the allocs
  1212  	var allocs []*structs.Allocation
  1213  	for i := 0; i < 2; i++ {
  1214  		for j := 0; j < 10; j++ {
  1215  			alloc := mock.Alloc()
  1216  			alloc.Job = job
  1217  			alloc.JobID = job.ID
  1218  			alloc.NodeID = uuid.Generate()
  1219  			alloc.Name = structs.AllocName(job.ID, job.TaskGroups[i].Name, uint(j))
  1220  			alloc.TaskGroup = job.TaskGroups[i].Name
  1221  			allocs = append(allocs, alloc)
  1222  		}
  1223  	}
  1224  
  1225  	d := structs.NewDeployment(job)
  1226  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  1227  		DesiredTotal: 10,
  1228  	}
  1229  
  1230  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  1231  	r := reconciler.Compute()
  1232  
  1233  	// Assert the correct results
  1234  	assertResults(t, r, &resultExpectation{
  1235  		createDeployment:  nil,
  1236  		deploymentUpdates: nil,
  1237  		place:             0,
  1238  		inplace:           0,
  1239  		stop:              0,
  1240  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1241  			job.TaskGroups[0].Name: {
  1242  				Ignore: 10,
  1243  			},
  1244  			tg2.Name: {
  1245  				Ignore: 10,
  1246  			},
  1247  		},
  1248  	})
  1249  }
  1250  
  1251  // Tests delayed rescheduling of failed batch allocations
  1252  func TestReconciler_RescheduleLater_Batch(t *testing.T) {
  1253  	require := require.New(t)
  1254  
  1255  	// Set desired 4
  1256  	job := mock.Job()
  1257  	job.TaskGroups[0].Count = 4
  1258  	now := time.Now()
  1259  
  1260  	// Set up reschedule policy
  1261  	delayDur := 15 * time.Second
  1262  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "constant"}
  1263  	tgName := job.TaskGroups[0].Name
  1264  
  1265  	// Create 6 existing allocations - 2 running, 1 complete and 3 failed
  1266  	var allocs []*structs.Allocation
  1267  	for i := 0; i < 6; i++ {
  1268  		alloc := mock.Alloc()
  1269  		alloc.Job = job
  1270  		alloc.JobID = job.ID
  1271  		alloc.NodeID = uuid.Generate()
  1272  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1273  		allocs = append(allocs, alloc)
  1274  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1275  	}
  1276  
  1277  	// Mark 3 as failed with restart tracking info
  1278  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1279  	allocs[0].NextAllocation = allocs[1].ID
  1280  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1281  	allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1282  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1283  			PrevAllocID: allocs[0].ID,
  1284  			PrevNodeID:  uuid.Generate(),
  1285  		},
  1286  	}}
  1287  	allocs[1].NextAllocation = allocs[2].ID
  1288  	allocs[2].ClientStatus = structs.AllocClientStatusFailed
  1289  	allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1290  		StartedAt:  now.Add(-1 * time.Hour),
  1291  		FinishedAt: now}}
  1292  	allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1293  		{RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(),
  1294  			PrevAllocID: allocs[0].ID,
  1295  			PrevNodeID:  uuid.Generate(),
  1296  		},
  1297  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1298  			PrevAllocID: allocs[1].ID,
  1299  			PrevNodeID:  uuid.Generate(),
  1300  		},
  1301  	}}
  1302  
  1303  	// Mark one as complete
  1304  	allocs[5].ClientStatus = structs.AllocClientStatusComplete
  1305  
  1306  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, uuid.Generate())
  1307  	r := reconciler.Compute()
  1308  
  1309  	// Two reschedule attempts were already made, one more can be made at a future time
  1310  	// Verify that the follow up eval has the expected waitUntil time
  1311  	evals := r.desiredFollowupEvals[tgName]
  1312  	require.NotNil(evals)
  1313  	require.Equal(1, len(evals))
  1314  	require.Equal(now.Add(delayDur), evals[0].WaitUntil)
  1315  
  1316  	// Alloc 5 should not be replaced because it is terminal
  1317  	assertResults(t, r, &resultExpectation{
  1318  		createDeployment:  nil,
  1319  		deploymentUpdates: nil,
  1320  		place:             0,
  1321  		inplace:           0,
  1322  		attributeUpdates:  1,
  1323  		stop:              0,
  1324  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1325  			job.TaskGroups[0].Name: {
  1326  				Place:         0,
  1327  				InPlaceUpdate: 0,
  1328  				Ignore:        4,
  1329  			},
  1330  		},
  1331  	})
  1332  	assertNamesHaveIndexes(t, intRange(2, 2), attributeUpdatesToNames(r.attributeUpdates))
  1333  
  1334  	// Verify that the followup evalID field is set correctly
  1335  	var annotated *structs.Allocation
  1336  	for _, a := range r.attributeUpdates {
  1337  		annotated = a
  1338  	}
  1339  	require.Equal(evals[0].ID, annotated.FollowupEvalID)
  1340  }
  1341  
  1342  // Tests delayed rescheduling of failed batch allocations and batching of allocs
  1343  // with fail times that are close together
  1344  func TestReconciler_RescheduleLaterWithBatchedEvals_Batch(t *testing.T) {
  1345  	require := require.New(t)
  1346  
  1347  	// Set desired 4
  1348  	job := mock.Job()
  1349  	job.TaskGroups[0].Count = 10
  1350  	now := time.Now()
  1351  
  1352  	// Set up reschedule policy
  1353  	delayDur := 15 * time.Second
  1354  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "constant"}
  1355  	tgName := job.TaskGroups[0].Name
  1356  
  1357  	// Create 10 existing allocations
  1358  	var allocs []*structs.Allocation
  1359  	for i := 0; i < 10; i++ {
  1360  		alloc := mock.Alloc()
  1361  		alloc.Job = job
  1362  		alloc.JobID = job.ID
  1363  		alloc.NodeID = uuid.Generate()
  1364  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1365  		allocs = append(allocs, alloc)
  1366  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1367  	}
  1368  
  1369  	// Mark 5 as failed with fail times very close together
  1370  	for i := 0; i < 5; i++ {
  1371  		allocs[i].ClientStatus = structs.AllocClientStatusFailed
  1372  		allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1373  			StartedAt:  now.Add(-1 * time.Hour),
  1374  			FinishedAt: now.Add(time.Duration(50*i) * time.Millisecond)}}
  1375  	}
  1376  
  1377  	// Mark two more as failed several seconds later
  1378  	for i := 5; i < 7; i++ {
  1379  		allocs[i].ClientStatus = structs.AllocClientStatusFailed
  1380  		allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1381  			StartedAt:  now.Add(-1 * time.Hour),
  1382  			FinishedAt: now.Add(10 * time.Second)}}
  1383  	}
  1384  
  1385  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, uuid.Generate())
  1386  	r := reconciler.Compute()
  1387  
  1388  	// Verify that two follow up evals were created
  1389  	evals := r.desiredFollowupEvals[tgName]
  1390  	require.NotNil(evals)
  1391  	require.Equal(2, len(evals))
  1392  
  1393  	// Verify expected WaitUntil values for both batched evals
  1394  	require.Equal(now.Add(delayDur), evals[0].WaitUntil)
  1395  	secondBatchDuration := delayDur + 10*time.Second
  1396  	require.Equal(now.Add(secondBatchDuration), evals[1].WaitUntil)
  1397  
  1398  	// Alloc 5 should not be replaced because it is terminal
  1399  	assertResults(t, r, &resultExpectation{
  1400  		createDeployment:  nil,
  1401  		deploymentUpdates: nil,
  1402  		place:             0,
  1403  		inplace:           0,
  1404  		attributeUpdates:  7,
  1405  		stop:              0,
  1406  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1407  			job.TaskGroups[0].Name: {
  1408  				Place:         0,
  1409  				InPlaceUpdate: 0,
  1410  				Ignore:        10,
  1411  			},
  1412  		},
  1413  	})
  1414  	assertNamesHaveIndexes(t, intRange(0, 6), attributeUpdatesToNames(r.attributeUpdates))
  1415  
  1416  	// Verify that the followup evalID field is set correctly
  1417  	for _, alloc := range r.attributeUpdates {
  1418  		if allocNameToIndex(alloc.Name) < 5 {
  1419  			require.Equal(evals[0].ID, alloc.FollowupEvalID)
  1420  		} else if allocNameToIndex(alloc.Name) < 7 {
  1421  			require.Equal(evals[1].ID, alloc.FollowupEvalID)
  1422  		} else {
  1423  			t.Fatalf("Unexpected alloc name in Inplace results %v", alloc.Name)
  1424  		}
  1425  	}
  1426  }
  1427  
  1428  // Tests rescheduling failed batch allocations
  1429  func TestReconciler_RescheduleNow_Batch(t *testing.T) {
  1430  	require := require.New(t)
  1431  	// Set desired 4
  1432  	job := mock.Job()
  1433  	job.TaskGroups[0].Count = 4
  1434  	now := time.Now()
  1435  	// Set up reschedule policy
  1436  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: 5 * time.Second, DelayFunction: "constant"}
  1437  	tgName := job.TaskGroups[0].Name
  1438  	// Create 6 existing allocations - 2 running, 1 complete and 3 failed
  1439  	var allocs []*structs.Allocation
  1440  	for i := 0; i < 6; i++ {
  1441  		alloc := mock.Alloc()
  1442  		alloc.Job = job
  1443  		alloc.JobID = job.ID
  1444  		alloc.NodeID = uuid.Generate()
  1445  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1446  		allocs = append(allocs, alloc)
  1447  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1448  	}
  1449  	// Mark 3 as failed with restart tracking info
  1450  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1451  	allocs[0].NextAllocation = allocs[1].ID
  1452  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1453  	allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1454  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1455  			PrevAllocID: allocs[0].ID,
  1456  			PrevNodeID:  uuid.Generate(),
  1457  		},
  1458  	}}
  1459  	allocs[1].NextAllocation = allocs[2].ID
  1460  	allocs[2].ClientStatus = structs.AllocClientStatusFailed
  1461  	allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1462  		StartedAt:  now.Add(-1 * time.Hour),
  1463  		FinishedAt: now.Add(-5 * time.Second)}}
  1464  	allocs[2].FollowupEvalID = uuid.Generate()
  1465  	allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1466  		{RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(),
  1467  			PrevAllocID: allocs[0].ID,
  1468  			PrevNodeID:  uuid.Generate(),
  1469  		},
  1470  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1471  			PrevAllocID: allocs[1].ID,
  1472  			PrevNodeID:  uuid.Generate(),
  1473  		},
  1474  	}}
  1475  	// Mark one as complete
  1476  	allocs[5].ClientStatus = structs.AllocClientStatusComplete
  1477  
  1478  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, "")
  1479  	reconciler.now = now
  1480  	r := reconciler.Compute()
  1481  
  1482  	// Verify that no follow up evals were created
  1483  	evals := r.desiredFollowupEvals[tgName]
  1484  	require.Nil(evals)
  1485  
  1486  	// Two reschedule attempts were made, one more can be made now
  1487  	// Alloc 5 should not be replaced because it is terminal
  1488  	assertResults(t, r, &resultExpectation{
  1489  		createDeployment:  nil,
  1490  		deploymentUpdates: nil,
  1491  		place:             1,
  1492  		inplace:           0,
  1493  		stop:              0,
  1494  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1495  			job.TaskGroups[0].Name: {
  1496  				Place:  1,
  1497  				Ignore: 3,
  1498  			},
  1499  		},
  1500  	})
  1501  
  1502  	assertNamesHaveIndexes(t, intRange(2, 2), placeResultsToNames(r.place))
  1503  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1504  	assertPlacementsAreRescheduled(t, 1, r.place)
  1505  
  1506  }
  1507  
  1508  // Tests rescheduling failed service allocations with desired state stop
  1509  func TestReconciler_RescheduleLater_Service(t *testing.T) {
  1510  	require := require.New(t)
  1511  
  1512  	// Set desired 5
  1513  	job := mock.Job()
  1514  	job.TaskGroups[0].Count = 5
  1515  	tgName := job.TaskGroups[0].Name
  1516  	now := time.Now()
  1517  
  1518  	// Set up reschedule policy
  1519  	delayDur := 15 * time.Second
  1520  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour, Delay: delayDur, MaxDelay: 1 * time.Hour}
  1521  
  1522  	// Create 5 existing allocations
  1523  	var allocs []*structs.Allocation
  1524  	for i := 0; i < 5; i++ {
  1525  		alloc := mock.Alloc()
  1526  		alloc.Job = job
  1527  		alloc.JobID = job.ID
  1528  		alloc.NodeID = uuid.Generate()
  1529  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1530  		allocs = append(allocs, alloc)
  1531  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1532  	}
  1533  
  1534  	// Mark two as failed
  1535  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1536  
  1537  	// Mark one of them as already rescheduled once
  1538  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1539  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1540  			PrevAllocID: uuid.Generate(),
  1541  			PrevNodeID:  uuid.Generate(),
  1542  		},
  1543  	}}
  1544  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1545  		StartedAt:  now.Add(-1 * time.Hour),
  1546  		FinishedAt: now}}
  1547  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1548  
  1549  	// Mark one as desired state stop
  1550  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  1551  
  1552  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, uuid.Generate())
  1553  	r := reconciler.Compute()
  1554  
  1555  	// Should place a new placement and create a follow up eval for the delayed reschedule
  1556  	// Verify that the follow up eval has the expected waitUntil time
  1557  	evals := r.desiredFollowupEvals[tgName]
  1558  	require.NotNil(evals)
  1559  	require.Equal(1, len(evals))
  1560  	require.Equal(now.Add(delayDur), evals[0].WaitUntil)
  1561  
  1562  	assertResults(t, r, &resultExpectation{
  1563  		createDeployment:  nil,
  1564  		deploymentUpdates: nil,
  1565  		place:             1,
  1566  		inplace:           0,
  1567  		attributeUpdates:  1,
  1568  		stop:              0,
  1569  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1570  			job.TaskGroups[0].Name: {
  1571  				Place:         1,
  1572  				InPlaceUpdate: 0,
  1573  				Ignore:        4,
  1574  			},
  1575  		},
  1576  	})
  1577  
  1578  	assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
  1579  	assertNamesHaveIndexes(t, intRange(1, 1), attributeUpdatesToNames(r.attributeUpdates))
  1580  
  1581  	// Verify that the followup evalID field is set correctly
  1582  	var annotated *structs.Allocation
  1583  	for _, a := range r.attributeUpdates {
  1584  		annotated = a
  1585  	}
  1586  	require.Equal(evals[0].ID, annotated.FollowupEvalID)
  1587  }
  1588  
  1589  // Tests service allocations with client status complete
  1590  func TestReconciler_Service_ClientStatusComplete(t *testing.T) {
  1591  	// Set desired 5
  1592  	job := mock.Job()
  1593  	job.TaskGroups[0].Count = 5
  1594  
  1595  	// Set up reschedule policy
  1596  	delayDur := 15 * time.Second
  1597  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1598  		Attempts: 1,
  1599  		Interval: 24 * time.Hour,
  1600  		Delay:    delayDur,
  1601  		MaxDelay: 1 * time.Hour,
  1602  	}
  1603  
  1604  	// Create 5 existing allocations
  1605  	var allocs []*structs.Allocation
  1606  	for i := 0; i < 5; i++ {
  1607  		alloc := mock.Alloc()
  1608  		alloc.Job = job
  1609  		alloc.JobID = job.ID
  1610  		alloc.NodeID = uuid.Generate()
  1611  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1612  		allocs = append(allocs, alloc)
  1613  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1614  		alloc.DesiredStatus = structs.AllocDesiredStatusRun
  1615  	}
  1616  
  1617  	// Mark one as client status complete
  1618  	allocs[4].ClientStatus = structs.AllocClientStatusComplete
  1619  
  1620  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1621  	r := reconciler.Compute()
  1622  
  1623  	// Should place a new placement for the alloc that was marked complete
  1624  	assertResults(t, r, &resultExpectation{
  1625  		createDeployment:  nil,
  1626  		deploymentUpdates: nil,
  1627  		place:             1,
  1628  		inplace:           0,
  1629  		stop:              0,
  1630  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1631  			job.TaskGroups[0].Name: {
  1632  				Place:         1,
  1633  				InPlaceUpdate: 0,
  1634  				Ignore:        4,
  1635  			},
  1636  		},
  1637  	})
  1638  
  1639  	assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
  1640  
  1641  }
  1642  
  1643  // Tests service job placement with desired stop and client status complete
  1644  func TestReconciler_Service_DesiredStop_ClientStatusComplete(t *testing.T) {
  1645  	// Set desired 5
  1646  	job := mock.Job()
  1647  	job.TaskGroups[0].Count = 5
  1648  
  1649  	// Set up reschedule policy
  1650  	delayDur := 15 * time.Second
  1651  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1652  		Attempts: 1,
  1653  		Interval: 24 * time.Hour,
  1654  		Delay:    delayDur,
  1655  		MaxDelay: 1 * time.Hour,
  1656  	}
  1657  
  1658  	// Create 5 existing allocations
  1659  	var allocs []*structs.Allocation
  1660  	for i := 0; i < 5; i++ {
  1661  		alloc := mock.Alloc()
  1662  		alloc.Job = job
  1663  		alloc.JobID = job.ID
  1664  		alloc.NodeID = uuid.Generate()
  1665  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1666  		allocs = append(allocs, alloc)
  1667  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1668  		alloc.DesiredStatus = structs.AllocDesiredStatusRun
  1669  	}
  1670  
  1671  	// Mark one as failed but with desired status stop
  1672  	// Should not trigger rescheduling logic but should trigger a placement
  1673  	allocs[4].ClientStatus = structs.AllocClientStatusFailed
  1674  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  1675  
  1676  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1677  	r := reconciler.Compute()
  1678  
  1679  	// Should place a new placement for the alloc that was marked stopped
  1680  	assertResults(t, r, &resultExpectation{
  1681  		createDeployment:  nil,
  1682  		deploymentUpdates: nil,
  1683  		place:             1,
  1684  		inplace:           0,
  1685  		stop:              0,
  1686  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1687  			job.TaskGroups[0].Name: {
  1688  				Place:         1,
  1689  				InPlaceUpdate: 0,
  1690  				Ignore:        4,
  1691  			},
  1692  		},
  1693  	})
  1694  
  1695  	assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
  1696  
  1697  	// Should not have any follow up evals created
  1698  	require := require.New(t)
  1699  	require.Equal(0, len(r.desiredFollowupEvals))
  1700  }
  1701  
  1702  // Tests rescheduling failed service allocations with desired state stop
  1703  func TestReconciler_RescheduleNow_Service(t *testing.T) {
  1704  	require := require.New(t)
  1705  
  1706  	// Set desired 5
  1707  	job := mock.Job()
  1708  	job.TaskGroups[0].Count = 5
  1709  	tgName := job.TaskGroups[0].Name
  1710  	now := time.Now()
  1711  
  1712  	// Set up reschedule policy and update stanza
  1713  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1714  		Attempts:      1,
  1715  		Interval:      24 * time.Hour,
  1716  		Delay:         5 * time.Second,
  1717  		DelayFunction: "",
  1718  		MaxDelay:      1 * time.Hour,
  1719  		Unlimited:     false,
  1720  	}
  1721  	job.TaskGroups[0].Update = noCanaryUpdate
  1722  
  1723  	// Create 5 existing allocations
  1724  	var allocs []*structs.Allocation
  1725  	for i := 0; i < 5; i++ {
  1726  		alloc := mock.Alloc()
  1727  		alloc.Job = job
  1728  		alloc.JobID = job.ID
  1729  		alloc.NodeID = uuid.Generate()
  1730  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1731  		allocs = append(allocs, alloc)
  1732  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1733  	}
  1734  
  1735  	// Mark two as failed
  1736  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1737  
  1738  	// Mark one of them as already rescheduled once
  1739  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1740  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1741  			PrevAllocID: uuid.Generate(),
  1742  			PrevNodeID:  uuid.Generate(),
  1743  		},
  1744  	}}
  1745  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1746  		StartedAt:  now.Add(-1 * time.Hour),
  1747  		FinishedAt: now.Add(-10 * time.Second)}}
  1748  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1749  
  1750  	// Mark one as desired state stop
  1751  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  1752  
  1753  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1754  	r := reconciler.Compute()
  1755  
  1756  	// Verify that no follow up evals were created
  1757  	evals := r.desiredFollowupEvals[tgName]
  1758  	require.Nil(evals)
  1759  
  1760  	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
  1761  	assertResults(t, r, &resultExpectation{
  1762  		createDeployment:  nil,
  1763  		deploymentUpdates: nil,
  1764  		place:             2,
  1765  		inplace:           0,
  1766  		stop:              0,
  1767  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1768  			job.TaskGroups[0].Name: {
  1769  				Place:  2,
  1770  				Ignore: 3,
  1771  			},
  1772  		},
  1773  	})
  1774  
  1775  	// Rescheduled allocs should have previous allocs
  1776  	assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place))
  1777  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1778  	assertPlacementsAreRescheduled(t, 1, r.place)
  1779  }
  1780  
  1781  // Tests rescheduling failed service allocations when there's clock drift (upto a second)
  1782  func TestReconciler_RescheduleNow_WithinAllowedTimeWindow(t *testing.T) {
  1783  	require := require.New(t)
  1784  
  1785  	// Set desired 5
  1786  	job := mock.Job()
  1787  	job.TaskGroups[0].Count = 5
  1788  	tgName := job.TaskGroups[0].Name
  1789  	now := time.Now()
  1790  
  1791  	// Set up reschedule policy and update stanza
  1792  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1793  		Attempts:      1,
  1794  		Interval:      24 * time.Hour,
  1795  		Delay:         5 * time.Second,
  1796  		DelayFunction: "",
  1797  		MaxDelay:      1 * time.Hour,
  1798  		Unlimited:     false,
  1799  	}
  1800  	job.TaskGroups[0].Update = noCanaryUpdate
  1801  
  1802  	// Create 5 existing allocations
  1803  	var allocs []*structs.Allocation
  1804  	for i := 0; i < 5; i++ {
  1805  		alloc := mock.Alloc()
  1806  		alloc.Job = job
  1807  		alloc.JobID = job.ID
  1808  		alloc.NodeID = uuid.Generate()
  1809  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1810  		allocs = append(allocs, alloc)
  1811  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1812  	}
  1813  
  1814  	// Mark one as failed
  1815  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1816  
  1817  	// Mark one of them as already rescheduled once
  1818  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1819  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1820  			PrevAllocID: uuid.Generate(),
  1821  			PrevNodeID:  uuid.Generate(),
  1822  		},
  1823  	}}
  1824  	// Set fail time to 4 seconds ago which falls within the reschedule window
  1825  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1826  		StartedAt:  now.Add(-1 * time.Hour),
  1827  		FinishedAt: now.Add(-4 * time.Second)}}
  1828  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1829  
  1830  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1831  	reconciler.now = now
  1832  	r := reconciler.Compute()
  1833  
  1834  	// Verify that no follow up evals were created
  1835  	evals := r.desiredFollowupEvals[tgName]
  1836  	require.Nil(evals)
  1837  
  1838  	// Verify that one rescheduled alloc was placed
  1839  	assertResults(t, r, &resultExpectation{
  1840  		createDeployment:  nil,
  1841  		deploymentUpdates: nil,
  1842  		place:             1,
  1843  		inplace:           0,
  1844  		stop:              0,
  1845  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1846  			job.TaskGroups[0].Name: {
  1847  				Place:  1,
  1848  				Ignore: 4,
  1849  			},
  1850  		},
  1851  	})
  1852  
  1853  	// Rescheduled allocs should have previous allocs
  1854  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  1855  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1856  	assertPlacementsAreRescheduled(t, 1, r.place)
  1857  }
  1858  
  1859  // Tests rescheduling failed service allocations when the eval ID matches and there's a large clock drift
  1860  func TestReconciler_RescheduleNow_EvalIDMatch(t *testing.T) {
  1861  	require := require.New(t)
  1862  
  1863  	// Set desired 5
  1864  	job := mock.Job()
  1865  	job.TaskGroups[0].Count = 5
  1866  	tgName := job.TaskGroups[0].Name
  1867  	now := time.Now()
  1868  
  1869  	// Set up reschedule policy and update stanza
  1870  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1871  		Attempts:      1,
  1872  		Interval:      24 * time.Hour,
  1873  		Delay:         5 * time.Second,
  1874  		DelayFunction: "",
  1875  		MaxDelay:      1 * time.Hour,
  1876  		Unlimited:     false,
  1877  	}
  1878  	job.TaskGroups[0].Update = noCanaryUpdate
  1879  
  1880  	// Create 5 existing allocations
  1881  	var allocs []*structs.Allocation
  1882  	for i := 0; i < 5; i++ {
  1883  		alloc := mock.Alloc()
  1884  		alloc.Job = job
  1885  		alloc.JobID = job.ID
  1886  		alloc.NodeID = uuid.Generate()
  1887  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1888  		allocs = append(allocs, alloc)
  1889  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1890  	}
  1891  
  1892  	// Mark one as failed
  1893  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1894  
  1895  	// Mark one of them as already rescheduled once
  1896  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1897  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1898  			PrevAllocID: uuid.Generate(),
  1899  			PrevNodeID:  uuid.Generate(),
  1900  		},
  1901  	}}
  1902  	// Set fail time to 5 seconds ago and eval ID
  1903  	evalID := uuid.Generate()
  1904  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1905  		StartedAt:  now.Add(-1 * time.Hour),
  1906  		FinishedAt: now.Add(-5 * time.Second)}}
  1907  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1908  	allocs[1].FollowupEvalID = evalID
  1909  
  1910  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, evalID)
  1911  	reconciler.now = now.Add(-30 * time.Second)
  1912  	r := reconciler.Compute()
  1913  
  1914  	// Verify that no follow up evals were created
  1915  	evals := r.desiredFollowupEvals[tgName]
  1916  	require.Nil(evals)
  1917  
  1918  	// Verify that one rescheduled alloc was placed
  1919  	assertResults(t, r, &resultExpectation{
  1920  		createDeployment:  nil,
  1921  		deploymentUpdates: nil,
  1922  		place:             1,
  1923  		inplace:           0,
  1924  		stop:              0,
  1925  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1926  			job.TaskGroups[0].Name: {
  1927  				Place:  1,
  1928  				Ignore: 4,
  1929  			},
  1930  		},
  1931  	})
  1932  
  1933  	// Rescheduled allocs should have previous allocs
  1934  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  1935  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1936  	assertPlacementsAreRescheduled(t, 1, r.place)
  1937  }
  1938  
  1939  // Tests rescheduling failed service allocations when there are canaries
  1940  func TestReconciler_RescheduleNow_Service_WithCanaries(t *testing.T) {
  1941  	require := require.New(t)
  1942  
  1943  	// Set desired 5
  1944  	job := mock.Job()
  1945  	job.TaskGroups[0].Count = 5
  1946  	tgName := job.TaskGroups[0].Name
  1947  	now := time.Now()
  1948  
  1949  	// Set up reschedule policy and update stanza
  1950  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1951  		Attempts:      1,
  1952  		Interval:      24 * time.Hour,
  1953  		Delay:         5 * time.Second,
  1954  		DelayFunction: "",
  1955  		MaxDelay:      1 * time.Hour,
  1956  		Unlimited:     false,
  1957  	}
  1958  	job.TaskGroups[0].Update = canaryUpdate
  1959  
  1960  	job2 := job.Copy()
  1961  	job2.Version++
  1962  
  1963  	d := structs.NewDeployment(job2)
  1964  	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  1965  	s := &structs.DeploymentState{
  1966  		DesiredCanaries: 2,
  1967  		DesiredTotal:    5,
  1968  	}
  1969  	d.TaskGroups[job.TaskGroups[0].Name] = s
  1970  
  1971  	// Create 5 existing allocations
  1972  	var allocs []*structs.Allocation
  1973  	for i := 0; i < 5; i++ {
  1974  		alloc := mock.Alloc()
  1975  		alloc.Job = job
  1976  		alloc.JobID = job.ID
  1977  		alloc.NodeID = uuid.Generate()
  1978  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1979  		allocs = append(allocs, alloc)
  1980  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1981  	}
  1982  
  1983  	// Mark three as failed
  1984  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1985  
  1986  	// Mark one of them as already rescheduled once
  1987  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1988  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1989  			PrevAllocID: uuid.Generate(),
  1990  			PrevNodeID:  uuid.Generate(),
  1991  		},
  1992  	}}
  1993  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1994  		StartedAt:  now.Add(-1 * time.Hour),
  1995  		FinishedAt: now.Add(-10 * time.Second)}}
  1996  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1997  
  1998  	// Mark one as desired state stop
  1999  	allocs[4].ClientStatus = structs.AllocClientStatusFailed
  2000  
  2001  	// Create 2 canary allocations
  2002  	for i := 0; i < 2; i++ {
  2003  		alloc := mock.Alloc()
  2004  		alloc.Job = job
  2005  		alloc.JobID = job.ID
  2006  		alloc.NodeID = uuid.Generate()
  2007  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2008  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2009  		alloc.DeploymentID = d.ID
  2010  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  2011  			Canary:  true,
  2012  			Healthy: helper.BoolToPtr(false),
  2013  		}
  2014  		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
  2015  		allocs = append(allocs, alloc)
  2016  	}
  2017  
  2018  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "")
  2019  	r := reconciler.Compute()
  2020  
  2021  	// Verify that no follow up evals were created
  2022  	evals := r.desiredFollowupEvals[tgName]
  2023  	require.Nil(evals)
  2024  
  2025  	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
  2026  	assertResults(t, r, &resultExpectation{
  2027  		createDeployment:  nil,
  2028  		deploymentUpdates: nil,
  2029  		place:             2,
  2030  		inplace:           0,
  2031  		stop:              0,
  2032  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2033  			job.TaskGroups[0].Name: {
  2034  				Place:  2,
  2035  				Ignore: 5,
  2036  			},
  2037  		},
  2038  	})
  2039  
  2040  	// Rescheduled allocs should have previous allocs
  2041  	assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place))
  2042  	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
  2043  	assertPlacementsAreRescheduled(t, 2, r.place)
  2044  }
  2045  
  2046  // Tests rescheduling failed canary service allocations
  2047  func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) {
  2048  	require := require.New(t)
  2049  
  2050  	// Set desired 5
  2051  	job := mock.Job()
  2052  	job.TaskGroups[0].Count = 5
  2053  	tgName := job.TaskGroups[0].Name
  2054  	now := time.Now()
  2055  
  2056  	// Set up reschedule policy and update stanza
  2057  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  2058  		Delay:         5 * time.Second,
  2059  		DelayFunction: "constant",
  2060  		MaxDelay:      1 * time.Hour,
  2061  		Unlimited:     true,
  2062  	}
  2063  	job.TaskGroups[0].Update = canaryUpdate
  2064  
  2065  	job2 := job.Copy()
  2066  	job2.Version++
  2067  
  2068  	d := structs.NewDeployment(job2)
  2069  	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  2070  	s := &structs.DeploymentState{
  2071  		DesiredCanaries: 2,
  2072  		DesiredTotal:    5,
  2073  	}
  2074  	d.TaskGroups[job.TaskGroups[0].Name] = s
  2075  
  2076  	// Create 5 existing allocations
  2077  	var allocs []*structs.Allocation
  2078  	for i := 0; i < 5; i++ {
  2079  		alloc := mock.Alloc()
  2080  		alloc.Job = job
  2081  		alloc.JobID = job.ID
  2082  		alloc.NodeID = uuid.Generate()
  2083  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2084  		allocs = append(allocs, alloc)
  2085  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2086  	}
  2087  
  2088  	// Create 2 healthy canary allocations
  2089  	for i := 0; i < 2; i++ {
  2090  		alloc := mock.Alloc()
  2091  		alloc.Job = job
  2092  		alloc.JobID = job.ID
  2093  		alloc.NodeID = uuid.Generate()
  2094  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2095  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2096  		alloc.DeploymentID = d.ID
  2097  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  2098  			Canary:  true,
  2099  			Healthy: helper.BoolToPtr(false),
  2100  		}
  2101  		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
  2102  		allocs = append(allocs, alloc)
  2103  	}
  2104  
  2105  	// Mark the canaries as failed
  2106  	allocs[5].ClientStatus = structs.AllocClientStatusFailed
  2107  	allocs[5].DesiredTransition.Reschedule = helper.BoolToPtr(true)
  2108  
  2109  	// Mark one of them as already rescheduled once
  2110  	allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  2111  		{RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(),
  2112  			PrevAllocID: uuid.Generate(),
  2113  			PrevNodeID:  uuid.Generate(),
  2114  		},
  2115  	}}
  2116  
  2117  	allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  2118  		StartedAt:  now.Add(-1 * time.Hour),
  2119  		FinishedAt: now.Add(-10 * time.Second)}}
  2120  	allocs[6].ClientStatus = structs.AllocClientStatusFailed
  2121  	allocs[6].DesiredTransition.Reschedule = helper.BoolToPtr(true)
  2122  
  2123  	// Create 4 unhealthy canary allocations that have already been replaced
  2124  	for i := 0; i < 4; i++ {
  2125  		alloc := mock.Alloc()
  2126  		alloc.Job = job
  2127  		alloc.JobID = job.ID
  2128  		alloc.NodeID = uuid.Generate()
  2129  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2))
  2130  		alloc.ClientStatus = structs.AllocClientStatusFailed
  2131  		alloc.DeploymentID = d.ID
  2132  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  2133  			Canary:  true,
  2134  			Healthy: helper.BoolToPtr(false),
  2135  		}
  2136  		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
  2137  		allocs = append(allocs, alloc)
  2138  	}
  2139  
  2140  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "")
  2141  	reconciler.now = now
  2142  	r := reconciler.Compute()
  2143  
  2144  	// Verify that no follow up evals were created
  2145  	evals := r.desiredFollowupEvals[tgName]
  2146  	require.Nil(evals)
  2147  
  2148  	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
  2149  	assertResults(t, r, &resultExpectation{
  2150  		createDeployment:  nil,
  2151  		deploymentUpdates: nil,
  2152  		place:             2,
  2153  		inplace:           0,
  2154  		stop:              0,
  2155  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2156  			job.TaskGroups[0].Name: {
  2157  				Place:  2,
  2158  				Ignore: 9,
  2159  			},
  2160  		},
  2161  	})
  2162  
  2163  	// Rescheduled allocs should have previous allocs
  2164  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  2165  	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
  2166  	assertPlacementsAreRescheduled(t, 2, r.place)
  2167  }
  2168  
  2169  // Tests rescheduling failed canary service allocations when one has reached its
  2170  // reschedule limit
  2171  func TestReconciler_RescheduleNow_Service_Canaries_Limit(t *testing.T) {
  2172  	require := require.New(t)
  2173  
  2174  	// Set desired 5
  2175  	job := mock.Job()
  2176  	job.TaskGroups[0].Count = 5
  2177  	tgName := job.TaskGroups[0].Name
  2178  	now := time.Now()
  2179  
  2180  	// Set up reschedule policy and update stanza
  2181  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  2182  		Attempts:      1,
  2183  		Interval:      24 * time.Hour,
  2184  		Delay:         5 * time.Second,
  2185  		DelayFunction: "",
  2186  		MaxDelay:      1 * time.Hour,
  2187  		Unlimited:     false,
  2188  	}
  2189  	job.TaskGroups[0].Update = canaryUpdate
  2190  
  2191  	job2 := job.Copy()
  2192  	job2.Version++
  2193  
  2194  	d := structs.NewDeployment(job2)
  2195  	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  2196  	s := &structs.DeploymentState{
  2197  		DesiredCanaries: 2,
  2198  		DesiredTotal:    5,
  2199  	}
  2200  	d.TaskGroups[job.TaskGroups[0].Name] = s
  2201  
  2202  	// Create 5 existing allocations
  2203  	var allocs []*structs.Allocation
  2204  	for i := 0; i < 5; i++ {
  2205  		alloc := mock.Alloc()
  2206  		alloc.Job = job
  2207  		alloc.JobID = job.ID
  2208  		alloc.NodeID = uuid.Generate()
  2209  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2210  		allocs = append(allocs, alloc)
  2211  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2212  	}
  2213  
  2214  	// Create 2 healthy canary allocations
  2215  	for i := 0; i < 2; i++ {
  2216  		alloc := mock.Alloc()
  2217  		alloc.Job = job
  2218  		alloc.JobID = job.ID
  2219  		alloc.NodeID = uuid.Generate()
  2220  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2221  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2222  		alloc.DeploymentID = d.ID
  2223  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  2224  			Canary:  true,
  2225  			Healthy: helper.BoolToPtr(false),
  2226  		}
  2227  		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
  2228  		allocs = append(allocs, alloc)
  2229  	}
  2230  
  2231  	// Mark the canaries as failed
  2232  	allocs[5].ClientStatus = structs.AllocClientStatusFailed
  2233  	allocs[5].DesiredTransition.Reschedule = helper.BoolToPtr(true)
  2234  
  2235  	// Mark one of them as already rescheduled once
  2236  	allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  2237  		{RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(),
  2238  			PrevAllocID: uuid.Generate(),
  2239  			PrevNodeID:  uuid.Generate(),
  2240  		},
  2241  	}}
  2242  
  2243  	allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  2244  		StartedAt:  now.Add(-1 * time.Hour),
  2245  		FinishedAt: now.Add(-10 * time.Second)}}
  2246  	allocs[6].ClientStatus = structs.AllocClientStatusFailed
  2247  	allocs[6].DesiredTransition.Reschedule = helper.BoolToPtr(true)
  2248  
  2249  	// Create 4 unhealthy canary allocations that have already been replaced
  2250  	for i := 0; i < 4; i++ {
  2251  		alloc := mock.Alloc()
  2252  		alloc.Job = job
  2253  		alloc.JobID = job.ID
  2254  		alloc.NodeID = uuid.Generate()
  2255  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2))
  2256  		alloc.ClientStatus = structs.AllocClientStatusFailed
  2257  		alloc.DeploymentID = d.ID
  2258  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  2259  			Canary:  true,
  2260  			Healthy: helper.BoolToPtr(false),
  2261  		}
  2262  		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
  2263  		allocs = append(allocs, alloc)
  2264  	}
  2265  
  2266  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "")
  2267  	reconciler.now = now
  2268  	r := reconciler.Compute()
  2269  
  2270  	// Verify that no follow up evals were created
  2271  	evals := r.desiredFollowupEvals[tgName]
  2272  	require.Nil(evals)
  2273  
  2274  	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
  2275  	assertResults(t, r, &resultExpectation{
  2276  		createDeployment:  nil,
  2277  		deploymentUpdates: nil,
  2278  		place:             1,
  2279  		inplace:           0,
  2280  		stop:              0,
  2281  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2282  			job.TaskGroups[0].Name: {
  2283  				Place:  1,
  2284  				Ignore: 10,
  2285  			},
  2286  		},
  2287  	})
  2288  
  2289  	// Rescheduled allocs should have previous allocs
  2290  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  2291  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  2292  	assertPlacementsAreRescheduled(t, 1, r.place)
  2293  }
  2294  
  2295  // Tests failed service allocations that were already rescheduled won't be rescheduled again
  2296  func TestReconciler_DontReschedule_PreviouslyRescheduled(t *testing.T) {
  2297  	// Set desired 5
  2298  	job := mock.Job()
  2299  	job.TaskGroups[0].Count = 5
  2300  
  2301  	// Set up reschedule policy
  2302  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 5, Interval: 24 * time.Hour}
  2303  
  2304  	// Create 7 existing allocations
  2305  	var allocs []*structs.Allocation
  2306  	for i := 0; i < 7; i++ {
  2307  		alloc := mock.Alloc()
  2308  		alloc.Job = job
  2309  		alloc.JobID = job.ID
  2310  		alloc.NodeID = uuid.Generate()
  2311  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2312  		allocs = append(allocs, alloc)
  2313  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2314  	}
  2315  	// Mark two as failed and rescheduled
  2316  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  2317  	allocs[0].ID = allocs[1].ID
  2318  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  2319  	allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  2320  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  2321  			PrevAllocID: uuid.Generate(),
  2322  			PrevNodeID:  uuid.Generate(),
  2323  		},
  2324  	}}
  2325  	allocs[1].NextAllocation = allocs[2].ID
  2326  
  2327  	// Mark one as desired state stop
  2328  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  2329  
  2330  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  2331  	r := reconciler.Compute()
  2332  
  2333  	// Should place 1 - one is a new placement to make up the desired count of 5
  2334  	// failing allocs are not rescheduled
  2335  	assertResults(t, r, &resultExpectation{
  2336  		createDeployment:  nil,
  2337  		deploymentUpdates: nil,
  2338  		place:             1,
  2339  		inplace:           0,
  2340  		stop:              0,
  2341  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2342  			job.TaskGroups[0].Name: {
  2343  				Place:  1,
  2344  				Ignore: 4,
  2345  			},
  2346  		},
  2347  	})
  2348  
  2349  	// name index 0 is used for the replacement because its
  2350  	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
  2351  }
  2352  
  2353  // Tests the reconciler cancels an old deployment when the job is being stopped
  2354  func TestReconciler_CancelDeployment_JobStop(t *testing.T) {
  2355  	job := mock.Job()
  2356  	job.Stop = true
  2357  
  2358  	running := structs.NewDeployment(job)
  2359  	failed := structs.NewDeployment(job)
  2360  	failed.Status = structs.DeploymentStatusFailed
  2361  
  2362  	cases := []struct {
  2363  		name             string
  2364  		job              *structs.Job
  2365  		jobID, taskGroup string
  2366  		deployment       *structs.Deployment
  2367  		cancel           bool
  2368  	}{
  2369  		{
  2370  			name:       "stopped job, running deployment",
  2371  			job:        job,
  2372  			jobID:      job.ID,
  2373  			taskGroup:  job.TaskGroups[0].Name,
  2374  			deployment: running,
  2375  			cancel:     true,
  2376  		},
  2377  		{
  2378  			name:       "nil job, running deployment",
  2379  			job:        nil,
  2380  			jobID:      "foo",
  2381  			taskGroup:  "bar",
  2382  			deployment: running,
  2383  			cancel:     true,
  2384  		},
  2385  		{
  2386  			name:       "stopped job, failed deployment",
  2387  			job:        job,
  2388  			jobID:      job.ID,
  2389  			taskGroup:  job.TaskGroups[0].Name,
  2390  			deployment: failed,
  2391  			cancel:     false,
  2392  		},
  2393  		{
  2394  			name:       "nil job, failed deployment",
  2395  			job:        nil,
  2396  			jobID:      "foo",
  2397  			taskGroup:  "bar",
  2398  			deployment: failed,
  2399  			cancel:     false,
  2400  		},
  2401  	}
  2402  
  2403  	for _, c := range cases {
  2404  		t.Run(c.name, func(t *testing.T) {
  2405  			// Create 10 allocations
  2406  			var allocs []*structs.Allocation
  2407  			for i := 0; i < 10; i++ {
  2408  				alloc := mock.Alloc()
  2409  				alloc.Job = c.job
  2410  				alloc.JobID = c.jobID
  2411  				alloc.NodeID = uuid.Generate()
  2412  				alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i))
  2413  				alloc.TaskGroup = c.taskGroup
  2414  				allocs = append(allocs, alloc)
  2415  			}
  2416  
  2417  			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job, c.deployment, allocs, nil, "")
  2418  			r := reconciler.Compute()
  2419  
  2420  			var updates []*structs.DeploymentStatusUpdate
  2421  			if c.cancel {
  2422  				updates = []*structs.DeploymentStatusUpdate{
  2423  					{
  2424  						DeploymentID:      c.deployment.ID,
  2425  						Status:            structs.DeploymentStatusCancelled,
  2426  						StatusDescription: structs.DeploymentStatusDescriptionStoppedJob,
  2427  					},
  2428  				}
  2429  			}
  2430  
  2431  			// Assert the correct results
  2432  			assertResults(t, r, &resultExpectation{
  2433  				createDeployment:  nil,
  2434  				deploymentUpdates: updates,
  2435  				place:             0,
  2436  				inplace:           0,
  2437  				stop:              10,
  2438  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2439  					c.taskGroup: {
  2440  						Stop: 10,
  2441  					},
  2442  				},
  2443  			})
  2444  
  2445  			assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop))
  2446  		})
  2447  	}
  2448  }
  2449  
  2450  // Tests the reconciler cancels an old deployment when the job is updated
  2451  func TestReconciler_CancelDeployment_JobUpdate(t *testing.T) {
  2452  	// Create a base job
  2453  	job := mock.Job()
  2454  
  2455  	// Create two deployments
  2456  	running := structs.NewDeployment(job)
  2457  	failed := structs.NewDeployment(job)
  2458  	failed.Status = structs.DeploymentStatusFailed
  2459  
  2460  	// Make the job newer than the deployment
  2461  	job.Version += 10
  2462  
  2463  	cases := []struct {
  2464  		name       string
  2465  		deployment *structs.Deployment
  2466  		cancel     bool
  2467  	}{
  2468  		{
  2469  			name:       "running deployment",
  2470  			deployment: running,
  2471  			cancel:     true,
  2472  		},
  2473  		{
  2474  			name:       "failed deployment",
  2475  			deployment: failed,
  2476  			cancel:     false,
  2477  		},
  2478  	}
  2479  
  2480  	for _, c := range cases {
  2481  		t.Run(c.name, func(t *testing.T) {
  2482  			// Create 10 allocations
  2483  			var allocs []*structs.Allocation
  2484  			for i := 0; i < 10; i++ {
  2485  				alloc := mock.Alloc()
  2486  				alloc.Job = job
  2487  				alloc.JobID = job.ID
  2488  				alloc.NodeID = uuid.Generate()
  2489  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2490  				alloc.TaskGroup = job.TaskGroups[0].Name
  2491  				allocs = append(allocs, alloc)
  2492  			}
  2493  
  2494  			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, c.deployment, allocs, nil, "")
  2495  			r := reconciler.Compute()
  2496  
  2497  			var updates []*structs.DeploymentStatusUpdate
  2498  			if c.cancel {
  2499  				updates = []*structs.DeploymentStatusUpdate{
  2500  					{
  2501  						DeploymentID:      c.deployment.ID,
  2502  						Status:            structs.DeploymentStatusCancelled,
  2503  						StatusDescription: structs.DeploymentStatusDescriptionNewerJob,
  2504  					},
  2505  				}
  2506  			}
  2507  
  2508  			// Assert the correct results
  2509  			assertResults(t, r, &resultExpectation{
  2510  				createDeployment:  nil,
  2511  				deploymentUpdates: updates,
  2512  				place:             0,
  2513  				inplace:           0,
  2514  				stop:              0,
  2515  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2516  					job.TaskGroups[0].Name: {
  2517  						Ignore: 10,
  2518  					},
  2519  				},
  2520  			})
  2521  		})
  2522  	}
  2523  }
  2524  
  2525  // Tests the reconciler creates a deployment and does a rolling upgrade with
  2526  // destructive changes
  2527  func TestReconciler_CreateDeployment_RollingUpgrade_Destructive(t *testing.T) {
  2528  	job := mock.Job()
  2529  	job.TaskGroups[0].Update = noCanaryUpdate
  2530  
  2531  	// Create 10 allocations from the old job
  2532  	var allocs []*structs.Allocation
  2533  	for i := 0; i < 10; i++ {
  2534  		alloc := mock.Alloc()
  2535  		alloc.Job = job
  2536  		alloc.JobID = job.ID
  2537  		alloc.NodeID = uuid.Generate()
  2538  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2539  		alloc.TaskGroup = job.TaskGroups[0].Name
  2540  		allocs = append(allocs, alloc)
  2541  	}
  2542  
  2543  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  2544  	r := reconciler.Compute()
  2545  
  2546  	d := structs.NewDeployment(job)
  2547  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2548  		DesiredTotal: 10,
  2549  	}
  2550  
  2551  	// Assert the correct results
  2552  	assertResults(t, r, &resultExpectation{
  2553  		createDeployment:  d,
  2554  		deploymentUpdates: nil,
  2555  		destructive:       4,
  2556  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2557  			job.TaskGroups[0].Name: {
  2558  				DestructiveUpdate: 4,
  2559  				Ignore:            6,
  2560  			},
  2561  		},
  2562  	})
  2563  
  2564  	assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate))
  2565  }
  2566  
  2567  // Tests the reconciler creates a deployment for inplace updates
  2568  func TestReconciler_CreateDeployment_RollingUpgrade_Inplace(t *testing.T) {
  2569  	jobOld := mock.Job()
  2570  	job := jobOld.Copy()
  2571  	job.Version++
  2572  	job.TaskGroups[0].Update = noCanaryUpdate
  2573  
  2574  	// Create 10 allocations from the old job
  2575  	var allocs []*structs.Allocation
  2576  	for i := 0; i < 10; i++ {
  2577  		alloc := mock.Alloc()
  2578  		alloc.Job = jobOld
  2579  		alloc.JobID = job.ID
  2580  		alloc.NodeID = uuid.Generate()
  2581  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2582  		alloc.TaskGroup = job.TaskGroups[0].Name
  2583  		allocs = append(allocs, alloc)
  2584  	}
  2585  
  2586  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "")
  2587  	r := reconciler.Compute()
  2588  
  2589  	d := structs.NewDeployment(job)
  2590  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2591  		DesiredTotal: 10,
  2592  	}
  2593  
  2594  	// Assert the correct results
  2595  	assertResults(t, r, &resultExpectation{
  2596  		createDeployment:  d,
  2597  		deploymentUpdates: nil,
  2598  		place:             0,
  2599  		inplace:           10,
  2600  		stop:              0,
  2601  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2602  			job.TaskGroups[0].Name: {
  2603  				InPlaceUpdate: 10,
  2604  			},
  2605  		},
  2606  	})
  2607  }
  2608  
  2609  // Tests the reconciler creates a deployment when the job has a newer create index
  2610  func TestReconciler_CreateDeployment_NewerCreateIndex(t *testing.T) {
  2611  	jobOld := mock.Job()
  2612  	job := jobOld.Copy()
  2613  	job.TaskGroups[0].Update = noCanaryUpdate
  2614  	job.CreateIndex += 100
  2615  
  2616  	// Create 5 allocations from the old job
  2617  	var allocs []*structs.Allocation
  2618  	for i := 0; i < 5; i++ {
  2619  		alloc := mock.Alloc()
  2620  		alloc.Job = jobOld
  2621  		alloc.JobID = jobOld.ID
  2622  		alloc.NodeID = uuid.Generate()
  2623  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2624  		alloc.TaskGroup = job.TaskGroups[0].Name
  2625  		allocs = append(allocs, alloc)
  2626  	}
  2627  
  2628  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  2629  	r := reconciler.Compute()
  2630  
  2631  	d := structs.NewDeployment(job)
  2632  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2633  		DesiredTotal: 5,
  2634  	}
  2635  
  2636  	// Assert the correct results
  2637  	assertResults(t, r, &resultExpectation{
  2638  		createDeployment:  d,
  2639  		deploymentUpdates: nil,
  2640  		place:             5,
  2641  		destructive:       0,
  2642  		inplace:           0,
  2643  		stop:              0,
  2644  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2645  			job.TaskGroups[0].Name: {
  2646  				InPlaceUpdate:     0,
  2647  				Ignore:            5,
  2648  				Place:             5,
  2649  				DestructiveUpdate: 0,
  2650  			},
  2651  		},
  2652  	})
  2653  }
  2654  
  2655  // Tests the reconciler doesn't creates a deployment if there are no changes
  2656  func TestReconciler_DontCreateDeployment_NoChanges(t *testing.T) {
  2657  	job := mock.Job()
  2658  	job.TaskGroups[0].Update = noCanaryUpdate
  2659  
  2660  	// Create 10 allocations from the job
  2661  	var allocs []*structs.Allocation
  2662  	for i := 0; i < 10; i++ {
  2663  		alloc := mock.Alloc()
  2664  		alloc.Job = job
  2665  		alloc.JobID = job.ID
  2666  		alloc.NodeID = uuid.Generate()
  2667  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2668  		alloc.TaskGroup = job.TaskGroups[0].Name
  2669  		allocs = append(allocs, alloc)
  2670  	}
  2671  
  2672  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  2673  	r := reconciler.Compute()
  2674  
  2675  	// Assert the correct results
  2676  	assertResults(t, r, &resultExpectation{
  2677  		createDeployment:  nil,
  2678  		deploymentUpdates: nil,
  2679  		place:             0,
  2680  		inplace:           0,
  2681  		stop:              0,
  2682  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2683  			job.TaskGroups[0].Name: {
  2684  				DestructiveUpdate: 0,
  2685  				Ignore:            10,
  2686  			},
  2687  		},
  2688  	})
  2689  }
  2690  
  2691  // Tests the reconciler doesn't place any more canaries when the deployment is
  2692  // paused or failed
  2693  func TestReconciler_PausedOrFailedDeployment_NoMoreCanaries(t *testing.T) {
  2694  	job := mock.Job()
  2695  	job.TaskGroups[0].Update = canaryUpdate
  2696  
  2697  	cases := []struct {
  2698  		name             string
  2699  		deploymentStatus string
  2700  		stop             uint64
  2701  	}{
  2702  		{
  2703  			name:             "paused deployment",
  2704  			deploymentStatus: structs.DeploymentStatusPaused,
  2705  			stop:             0,
  2706  		},
  2707  		{
  2708  			name:             "failed deployment",
  2709  			deploymentStatus: structs.DeploymentStatusFailed,
  2710  			stop:             1,
  2711  		},
  2712  	}
  2713  
  2714  	for _, c := range cases {
  2715  		t.Run(c.name, func(t *testing.T) {
  2716  			// Create a deployment that is paused/failed and has placed some canaries
  2717  			d := structs.NewDeployment(job)
  2718  			d.Status = c.deploymentStatus
  2719  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2720  				Promoted:        false,
  2721  				DesiredCanaries: 2,
  2722  				DesiredTotal:    10,
  2723  				PlacedAllocs:    1,
  2724  			}
  2725  
  2726  			// Create 10 allocations for the original job
  2727  			var allocs []*structs.Allocation
  2728  			for i := 0; i < 10; i++ {
  2729  				alloc := mock.Alloc()
  2730  				alloc.Job = job
  2731  				alloc.JobID = job.ID
  2732  				alloc.NodeID = uuid.Generate()
  2733  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2734  				alloc.TaskGroup = job.TaskGroups[0].Name
  2735  				allocs = append(allocs, alloc)
  2736  			}
  2737  
  2738  			// Create one canary
  2739  			canary := mock.Alloc()
  2740  			canary.Job = job
  2741  			canary.JobID = job.ID
  2742  			canary.NodeID = uuid.Generate()
  2743  			canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0)
  2744  			canary.TaskGroup = job.TaskGroups[0].Name
  2745  			canary.DeploymentID = d.ID
  2746  			allocs = append(allocs, canary)
  2747  			d.TaskGroups[canary.TaskGroup].PlacedCanaries = []string{canary.ID}
  2748  
  2749  			mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{canary.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive)
  2750  			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  2751  			r := reconciler.Compute()
  2752  
  2753  			// Assert the correct results
  2754  			assertResults(t, r, &resultExpectation{
  2755  				createDeployment:  nil,
  2756  				deploymentUpdates: nil,
  2757  				place:             0,
  2758  				inplace:           0,
  2759  				stop:              int(c.stop),
  2760  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2761  					job.TaskGroups[0].Name: {
  2762  						Ignore: 11 - c.stop,
  2763  						Stop:   c.stop,
  2764  					},
  2765  				},
  2766  			})
  2767  		})
  2768  	}
  2769  }
  2770  
  2771  // Tests the reconciler doesn't place any more allocs when the deployment is
  2772  // paused or failed
  2773  func TestReconciler_PausedOrFailedDeployment_NoMorePlacements(t *testing.T) {
  2774  	job := mock.Job()
  2775  	job.TaskGroups[0].Update = noCanaryUpdate
  2776  	job.TaskGroups[0].Count = 15
  2777  
  2778  	cases := []struct {
  2779  		name             string
  2780  		deploymentStatus string
  2781  	}{
  2782  		{
  2783  			name:             "paused deployment",
  2784  			deploymentStatus: structs.DeploymentStatusPaused,
  2785  		},
  2786  		{
  2787  			name:             "failed deployment",
  2788  			deploymentStatus: structs.DeploymentStatusFailed,
  2789  		},
  2790  	}
  2791  
  2792  	for _, c := range cases {
  2793  		t.Run(c.name, func(t *testing.T) {
  2794  			// Create a deployment that is paused and has placed some canaries
  2795  			d := structs.NewDeployment(job)
  2796  			d.Status = c.deploymentStatus
  2797  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2798  				Promoted:     false,
  2799  				DesiredTotal: 15,
  2800  				PlacedAllocs: 10,
  2801  			}
  2802  
  2803  			// Create 10 allocations for the new job
  2804  			var allocs []*structs.Allocation
  2805  			for i := 0; i < 10; i++ {
  2806  				alloc := mock.Alloc()
  2807  				alloc.Job = job
  2808  				alloc.JobID = job.ID
  2809  				alloc.NodeID = uuid.Generate()
  2810  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2811  				alloc.TaskGroup = job.TaskGroups[0].Name
  2812  				allocs = append(allocs, alloc)
  2813  			}
  2814  
  2815  			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  2816  			r := reconciler.Compute()
  2817  
  2818  			// Assert the correct results
  2819  			assertResults(t, r, &resultExpectation{
  2820  				createDeployment:  nil,
  2821  				deploymentUpdates: nil,
  2822  				place:             0,
  2823  				inplace:           0,
  2824  				stop:              0,
  2825  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2826  					job.TaskGroups[0].Name: {
  2827  						Ignore: 10,
  2828  					},
  2829  				},
  2830  			})
  2831  		})
  2832  	}
  2833  }
  2834  
  2835  // Tests the reconciler doesn't do any more destructive updates when the
  2836  // deployment is paused or failed
  2837  func TestReconciler_PausedOrFailedDeployment_NoMoreDestructiveUpdates(t *testing.T) {
  2838  	job := mock.Job()
  2839  	job.TaskGroups[0].Update = noCanaryUpdate
  2840  
  2841  	cases := []struct {
  2842  		name             string
  2843  		deploymentStatus string
  2844  	}{
  2845  		{
  2846  			name:             "paused deployment",
  2847  			deploymentStatus: structs.DeploymentStatusPaused,
  2848  		},
  2849  		{
  2850  			name:             "failed deployment",
  2851  			deploymentStatus: structs.DeploymentStatusFailed,
  2852  		},
  2853  	}
  2854  
  2855  	for _, c := range cases {
  2856  		t.Run(c.name, func(t *testing.T) {
  2857  			// Create a deployment that is paused and has placed some canaries
  2858  			d := structs.NewDeployment(job)
  2859  			d.Status = c.deploymentStatus
  2860  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2861  				Promoted:     false,
  2862  				DesiredTotal: 10,
  2863  				PlacedAllocs: 1,
  2864  			}
  2865  
  2866  			// Create 9 allocations for the original job
  2867  			var allocs []*structs.Allocation
  2868  			for i := 1; i < 10; i++ {
  2869  				alloc := mock.Alloc()
  2870  				alloc.Job = job
  2871  				alloc.JobID = job.ID
  2872  				alloc.NodeID = uuid.Generate()
  2873  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2874  				alloc.TaskGroup = job.TaskGroups[0].Name
  2875  				allocs = append(allocs, alloc)
  2876  			}
  2877  
  2878  			// Create one for the new job
  2879  			newAlloc := mock.Alloc()
  2880  			newAlloc.Job = job
  2881  			newAlloc.JobID = job.ID
  2882  			newAlloc.NodeID = uuid.Generate()
  2883  			newAlloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0)
  2884  			newAlloc.TaskGroup = job.TaskGroups[0].Name
  2885  			newAlloc.DeploymentID = d.ID
  2886  			allocs = append(allocs, newAlloc)
  2887  
  2888  			mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{newAlloc.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive)
  2889  			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  2890  			r := reconciler.Compute()
  2891  
  2892  			// Assert the correct results
  2893  			assertResults(t, r, &resultExpectation{
  2894  				createDeployment:  nil,
  2895  				deploymentUpdates: nil,
  2896  				place:             0,
  2897  				inplace:           0,
  2898  				stop:              0,
  2899  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2900  					job.TaskGroups[0].Name: {
  2901  						Ignore: 10,
  2902  					},
  2903  				},
  2904  			})
  2905  		})
  2906  	}
  2907  }
  2908  
  2909  // Tests the reconciler handles migrating a canary correctly on a draining node
  2910  func TestReconciler_DrainNode_Canary(t *testing.T) {
  2911  	job := mock.Job()
  2912  	job.TaskGroups[0].Update = canaryUpdate
  2913  
  2914  	// Create a deployment that is paused and has placed some canaries
  2915  	d := structs.NewDeployment(job)
  2916  	s := &structs.DeploymentState{
  2917  		Promoted:        false,
  2918  		DesiredTotal:    10,
  2919  		DesiredCanaries: 2,
  2920  		PlacedAllocs:    2,
  2921  	}
  2922  	d.TaskGroups[job.TaskGroups[0].Name] = s
  2923  
  2924  	// Create 10 allocations from the old job
  2925  	var allocs []*structs.Allocation
  2926  	for i := 0; i < 10; i++ {
  2927  		alloc := mock.Alloc()
  2928  		alloc.Job = job
  2929  		alloc.JobID = job.ID
  2930  		alloc.NodeID = uuid.Generate()
  2931  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2932  		alloc.TaskGroup = job.TaskGroups[0].Name
  2933  		allocs = append(allocs, alloc)
  2934  	}
  2935  
  2936  	// Create two canaries for the new job
  2937  	handled := make(map[string]allocUpdateType)
  2938  	for i := 0; i < 2; i++ {
  2939  		// Create one canary
  2940  		canary := mock.Alloc()
  2941  		canary.Job = job
  2942  		canary.JobID = job.ID
  2943  		canary.NodeID = uuid.Generate()
  2944  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2945  		canary.TaskGroup = job.TaskGroups[0].Name
  2946  		canary.DeploymentID = d.ID
  2947  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  2948  		allocs = append(allocs, canary)
  2949  		handled[canary.ID] = allocUpdateFnIgnore
  2950  	}
  2951  
  2952  	// Build a map of tainted nodes that contains the last canary
  2953  	tainted := make(map[string]*structs.Node, 1)
  2954  	n := mock.Node()
  2955  	n.ID = allocs[11].NodeID
  2956  	allocs[11].DesiredTransition.Migrate = helper.BoolToPtr(true)
  2957  	n.Drain = true
  2958  	tainted[n.ID] = n
  2959  
  2960  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  2961  	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "")
  2962  	r := reconciler.Compute()
  2963  
  2964  	// Assert the correct results
  2965  	assertResults(t, r, &resultExpectation{
  2966  		createDeployment:  nil,
  2967  		deploymentUpdates: nil,
  2968  		place:             1,
  2969  		inplace:           0,
  2970  		stop:              1,
  2971  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2972  			job.TaskGroups[0].Name: {
  2973  				Canary: 1,
  2974  				Ignore: 11,
  2975  			},
  2976  		},
  2977  	})
  2978  	assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop))
  2979  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  2980  }
  2981  
  2982  // Tests the reconciler handles migrating a canary correctly on a lost node
  2983  func TestReconciler_LostNode_Canary(t *testing.T) {
  2984  	job := mock.Job()
  2985  	job.TaskGroups[0].Update = canaryUpdate
  2986  
  2987  	// Create a deployment that is paused and has placed some canaries
  2988  	d := structs.NewDeployment(job)
  2989  	s := &structs.DeploymentState{
  2990  		Promoted:        false,
  2991  		DesiredTotal:    10,
  2992  		DesiredCanaries: 2,
  2993  		PlacedAllocs:    2,
  2994  	}
  2995  	d.TaskGroups[job.TaskGroups[0].Name] = s
  2996  
  2997  	// Create 10 allocations from the old job
  2998  	var allocs []*structs.Allocation
  2999  	for i := 0; i < 10; i++ {
  3000  		alloc := mock.Alloc()
  3001  		alloc.Job = job
  3002  		alloc.JobID = job.ID
  3003  		alloc.NodeID = uuid.Generate()
  3004  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3005  		alloc.TaskGroup = job.TaskGroups[0].Name
  3006  		allocs = append(allocs, alloc)
  3007  	}
  3008  
  3009  	// Create two canaries for the new job
  3010  	handled := make(map[string]allocUpdateType)
  3011  	for i := 0; i < 2; i++ {
  3012  		// Create one canary
  3013  		canary := mock.Alloc()
  3014  		canary.Job = job
  3015  		canary.JobID = job.ID
  3016  		canary.NodeID = uuid.Generate()
  3017  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3018  		canary.TaskGroup = job.TaskGroups[0].Name
  3019  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3020  		canary.DeploymentID = d.ID
  3021  		allocs = append(allocs, canary)
  3022  		handled[canary.ID] = allocUpdateFnIgnore
  3023  	}
  3024  
  3025  	// Build a map of tainted nodes that contains the last canary
  3026  	tainted := make(map[string]*structs.Node, 1)
  3027  	n := mock.Node()
  3028  	n.ID = allocs[11].NodeID
  3029  	n.Status = structs.NodeStatusDown
  3030  	tainted[n.ID] = n
  3031  
  3032  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3033  	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "")
  3034  	r := reconciler.Compute()
  3035  
  3036  	// Assert the correct results
  3037  	assertResults(t, r, &resultExpectation{
  3038  		createDeployment:  nil,
  3039  		deploymentUpdates: nil,
  3040  		place:             1,
  3041  		inplace:           0,
  3042  		stop:              1,
  3043  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3044  			job.TaskGroups[0].Name: {
  3045  				Canary: 1,
  3046  				Ignore: 11,
  3047  			},
  3048  		},
  3049  	})
  3050  
  3051  	assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop))
  3052  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  3053  }
  3054  
  3055  // Tests the reconciler handles stopping canaries from older deployments
  3056  func TestReconciler_StopOldCanaries(t *testing.T) {
  3057  	job := mock.Job()
  3058  	job.TaskGroups[0].Update = canaryUpdate
  3059  
  3060  	// Create an old deployment that has placed some canaries
  3061  	d := structs.NewDeployment(job)
  3062  	s := &structs.DeploymentState{
  3063  		Promoted:        false,
  3064  		DesiredTotal:    10,
  3065  		DesiredCanaries: 2,
  3066  		PlacedAllocs:    2,
  3067  	}
  3068  	d.TaskGroups[job.TaskGroups[0].Name] = s
  3069  
  3070  	// Update the job
  3071  	job.Version += 10
  3072  
  3073  	// Create 10 allocations from the old job
  3074  	var allocs []*structs.Allocation
  3075  	for i := 0; i < 10; i++ {
  3076  		alloc := mock.Alloc()
  3077  		alloc.Job = job
  3078  		alloc.JobID = job.ID
  3079  		alloc.NodeID = uuid.Generate()
  3080  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3081  		alloc.TaskGroup = job.TaskGroups[0].Name
  3082  		allocs = append(allocs, alloc)
  3083  	}
  3084  
  3085  	// Create canaries
  3086  	for i := 0; i < 2; i++ {
  3087  		// Create one canary
  3088  		canary := mock.Alloc()
  3089  		canary.Job = job
  3090  		canary.JobID = job.ID
  3091  		canary.NodeID = uuid.Generate()
  3092  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3093  		canary.TaskGroup = job.TaskGroups[0].Name
  3094  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3095  		canary.DeploymentID = d.ID
  3096  		allocs = append(allocs, canary)
  3097  	}
  3098  
  3099  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  3100  	r := reconciler.Compute()
  3101  
  3102  	newD := structs.NewDeployment(job)
  3103  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3104  	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3105  		DesiredCanaries: 2,
  3106  		DesiredTotal:    10,
  3107  	}
  3108  
  3109  	// Assert the correct results
  3110  	assertResults(t, r, &resultExpectation{
  3111  		createDeployment: newD,
  3112  		deploymentUpdates: []*structs.DeploymentStatusUpdate{
  3113  			{
  3114  				DeploymentID:      d.ID,
  3115  				Status:            structs.DeploymentStatusCancelled,
  3116  				StatusDescription: structs.DeploymentStatusDescriptionNewerJob,
  3117  			},
  3118  		},
  3119  		place:   2,
  3120  		inplace: 0,
  3121  		stop:    2,
  3122  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3123  			job.TaskGroups[0].Name: {
  3124  				Canary: 2,
  3125  				Stop:   2,
  3126  				Ignore: 10,
  3127  			},
  3128  		},
  3129  	})
  3130  
  3131  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  3132  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  3133  }
  3134  
  3135  // Tests the reconciler creates new canaries when the job changes
  3136  func TestReconciler_NewCanaries(t *testing.T) {
  3137  	job := mock.Job()
  3138  	job.TaskGroups[0].Update = canaryUpdate
  3139  
  3140  	// Create 10 allocations from the old job
  3141  	var allocs []*structs.Allocation
  3142  	for i := 0; i < 10; i++ {
  3143  		alloc := mock.Alloc()
  3144  		alloc.Job = job
  3145  		alloc.JobID = job.ID
  3146  		alloc.NodeID = uuid.Generate()
  3147  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3148  		alloc.TaskGroup = job.TaskGroups[0].Name
  3149  		allocs = append(allocs, alloc)
  3150  	}
  3151  
  3152  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3153  	r := reconciler.Compute()
  3154  
  3155  	newD := structs.NewDeployment(job)
  3156  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3157  	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3158  		DesiredCanaries: 2,
  3159  		DesiredTotal:    10,
  3160  	}
  3161  
  3162  	// Assert the correct results
  3163  	assertResults(t, r, &resultExpectation{
  3164  		createDeployment:  newD,
  3165  		deploymentUpdates: nil,
  3166  		place:             2,
  3167  		inplace:           0,
  3168  		stop:              0,
  3169  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3170  			job.TaskGroups[0].Name: {
  3171  				Canary: 2,
  3172  				Ignore: 10,
  3173  			},
  3174  		},
  3175  	})
  3176  
  3177  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  3178  }
  3179  
  3180  // Tests the reconciler creates new canaries when the job changes and the
  3181  // canary count is greater than the task group count
  3182  func TestReconciler_NewCanaries_CountGreater(t *testing.T) {
  3183  	job := mock.Job()
  3184  	job.TaskGroups[0].Count = 3
  3185  	job.TaskGroups[0].Update = canaryUpdate.Copy()
  3186  	job.TaskGroups[0].Update.Canary = 7
  3187  
  3188  	// Create 3 allocations from the old job
  3189  	var allocs []*structs.Allocation
  3190  	for i := 0; i < 3; i++ {
  3191  		alloc := mock.Alloc()
  3192  		alloc.Job = job
  3193  		alloc.JobID = job.ID
  3194  		alloc.NodeID = uuid.Generate()
  3195  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3196  		alloc.TaskGroup = job.TaskGroups[0].Name
  3197  		allocs = append(allocs, alloc)
  3198  	}
  3199  
  3200  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3201  	r := reconciler.Compute()
  3202  
  3203  	newD := structs.NewDeployment(job)
  3204  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3205  	state := &structs.DeploymentState{
  3206  		DesiredCanaries: 7,
  3207  		DesiredTotal:    3,
  3208  	}
  3209  	newD.TaskGroups[job.TaskGroups[0].Name] = state
  3210  
  3211  	// Assert the correct results
  3212  	assertResults(t, r, &resultExpectation{
  3213  		createDeployment:  newD,
  3214  		deploymentUpdates: nil,
  3215  		place:             7,
  3216  		inplace:           0,
  3217  		stop:              0,
  3218  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3219  			job.TaskGroups[0].Name: {
  3220  				Canary: 7,
  3221  				Ignore: 3,
  3222  			},
  3223  		},
  3224  	})
  3225  
  3226  	assertNamesHaveIndexes(t, intRange(0, 2, 3, 6), placeResultsToNames(r.place))
  3227  }
  3228  
  3229  // Tests the reconciler creates new canaries when the job changes for multiple
  3230  // task groups
  3231  func TestReconciler_NewCanaries_MultiTG(t *testing.T) {
  3232  	job := mock.Job()
  3233  	job.TaskGroups[0].Update = canaryUpdate
  3234  	job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy())
  3235  	job.TaskGroups[0].Name = "tg2"
  3236  
  3237  	// Create 10 allocations from the old job for each tg
  3238  	var allocs []*structs.Allocation
  3239  	for j := 0; j < 2; j++ {
  3240  		for i := 0; i < 10; i++ {
  3241  			alloc := mock.Alloc()
  3242  			alloc.Job = job
  3243  			alloc.JobID = job.ID
  3244  			alloc.NodeID = uuid.Generate()
  3245  			alloc.Name = structs.AllocName(job.ID, job.TaskGroups[j].Name, uint(i))
  3246  			alloc.TaskGroup = job.TaskGroups[j].Name
  3247  			allocs = append(allocs, alloc)
  3248  		}
  3249  	}
  3250  
  3251  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3252  	r := reconciler.Compute()
  3253  
  3254  	newD := structs.NewDeployment(job)
  3255  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3256  	state := &structs.DeploymentState{
  3257  		DesiredCanaries: 2,
  3258  		DesiredTotal:    10,
  3259  	}
  3260  	newD.TaskGroups[job.TaskGroups[0].Name] = state
  3261  	newD.TaskGroups[job.TaskGroups[1].Name] = state.Copy()
  3262  
  3263  	// Assert the correct results
  3264  	assertResults(t, r, &resultExpectation{
  3265  		createDeployment:  newD,
  3266  		deploymentUpdates: nil,
  3267  		place:             4,
  3268  		inplace:           0,
  3269  		stop:              0,
  3270  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3271  			job.TaskGroups[0].Name: {
  3272  				Canary: 2,
  3273  				Ignore: 10,
  3274  			},
  3275  			job.TaskGroups[1].Name: {
  3276  				Canary: 2,
  3277  				Ignore: 10,
  3278  			},
  3279  		},
  3280  	})
  3281  
  3282  	assertNamesHaveIndexes(t, intRange(0, 1, 0, 1), placeResultsToNames(r.place))
  3283  }
  3284  
  3285  // Tests the reconciler creates new canaries when the job changes and scales up
  3286  func TestReconciler_NewCanaries_ScaleUp(t *testing.T) {
  3287  	// Scale the job up to 15
  3288  	job := mock.Job()
  3289  	job.TaskGroups[0].Update = canaryUpdate
  3290  	job.TaskGroups[0].Count = 15
  3291  
  3292  	// Create 10 allocations from the old job
  3293  	var allocs []*structs.Allocation
  3294  	for i := 0; i < 10; i++ {
  3295  		alloc := mock.Alloc()
  3296  		alloc.Job = job
  3297  		alloc.JobID = job.ID
  3298  		alloc.NodeID = uuid.Generate()
  3299  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3300  		alloc.TaskGroup = job.TaskGroups[0].Name
  3301  		allocs = append(allocs, alloc)
  3302  	}
  3303  
  3304  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3305  	r := reconciler.Compute()
  3306  
  3307  	newD := structs.NewDeployment(job)
  3308  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3309  	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3310  		DesiredCanaries: 2,
  3311  		DesiredTotal:    15,
  3312  	}
  3313  
  3314  	// Assert the correct results
  3315  	assertResults(t, r, &resultExpectation{
  3316  		createDeployment:  newD,
  3317  		deploymentUpdates: nil,
  3318  		place:             2,
  3319  		inplace:           0,
  3320  		stop:              0,
  3321  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3322  			job.TaskGroups[0].Name: {
  3323  				Canary: 2,
  3324  				Ignore: 10,
  3325  			},
  3326  		},
  3327  	})
  3328  
  3329  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  3330  }
  3331  
  3332  // Tests the reconciler creates new canaries when the job changes and scales
  3333  // down
  3334  func TestReconciler_NewCanaries_ScaleDown(t *testing.T) {
  3335  	// Scale the job down to 5
  3336  	job := mock.Job()
  3337  	job.TaskGroups[0].Update = canaryUpdate
  3338  	job.TaskGroups[0].Count = 5
  3339  
  3340  	// Create 10 allocations from the old job
  3341  	var allocs []*structs.Allocation
  3342  	for i := 0; i < 10; i++ {
  3343  		alloc := mock.Alloc()
  3344  		alloc.Job = job
  3345  		alloc.JobID = job.ID
  3346  		alloc.NodeID = uuid.Generate()
  3347  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3348  		alloc.TaskGroup = job.TaskGroups[0].Name
  3349  		allocs = append(allocs, alloc)
  3350  	}
  3351  
  3352  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3353  	r := reconciler.Compute()
  3354  
  3355  	newD := structs.NewDeployment(job)
  3356  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3357  	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3358  		DesiredCanaries: 2,
  3359  		DesiredTotal:    5,
  3360  	}
  3361  
  3362  	// Assert the correct results
  3363  	assertResults(t, r, &resultExpectation{
  3364  		createDeployment:  newD,
  3365  		deploymentUpdates: nil,
  3366  		place:             2,
  3367  		inplace:           0,
  3368  		stop:              5,
  3369  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3370  			job.TaskGroups[0].Name: {
  3371  				Canary: 2,
  3372  				Stop:   5,
  3373  				Ignore: 5,
  3374  			},
  3375  		},
  3376  	})
  3377  
  3378  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  3379  	assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop))
  3380  }
  3381  
  3382  // Tests the reconciler handles filling the names of partially placed canaries
  3383  func TestReconciler_NewCanaries_FillNames(t *testing.T) {
  3384  	job := mock.Job()
  3385  	job.TaskGroups[0].Update = &structs.UpdateStrategy{
  3386  		Canary:          4,
  3387  		MaxParallel:     2,
  3388  		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
  3389  		MinHealthyTime:  10 * time.Second,
  3390  		HealthyDeadline: 10 * time.Minute,
  3391  	}
  3392  
  3393  	// Create an existing deployment that has placed some canaries
  3394  	d := structs.NewDeployment(job)
  3395  	s := &structs.DeploymentState{
  3396  		Promoted:        false,
  3397  		DesiredTotal:    10,
  3398  		DesiredCanaries: 4,
  3399  		PlacedAllocs:    2,
  3400  	}
  3401  	d.TaskGroups[job.TaskGroups[0].Name] = s
  3402  
  3403  	// Create 10 allocations from the old job
  3404  	var allocs []*structs.Allocation
  3405  	for i := 0; i < 10; i++ {
  3406  		alloc := mock.Alloc()
  3407  		alloc.Job = job
  3408  		alloc.JobID = job.ID
  3409  		alloc.NodeID = uuid.Generate()
  3410  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3411  		alloc.TaskGroup = job.TaskGroups[0].Name
  3412  		allocs = append(allocs, alloc)
  3413  	}
  3414  
  3415  	// Create canaries but pick names at the ends
  3416  	for i := 0; i < 4; i += 3 {
  3417  		// Create one canary
  3418  		canary := mock.Alloc()
  3419  		canary.Job = job
  3420  		canary.JobID = job.ID
  3421  		canary.NodeID = uuid.Generate()
  3422  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3423  		canary.TaskGroup = job.TaskGroups[0].Name
  3424  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3425  		canary.DeploymentID = d.ID
  3426  		allocs = append(allocs, canary)
  3427  	}
  3428  
  3429  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  3430  	r := reconciler.Compute()
  3431  
  3432  	// Assert the correct results
  3433  	assertResults(t, r, &resultExpectation{
  3434  		createDeployment:  nil,
  3435  		deploymentUpdates: nil,
  3436  		place:             2,
  3437  		inplace:           0,
  3438  		stop:              0,
  3439  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3440  			job.TaskGroups[0].Name: {
  3441  				Canary: 2,
  3442  				Ignore: 12,
  3443  			},
  3444  		},
  3445  	})
  3446  
  3447  	assertNamesHaveIndexes(t, intRange(1, 2), placeResultsToNames(r.place))
  3448  }
  3449  
  3450  // Tests the reconciler handles canary promotion by unblocking max_parallel
  3451  func TestReconciler_PromoteCanaries_Unblock(t *testing.T) {
  3452  	job := mock.Job()
  3453  	job.TaskGroups[0].Update = canaryUpdate
  3454  
  3455  	// Create an existing deployment that has placed some canaries and mark them
  3456  	// promoted
  3457  	d := structs.NewDeployment(job)
  3458  	s := &structs.DeploymentState{
  3459  		Promoted:        true,
  3460  		DesiredTotal:    10,
  3461  		DesiredCanaries: 2,
  3462  		PlacedAllocs:    2,
  3463  	}
  3464  	d.TaskGroups[job.TaskGroups[0].Name] = s
  3465  
  3466  	// Create 10 allocations from the old job
  3467  	var allocs []*structs.Allocation
  3468  	for i := 0; i < 10; i++ {
  3469  		alloc := mock.Alloc()
  3470  		alloc.Job = job
  3471  		alloc.JobID = job.ID
  3472  		alloc.NodeID = uuid.Generate()
  3473  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3474  		alloc.TaskGroup = job.TaskGroups[0].Name
  3475  		allocs = append(allocs, alloc)
  3476  	}
  3477  
  3478  	// Create the canaries
  3479  	handled := make(map[string]allocUpdateType)
  3480  	for i := 0; i < 2; i++ {
  3481  		// Create one canary
  3482  		canary := mock.Alloc()
  3483  		canary.Job = job
  3484  		canary.JobID = job.ID
  3485  		canary.NodeID = uuid.Generate()
  3486  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3487  		canary.TaskGroup = job.TaskGroups[0].Name
  3488  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3489  		canary.DeploymentID = d.ID
  3490  		canary.DeploymentStatus = &structs.AllocDeploymentStatus{
  3491  			Healthy: helper.BoolToPtr(true),
  3492  		}
  3493  		allocs = append(allocs, canary)
  3494  		handled[canary.ID] = allocUpdateFnIgnore
  3495  	}
  3496  
  3497  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3498  	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  3499  	r := reconciler.Compute()
  3500  
  3501  	// Assert the correct results
  3502  	assertResults(t, r, &resultExpectation{
  3503  		createDeployment:  nil,
  3504  		deploymentUpdates: nil,
  3505  		destructive:       2,
  3506  		stop:              2,
  3507  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3508  			job.TaskGroups[0].Name: {
  3509  				Stop:              2,
  3510  				DestructiveUpdate: 2,
  3511  				Ignore:            8,
  3512  			},
  3513  		},
  3514  	})
  3515  
  3516  	assertNoCanariesStopped(t, d, r.stop)
  3517  	assertNamesHaveIndexes(t, intRange(2, 3), destructiveResultsToNames(r.destructiveUpdate))
  3518  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  3519  }
  3520  
  3521  // Tests the reconciler handles canary promotion when the canary count equals
  3522  // the total correctly
  3523  func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) {
  3524  	job := mock.Job()
  3525  	job.TaskGroups[0].Update = canaryUpdate
  3526  	job.TaskGroups[0].Count = 2
  3527  
  3528  	// Create an existing deployment that has placed some canaries and mark them
  3529  	// promoted
  3530  	d := structs.NewDeployment(job)
  3531  	s := &structs.DeploymentState{
  3532  		Promoted:        true,
  3533  		DesiredTotal:    2,
  3534  		DesiredCanaries: 2,
  3535  		PlacedAllocs:    2,
  3536  		HealthyAllocs:   2,
  3537  	}
  3538  	d.TaskGroups[job.TaskGroups[0].Name] = s
  3539  
  3540  	// Create 2 allocations from the old job
  3541  	var allocs []*structs.Allocation
  3542  	for i := 0; i < 2; i++ {
  3543  		alloc := mock.Alloc()
  3544  		alloc.Job = job
  3545  		alloc.JobID = job.ID
  3546  		alloc.NodeID = uuid.Generate()
  3547  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3548  		alloc.TaskGroup = job.TaskGroups[0].Name
  3549  		allocs = append(allocs, alloc)
  3550  	}
  3551  
  3552  	// Create the canaries
  3553  	handled := make(map[string]allocUpdateType)
  3554  	for i := 0; i < 2; i++ {
  3555  		// Create one canary
  3556  		canary := mock.Alloc()
  3557  		canary.Job = job
  3558  		canary.JobID = job.ID
  3559  		canary.NodeID = uuid.Generate()
  3560  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3561  		canary.TaskGroup = job.TaskGroups[0].Name
  3562  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3563  		canary.DeploymentID = d.ID
  3564  		canary.DeploymentStatus = &structs.AllocDeploymentStatus{
  3565  			Healthy: helper.BoolToPtr(true),
  3566  		}
  3567  		allocs = append(allocs, canary)
  3568  		handled[canary.ID] = allocUpdateFnIgnore
  3569  	}
  3570  
  3571  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3572  	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  3573  	r := reconciler.Compute()
  3574  
  3575  	updates := []*structs.DeploymentStatusUpdate{
  3576  		{
  3577  			DeploymentID:      d.ID,
  3578  			Status:            structs.DeploymentStatusSuccessful,
  3579  			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
  3580  		},
  3581  	}
  3582  
  3583  	// Assert the correct results
  3584  	assertResults(t, r, &resultExpectation{
  3585  		createDeployment:  nil,
  3586  		deploymentUpdates: updates,
  3587  		place:             0,
  3588  		inplace:           0,
  3589  		stop:              2,
  3590  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3591  			job.TaskGroups[0].Name: {
  3592  				Stop:   2,
  3593  				Ignore: 2,
  3594  			},
  3595  		},
  3596  	})
  3597  
  3598  	assertNoCanariesStopped(t, d, r.stop)
  3599  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  3600  }
  3601  
  3602  // Tests the reconciler checks the health of placed allocs to determine the
  3603  // limit
  3604  func TestReconciler_DeploymentLimit_HealthAccounting(t *testing.T) {
  3605  	job := mock.Job()
  3606  	job.TaskGroups[0].Update = noCanaryUpdate
  3607  
  3608  	cases := []struct {
  3609  		healthy int
  3610  	}{
  3611  		{
  3612  			healthy: 0,
  3613  		},
  3614  		{
  3615  			healthy: 1,
  3616  		},
  3617  		{
  3618  			healthy: 2,
  3619  		},
  3620  		{
  3621  			healthy: 3,
  3622  		},
  3623  		{
  3624  			healthy: 4,
  3625  		},
  3626  	}
  3627  
  3628  	for _, c := range cases {
  3629  		t.Run(fmt.Sprintf("%d healthy", c.healthy), func(t *testing.T) {
  3630  			// Create an existing deployment that has placed some canaries and mark them
  3631  			// promoted
  3632  			d := structs.NewDeployment(job)
  3633  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3634  				Promoted:     true,
  3635  				DesiredTotal: 10,
  3636  				PlacedAllocs: 4,
  3637  			}
  3638  
  3639  			// Create 6 allocations from the old job
  3640  			var allocs []*structs.Allocation
  3641  			for i := 4; i < 10; i++ {
  3642  				alloc := mock.Alloc()
  3643  				alloc.Job = job
  3644  				alloc.JobID = job.ID
  3645  				alloc.NodeID = uuid.Generate()
  3646  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3647  				alloc.TaskGroup = job.TaskGroups[0].Name
  3648  				allocs = append(allocs, alloc)
  3649  			}
  3650  
  3651  			// Create the new allocs
  3652  			handled := make(map[string]allocUpdateType)
  3653  			for i := 0; i < 4; i++ {
  3654  				new := mock.Alloc()
  3655  				new.Job = job
  3656  				new.JobID = job.ID
  3657  				new.NodeID = uuid.Generate()
  3658  				new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3659  				new.TaskGroup = job.TaskGroups[0].Name
  3660  				new.DeploymentID = d.ID
  3661  				if i < c.healthy {
  3662  					new.DeploymentStatus = &structs.AllocDeploymentStatus{
  3663  						Healthy: helper.BoolToPtr(true),
  3664  					}
  3665  				}
  3666  				allocs = append(allocs, new)
  3667  				handled[new.ID] = allocUpdateFnIgnore
  3668  			}
  3669  
  3670  			mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3671  			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  3672  			r := reconciler.Compute()
  3673  
  3674  			// Assert the correct results
  3675  			assertResults(t, r, &resultExpectation{
  3676  				createDeployment:  nil,
  3677  				deploymentUpdates: nil,
  3678  				destructive:       c.healthy,
  3679  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3680  					job.TaskGroups[0].Name: {
  3681  						DestructiveUpdate: uint64(c.healthy),
  3682  						Ignore:            uint64(10 - c.healthy),
  3683  					},
  3684  				},
  3685  			})
  3686  
  3687  			if c.healthy != 0 {
  3688  				assertNamesHaveIndexes(t, intRange(4, 3+c.healthy), destructiveResultsToNames(r.destructiveUpdate))
  3689  			}
  3690  		})
  3691  	}
  3692  }
  3693  
  3694  // Tests the reconciler handles an alloc on a tainted node during a rolling
  3695  // update
  3696  func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) {
  3697  	job := mock.Job()
  3698  	job.TaskGroups[0].Update = noCanaryUpdate
  3699  
  3700  	// Create an existing deployment that has some placed allocs
  3701  	d := structs.NewDeployment(job)
  3702  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3703  		Promoted:     true,
  3704  		DesiredTotal: 10,
  3705  		PlacedAllocs: 7,
  3706  	}
  3707  
  3708  	// Create 2 allocations from the old job
  3709  	var allocs []*structs.Allocation
  3710  	for i := 8; i < 10; i++ {
  3711  		alloc := mock.Alloc()
  3712  		alloc.Job = job
  3713  		alloc.JobID = job.ID
  3714  		alloc.NodeID = uuid.Generate()
  3715  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3716  		alloc.TaskGroup = job.TaskGroups[0].Name
  3717  		allocs = append(allocs, alloc)
  3718  	}
  3719  
  3720  	// Create the healthy replacements
  3721  	handled := make(map[string]allocUpdateType)
  3722  	for i := 0; i < 8; i++ {
  3723  		new := mock.Alloc()
  3724  		new.Job = job
  3725  		new.JobID = job.ID
  3726  		new.NodeID = uuid.Generate()
  3727  		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3728  		new.TaskGroup = job.TaskGroups[0].Name
  3729  		new.DeploymentID = d.ID
  3730  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  3731  			Healthy: helper.BoolToPtr(true),
  3732  		}
  3733  		allocs = append(allocs, new)
  3734  		handled[new.ID] = allocUpdateFnIgnore
  3735  	}
  3736  
  3737  	// Build a map of tainted nodes
  3738  	tainted := make(map[string]*structs.Node, 3)
  3739  	for i := 0; i < 3; i++ {
  3740  		n := mock.Node()
  3741  		n.ID = allocs[2+i].NodeID
  3742  		if i == 0 {
  3743  			n.Status = structs.NodeStatusDown
  3744  		} else {
  3745  			n.Drain = true
  3746  			allocs[2+i].DesiredTransition.Migrate = helper.BoolToPtr(true)
  3747  		}
  3748  		tainted[n.ID] = n
  3749  	}
  3750  
  3751  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3752  	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "")
  3753  	r := reconciler.Compute()
  3754  
  3755  	// Assert the correct results
  3756  	assertResults(t, r, &resultExpectation{
  3757  		createDeployment:  nil,
  3758  		deploymentUpdates: nil,
  3759  		place:             3,
  3760  		destructive:       2,
  3761  		stop:              3,
  3762  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3763  			job.TaskGroups[0].Name: {
  3764  				Place:             1, // Place the lost
  3765  				Stop:              1, // Stop the lost
  3766  				Migrate:           2, // Migrate the tainted
  3767  				DestructiveUpdate: 2,
  3768  				Ignore:            5,
  3769  			},
  3770  		},
  3771  	})
  3772  
  3773  	assertNamesHaveIndexes(t, intRange(8, 9), destructiveResultsToNames(r.destructiveUpdate))
  3774  	assertNamesHaveIndexes(t, intRange(0, 2), placeResultsToNames(r.place))
  3775  	assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop))
  3776  }
  3777  
  3778  // Tests the reconciler handles a failed deployment with allocs on tainted
  3779  // nodes
  3780  func TestReconciler_FailedDeployment_TaintedNodes(t *testing.T) {
  3781  	job := mock.Job()
  3782  	job.TaskGroups[0].Update = noCanaryUpdate
  3783  
  3784  	// Create an existing failed deployment that has some placed allocs
  3785  	d := structs.NewDeployment(job)
  3786  	d.Status = structs.DeploymentStatusFailed
  3787  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3788  		Promoted:     true,
  3789  		DesiredTotal: 10,
  3790  		PlacedAllocs: 4,
  3791  	}
  3792  
  3793  	// Create 6 allocations from the old job
  3794  	var allocs []*structs.Allocation
  3795  	for i := 4; i < 10; i++ {
  3796  		alloc := mock.Alloc()
  3797  		alloc.Job = job
  3798  		alloc.JobID = job.ID
  3799  		alloc.NodeID = uuid.Generate()
  3800  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3801  		alloc.TaskGroup = job.TaskGroups[0].Name
  3802  		allocs = append(allocs, alloc)
  3803  	}
  3804  
  3805  	// Create the healthy replacements
  3806  	handled := make(map[string]allocUpdateType)
  3807  	for i := 0; i < 4; i++ {
  3808  		new := mock.Alloc()
  3809  		new.Job = job
  3810  		new.JobID = job.ID
  3811  		new.NodeID = uuid.Generate()
  3812  		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3813  		new.TaskGroup = job.TaskGroups[0].Name
  3814  		new.DeploymentID = d.ID
  3815  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  3816  			Healthy: helper.BoolToPtr(true),
  3817  		}
  3818  		allocs = append(allocs, new)
  3819  		handled[new.ID] = allocUpdateFnIgnore
  3820  	}
  3821  
  3822  	// Build a map of tainted nodes
  3823  	tainted := make(map[string]*structs.Node, 2)
  3824  	for i := 0; i < 2; i++ {
  3825  		n := mock.Node()
  3826  		n.ID = allocs[6+i].NodeID
  3827  		if i == 0 {
  3828  			n.Status = structs.NodeStatusDown
  3829  		} else {
  3830  			n.Drain = true
  3831  			allocs[6+i].DesiredTransition.Migrate = helper.BoolToPtr(true)
  3832  		}
  3833  		tainted[n.ID] = n
  3834  	}
  3835  
  3836  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3837  	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "")
  3838  	r := reconciler.Compute()
  3839  
  3840  	// Assert the correct results
  3841  	assertResults(t, r, &resultExpectation{
  3842  		createDeployment:  nil,
  3843  		deploymentUpdates: nil,
  3844  		place:             2,
  3845  		inplace:           0,
  3846  		stop:              2,
  3847  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3848  			job.TaskGroups[0].Name: {
  3849  				Place:   1,
  3850  				Migrate: 1,
  3851  				Stop:    1,
  3852  				Ignore:  8,
  3853  			},
  3854  		},
  3855  	})
  3856  
  3857  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  3858  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  3859  }
  3860  
  3861  // Tests the reconciler handles a run after a deployment is complete
  3862  // successfully.
  3863  func TestReconciler_CompleteDeployment(t *testing.T) {
  3864  	job := mock.Job()
  3865  	job.TaskGroups[0].Update = canaryUpdate
  3866  
  3867  	d := structs.NewDeployment(job)
  3868  	d.Status = structs.DeploymentStatusSuccessful
  3869  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3870  		Promoted:        true,
  3871  		DesiredTotal:    10,
  3872  		DesiredCanaries: 2,
  3873  		PlacedAllocs:    10,
  3874  		HealthyAllocs:   10,
  3875  	}
  3876  
  3877  	// Create allocations from the old job
  3878  	var allocs []*structs.Allocation
  3879  	for i := 0; i < 10; i++ {
  3880  		alloc := mock.Alloc()
  3881  		alloc.Job = job
  3882  		alloc.JobID = job.ID
  3883  		alloc.NodeID = uuid.Generate()
  3884  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3885  		alloc.TaskGroup = job.TaskGroups[0].Name
  3886  		alloc.DeploymentID = d.ID
  3887  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  3888  			Healthy: helper.BoolToPtr(true),
  3889  		}
  3890  		allocs = append(allocs, alloc)
  3891  	}
  3892  
  3893  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  3894  	r := reconciler.Compute()
  3895  
  3896  	// Assert the correct results
  3897  	assertResults(t, r, &resultExpectation{
  3898  		createDeployment:  nil,
  3899  		deploymentUpdates: nil,
  3900  		place:             0,
  3901  		inplace:           0,
  3902  		stop:              0,
  3903  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3904  			job.TaskGroups[0].Name: {
  3905  				Ignore: 10,
  3906  			},
  3907  		},
  3908  	})
  3909  }
  3910  
  3911  // Tests that the reconciler marks a deployment as complete once there is
  3912  // nothing left to place even if there are failed allocations that are part of
  3913  // the deployment.
  3914  func TestReconciler_MarkDeploymentComplete_FailedAllocations(t *testing.T) {
  3915  	job := mock.Job()
  3916  	job.TaskGroups[0].Update = noCanaryUpdate
  3917  
  3918  	d := structs.NewDeployment(job)
  3919  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3920  		DesiredTotal:  10,
  3921  		PlacedAllocs:  20,
  3922  		HealthyAllocs: 10,
  3923  	}
  3924  
  3925  	// Create 10 healthy allocs and 10 allocs that are failed
  3926  	var allocs []*structs.Allocation
  3927  	for i := 0; i < 20; i++ {
  3928  		alloc := mock.Alloc()
  3929  		alloc.Job = job
  3930  		alloc.JobID = job.ID
  3931  		alloc.NodeID = uuid.Generate()
  3932  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%10))
  3933  		alloc.TaskGroup = job.TaskGroups[0].Name
  3934  		alloc.DeploymentID = d.ID
  3935  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{}
  3936  		if i < 10 {
  3937  			alloc.ClientStatus = structs.AllocClientStatusRunning
  3938  			alloc.DeploymentStatus.Healthy = helper.BoolToPtr(true)
  3939  		} else {
  3940  			alloc.DesiredStatus = structs.AllocDesiredStatusStop
  3941  			alloc.ClientStatus = structs.AllocClientStatusFailed
  3942  			alloc.DeploymentStatus.Healthy = helper.BoolToPtr(false)
  3943  		}
  3944  
  3945  		allocs = append(allocs, alloc)
  3946  	}
  3947  
  3948  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  3949  	r := reconciler.Compute()
  3950  
  3951  	updates := []*structs.DeploymentStatusUpdate{
  3952  		{
  3953  			DeploymentID:      d.ID,
  3954  			Status:            structs.DeploymentStatusSuccessful,
  3955  			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
  3956  		},
  3957  	}
  3958  
  3959  	// Assert the correct results
  3960  	assertResults(t, r, &resultExpectation{
  3961  		createDeployment:  nil,
  3962  		deploymentUpdates: updates,
  3963  		place:             0,
  3964  		inplace:           0,
  3965  		stop:              0,
  3966  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3967  			job.TaskGroups[0].Name: {
  3968  				Ignore: 10,
  3969  			},
  3970  		},
  3971  	})
  3972  }
  3973  
  3974  // Test that a failed deployment cancels non-promoted canaries
  3975  func TestReconciler_FailedDeployment_CancelCanaries(t *testing.T) {
  3976  	// Create a job with two task groups
  3977  	job := mock.Job()
  3978  	job.TaskGroups[0].Update = canaryUpdate
  3979  	job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy())
  3980  	job.TaskGroups[1].Name = "two"
  3981  
  3982  	// Create an existing failed deployment that has promoted one task group
  3983  	d := structs.NewDeployment(job)
  3984  	d.Status = structs.DeploymentStatusFailed
  3985  	s0 := &structs.DeploymentState{
  3986  		Promoted:        true,
  3987  		DesiredTotal:    10,
  3988  		DesiredCanaries: 2,
  3989  		PlacedAllocs:    4,
  3990  	}
  3991  	s1 := &structs.DeploymentState{
  3992  		Promoted:        false,
  3993  		DesiredTotal:    10,
  3994  		DesiredCanaries: 2,
  3995  		PlacedAllocs:    2,
  3996  	}
  3997  	d.TaskGroups[job.TaskGroups[0].Name] = s0
  3998  	d.TaskGroups[job.TaskGroups[1].Name] = s1
  3999  
  4000  	// Create 6 allocations from the old job
  4001  	var allocs []*structs.Allocation
  4002  	handled := make(map[string]allocUpdateType)
  4003  	for _, group := range []int{0, 1} {
  4004  		replacements := 4
  4005  		state := s0
  4006  		if group == 1 {
  4007  			replacements = 2
  4008  			state = s1
  4009  		}
  4010  
  4011  		// Create the healthy replacements
  4012  		for i := 0; i < replacements; i++ {
  4013  			new := mock.Alloc()
  4014  			new.Job = job
  4015  			new.JobID = job.ID
  4016  			new.NodeID = uuid.Generate()
  4017  			new.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i))
  4018  			new.TaskGroup = job.TaskGroups[group].Name
  4019  			new.DeploymentID = d.ID
  4020  			new.DeploymentStatus = &structs.AllocDeploymentStatus{
  4021  				Healthy: helper.BoolToPtr(true),
  4022  			}
  4023  			allocs = append(allocs, new)
  4024  			handled[new.ID] = allocUpdateFnIgnore
  4025  
  4026  			// Add the alloc to the canary list
  4027  			if i < 2 {
  4028  				state.PlacedCanaries = append(state.PlacedCanaries, new.ID)
  4029  			}
  4030  		}
  4031  		for i := replacements; i < 10; i++ {
  4032  			alloc := mock.Alloc()
  4033  			alloc.Job = job
  4034  			alloc.JobID = job.ID
  4035  			alloc.NodeID = uuid.Generate()
  4036  			alloc.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i))
  4037  			alloc.TaskGroup = job.TaskGroups[group].Name
  4038  			allocs = append(allocs, alloc)
  4039  		}
  4040  	}
  4041  
  4042  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  4043  	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  4044  	r := reconciler.Compute()
  4045  
  4046  	// Assert the correct results
  4047  	assertResults(t, r, &resultExpectation{
  4048  		createDeployment:  nil,
  4049  		deploymentUpdates: nil,
  4050  		place:             0,
  4051  		inplace:           0,
  4052  		stop:              2,
  4053  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4054  			job.TaskGroups[0].Name: {
  4055  				Ignore: 10,
  4056  			},
  4057  			job.TaskGroups[1].Name: {
  4058  				Stop:   2,
  4059  				Ignore: 8,
  4060  			},
  4061  		},
  4062  	})
  4063  
  4064  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  4065  }
  4066  
  4067  // Test that a failed deployment and updated job works
  4068  func TestReconciler_FailedDeployment_NewJob(t *testing.T) {
  4069  	job := mock.Job()
  4070  	job.TaskGroups[0].Update = noCanaryUpdate
  4071  
  4072  	// Create an existing failed deployment that has some placed allocs
  4073  	d := structs.NewDeployment(job)
  4074  	d.Status = structs.DeploymentStatusFailed
  4075  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4076  		Promoted:     true,
  4077  		DesiredTotal: 10,
  4078  		PlacedAllocs: 4,
  4079  	}
  4080  
  4081  	// Create 6 allocations from the old job
  4082  	var allocs []*structs.Allocation
  4083  	for i := 4; i < 10; i++ {
  4084  		alloc := mock.Alloc()
  4085  		alloc.Job = job
  4086  		alloc.JobID = job.ID
  4087  		alloc.NodeID = uuid.Generate()
  4088  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4089  		alloc.TaskGroup = job.TaskGroups[0].Name
  4090  		allocs = append(allocs, alloc)
  4091  	}
  4092  
  4093  	// Create the healthy replacements
  4094  	for i := 0; i < 4; i++ {
  4095  		new := mock.Alloc()
  4096  		new.Job = job
  4097  		new.JobID = job.ID
  4098  		new.NodeID = uuid.Generate()
  4099  		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4100  		new.TaskGroup = job.TaskGroups[0].Name
  4101  		new.DeploymentID = d.ID
  4102  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  4103  			Healthy: helper.BoolToPtr(true),
  4104  		}
  4105  		allocs = append(allocs, new)
  4106  	}
  4107  
  4108  	// Up the job version
  4109  	jobNew := job.Copy()
  4110  	jobNew.Version += 100
  4111  
  4112  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, jobNew, d, allocs, nil, "")
  4113  	r := reconciler.Compute()
  4114  
  4115  	dnew := structs.NewDeployment(jobNew)
  4116  	dnew.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4117  		DesiredTotal: 10,
  4118  	}
  4119  
  4120  	// Assert the correct results
  4121  	assertResults(t, r, &resultExpectation{
  4122  		createDeployment:  dnew,
  4123  		deploymentUpdates: nil,
  4124  		destructive:       4,
  4125  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4126  			job.TaskGroups[0].Name: {
  4127  				DestructiveUpdate: 4,
  4128  				Ignore:            6,
  4129  			},
  4130  		},
  4131  	})
  4132  
  4133  	assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate))
  4134  }
  4135  
  4136  // Tests the reconciler marks a deployment as complete
  4137  func TestReconciler_MarkDeploymentComplete(t *testing.T) {
  4138  	job := mock.Job()
  4139  	job.TaskGroups[0].Update = noCanaryUpdate
  4140  
  4141  	d := structs.NewDeployment(job)
  4142  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4143  		Promoted:      true,
  4144  		DesiredTotal:  10,
  4145  		PlacedAllocs:  10,
  4146  		HealthyAllocs: 10,
  4147  	}
  4148  
  4149  	// Create allocations from the old job
  4150  	var allocs []*structs.Allocation
  4151  	for i := 0; i < 10; i++ {
  4152  		alloc := mock.Alloc()
  4153  		alloc.Job = job
  4154  		alloc.JobID = job.ID
  4155  		alloc.NodeID = uuid.Generate()
  4156  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4157  		alloc.TaskGroup = job.TaskGroups[0].Name
  4158  		alloc.DeploymentID = d.ID
  4159  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  4160  			Healthy: helper.BoolToPtr(true),
  4161  		}
  4162  		allocs = append(allocs, alloc)
  4163  	}
  4164  
  4165  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  4166  	r := reconciler.Compute()
  4167  
  4168  	updates := []*structs.DeploymentStatusUpdate{
  4169  		{
  4170  			DeploymentID:      d.ID,
  4171  			Status:            structs.DeploymentStatusSuccessful,
  4172  			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
  4173  		},
  4174  	}
  4175  
  4176  	// Assert the correct results
  4177  	assertResults(t, r, &resultExpectation{
  4178  		createDeployment:  nil,
  4179  		deploymentUpdates: updates,
  4180  		place:             0,
  4181  		inplace:           0,
  4182  		stop:              0,
  4183  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4184  			job.TaskGroups[0].Name: {
  4185  				Ignore: 10,
  4186  			},
  4187  		},
  4188  	})
  4189  }
  4190  
  4191  // Tests the reconciler handles changing a job such that a deployment is created
  4192  // while doing a scale up but as the second eval.
  4193  func TestReconciler_JobChange_ScaleUp_SecondEval(t *testing.T) {
  4194  	// Scale the job up to 15
  4195  	job := mock.Job()
  4196  	job.TaskGroups[0].Update = noCanaryUpdate
  4197  	job.TaskGroups[0].Count = 30
  4198  
  4199  	// Create a deployment that is paused and has placed some canaries
  4200  	d := structs.NewDeployment(job)
  4201  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4202  		Promoted:     false,
  4203  		DesiredTotal: 30,
  4204  		PlacedAllocs: 20,
  4205  	}
  4206  
  4207  	// Create 10 allocations from the old job
  4208  	var allocs []*structs.Allocation
  4209  	for i := 0; i < 10; i++ {
  4210  		alloc := mock.Alloc()
  4211  		alloc.Job = job
  4212  		alloc.JobID = job.ID
  4213  		alloc.NodeID = uuid.Generate()
  4214  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4215  		alloc.TaskGroup = job.TaskGroups[0].Name
  4216  		allocs = append(allocs, alloc)
  4217  	}
  4218  
  4219  	// Create 20 from new job
  4220  	handled := make(map[string]allocUpdateType)
  4221  	for i := 10; i < 30; i++ {
  4222  		alloc := mock.Alloc()
  4223  		alloc.Job = job
  4224  		alloc.JobID = job.ID
  4225  		alloc.DeploymentID = d.ID
  4226  		alloc.NodeID = uuid.Generate()
  4227  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4228  		alloc.TaskGroup = job.TaskGroups[0].Name
  4229  		allocs = append(allocs, alloc)
  4230  		handled[alloc.ID] = allocUpdateFnIgnore
  4231  	}
  4232  
  4233  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  4234  	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  4235  	r := reconciler.Compute()
  4236  
  4237  	// Assert the correct results
  4238  	assertResults(t, r, &resultExpectation{
  4239  		createDeployment:  nil,
  4240  		deploymentUpdates: nil,
  4241  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4242  			job.TaskGroups[0].Name: {
  4243  				// All should be ignored because nothing has been marked as
  4244  				// healthy.
  4245  				Ignore: 30,
  4246  			},
  4247  		},
  4248  	})
  4249  }
  4250  
  4251  // Tests the reconciler doesn't stop allocations when doing a rolling upgrade
  4252  // where the count of the old job allocs is < desired count.
  4253  func TestReconciler_RollingUpgrade_MissingAllocs(t *testing.T) {
  4254  	job := mock.Job()
  4255  	job.TaskGroups[0].Update = noCanaryUpdate
  4256  
  4257  	// Create 7 allocations from the old job
  4258  	var allocs []*structs.Allocation
  4259  	for i := 0; i < 7; i++ {
  4260  		alloc := mock.Alloc()
  4261  		alloc.Job = job
  4262  		alloc.JobID = job.ID
  4263  		alloc.NodeID = uuid.Generate()
  4264  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4265  		alloc.TaskGroup = job.TaskGroups[0].Name
  4266  		allocs = append(allocs, alloc)
  4267  	}
  4268  
  4269  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  4270  	r := reconciler.Compute()
  4271  
  4272  	d := structs.NewDeployment(job)
  4273  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4274  		DesiredTotal: 10,
  4275  	}
  4276  
  4277  	// Assert the correct results
  4278  	assertResults(t, r, &resultExpectation{
  4279  		createDeployment:  d,
  4280  		deploymentUpdates: nil,
  4281  		place:             3,
  4282  		destructive:       1,
  4283  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4284  			job.TaskGroups[0].Name: {
  4285  				Place:             3,
  4286  				DestructiveUpdate: 1,
  4287  				Ignore:            6,
  4288  			},
  4289  		},
  4290  	})
  4291  
  4292  	assertNamesHaveIndexes(t, intRange(7, 9), placeResultsToNames(r.place))
  4293  	assertNamesHaveIndexes(t, intRange(0, 0), destructiveResultsToNames(r.destructiveUpdate))
  4294  }
  4295  
  4296  // Tests that the reconciler handles rerunning a batch job in the case that the
  4297  // allocations are from an older instance of the job.
  4298  func TestReconciler_Batch_Rerun(t *testing.T) {
  4299  	job := mock.Job()
  4300  	job.Type = structs.JobTypeBatch
  4301  	job.TaskGroups[0].Update = nil
  4302  
  4303  	// Create 10 allocations from the old job and have them be complete
  4304  	var allocs []*structs.Allocation
  4305  	for i := 0; i < 10; i++ {
  4306  		alloc := mock.Alloc()
  4307  		alloc.Job = job
  4308  		alloc.JobID = job.ID
  4309  		alloc.NodeID = uuid.Generate()
  4310  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4311  		alloc.TaskGroup = job.TaskGroups[0].Name
  4312  		alloc.ClientStatus = structs.AllocClientStatusComplete
  4313  		alloc.DesiredStatus = structs.AllocDesiredStatusStop
  4314  		allocs = append(allocs, alloc)
  4315  	}
  4316  
  4317  	// Create a copy of the job that is "new"
  4318  	job2 := job.Copy()
  4319  	job2.CreateIndex++
  4320  
  4321  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job2.ID, job2, nil, allocs, nil, "")
  4322  	r := reconciler.Compute()
  4323  
  4324  	// Assert the correct results
  4325  	assertResults(t, r, &resultExpectation{
  4326  		createDeployment:  nil,
  4327  		deploymentUpdates: nil,
  4328  		place:             10,
  4329  		destructive:       0,
  4330  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4331  			job.TaskGroups[0].Name: {
  4332  				Place:             10,
  4333  				DestructiveUpdate: 0,
  4334  				Ignore:            10,
  4335  			},
  4336  		},
  4337  	})
  4338  
  4339  	assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
  4340  }
  4341  
  4342  // Test that a failed deployment will not result in rescheduling failed allocations
  4343  func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) {
  4344  	job := mock.Job()
  4345  	job.TaskGroups[0].Update = noCanaryUpdate
  4346  
  4347  	tgName := job.TaskGroups[0].Name
  4348  	now := time.Now()
  4349  	// Create an existing failed deployment that has some placed allocs
  4350  	d := structs.NewDeployment(job)
  4351  	d.Status = structs.DeploymentStatusFailed
  4352  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4353  		Promoted:     true,
  4354  		DesiredTotal: 5,
  4355  		PlacedAllocs: 4,
  4356  	}
  4357  
  4358  	// Create 4 allocations and mark two as failed
  4359  	var allocs []*structs.Allocation
  4360  	for i := 0; i < 4; i++ {
  4361  		alloc := mock.Alloc()
  4362  		alloc.Job = job
  4363  		alloc.JobID = job.ID
  4364  		alloc.NodeID = uuid.Generate()
  4365  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4366  		alloc.TaskGroup = job.TaskGroups[0].Name
  4367  		alloc.DeploymentID = d.ID
  4368  		allocs = append(allocs, alloc)
  4369  	}
  4370  
  4371  	//create some allocations that are reschedulable now
  4372  	allocs[2].ClientStatus = structs.AllocClientStatusFailed
  4373  	allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  4374  		StartedAt:  now.Add(-1 * time.Hour),
  4375  		FinishedAt: now.Add(-10 * time.Second)}}
  4376  
  4377  	allocs[3].ClientStatus = structs.AllocClientStatusFailed
  4378  	allocs[3].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  4379  		StartedAt:  now.Add(-1 * time.Hour),
  4380  		FinishedAt: now.Add(-10 * time.Second)}}
  4381  
  4382  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  4383  	r := reconciler.Compute()
  4384  
  4385  	// Assert that no rescheduled placements were created
  4386  	assertResults(t, r, &resultExpectation{
  4387  		place:             0,
  4388  		createDeployment:  nil,
  4389  		deploymentUpdates: nil,
  4390  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4391  			job.TaskGroups[0].Name: {
  4392  				Ignore: 2,
  4393  			},
  4394  		},
  4395  	})
  4396  }
  4397  
  4398  // Test that a running deployment with failed allocs will not result in
  4399  // rescheduling failed allocations unless they are marked as reschedulable.
  4400  func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) {
  4401  	job := mock.Job()
  4402  	job.TaskGroups[0].Update = noCanaryUpdate
  4403  	tgName := job.TaskGroups[0].Name
  4404  	now := time.Now()
  4405  
  4406  	// Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet
  4407  	d := structs.NewDeployment(job)
  4408  	d.Status = structs.DeploymentStatusRunning
  4409  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4410  		Promoted:     false,
  4411  		DesiredTotal: 10,
  4412  		PlacedAllocs: 10,
  4413  	}
  4414  
  4415  	// Create 10 allocations
  4416  	var allocs []*structs.Allocation
  4417  	for i := 0; i < 10; i++ {
  4418  		alloc := mock.Alloc()
  4419  		alloc.Job = job
  4420  		alloc.JobID = job.ID
  4421  		alloc.NodeID = uuid.Generate()
  4422  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4423  		alloc.TaskGroup = job.TaskGroups[0].Name
  4424  		alloc.DeploymentID = d.ID
  4425  		alloc.ClientStatus = structs.AllocClientStatusFailed
  4426  		alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  4427  			StartedAt:  now.Add(-1 * time.Hour),
  4428  			FinishedAt: now.Add(-10 * time.Second)}}
  4429  		allocs = append(allocs, alloc)
  4430  	}
  4431  
  4432  	// Mark half of them as reschedulable
  4433  	for i := 0; i < 5; i++ {
  4434  		allocs[i].DesiredTransition.Reschedule = helper.BoolToPtr(true)
  4435  	}
  4436  
  4437  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  4438  	r := reconciler.Compute()
  4439  
  4440  	// Assert that no rescheduled placements were created
  4441  	assertResults(t, r, &resultExpectation{
  4442  		place:             5,
  4443  		createDeployment:  nil,
  4444  		deploymentUpdates: nil,
  4445  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4446  			job.TaskGroups[0].Name: {
  4447  				Place:  5,
  4448  				Ignore: 5,
  4449  			},
  4450  		},
  4451  	})
  4452  }
  4453  
  4454  // Test that a failed deployment cancels non-promoted canaries
  4455  func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) {
  4456  	// Create a job
  4457  	job := mock.Job()
  4458  	job.TaskGroups[0].Count = 3
  4459  	job.TaskGroups[0].Update = &structs.UpdateStrategy{
  4460  		Canary:          3,
  4461  		MaxParallel:     2,
  4462  		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
  4463  		MinHealthyTime:  10 * time.Second,
  4464  		HealthyDeadline: 10 * time.Minute,
  4465  		Stagger:         31 * time.Second,
  4466  	}
  4467  
  4468  	// Create v1 of the job
  4469  	jobv1 := job.Copy()
  4470  	jobv1.Version = 1
  4471  	jobv1.TaskGroups[0].Meta = map[string]string{"version": "1"}
  4472  
  4473  	// Create v2 of the job
  4474  	jobv2 := job.Copy()
  4475  	jobv2.Version = 2
  4476  	jobv2.TaskGroups[0].Meta = map[string]string{"version": "2"}
  4477  
  4478  	d := structs.NewDeployment(jobv2)
  4479  	state := &structs.DeploymentState{
  4480  		Promoted:      true,
  4481  		DesiredTotal:  3,
  4482  		PlacedAllocs:  3,
  4483  		HealthyAllocs: 3,
  4484  	}
  4485  	d.TaskGroups[job.TaskGroups[0].Name] = state
  4486  
  4487  	// Create the original
  4488  	var allocs []*structs.Allocation
  4489  	for i := 0; i < 3; i++ {
  4490  		new := mock.Alloc()
  4491  		new.Job = jobv2
  4492  		new.JobID = job.ID
  4493  		new.NodeID = uuid.Generate()
  4494  		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4495  		new.TaskGroup = job.TaskGroups[0].Name
  4496  		new.DeploymentID = d.ID
  4497  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  4498  			Healthy: helper.BoolToPtr(true),
  4499  		}
  4500  		new.ClientStatus = structs.AllocClientStatusRunning
  4501  		allocs = append(allocs, new)
  4502  
  4503  	}
  4504  	for i := 0; i < 3; i++ {
  4505  		new := mock.Alloc()
  4506  		new.Job = jobv1
  4507  		new.JobID = jobv1.ID
  4508  		new.NodeID = uuid.Generate()
  4509  		new.Name = structs.AllocName(jobv1.ID, jobv1.TaskGroups[0].Name, uint(i))
  4510  		new.TaskGroup = job.TaskGroups[0].Name
  4511  		new.DeploymentID = uuid.Generate()
  4512  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  4513  			Healthy: helper.BoolToPtr(false),
  4514  		}
  4515  		new.DesiredStatus = structs.AllocDesiredStatusStop
  4516  		new.ClientStatus = structs.AllocClientStatusFailed
  4517  		allocs = append(allocs, new)
  4518  	}
  4519  
  4520  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, jobv2, d, allocs, nil, "")
  4521  	r := reconciler.Compute()
  4522  
  4523  	updates := []*structs.DeploymentStatusUpdate{
  4524  		{
  4525  			DeploymentID:      d.ID,
  4526  			Status:            structs.DeploymentStatusSuccessful,
  4527  			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
  4528  		},
  4529  	}
  4530  
  4531  	// Assert the correct results
  4532  	assertResults(t, r, &resultExpectation{
  4533  		createDeployment:  nil,
  4534  		deploymentUpdates: updates,
  4535  		place:             0,
  4536  		inplace:           0,
  4537  		stop:              0,
  4538  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4539  			job.TaskGroups[0].Name: {
  4540  				Stop:          0,
  4541  				InPlaceUpdate: 0,
  4542  				Ignore:        3,
  4543  			},
  4544  		},
  4545  	})
  4546  }
  4547  
  4548  // Test that a successful deployment with failed allocs will result in
  4549  // rescheduling failed allocations
  4550  func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T) {
  4551  	job := mock.Job()
  4552  	job.TaskGroups[0].Update = noCanaryUpdate
  4553  	tgName := job.TaskGroups[0].Name
  4554  	now := time.Now()
  4555  
  4556  	// Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet
  4557  	d := structs.NewDeployment(job)
  4558  	d.Status = structs.DeploymentStatusSuccessful
  4559  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4560  		Promoted:     false,
  4561  		DesiredTotal: 10,
  4562  		PlacedAllocs: 10,
  4563  	}
  4564  
  4565  	// Create 10 allocations
  4566  	var allocs []*structs.Allocation
  4567  	for i := 0; i < 10; i++ {
  4568  		alloc := mock.Alloc()
  4569  		alloc.Job = job
  4570  		alloc.JobID = job.ID
  4571  		alloc.NodeID = uuid.Generate()
  4572  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4573  		alloc.TaskGroup = job.TaskGroups[0].Name
  4574  		alloc.DeploymentID = d.ID
  4575  		alloc.ClientStatus = structs.AllocClientStatusFailed
  4576  		alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  4577  			StartedAt:  now.Add(-1 * time.Hour),
  4578  			FinishedAt: now.Add(-10 * time.Second)}}
  4579  		allocs = append(allocs, alloc)
  4580  	}
  4581  
  4582  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  4583  	r := reconciler.Compute()
  4584  
  4585  	// Assert that rescheduled placements were created
  4586  	assertResults(t, r, &resultExpectation{
  4587  		place:             10,
  4588  		createDeployment:  nil,
  4589  		deploymentUpdates: nil,
  4590  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4591  			job.TaskGroups[0].Name: {
  4592  				Place:  10,
  4593  				Ignore: 0,
  4594  			},
  4595  		},
  4596  	})
  4597  	assertPlaceResultsHavePreviousAllocs(t, 10, r.place)
  4598  }
  4599  
  4600  // Tests force rescheduling a failed alloc that is past its reschedule limit
  4601  func TestReconciler_ForceReschedule_Service(t *testing.T) {
  4602  	require := require.New(t)
  4603  
  4604  	// Set desired 5
  4605  	job := mock.Job()
  4606  	job.TaskGroups[0].Count = 5
  4607  	tgName := job.TaskGroups[0].Name
  4608  
  4609  	// Set up reschedule policy and update stanza
  4610  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  4611  		Attempts:      1,
  4612  		Interval:      24 * time.Hour,
  4613  		Delay:         5 * time.Second,
  4614  		DelayFunction: "",
  4615  		MaxDelay:      1 * time.Hour,
  4616  		Unlimited:     false,
  4617  	}
  4618  	job.TaskGroups[0].Update = noCanaryUpdate
  4619  
  4620  	// Create 5 existing allocations
  4621  	var allocs []*structs.Allocation
  4622  	for i := 0; i < 5; i++ {
  4623  		alloc := mock.Alloc()
  4624  		alloc.Job = job
  4625  		alloc.JobID = job.ID
  4626  		alloc.NodeID = uuid.Generate()
  4627  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4628  		allocs = append(allocs, alloc)
  4629  		alloc.ClientStatus = structs.AllocClientStatusRunning
  4630  	}
  4631  
  4632  	// Mark one as failed and past its reschedule limit so not eligible to reschedule
  4633  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  4634  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  4635  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  4636  			PrevAllocID: uuid.Generate(),
  4637  			PrevNodeID:  uuid.Generate(),
  4638  		},
  4639  	}}
  4640  
  4641  	// Mark DesiredTransition ForceReschedule
  4642  	allocs[0].DesiredTransition = structs.DesiredTransition{ForceReschedule: helper.BoolToPtr(true)}
  4643  
  4644  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  4645  	r := reconciler.Compute()
  4646  
  4647  	// Verify that no follow up evals were created
  4648  	evals := r.desiredFollowupEvals[tgName]
  4649  	require.Nil(evals)
  4650  
  4651  	// Verify that one rescheduled alloc was created because of the forced reschedule
  4652  	assertResults(t, r, &resultExpectation{
  4653  		createDeployment:  nil,
  4654  		deploymentUpdates: nil,
  4655  		place:             1,
  4656  		inplace:           0,
  4657  		stop:              0,
  4658  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4659  			job.TaskGroups[0].Name: {
  4660  				Place:  1,
  4661  				Ignore: 4,
  4662  			},
  4663  		},
  4664  	})
  4665  
  4666  	// Rescheduled allocs should have previous allocs
  4667  	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
  4668  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  4669  	assertPlacementsAreRescheduled(t, 1, r.place)
  4670  }