github.com/djenriquez/nomad-1@v0.8.1/scheduler/reconcile_test.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"fmt"
     5  	"log"
     6  	"os"
     7  	"reflect"
     8  	"regexp"
     9  	"strconv"
    10  	"testing"
    11  	"time"
    12  
    13  	"github.com/hashicorp/nomad/helper"
    14  	"github.com/hashicorp/nomad/helper/uuid"
    15  	"github.com/hashicorp/nomad/nomad/mock"
    16  	"github.com/hashicorp/nomad/nomad/structs"
    17  	"github.com/kr/pretty"
    18  	"github.com/stretchr/testify/assert"
    19  	"github.com/stretchr/testify/require"
    20  )
    21  
    22  /*
    23  Basic Tests:
    24  √  Place when there is nothing in the cluster
    25  √  Place remainder when there is some in the cluster
    26  √  Scale down from n to n-m where n != m
    27  √  Scale down from n to zero
    28  √  Inplace upgrade test
    29  √  Inplace upgrade and scale up test
    30  √  Inplace upgrade and scale down test
    31  √  Destructive upgrade
    32  √  Destructive upgrade and scale up test
    33  √  Destructive upgrade and scale down test
    34  √  Handle lost nodes
    35  √  Handle lost nodes and scale up
    36  √  Handle lost nodes and scale down
    37  √  Handle draining nodes
    38  √  Handle draining nodes and scale up
    39  √  Handle draining nodes and scale down
    40  √  Handle task group being removed
    41  √  Handle job being stopped both as .Stopped and nil
    42  √  Place more that one group
    43  √  Handle delayed rescheduling failed allocs for batch jobs
    44  √  Handle delayed rescheduling failed allocs for service jobs
    45  √  Handle eligible now rescheduling failed allocs for batch jobs
    46  √  Handle eligible now rescheduling failed allocs for service jobs
    47  √  Previously rescheduled allocs should not be rescheduled again
    48  √  Aggregated evaluations for allocations that fail close together
    49  
    50  Update stanza Tests:
    51  √  Stopped job cancels any active deployment
    52  √  Stopped job doesn't cancel terminal deployment
    53  √  JobIndex change cancels any active deployment
    54  √  JobIndex change doesn't cancels any terminal deployment
    55  √  Destructive changes create deployment and get rolled out via max_parallelism
    56  √  Don't create a deployment if there are no changes
    57  √  Deployment created by all inplace updates
    58  √  Paused or failed deployment doesn't create any more canaries
    59  √  Paused or failed deployment doesn't do any placements unless replacing lost allocs
    60  √  Paused or failed deployment doesn't do destructive updates
    61  √  Paused does do migrations
    62  √  Failed deployment doesn't do migrations
    63  √  Canary that is on a draining node
    64  √  Canary that is on a lost node
    65  √  Stop old canaries
    66  √  Create new canaries on job change
    67  √  Create new canaries on job change while scaling up
    68  √  Create new canaries on job change while scaling down
    69  √  Fill canaries if partial placement
    70  √  Promote canaries unblocks max_parallel
    71  √  Promote canaries when canaries == count
    72  √  Only place as many as are healthy in deployment
    73  √  Limit calculation accounts for healthy allocs on migrating/lost nodes
    74  √  Failed deployment should not place anything
    75  √  Run after canaries have been promoted, new allocs have been rolled out and there is no deployment
    76  √  Failed deployment cancels non-promoted task groups
    77  √  Failed deployment and updated job works
    78  √  Finished deployment gets marked as complete
    79  √  Change job change while scaling up
    80  √  Update the job when all allocations from the previous job haven't been placed yet.
    81  √  Paused or failed deployment doesn't do any rescheduling of failed allocs
    82  √  Running deployment with failed allocs doesn't do any rescheduling of failed allocs
    83  */
    84  
    85  var (
    86  	canaryUpdate = &structs.UpdateStrategy{
    87  		Canary:          2,
    88  		MaxParallel:     2,
    89  		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
    90  		MinHealthyTime:  10 * time.Second,
    91  		HealthyDeadline: 10 * time.Minute,
    92  		Stagger:         31 * time.Second,
    93  	}
    94  
    95  	noCanaryUpdate = &structs.UpdateStrategy{
    96  		MaxParallel:     4,
    97  		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
    98  		MinHealthyTime:  10 * time.Second,
    99  		HealthyDeadline: 10 * time.Minute,
   100  		Stagger:         31 * time.Second,
   101  	}
   102  )
   103  
   104  func testLogger() *log.Logger {
   105  	return log.New(os.Stderr, "", log.LstdFlags)
   106  }
   107  
   108  func allocUpdateFnIgnore(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) {
   109  	return true, false, nil
   110  }
   111  
   112  func allocUpdateFnDestructive(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) {
   113  	return false, true, nil
   114  }
   115  
   116  func allocUpdateFnInplace(existing *structs.Allocation, _ *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) {
   117  	// Create a shallow copy
   118  	newAlloc := existing.CopySkipJob()
   119  	newAlloc.TaskResources = make(map[string]*structs.Resources)
   120  
   121  	// Use the new task resources but keep the network from the old
   122  	for _, task := range newTG.Tasks {
   123  		r := task.Resources.Copy()
   124  		r.Networks = existing.TaskResources[task.Name].Networks
   125  		newAlloc.TaskResources[task.Name] = r
   126  	}
   127  
   128  	return false, false, newAlloc
   129  }
   130  
   131  func allocUpdateFnMock(handled map[string]allocUpdateType, unhandled allocUpdateType) allocUpdateType {
   132  	return func(existing *structs.Allocation, newJob *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) {
   133  		if fn, ok := handled[existing.ID]; ok {
   134  			return fn(existing, newJob, newTG)
   135  		}
   136  
   137  		return unhandled(existing, newJob, newTG)
   138  	}
   139  }
   140  
   141  var (
   142  	// AllocationIndexRegex is a regular expression to find the allocation index.
   143  	allocationIndexRegex = regexp.MustCompile(".+\\[(\\d+)\\]$")
   144  )
   145  
   146  // allocNameToIndex returns the index of the allocation.
   147  func allocNameToIndex(name string) uint {
   148  	matches := allocationIndexRegex.FindStringSubmatch(name)
   149  	if len(matches) != 2 {
   150  		return 0
   151  	}
   152  
   153  	index, err := strconv.Atoi(matches[1])
   154  	if err != nil {
   155  		return 0
   156  	}
   157  
   158  	return uint(index)
   159  }
   160  
   161  func assertNamesHaveIndexes(t *testing.T, indexes []int, names []string) {
   162  	t.Helper()
   163  	m := make(map[uint]int)
   164  	for _, i := range indexes {
   165  		m[uint(i)] += 1
   166  	}
   167  
   168  	for _, n := range names {
   169  		index := allocNameToIndex(n)
   170  		val, contained := m[index]
   171  		if !contained {
   172  			t.Fatalf("Unexpected index %d from name %s\nAll names: %v", index, n, names)
   173  		}
   174  
   175  		val--
   176  		if val < 0 {
   177  			t.Fatalf("Index %d repeated too many times\nAll names: %v", index, names)
   178  		}
   179  		m[index] = val
   180  	}
   181  
   182  	for k, remainder := range m {
   183  		if remainder != 0 {
   184  			t.Fatalf("Index %d has %d remaining uses expected\nAll names: %v", k, remainder, names)
   185  		}
   186  	}
   187  }
   188  
   189  func assertNoCanariesStopped(t *testing.T, d *structs.Deployment, stop []allocStopResult) {
   190  	t.Helper()
   191  	canaryIndex := make(map[string]struct{})
   192  	for _, state := range d.TaskGroups {
   193  		for _, c := range state.PlacedCanaries {
   194  			canaryIndex[c] = struct{}{}
   195  		}
   196  	}
   197  
   198  	for _, s := range stop {
   199  		if _, ok := canaryIndex[s.alloc.ID]; ok {
   200  			t.Fatalf("Stopping canary alloc %q %q", s.alloc.ID, s.alloc.Name)
   201  		}
   202  	}
   203  }
   204  
   205  func assertPlaceResultsHavePreviousAllocs(t *testing.T, numPrevious int, place []allocPlaceResult) {
   206  	t.Helper()
   207  	names := make(map[string]struct{}, numPrevious)
   208  
   209  	found := 0
   210  	for _, p := range place {
   211  		if _, ok := names[p.name]; ok {
   212  			t.Fatalf("Name %q already placed", p.name)
   213  		}
   214  		names[p.name] = struct{}{}
   215  
   216  		if p.previousAlloc == nil {
   217  			continue
   218  		}
   219  
   220  		if act := p.previousAlloc.Name; p.name != act {
   221  			t.Fatalf("Name mismatch on previous alloc; got %q; want %q", act, p.name)
   222  		}
   223  		found++
   224  	}
   225  	if numPrevious != found {
   226  		t.Fatalf("wanted %d; got %d placements with previous allocs", numPrevious, found)
   227  	}
   228  }
   229  
   230  func assertPlacementsAreRescheduled(t *testing.T, numRescheduled int, place []allocPlaceResult) {
   231  	t.Helper()
   232  	names := make(map[string]struct{}, numRescheduled)
   233  
   234  	found := 0
   235  	for _, p := range place {
   236  		if _, ok := names[p.name]; ok {
   237  			t.Fatalf("Name %q already placed", p.name)
   238  		}
   239  		names[p.name] = struct{}{}
   240  
   241  		if p.previousAlloc == nil {
   242  			continue
   243  		}
   244  		if p.reschedule {
   245  			found++
   246  		}
   247  
   248  	}
   249  	if numRescheduled != found {
   250  		t.Fatalf("wanted %d; got %d placements that are rescheduled", numRescheduled, found)
   251  	}
   252  }
   253  
   254  func intRange(pairs ...int) []int {
   255  	if len(pairs)%2 != 0 {
   256  		return nil
   257  	}
   258  
   259  	var r []int
   260  	for i := 0; i < len(pairs); i += 2 {
   261  		for j := pairs[i]; j <= pairs[i+1]; j++ {
   262  			r = append(r, j)
   263  		}
   264  	}
   265  	return r
   266  }
   267  
   268  func placeResultsToNames(place []allocPlaceResult) []string {
   269  	names := make([]string, 0, len(place))
   270  	for _, p := range place {
   271  		names = append(names, p.name)
   272  	}
   273  	return names
   274  }
   275  
   276  func destructiveResultsToNames(destructive []allocDestructiveResult) []string {
   277  	names := make([]string, 0, len(destructive))
   278  	for _, d := range destructive {
   279  		names = append(names, d.placeName)
   280  	}
   281  	return names
   282  }
   283  
   284  func stopResultsToNames(stop []allocStopResult) []string {
   285  	names := make([]string, 0, len(stop))
   286  	for _, s := range stop {
   287  		names = append(names, s.alloc.Name)
   288  	}
   289  	return names
   290  }
   291  
   292  func attributeUpdatesToNames(attributeUpdates map[string]*structs.Allocation) []string {
   293  	names := make([]string, 0, len(attributeUpdates))
   294  	for _, a := range attributeUpdates {
   295  		names = append(names, a.Name)
   296  	}
   297  	return names
   298  }
   299  
   300  func allocsToNames(allocs []*structs.Allocation) []string {
   301  	names := make([]string, 0, len(allocs))
   302  	for _, a := range allocs {
   303  		names = append(names, a.Name)
   304  	}
   305  	return names
   306  }
   307  
   308  type resultExpectation struct {
   309  	createDeployment  *structs.Deployment
   310  	deploymentUpdates []*structs.DeploymentStatusUpdate
   311  	place             int
   312  	destructive       int
   313  	inplace           int
   314  	attributeUpdates  int
   315  	stop              int
   316  	desiredTGUpdates  map[string]*structs.DesiredUpdates
   317  }
   318  
   319  func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) {
   320  	t.Helper()
   321  	assert := assert.New(t)
   322  
   323  	if exp.createDeployment != nil && r.deployment == nil {
   324  		t.Fatalf("Expect a created deployment got none")
   325  	} else if exp.createDeployment == nil && r.deployment != nil {
   326  		t.Fatalf("Expect no created deployment; got %#v", r.deployment)
   327  	} else if exp.createDeployment != nil && r.deployment != nil {
   328  		// Clear the deployment ID
   329  		r.deployment.ID, exp.createDeployment.ID = "", ""
   330  		if !reflect.DeepEqual(r.deployment, exp.createDeployment) {
   331  			t.Fatalf("Unexpected createdDeployment; got\n %#v\nwant\n%#v\nDiff: %v",
   332  				r.deployment, exp.createDeployment, pretty.Diff(r.deployment, exp.createDeployment))
   333  		}
   334  	}
   335  
   336  	assert.EqualValues(exp.deploymentUpdates, r.deploymentUpdates, "Expected Deployment Updates")
   337  	assert.Len(r.place, exp.place, "Expected Placements")
   338  	assert.Len(r.destructiveUpdate, exp.destructive, "Expected Destructive")
   339  	assert.Len(r.inplaceUpdate, exp.inplace, "Expected Inplace Updates")
   340  	assert.Len(r.attributeUpdates, exp.attributeUpdates, "Expected Attribute Updates")
   341  	assert.Len(r.stop, exp.stop, "Expected Stops")
   342  	assert.EqualValues(exp.desiredTGUpdates, r.desiredTGUpdates, "Expected Desired TG Update Annotations")
   343  }
   344  
   345  // Tests the reconciler properly handles placements for a job that has no
   346  // existing allocations
   347  func TestReconciler_Place_NoExisting(t *testing.T) {
   348  	job := mock.Job()
   349  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, nil, nil, "")
   350  	r := reconciler.Compute()
   351  
   352  	// Assert the correct results
   353  	assertResults(t, r, &resultExpectation{
   354  		createDeployment:  nil,
   355  		deploymentUpdates: nil,
   356  		place:             10,
   357  		inplace:           0,
   358  		stop:              0,
   359  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   360  			job.TaskGroups[0].Name: {
   361  				Place: 10,
   362  			},
   363  		},
   364  	})
   365  
   366  	assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
   367  }
   368  
   369  // Tests the reconciler properly handles placements for a job that has some
   370  // existing allocations
   371  func TestReconciler_Place_Existing(t *testing.T) {
   372  	job := mock.Job()
   373  
   374  	// Create 3 existing allocations
   375  	var allocs []*structs.Allocation
   376  	for i := 0; i < 5; i++ {
   377  		alloc := mock.Alloc()
   378  		alloc.Job = job
   379  		alloc.JobID = job.ID
   380  		alloc.NodeID = uuid.Generate()
   381  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   382  		allocs = append(allocs, alloc)
   383  	}
   384  
   385  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
   386  	r := reconciler.Compute()
   387  
   388  	// Assert the correct results
   389  	assertResults(t, r, &resultExpectation{
   390  		createDeployment:  nil,
   391  		deploymentUpdates: nil,
   392  		place:             5,
   393  		inplace:           0,
   394  		stop:              0,
   395  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   396  			job.TaskGroups[0].Name: {
   397  				Place:  5,
   398  				Ignore: 5,
   399  			},
   400  		},
   401  	})
   402  
   403  	assertNamesHaveIndexes(t, intRange(5, 9), placeResultsToNames(r.place))
   404  }
   405  
   406  // Tests the reconciler properly handles stopping allocations for a job that has
   407  // scaled down
   408  func TestReconciler_ScaleDown_Partial(t *testing.T) {
   409  	// Has desired 10
   410  	job := mock.Job()
   411  
   412  	// Create 20 existing allocations
   413  	var allocs []*structs.Allocation
   414  	for i := 0; i < 20; i++ {
   415  		alloc := mock.Alloc()
   416  		alloc.Job = job
   417  		alloc.JobID = job.ID
   418  		alloc.NodeID = uuid.Generate()
   419  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   420  		allocs = append(allocs, alloc)
   421  	}
   422  
   423  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
   424  	r := reconciler.Compute()
   425  
   426  	// Assert the correct results
   427  	assertResults(t, r, &resultExpectation{
   428  		createDeployment:  nil,
   429  		deploymentUpdates: nil,
   430  		place:             0,
   431  		inplace:           0,
   432  		stop:              10,
   433  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   434  			job.TaskGroups[0].Name: {
   435  				Ignore: 10,
   436  				Stop:   10,
   437  			},
   438  		},
   439  	})
   440  
   441  	assertNamesHaveIndexes(t, intRange(10, 19), stopResultsToNames(r.stop))
   442  }
   443  
   444  // Tests the reconciler properly handles stopping allocations for a job that has
   445  // scaled down to zero desired
   446  func TestReconciler_ScaleDown_Zero(t *testing.T) {
   447  	// Set desired 0
   448  	job := mock.Job()
   449  	job.TaskGroups[0].Count = 0
   450  
   451  	// Create 20 existing allocations
   452  	var allocs []*structs.Allocation
   453  	for i := 0; i < 20; i++ {
   454  		alloc := mock.Alloc()
   455  		alloc.Job = job
   456  		alloc.JobID = job.ID
   457  		alloc.NodeID = uuid.Generate()
   458  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   459  		allocs = append(allocs, alloc)
   460  	}
   461  
   462  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
   463  	r := reconciler.Compute()
   464  
   465  	// Assert the correct results
   466  	assertResults(t, r, &resultExpectation{
   467  		createDeployment:  nil,
   468  		deploymentUpdates: nil,
   469  		place:             0,
   470  		inplace:           0,
   471  		stop:              20,
   472  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   473  			job.TaskGroups[0].Name: {
   474  				Stop: 20,
   475  			},
   476  		},
   477  	})
   478  
   479  	assertNamesHaveIndexes(t, intRange(0, 19), stopResultsToNames(r.stop))
   480  }
   481  
   482  // Tests the reconciler properly handles stopping allocations for a job that has
   483  // scaled down to zero desired where allocs have duplicate names
   484  func TestReconciler_ScaleDown_Zero_DuplicateNames(t *testing.T) {
   485  	// Set desired 0
   486  	job := mock.Job()
   487  	job.TaskGroups[0].Count = 0
   488  
   489  	// Create 20 existing allocations
   490  	var allocs []*structs.Allocation
   491  	var expectedStopped []int
   492  	for i := 0; i < 20; i++ {
   493  		alloc := mock.Alloc()
   494  		alloc.Job = job
   495  		alloc.JobID = job.ID
   496  		alloc.NodeID = uuid.Generate()
   497  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2))
   498  		allocs = append(allocs, alloc)
   499  		expectedStopped = append(expectedStopped, i%2)
   500  	}
   501  
   502  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
   503  	r := reconciler.Compute()
   504  
   505  	// Assert the correct results
   506  	assertResults(t, r, &resultExpectation{
   507  		createDeployment:  nil,
   508  		deploymentUpdates: nil,
   509  		place:             0,
   510  		inplace:           0,
   511  		stop:              20,
   512  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   513  			job.TaskGroups[0].Name: {
   514  				Stop: 20,
   515  			},
   516  		},
   517  	})
   518  
   519  	assertNamesHaveIndexes(t, expectedStopped, stopResultsToNames(r.stop))
   520  }
   521  
   522  // Tests the reconciler properly handles inplace upgrading allocations
   523  func TestReconciler_Inplace(t *testing.T) {
   524  	job := mock.Job()
   525  
   526  	// Create 10 existing allocations
   527  	var allocs []*structs.Allocation
   528  	for i := 0; i < 10; i++ {
   529  		alloc := mock.Alloc()
   530  		alloc.Job = job
   531  		alloc.JobID = job.ID
   532  		alloc.NodeID = uuid.Generate()
   533  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   534  		allocs = append(allocs, alloc)
   535  	}
   536  
   537  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "")
   538  	r := reconciler.Compute()
   539  
   540  	// Assert the correct results
   541  	assertResults(t, r, &resultExpectation{
   542  		createDeployment:  nil,
   543  		deploymentUpdates: nil,
   544  		place:             0,
   545  		inplace:           10,
   546  		stop:              0,
   547  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   548  			job.TaskGroups[0].Name: {
   549  				InPlaceUpdate: 10,
   550  			},
   551  		},
   552  	})
   553  
   554  	assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate))
   555  }
   556  
   557  // Tests the reconciler properly handles inplace upgrading allocations while
   558  // scaling up
   559  func TestReconciler_Inplace_ScaleUp(t *testing.T) {
   560  	// Set desired 15
   561  	job := mock.Job()
   562  	job.TaskGroups[0].Count = 15
   563  
   564  	// Create 10 existing allocations
   565  	var allocs []*structs.Allocation
   566  	for i := 0; i < 10; i++ {
   567  		alloc := mock.Alloc()
   568  		alloc.Job = job
   569  		alloc.JobID = job.ID
   570  		alloc.NodeID = uuid.Generate()
   571  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   572  		allocs = append(allocs, alloc)
   573  	}
   574  
   575  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "")
   576  	r := reconciler.Compute()
   577  
   578  	// Assert the correct results
   579  	assertResults(t, r, &resultExpectation{
   580  		createDeployment:  nil,
   581  		deploymentUpdates: nil,
   582  		place:             5,
   583  		inplace:           10,
   584  		stop:              0,
   585  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   586  			job.TaskGroups[0].Name: {
   587  				Place:         5,
   588  				InPlaceUpdate: 10,
   589  			},
   590  		},
   591  	})
   592  
   593  	assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate))
   594  	assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place))
   595  }
   596  
   597  // Tests the reconciler properly handles inplace upgrading allocations while
   598  // scaling down
   599  func TestReconciler_Inplace_ScaleDown(t *testing.T) {
   600  	// Set desired 5
   601  	job := mock.Job()
   602  	job.TaskGroups[0].Count = 5
   603  
   604  	// Create 10 existing allocations
   605  	var allocs []*structs.Allocation
   606  	for i := 0; i < 10; i++ {
   607  		alloc := mock.Alloc()
   608  		alloc.Job = job
   609  		alloc.JobID = job.ID
   610  		alloc.NodeID = uuid.Generate()
   611  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   612  		allocs = append(allocs, alloc)
   613  	}
   614  
   615  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "")
   616  	r := reconciler.Compute()
   617  
   618  	// Assert the correct results
   619  	assertResults(t, r, &resultExpectation{
   620  		createDeployment:  nil,
   621  		deploymentUpdates: nil,
   622  		place:             0,
   623  		inplace:           5,
   624  		stop:              5,
   625  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   626  			job.TaskGroups[0].Name: {
   627  				Stop:          5,
   628  				InPlaceUpdate: 5,
   629  			},
   630  		},
   631  	})
   632  
   633  	assertNamesHaveIndexes(t, intRange(0, 4), allocsToNames(r.inplaceUpdate))
   634  	assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop))
   635  }
   636  
   637  // Tests the reconciler properly handles destructive upgrading allocations
   638  func TestReconciler_Destructive(t *testing.T) {
   639  	job := mock.Job()
   640  
   641  	// Create 10 existing allocations
   642  	var allocs []*structs.Allocation
   643  	for i := 0; i < 10; i++ {
   644  		alloc := mock.Alloc()
   645  		alloc.Job = job
   646  		alloc.JobID = job.ID
   647  		alloc.NodeID = uuid.Generate()
   648  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   649  		allocs = append(allocs, alloc)
   650  	}
   651  
   652  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
   653  	r := reconciler.Compute()
   654  
   655  	// Assert the correct results
   656  	assertResults(t, r, &resultExpectation{
   657  		createDeployment:  nil,
   658  		deploymentUpdates: nil,
   659  		destructive:       10,
   660  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   661  			job.TaskGroups[0].Name: {
   662  				DestructiveUpdate: 10,
   663  			},
   664  		},
   665  	})
   666  
   667  	assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate))
   668  }
   669  
   670  // Tests the reconciler properly handles destructive upgrading allocations while
   671  // scaling up
   672  func TestReconciler_Destructive_ScaleUp(t *testing.T) {
   673  	// Set desired 15
   674  	job := mock.Job()
   675  	job.TaskGroups[0].Count = 15
   676  
   677  	// Create 10 existing allocations
   678  	var allocs []*structs.Allocation
   679  	for i := 0; i < 10; i++ {
   680  		alloc := mock.Alloc()
   681  		alloc.Job = job
   682  		alloc.JobID = job.ID
   683  		alloc.NodeID = uuid.Generate()
   684  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   685  		allocs = append(allocs, alloc)
   686  	}
   687  
   688  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
   689  	r := reconciler.Compute()
   690  
   691  	// Assert the correct results
   692  	assertResults(t, r, &resultExpectation{
   693  		createDeployment:  nil,
   694  		deploymentUpdates: nil,
   695  		place:             5,
   696  		destructive:       10,
   697  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   698  			job.TaskGroups[0].Name: {
   699  				Place:             5,
   700  				DestructiveUpdate: 10,
   701  			},
   702  		},
   703  	})
   704  
   705  	assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate))
   706  	assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place))
   707  }
   708  
   709  // Tests the reconciler properly handles destructive upgrading allocations while
   710  // scaling down
   711  func TestReconciler_Destructive_ScaleDown(t *testing.T) {
   712  	// Set desired 5
   713  	job := mock.Job()
   714  	job.TaskGroups[0].Count = 5
   715  
   716  	// Create 10 existing allocations
   717  	var allocs []*structs.Allocation
   718  	for i := 0; i < 10; i++ {
   719  		alloc := mock.Alloc()
   720  		alloc.Job = job
   721  		alloc.JobID = job.ID
   722  		alloc.NodeID = uuid.Generate()
   723  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   724  		allocs = append(allocs, alloc)
   725  	}
   726  
   727  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
   728  	r := reconciler.Compute()
   729  
   730  	// Assert the correct results
   731  	assertResults(t, r, &resultExpectation{
   732  		createDeployment:  nil,
   733  		deploymentUpdates: nil,
   734  		destructive:       5,
   735  		stop:              5,
   736  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   737  			job.TaskGroups[0].Name: {
   738  				Stop:              5,
   739  				DestructiveUpdate: 5,
   740  			},
   741  		},
   742  	})
   743  
   744  	assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop))
   745  	assertNamesHaveIndexes(t, intRange(0, 4), destructiveResultsToNames(r.destructiveUpdate))
   746  }
   747  
   748  // Tests the reconciler properly handles lost nodes with allocations
   749  func TestReconciler_LostNode(t *testing.T) {
   750  	job := mock.Job()
   751  
   752  	// Create 10 existing allocations
   753  	var allocs []*structs.Allocation
   754  	for i := 0; i < 10; i++ {
   755  		alloc := mock.Alloc()
   756  		alloc.Job = job
   757  		alloc.JobID = job.ID
   758  		alloc.NodeID = uuid.Generate()
   759  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   760  		allocs = append(allocs, alloc)
   761  	}
   762  
   763  	// Build a map of tainted nodes
   764  	tainted := make(map[string]*structs.Node, 2)
   765  	for i := 0; i < 2; i++ {
   766  		n := mock.Node()
   767  		n.ID = allocs[i].NodeID
   768  		n.Status = structs.NodeStatusDown
   769  		tainted[n.ID] = n
   770  	}
   771  
   772  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   773  	r := reconciler.Compute()
   774  
   775  	// Assert the correct results
   776  	assertResults(t, r, &resultExpectation{
   777  		createDeployment:  nil,
   778  		deploymentUpdates: nil,
   779  		place:             2,
   780  		inplace:           0,
   781  		stop:              2,
   782  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   783  			job.TaskGroups[0].Name: {
   784  				Place:  2,
   785  				Stop:   2,
   786  				Ignore: 8,
   787  			},
   788  		},
   789  	})
   790  
   791  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
   792  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
   793  }
   794  
   795  // Tests the reconciler properly handles lost nodes with allocations while
   796  // scaling up
   797  func TestReconciler_LostNode_ScaleUp(t *testing.T) {
   798  	// Set desired 15
   799  	job := mock.Job()
   800  	job.TaskGroups[0].Count = 15
   801  
   802  	// Create 10 existing allocations
   803  	var allocs []*structs.Allocation
   804  	for i := 0; i < 10; i++ {
   805  		alloc := mock.Alloc()
   806  		alloc.Job = job
   807  		alloc.JobID = job.ID
   808  		alloc.NodeID = uuid.Generate()
   809  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   810  		allocs = append(allocs, alloc)
   811  	}
   812  
   813  	// Build a map of tainted nodes
   814  	tainted := make(map[string]*structs.Node, 2)
   815  	for i := 0; i < 2; i++ {
   816  		n := mock.Node()
   817  		n.ID = allocs[i].NodeID
   818  		n.Status = structs.NodeStatusDown
   819  		tainted[n.ID] = n
   820  	}
   821  
   822  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   823  	r := reconciler.Compute()
   824  
   825  	// Assert the correct results
   826  	assertResults(t, r, &resultExpectation{
   827  		createDeployment:  nil,
   828  		deploymentUpdates: nil,
   829  		place:             7,
   830  		inplace:           0,
   831  		stop:              2,
   832  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   833  			job.TaskGroups[0].Name: {
   834  				Place:  7,
   835  				Stop:   2,
   836  				Ignore: 8,
   837  			},
   838  		},
   839  	})
   840  
   841  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
   842  	assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place))
   843  }
   844  
   845  // Tests the reconciler properly handles lost nodes with allocations while
   846  // scaling down
   847  func TestReconciler_LostNode_ScaleDown(t *testing.T) {
   848  	// Set desired 5
   849  	job := mock.Job()
   850  	job.TaskGroups[0].Count = 5
   851  
   852  	// Create 10 existing allocations
   853  	var allocs []*structs.Allocation
   854  	for i := 0; i < 10; i++ {
   855  		alloc := mock.Alloc()
   856  		alloc.Job = job
   857  		alloc.JobID = job.ID
   858  		alloc.NodeID = uuid.Generate()
   859  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   860  		allocs = append(allocs, alloc)
   861  	}
   862  
   863  	// Build a map of tainted nodes
   864  	tainted := make(map[string]*structs.Node, 2)
   865  	for i := 0; i < 2; i++ {
   866  		n := mock.Node()
   867  		n.ID = allocs[i].NodeID
   868  		n.Status = structs.NodeStatusDown
   869  		tainted[n.ID] = n
   870  	}
   871  
   872  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   873  	r := reconciler.Compute()
   874  
   875  	// Assert the correct results
   876  	assertResults(t, r, &resultExpectation{
   877  		createDeployment:  nil,
   878  		deploymentUpdates: nil,
   879  		place:             0,
   880  		inplace:           0,
   881  		stop:              5,
   882  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   883  			job.TaskGroups[0].Name: {
   884  				Stop:   5,
   885  				Ignore: 5,
   886  			},
   887  		},
   888  	})
   889  
   890  	assertNamesHaveIndexes(t, intRange(0, 1, 7, 9), stopResultsToNames(r.stop))
   891  }
   892  
   893  // Tests the reconciler properly handles draining nodes with allocations
   894  func TestReconciler_DrainNode(t *testing.T) {
   895  	job := mock.Job()
   896  
   897  	// Create 10 existing allocations
   898  	var allocs []*structs.Allocation
   899  	for i := 0; i < 10; i++ {
   900  		alloc := mock.Alloc()
   901  		alloc.Job = job
   902  		alloc.JobID = job.ID
   903  		alloc.NodeID = uuid.Generate()
   904  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   905  		allocs = append(allocs, alloc)
   906  	}
   907  
   908  	// Build a map of tainted nodes
   909  	tainted := make(map[string]*structs.Node, 2)
   910  	for i := 0; i < 2; i++ {
   911  		n := mock.Node()
   912  		n.ID = allocs[i].NodeID
   913  		allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true)
   914  		n.Drain = true
   915  		tainted[n.ID] = n
   916  	}
   917  
   918  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   919  	r := reconciler.Compute()
   920  
   921  	// Assert the correct results
   922  	assertResults(t, r, &resultExpectation{
   923  		createDeployment:  nil,
   924  		deploymentUpdates: nil,
   925  		place:             2,
   926  		inplace:           0,
   927  		stop:              2,
   928  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   929  			job.TaskGroups[0].Name: {
   930  				Migrate: 2,
   931  				Ignore:  8,
   932  			},
   933  		},
   934  	})
   935  
   936  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
   937  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
   938  	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
   939  	// These should not have the reschedule field set
   940  	assertPlacementsAreRescheduled(t, 0, r.place)
   941  }
   942  
   943  // Tests the reconciler properly handles draining nodes with allocations while
   944  // scaling up
   945  func TestReconciler_DrainNode_ScaleUp(t *testing.T) {
   946  	// Set desired 15
   947  	job := mock.Job()
   948  	job.TaskGroups[0].Count = 15
   949  
   950  	// Create 10 existing allocations
   951  	var allocs []*structs.Allocation
   952  	for i := 0; i < 10; i++ {
   953  		alloc := mock.Alloc()
   954  		alloc.Job = job
   955  		alloc.JobID = job.ID
   956  		alloc.NodeID = uuid.Generate()
   957  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   958  		allocs = append(allocs, alloc)
   959  	}
   960  
   961  	// Build a map of tainted nodes
   962  	tainted := make(map[string]*structs.Node, 2)
   963  	for i := 0; i < 2; i++ {
   964  		n := mock.Node()
   965  		n.ID = allocs[i].NodeID
   966  		allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true)
   967  		n.Drain = true
   968  		tainted[n.ID] = n
   969  	}
   970  
   971  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   972  	r := reconciler.Compute()
   973  
   974  	// Assert the correct results
   975  	assertResults(t, r, &resultExpectation{
   976  		createDeployment:  nil,
   977  		deploymentUpdates: nil,
   978  		place:             7,
   979  		inplace:           0,
   980  		stop:              2,
   981  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   982  			job.TaskGroups[0].Name: {
   983  				Place:   5,
   984  				Migrate: 2,
   985  				Ignore:  8,
   986  			},
   987  		},
   988  	})
   989  
   990  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
   991  	assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place))
   992  	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
   993  	// These should not have the reschedule field set
   994  	assertPlacementsAreRescheduled(t, 0, r.place)
   995  }
   996  
   997  // Tests the reconciler properly handles draining nodes with allocations while
   998  // scaling down
   999  func TestReconciler_DrainNode_ScaleDown(t *testing.T) {
  1000  	// Set desired 8
  1001  	job := mock.Job()
  1002  	job.TaskGroups[0].Count = 8
  1003  
  1004  	// Create 10 existing allocations
  1005  	var allocs []*structs.Allocation
  1006  	for i := 0; i < 10; i++ {
  1007  		alloc := mock.Alloc()
  1008  		alloc.Job = job
  1009  		alloc.JobID = job.ID
  1010  		alloc.NodeID = uuid.Generate()
  1011  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1012  		allocs = append(allocs, alloc)
  1013  	}
  1014  
  1015  	// Build a map of tainted nodes
  1016  	tainted := make(map[string]*structs.Node, 3)
  1017  	for i := 0; i < 3; i++ {
  1018  		n := mock.Node()
  1019  		n.ID = allocs[i].NodeID
  1020  		allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true)
  1021  		n.Drain = true
  1022  		tainted[n.ID] = n
  1023  	}
  1024  
  1025  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
  1026  	r := reconciler.Compute()
  1027  
  1028  	// Assert the correct results
  1029  	assertResults(t, r, &resultExpectation{
  1030  		createDeployment:  nil,
  1031  		deploymentUpdates: nil,
  1032  		place:             1,
  1033  		inplace:           0,
  1034  		stop:              3,
  1035  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1036  			job.TaskGroups[0].Name: {
  1037  				Migrate: 1,
  1038  				Stop:    2,
  1039  				Ignore:  7,
  1040  			},
  1041  		},
  1042  	})
  1043  
  1044  	assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop))
  1045  	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
  1046  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1047  	// These should not have the reschedule field set
  1048  	assertPlacementsAreRescheduled(t, 0, r.place)
  1049  }
  1050  
  1051  // Tests the reconciler properly handles a task group being removed
  1052  func TestReconciler_RemovedTG(t *testing.T) {
  1053  	job := mock.Job()
  1054  
  1055  	// Create 10 allocations for a tg that no longer exists
  1056  	var allocs []*structs.Allocation
  1057  	for i := 0; i < 10; i++ {
  1058  		alloc := mock.Alloc()
  1059  		alloc.Job = job
  1060  		alloc.JobID = job.ID
  1061  		alloc.NodeID = uuid.Generate()
  1062  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1063  		allocs = append(allocs, alloc)
  1064  	}
  1065  
  1066  	oldName := job.TaskGroups[0].Name
  1067  	newName := "different"
  1068  	job.TaskGroups[0].Name = newName
  1069  
  1070  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1071  	r := reconciler.Compute()
  1072  
  1073  	// Assert the correct results
  1074  	assertResults(t, r, &resultExpectation{
  1075  		createDeployment:  nil,
  1076  		deploymentUpdates: nil,
  1077  		place:             10,
  1078  		inplace:           0,
  1079  		stop:              10,
  1080  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1081  			oldName: {
  1082  				Stop: 10,
  1083  			},
  1084  			newName: {
  1085  				Place: 10,
  1086  			},
  1087  		},
  1088  	})
  1089  
  1090  	assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop))
  1091  	assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
  1092  }
  1093  
  1094  // Tests the reconciler properly handles a job in stopped states
  1095  func TestReconciler_JobStopped(t *testing.T) {
  1096  	job := mock.Job()
  1097  	job.Stop = true
  1098  
  1099  	cases := []struct {
  1100  		name             string
  1101  		job              *structs.Job
  1102  		jobID, taskGroup string
  1103  	}{
  1104  		{
  1105  			name:      "stopped job",
  1106  			job:       job,
  1107  			jobID:     job.ID,
  1108  			taskGroup: job.TaskGroups[0].Name,
  1109  		},
  1110  		{
  1111  			name:      "nil job",
  1112  			job:       nil,
  1113  			jobID:     "foo",
  1114  			taskGroup: "bar",
  1115  		},
  1116  	}
  1117  
  1118  	for _, c := range cases {
  1119  		t.Run(c.name, func(t *testing.T) {
  1120  			// Create 10 allocations
  1121  			var allocs []*structs.Allocation
  1122  			for i := 0; i < 10; i++ {
  1123  				alloc := mock.Alloc()
  1124  				alloc.Job = c.job
  1125  				alloc.JobID = c.jobID
  1126  				alloc.NodeID = uuid.Generate()
  1127  				alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i))
  1128  				alloc.TaskGroup = c.taskGroup
  1129  				allocs = append(allocs, alloc)
  1130  			}
  1131  
  1132  			reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, c.jobID, c.job, nil, allocs, nil, "")
  1133  			r := reconciler.Compute()
  1134  
  1135  			// Assert the correct results
  1136  			assertResults(t, r, &resultExpectation{
  1137  				createDeployment:  nil,
  1138  				deploymentUpdates: nil,
  1139  				place:             0,
  1140  				inplace:           0,
  1141  				stop:              10,
  1142  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1143  					c.taskGroup: {
  1144  						Stop: 10,
  1145  					},
  1146  				},
  1147  			})
  1148  
  1149  			assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop))
  1150  		})
  1151  	}
  1152  }
  1153  
  1154  // Tests the reconciler properly handles jobs with multiple task groups
  1155  func TestReconciler_MultiTG(t *testing.T) {
  1156  	job := mock.Job()
  1157  	tg2 := job.TaskGroups[0].Copy()
  1158  	tg2.Name = "foo"
  1159  	job.TaskGroups = append(job.TaskGroups, tg2)
  1160  
  1161  	// Create 2 existing allocations for the first tg
  1162  	var allocs []*structs.Allocation
  1163  	for i := 0; i < 2; i++ {
  1164  		alloc := mock.Alloc()
  1165  		alloc.Job = job
  1166  		alloc.JobID = job.ID
  1167  		alloc.NodeID = uuid.Generate()
  1168  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1169  		allocs = append(allocs, alloc)
  1170  	}
  1171  
  1172  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1173  	r := reconciler.Compute()
  1174  
  1175  	// Assert the correct results
  1176  	assertResults(t, r, &resultExpectation{
  1177  		createDeployment:  nil,
  1178  		deploymentUpdates: nil,
  1179  		place:             18,
  1180  		inplace:           0,
  1181  		stop:              0,
  1182  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1183  			job.TaskGroups[0].Name: {
  1184  				Place:  8,
  1185  				Ignore: 2,
  1186  			},
  1187  			tg2.Name: {
  1188  				Place: 10,
  1189  			},
  1190  		},
  1191  	})
  1192  
  1193  	assertNamesHaveIndexes(t, intRange(2, 9, 0, 9), placeResultsToNames(r.place))
  1194  }
  1195  
  1196  // Tests delayed rescheduling of failed batch allocations
  1197  func TestReconciler_RescheduleLater_Batch(t *testing.T) {
  1198  	require := require.New(t)
  1199  
  1200  	// Set desired 4
  1201  	job := mock.Job()
  1202  	job.TaskGroups[0].Count = 4
  1203  	now := time.Now()
  1204  
  1205  	// Set up reschedule policy
  1206  	delayDur := 15 * time.Second
  1207  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "constant"}
  1208  	tgName := job.TaskGroups[0].Name
  1209  
  1210  	// Create 6 existing allocations - 2 running, 1 complete and 3 failed
  1211  	var allocs []*structs.Allocation
  1212  	for i := 0; i < 6; i++ {
  1213  		alloc := mock.Alloc()
  1214  		alloc.Job = job
  1215  		alloc.JobID = job.ID
  1216  		alloc.NodeID = uuid.Generate()
  1217  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1218  		allocs = append(allocs, alloc)
  1219  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1220  	}
  1221  
  1222  	// Mark 3 as failed with restart tracking info
  1223  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1224  	allocs[0].NextAllocation = allocs[1].ID
  1225  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1226  	allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1227  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1228  			PrevAllocID: allocs[0].ID,
  1229  			PrevNodeID:  uuid.Generate(),
  1230  		},
  1231  	}}
  1232  	allocs[1].NextAllocation = allocs[2].ID
  1233  	allocs[2].ClientStatus = structs.AllocClientStatusFailed
  1234  	allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1235  		StartedAt:  now.Add(-1 * time.Hour),
  1236  		FinishedAt: now}}
  1237  	allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1238  		{RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(),
  1239  			PrevAllocID: allocs[0].ID,
  1240  			PrevNodeID:  uuid.Generate(),
  1241  		},
  1242  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1243  			PrevAllocID: allocs[1].ID,
  1244  			PrevNodeID:  uuid.Generate(),
  1245  		},
  1246  	}}
  1247  
  1248  	// Mark one as complete
  1249  	allocs[5].ClientStatus = structs.AllocClientStatusComplete
  1250  
  1251  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, uuid.Generate())
  1252  	r := reconciler.Compute()
  1253  
  1254  	// Two reschedule attempts were already made, one more can be made at a future time
  1255  	// Verify that the follow up eval has the expected waitUntil time
  1256  	evals := r.desiredFollowupEvals[tgName]
  1257  	require.NotNil(evals)
  1258  	require.Equal(1, len(evals))
  1259  	require.Equal(now.Add(delayDur), evals[0].WaitUntil)
  1260  
  1261  	// Alloc 5 should not be replaced because it is terminal
  1262  	assertResults(t, r, &resultExpectation{
  1263  		createDeployment:  nil,
  1264  		deploymentUpdates: nil,
  1265  		place:             0,
  1266  		inplace:           0,
  1267  		attributeUpdates:  1,
  1268  		stop:              0,
  1269  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1270  			job.TaskGroups[0].Name: {
  1271  				Place:         0,
  1272  				InPlaceUpdate: 0,
  1273  				Ignore:        4,
  1274  			},
  1275  		},
  1276  	})
  1277  	assertNamesHaveIndexes(t, intRange(2, 2), attributeUpdatesToNames(r.attributeUpdates))
  1278  
  1279  	// Verify that the followup evalID field is set correctly
  1280  	var annotated *structs.Allocation
  1281  	for _, a := range r.attributeUpdates {
  1282  		annotated = a
  1283  	}
  1284  	require.Equal(evals[0].ID, annotated.FollowupEvalID)
  1285  }
  1286  
  1287  // Tests delayed rescheduling of failed batch allocations and batching of allocs
  1288  // with fail times that are close together
  1289  func TestReconciler_RescheduleLaterWithBatchedEvals_Batch(t *testing.T) {
  1290  	require := require.New(t)
  1291  
  1292  	// Set desired 4
  1293  	job := mock.Job()
  1294  	job.TaskGroups[0].Count = 10
  1295  	now := time.Now()
  1296  
  1297  	// Set up reschedule policy
  1298  	delayDur := 15 * time.Second
  1299  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "constant"}
  1300  	tgName := job.TaskGroups[0].Name
  1301  
  1302  	// Create 10 existing allocations
  1303  	var allocs []*structs.Allocation
  1304  	for i := 0; i < 10; i++ {
  1305  		alloc := mock.Alloc()
  1306  		alloc.Job = job
  1307  		alloc.JobID = job.ID
  1308  		alloc.NodeID = uuid.Generate()
  1309  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1310  		allocs = append(allocs, alloc)
  1311  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1312  	}
  1313  
  1314  	// Mark 5 as failed with fail times very close together
  1315  	for i := 0; i < 5; i++ {
  1316  		allocs[i].ClientStatus = structs.AllocClientStatusFailed
  1317  		allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1318  			StartedAt:  now.Add(-1 * time.Hour),
  1319  			FinishedAt: now.Add(time.Duration(50*i) * time.Millisecond)}}
  1320  	}
  1321  
  1322  	// Mark two more as failed several seconds later
  1323  	for i := 5; i < 7; i++ {
  1324  		allocs[i].ClientStatus = structs.AllocClientStatusFailed
  1325  		allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1326  			StartedAt:  now.Add(-1 * time.Hour),
  1327  			FinishedAt: now.Add(10 * time.Second)}}
  1328  	}
  1329  
  1330  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, uuid.Generate())
  1331  	r := reconciler.Compute()
  1332  
  1333  	// Verify that two follow up evals were created
  1334  	evals := r.desiredFollowupEvals[tgName]
  1335  	require.NotNil(evals)
  1336  	require.Equal(2, len(evals))
  1337  
  1338  	// Verify expected WaitUntil values for both batched evals
  1339  	require.Equal(now.Add(delayDur), evals[0].WaitUntil)
  1340  	secondBatchDuration := delayDur + 10*time.Second
  1341  	require.Equal(now.Add(secondBatchDuration), evals[1].WaitUntil)
  1342  
  1343  	// Alloc 5 should not be replaced because it is terminal
  1344  	assertResults(t, r, &resultExpectation{
  1345  		createDeployment:  nil,
  1346  		deploymentUpdates: nil,
  1347  		place:             0,
  1348  		inplace:           0,
  1349  		attributeUpdates:  7,
  1350  		stop:              0,
  1351  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1352  			job.TaskGroups[0].Name: {
  1353  				Place:         0,
  1354  				InPlaceUpdate: 0,
  1355  				Ignore:        10,
  1356  			},
  1357  		},
  1358  	})
  1359  	assertNamesHaveIndexes(t, intRange(0, 6), attributeUpdatesToNames(r.attributeUpdates))
  1360  
  1361  	// Verify that the followup evalID field is set correctly
  1362  	for _, alloc := range r.attributeUpdates {
  1363  		if allocNameToIndex(alloc.Name) < 5 {
  1364  			require.Equal(evals[0].ID, alloc.FollowupEvalID)
  1365  		} else if allocNameToIndex(alloc.Name) < 7 {
  1366  			require.Equal(evals[1].ID, alloc.FollowupEvalID)
  1367  		} else {
  1368  			t.Fatalf("Unexpected alloc name in Inplace results %v", alloc.Name)
  1369  		}
  1370  	}
  1371  }
  1372  
  1373  // Tests rescheduling failed batch allocations
  1374  func TestReconciler_RescheduleNow_Batch(t *testing.T) {
  1375  	require := require.New(t)
  1376  	// Set desired 4
  1377  	job := mock.Job()
  1378  	job.TaskGroups[0].Count = 4
  1379  	now := time.Now()
  1380  	// Set up reschedule policy
  1381  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: 5 * time.Second, DelayFunction: "constant"}
  1382  	tgName := job.TaskGroups[0].Name
  1383  	// Create 6 existing allocations - 2 running, 1 complete and 3 failed
  1384  	var allocs []*structs.Allocation
  1385  	for i := 0; i < 6; i++ {
  1386  		alloc := mock.Alloc()
  1387  		alloc.Job = job
  1388  		alloc.JobID = job.ID
  1389  		alloc.NodeID = uuid.Generate()
  1390  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1391  		allocs = append(allocs, alloc)
  1392  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1393  	}
  1394  	// Mark 3 as failed with restart tracking info
  1395  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1396  	allocs[0].NextAllocation = allocs[1].ID
  1397  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1398  	allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1399  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1400  			PrevAllocID: allocs[0].ID,
  1401  			PrevNodeID:  uuid.Generate(),
  1402  		},
  1403  	}}
  1404  	allocs[1].NextAllocation = allocs[2].ID
  1405  	allocs[2].ClientStatus = structs.AllocClientStatusFailed
  1406  	allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1407  		StartedAt:  now.Add(-1 * time.Hour),
  1408  		FinishedAt: now.Add(-5 * time.Second)}}
  1409  	allocs[2].FollowupEvalID = uuid.Generate()
  1410  	allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1411  		{RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(),
  1412  			PrevAllocID: allocs[0].ID,
  1413  			PrevNodeID:  uuid.Generate(),
  1414  		},
  1415  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1416  			PrevAllocID: allocs[1].ID,
  1417  			PrevNodeID:  uuid.Generate(),
  1418  		},
  1419  	}}
  1420  	// Mark one as complete
  1421  	allocs[5].ClientStatus = structs.AllocClientStatusComplete
  1422  
  1423  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, "")
  1424  	reconciler.now = now
  1425  	r := reconciler.Compute()
  1426  
  1427  	// Verify that no follow up evals were created
  1428  	evals := r.desiredFollowupEvals[tgName]
  1429  	require.Nil(evals)
  1430  
  1431  	// Two reschedule attempts were made, one more can be made now
  1432  	// Alloc 5 should not be replaced because it is terminal
  1433  	assertResults(t, r, &resultExpectation{
  1434  		createDeployment:  nil,
  1435  		deploymentUpdates: nil,
  1436  		place:             1,
  1437  		inplace:           0,
  1438  		stop:              0,
  1439  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1440  			job.TaskGroups[0].Name: {
  1441  				Place:  1,
  1442  				Ignore: 3,
  1443  			},
  1444  		},
  1445  	})
  1446  
  1447  	assertNamesHaveIndexes(t, intRange(2, 2), placeResultsToNames(r.place))
  1448  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1449  	assertPlacementsAreRescheduled(t, 1, r.place)
  1450  
  1451  }
  1452  
  1453  // Tests rescheduling failed service allocations with desired state stop
  1454  func TestReconciler_RescheduleLater_Service(t *testing.T) {
  1455  	require := require.New(t)
  1456  
  1457  	// Set desired 5
  1458  	job := mock.Job()
  1459  	job.TaskGroups[0].Count = 5
  1460  	tgName := job.TaskGroups[0].Name
  1461  	now := time.Now()
  1462  
  1463  	// Set up reschedule policy
  1464  	delayDur := 15 * time.Second
  1465  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour, Delay: delayDur, MaxDelay: 1 * time.Hour}
  1466  
  1467  	// Create 5 existing allocations
  1468  	var allocs []*structs.Allocation
  1469  	for i := 0; i < 5; i++ {
  1470  		alloc := mock.Alloc()
  1471  		alloc.Job = job
  1472  		alloc.JobID = job.ID
  1473  		alloc.NodeID = uuid.Generate()
  1474  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1475  		allocs = append(allocs, alloc)
  1476  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1477  	}
  1478  
  1479  	// Mark two as failed
  1480  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1481  
  1482  	// Mark one of them as already rescheduled once
  1483  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1484  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1485  			PrevAllocID: uuid.Generate(),
  1486  			PrevNodeID:  uuid.Generate(),
  1487  		},
  1488  	}}
  1489  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1490  		StartedAt:  now.Add(-1 * time.Hour),
  1491  		FinishedAt: now}}
  1492  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1493  
  1494  	// Mark one as desired state stop
  1495  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  1496  
  1497  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, uuid.Generate())
  1498  	r := reconciler.Compute()
  1499  
  1500  	// Should place a new placement and create a follow up eval for the delayed reschedule
  1501  	// Verify that the follow up eval has the expected waitUntil time
  1502  	evals := r.desiredFollowupEvals[tgName]
  1503  	require.NotNil(evals)
  1504  	require.Equal(1, len(evals))
  1505  	require.Equal(now.Add(delayDur), evals[0].WaitUntil)
  1506  
  1507  	assertResults(t, r, &resultExpectation{
  1508  		createDeployment:  nil,
  1509  		deploymentUpdates: nil,
  1510  		place:             1,
  1511  		inplace:           0,
  1512  		attributeUpdates:  1,
  1513  		stop:              0,
  1514  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1515  			job.TaskGroups[0].Name: {
  1516  				Place:         1,
  1517  				InPlaceUpdate: 0,
  1518  				Ignore:        4,
  1519  			},
  1520  		},
  1521  	})
  1522  
  1523  	assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
  1524  	assertNamesHaveIndexes(t, intRange(1, 1), attributeUpdatesToNames(r.attributeUpdates))
  1525  
  1526  	// Verify that the followup evalID field is set correctly
  1527  	var annotated *structs.Allocation
  1528  	for _, a := range r.attributeUpdates {
  1529  		annotated = a
  1530  	}
  1531  	require.Equal(evals[0].ID, annotated.FollowupEvalID)
  1532  }
  1533  
  1534  // Tests service allocations with client status complete
  1535  func TestReconciler_Service_ClientStatusComplete(t *testing.T) {
  1536  	// Set desired 5
  1537  	job := mock.Job()
  1538  	job.TaskGroups[0].Count = 5
  1539  
  1540  	// Set up reschedule policy
  1541  	delayDur := 15 * time.Second
  1542  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1543  		Attempts: 1,
  1544  		Interval: 24 * time.Hour,
  1545  		Delay:    delayDur,
  1546  		MaxDelay: 1 * time.Hour,
  1547  	}
  1548  
  1549  	// Create 5 existing allocations
  1550  	var allocs []*structs.Allocation
  1551  	for i := 0; i < 5; i++ {
  1552  		alloc := mock.Alloc()
  1553  		alloc.Job = job
  1554  		alloc.JobID = job.ID
  1555  		alloc.NodeID = uuid.Generate()
  1556  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1557  		allocs = append(allocs, alloc)
  1558  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1559  		alloc.DesiredStatus = structs.AllocDesiredStatusRun
  1560  	}
  1561  
  1562  	// Mark one as client status complete
  1563  	allocs[4].ClientStatus = structs.AllocClientStatusComplete
  1564  
  1565  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1566  	r := reconciler.Compute()
  1567  
  1568  	// Should place a new placement for the alloc that was marked complete
  1569  	assertResults(t, r, &resultExpectation{
  1570  		createDeployment:  nil,
  1571  		deploymentUpdates: nil,
  1572  		place:             1,
  1573  		inplace:           0,
  1574  		stop:              0,
  1575  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1576  			job.TaskGroups[0].Name: {
  1577  				Place:         1,
  1578  				InPlaceUpdate: 0,
  1579  				Ignore:        4,
  1580  			},
  1581  		},
  1582  	})
  1583  
  1584  	assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
  1585  
  1586  }
  1587  
  1588  // Tests service job placement with desired stop and client status complete
  1589  func TestReconciler_Service_DesiredStop_ClientStatusComplete(t *testing.T) {
  1590  	// Set desired 5
  1591  	job := mock.Job()
  1592  	job.TaskGroups[0].Count = 5
  1593  
  1594  	// Set up reschedule policy
  1595  	delayDur := 15 * time.Second
  1596  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1597  		Attempts: 1,
  1598  		Interval: 24 * time.Hour,
  1599  		Delay:    delayDur,
  1600  		MaxDelay: 1 * time.Hour,
  1601  	}
  1602  
  1603  	// Create 5 existing allocations
  1604  	var allocs []*structs.Allocation
  1605  	for i := 0; i < 5; i++ {
  1606  		alloc := mock.Alloc()
  1607  		alloc.Job = job
  1608  		alloc.JobID = job.ID
  1609  		alloc.NodeID = uuid.Generate()
  1610  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1611  		allocs = append(allocs, alloc)
  1612  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1613  		alloc.DesiredStatus = structs.AllocDesiredStatusRun
  1614  	}
  1615  
  1616  	// Mark one as failed but with desired status stop
  1617  	// Should not trigger rescheduling logic but should trigger a placement
  1618  	allocs[4].ClientStatus = structs.AllocClientStatusFailed
  1619  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  1620  
  1621  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1622  	r := reconciler.Compute()
  1623  
  1624  	// Should place a new placement for the alloc that was marked stopped
  1625  	assertResults(t, r, &resultExpectation{
  1626  		createDeployment:  nil,
  1627  		deploymentUpdates: nil,
  1628  		place:             1,
  1629  		inplace:           0,
  1630  		stop:              0,
  1631  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1632  			job.TaskGroups[0].Name: {
  1633  				Place:         1,
  1634  				InPlaceUpdate: 0,
  1635  				Ignore:        4,
  1636  			},
  1637  		},
  1638  	})
  1639  
  1640  	assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
  1641  
  1642  	// Should not have any follow up evals created
  1643  	require := require.New(t)
  1644  	require.Equal(0, len(r.desiredFollowupEvals))
  1645  }
  1646  
  1647  // Tests rescheduling failed service allocations with desired state stop
  1648  func TestReconciler_RescheduleNow_Service(t *testing.T) {
  1649  	require := require.New(t)
  1650  
  1651  	// Set desired 5
  1652  	job := mock.Job()
  1653  	job.TaskGroups[0].Count = 5
  1654  	tgName := job.TaskGroups[0].Name
  1655  	now := time.Now()
  1656  
  1657  	// Set up reschedule policy and update stanza
  1658  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1659  		Attempts:      1,
  1660  		Interval:      24 * time.Hour,
  1661  		Delay:         5 * time.Second,
  1662  		DelayFunction: "",
  1663  		MaxDelay:      1 * time.Hour,
  1664  		Unlimited:     false,
  1665  	}
  1666  	job.TaskGroups[0].Update = noCanaryUpdate
  1667  
  1668  	// Create 5 existing allocations
  1669  	var allocs []*structs.Allocation
  1670  	for i := 0; i < 5; i++ {
  1671  		alloc := mock.Alloc()
  1672  		alloc.Job = job
  1673  		alloc.JobID = job.ID
  1674  		alloc.NodeID = uuid.Generate()
  1675  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1676  		allocs = append(allocs, alloc)
  1677  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1678  	}
  1679  
  1680  	// Mark two as failed
  1681  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1682  
  1683  	// Mark one of them as already rescheduled once
  1684  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1685  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1686  			PrevAllocID: uuid.Generate(),
  1687  			PrevNodeID:  uuid.Generate(),
  1688  		},
  1689  	}}
  1690  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1691  		StartedAt:  now.Add(-1 * time.Hour),
  1692  		FinishedAt: now.Add(-10 * time.Second)}}
  1693  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1694  
  1695  	// Mark one as desired state stop
  1696  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  1697  
  1698  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1699  	r := reconciler.Compute()
  1700  
  1701  	// Verify that no follow up evals were created
  1702  	evals := r.desiredFollowupEvals[tgName]
  1703  	require.Nil(evals)
  1704  
  1705  	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
  1706  	assertResults(t, r, &resultExpectation{
  1707  		createDeployment:  nil,
  1708  		deploymentUpdates: nil,
  1709  		place:             2,
  1710  		inplace:           0,
  1711  		stop:              0,
  1712  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1713  			job.TaskGroups[0].Name: {
  1714  				Place:  2,
  1715  				Ignore: 3,
  1716  			},
  1717  		},
  1718  	})
  1719  
  1720  	// Rescheduled allocs should have previous allocs
  1721  	assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place))
  1722  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1723  	assertPlacementsAreRescheduled(t, 1, r.place)
  1724  }
  1725  
  1726  // Tests rescheduling failed service allocations when there's clock drift (upto a second)
  1727  func TestReconciler_RescheduleNow_WithinAllowedTimeWindow(t *testing.T) {
  1728  	require := require.New(t)
  1729  
  1730  	// Set desired 5
  1731  	job := mock.Job()
  1732  	job.TaskGroups[0].Count = 5
  1733  	tgName := job.TaskGroups[0].Name
  1734  	now := time.Now()
  1735  
  1736  	// Set up reschedule policy and update stanza
  1737  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1738  		Attempts:      1,
  1739  		Interval:      24 * time.Hour,
  1740  		Delay:         5 * time.Second,
  1741  		DelayFunction: "",
  1742  		MaxDelay:      1 * time.Hour,
  1743  		Unlimited:     false,
  1744  	}
  1745  	job.TaskGroups[0].Update = noCanaryUpdate
  1746  
  1747  	// Create 5 existing allocations
  1748  	var allocs []*structs.Allocation
  1749  	for i := 0; i < 5; i++ {
  1750  		alloc := mock.Alloc()
  1751  		alloc.Job = job
  1752  		alloc.JobID = job.ID
  1753  		alloc.NodeID = uuid.Generate()
  1754  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1755  		allocs = append(allocs, alloc)
  1756  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1757  	}
  1758  
  1759  	// Mark one as failed
  1760  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1761  
  1762  	// Mark one of them as already rescheduled once
  1763  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1764  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1765  			PrevAllocID: uuid.Generate(),
  1766  			PrevNodeID:  uuid.Generate(),
  1767  		},
  1768  	}}
  1769  	// Set fail time to 4 seconds ago which falls within the reschedule window
  1770  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1771  		StartedAt:  now.Add(-1 * time.Hour),
  1772  		FinishedAt: now.Add(-4 * time.Second)}}
  1773  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1774  
  1775  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1776  	reconciler.now = now
  1777  	r := reconciler.Compute()
  1778  
  1779  	// Verify that no follow up evals were created
  1780  	evals := r.desiredFollowupEvals[tgName]
  1781  	require.Nil(evals)
  1782  
  1783  	// Verify that one rescheduled alloc was placed
  1784  	assertResults(t, r, &resultExpectation{
  1785  		createDeployment:  nil,
  1786  		deploymentUpdates: nil,
  1787  		place:             1,
  1788  		inplace:           0,
  1789  		stop:              0,
  1790  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1791  			job.TaskGroups[0].Name: {
  1792  				Place:  1,
  1793  				Ignore: 4,
  1794  			},
  1795  		},
  1796  	})
  1797  
  1798  	// Rescheduled allocs should have previous allocs
  1799  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  1800  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1801  	assertPlacementsAreRescheduled(t, 1, r.place)
  1802  }
  1803  
  1804  // Tests rescheduling failed service allocations when the eval ID matches and there's a large clock drift
  1805  func TestReconciler_RescheduleNow_EvalIDMatch(t *testing.T) {
  1806  	require := require.New(t)
  1807  
  1808  	// Set desired 5
  1809  	job := mock.Job()
  1810  	job.TaskGroups[0].Count = 5
  1811  	tgName := job.TaskGroups[0].Name
  1812  	now := time.Now()
  1813  
  1814  	// Set up reschedule policy and update stanza
  1815  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1816  		Attempts:      1,
  1817  		Interval:      24 * time.Hour,
  1818  		Delay:         5 * time.Second,
  1819  		DelayFunction: "",
  1820  		MaxDelay:      1 * time.Hour,
  1821  		Unlimited:     false,
  1822  	}
  1823  	job.TaskGroups[0].Update = noCanaryUpdate
  1824  
  1825  	// Create 5 existing allocations
  1826  	var allocs []*structs.Allocation
  1827  	for i := 0; i < 5; i++ {
  1828  		alloc := mock.Alloc()
  1829  		alloc.Job = job
  1830  		alloc.JobID = job.ID
  1831  		alloc.NodeID = uuid.Generate()
  1832  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1833  		allocs = append(allocs, alloc)
  1834  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1835  	}
  1836  
  1837  	// Mark one as failed
  1838  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1839  
  1840  	// Mark one of them as already rescheduled once
  1841  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1842  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1843  			PrevAllocID: uuid.Generate(),
  1844  			PrevNodeID:  uuid.Generate(),
  1845  		},
  1846  	}}
  1847  	// Set fail time to 5 seconds ago and eval ID
  1848  	evalID := uuid.Generate()
  1849  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1850  		StartedAt:  now.Add(-1 * time.Hour),
  1851  		FinishedAt: now.Add(-5 * time.Second)}}
  1852  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1853  	allocs[1].FollowupEvalID = evalID
  1854  
  1855  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, evalID)
  1856  	reconciler.now = now.Add(-30 * time.Second)
  1857  	r := reconciler.Compute()
  1858  
  1859  	// Verify that no follow up evals were created
  1860  	evals := r.desiredFollowupEvals[tgName]
  1861  	require.Nil(evals)
  1862  
  1863  	// Verify that one rescheduled alloc was placed
  1864  	assertResults(t, r, &resultExpectation{
  1865  		createDeployment:  nil,
  1866  		deploymentUpdates: nil,
  1867  		place:             1,
  1868  		inplace:           0,
  1869  		stop:              0,
  1870  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1871  			job.TaskGroups[0].Name: {
  1872  				Place:  1,
  1873  				Ignore: 4,
  1874  			},
  1875  		},
  1876  	})
  1877  
  1878  	// Rescheduled allocs should have previous allocs
  1879  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  1880  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1881  	assertPlacementsAreRescheduled(t, 1, r.place)
  1882  }
  1883  
  1884  // Tests failed service allocations that were already rescheduled won't be rescheduled again
  1885  func TestReconciler_DontReschedule_PreviouslyRescheduled(t *testing.T) {
  1886  	// Set desired 5
  1887  	job := mock.Job()
  1888  	job.TaskGroups[0].Count = 5
  1889  
  1890  	// Set up reschedule policy
  1891  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 5, Interval: 24 * time.Hour}
  1892  
  1893  	// Create 7 existing allocations
  1894  	var allocs []*structs.Allocation
  1895  	for i := 0; i < 7; i++ {
  1896  		alloc := mock.Alloc()
  1897  		alloc.Job = job
  1898  		alloc.JobID = job.ID
  1899  		alloc.NodeID = uuid.Generate()
  1900  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1901  		allocs = append(allocs, alloc)
  1902  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1903  	}
  1904  	// Mark two as failed and rescheduled
  1905  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1906  	allocs[0].ID = allocs[1].ID
  1907  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1908  	allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1909  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1910  			PrevAllocID: uuid.Generate(),
  1911  			PrevNodeID:  uuid.Generate(),
  1912  		},
  1913  	}}
  1914  	allocs[1].NextAllocation = allocs[2].ID
  1915  
  1916  	// Mark one as desired state stop
  1917  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  1918  
  1919  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1920  	r := reconciler.Compute()
  1921  
  1922  	// Should place 1 - one is a new placement to make up the desired count of 5
  1923  	// failing allocs are not rescheduled
  1924  	assertResults(t, r, &resultExpectation{
  1925  		createDeployment:  nil,
  1926  		deploymentUpdates: nil,
  1927  		place:             1,
  1928  		inplace:           0,
  1929  		stop:              0,
  1930  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1931  			job.TaskGroups[0].Name: {
  1932  				Place:  1,
  1933  				Ignore: 4,
  1934  			},
  1935  		},
  1936  	})
  1937  
  1938  	// name index 0 is used for the replacement because its
  1939  	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
  1940  }
  1941  
  1942  // Tests the reconciler cancels an old deployment when the job is being stopped
  1943  func TestReconciler_CancelDeployment_JobStop(t *testing.T) {
  1944  	job := mock.Job()
  1945  	job.Stop = true
  1946  
  1947  	running := structs.NewDeployment(job)
  1948  	failed := structs.NewDeployment(job)
  1949  	failed.Status = structs.DeploymentStatusFailed
  1950  
  1951  	cases := []struct {
  1952  		name             string
  1953  		job              *structs.Job
  1954  		jobID, taskGroup string
  1955  		deployment       *structs.Deployment
  1956  		cancel           bool
  1957  	}{
  1958  		{
  1959  			name:       "stopped job, running deployment",
  1960  			job:        job,
  1961  			jobID:      job.ID,
  1962  			taskGroup:  job.TaskGroups[0].Name,
  1963  			deployment: running,
  1964  			cancel:     true,
  1965  		},
  1966  		{
  1967  			name:       "nil job, running deployment",
  1968  			job:        nil,
  1969  			jobID:      "foo",
  1970  			taskGroup:  "bar",
  1971  			deployment: running,
  1972  			cancel:     true,
  1973  		},
  1974  		{
  1975  			name:       "stopped job, failed deployment",
  1976  			job:        job,
  1977  			jobID:      job.ID,
  1978  			taskGroup:  job.TaskGroups[0].Name,
  1979  			deployment: failed,
  1980  			cancel:     false,
  1981  		},
  1982  		{
  1983  			name:       "nil job, failed deployment",
  1984  			job:        nil,
  1985  			jobID:      "foo",
  1986  			taskGroup:  "bar",
  1987  			deployment: failed,
  1988  			cancel:     false,
  1989  		},
  1990  	}
  1991  
  1992  	for _, c := range cases {
  1993  		t.Run(c.name, func(t *testing.T) {
  1994  			// Create 10 allocations
  1995  			var allocs []*structs.Allocation
  1996  			for i := 0; i < 10; i++ {
  1997  				alloc := mock.Alloc()
  1998  				alloc.Job = c.job
  1999  				alloc.JobID = c.jobID
  2000  				alloc.NodeID = uuid.Generate()
  2001  				alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i))
  2002  				alloc.TaskGroup = c.taskGroup
  2003  				allocs = append(allocs, alloc)
  2004  			}
  2005  
  2006  			reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, c.jobID, c.job, c.deployment, allocs, nil, "")
  2007  			r := reconciler.Compute()
  2008  
  2009  			var updates []*structs.DeploymentStatusUpdate
  2010  			if c.cancel {
  2011  				updates = []*structs.DeploymentStatusUpdate{
  2012  					{
  2013  						DeploymentID:      c.deployment.ID,
  2014  						Status:            structs.DeploymentStatusCancelled,
  2015  						StatusDescription: structs.DeploymentStatusDescriptionStoppedJob,
  2016  					},
  2017  				}
  2018  			}
  2019  
  2020  			// Assert the correct results
  2021  			assertResults(t, r, &resultExpectation{
  2022  				createDeployment:  nil,
  2023  				deploymentUpdates: updates,
  2024  				place:             0,
  2025  				inplace:           0,
  2026  				stop:              10,
  2027  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2028  					c.taskGroup: {
  2029  						Stop: 10,
  2030  					},
  2031  				},
  2032  			})
  2033  
  2034  			assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop))
  2035  		})
  2036  	}
  2037  }
  2038  
  2039  // Tests the reconciler cancels an old deployment when the job is updated
  2040  func TestReconciler_CancelDeployment_JobUpdate(t *testing.T) {
  2041  	// Create a base job
  2042  	job := mock.Job()
  2043  
  2044  	// Create two deployments
  2045  	running := structs.NewDeployment(job)
  2046  	failed := structs.NewDeployment(job)
  2047  	failed.Status = structs.DeploymentStatusFailed
  2048  
  2049  	// Make the job newer than the deployment
  2050  	job.Version += 10
  2051  
  2052  	cases := []struct {
  2053  		name       string
  2054  		deployment *structs.Deployment
  2055  		cancel     bool
  2056  	}{
  2057  		{
  2058  			name:       "running deployment",
  2059  			deployment: running,
  2060  			cancel:     true,
  2061  		},
  2062  		{
  2063  			name:       "failed deployment",
  2064  			deployment: failed,
  2065  			cancel:     false,
  2066  		},
  2067  	}
  2068  
  2069  	for _, c := range cases {
  2070  		t.Run(c.name, func(t *testing.T) {
  2071  			// Create 10 allocations
  2072  			var allocs []*structs.Allocation
  2073  			for i := 0; i < 10; i++ {
  2074  				alloc := mock.Alloc()
  2075  				alloc.Job = job
  2076  				alloc.JobID = job.ID
  2077  				alloc.NodeID = uuid.Generate()
  2078  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2079  				alloc.TaskGroup = job.TaskGroups[0].Name
  2080  				allocs = append(allocs, alloc)
  2081  			}
  2082  
  2083  			reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, c.deployment, allocs, nil, "")
  2084  			r := reconciler.Compute()
  2085  
  2086  			var updates []*structs.DeploymentStatusUpdate
  2087  			if c.cancel {
  2088  				updates = []*structs.DeploymentStatusUpdate{
  2089  					{
  2090  						DeploymentID:      c.deployment.ID,
  2091  						Status:            structs.DeploymentStatusCancelled,
  2092  						StatusDescription: structs.DeploymentStatusDescriptionNewerJob,
  2093  					},
  2094  				}
  2095  			}
  2096  
  2097  			// Assert the correct results
  2098  			assertResults(t, r, &resultExpectation{
  2099  				createDeployment:  nil,
  2100  				deploymentUpdates: updates,
  2101  				place:             0,
  2102  				inplace:           0,
  2103  				stop:              0,
  2104  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2105  					job.TaskGroups[0].Name: {
  2106  						Ignore: 10,
  2107  					},
  2108  				},
  2109  			})
  2110  		})
  2111  	}
  2112  }
  2113  
  2114  // Tests the reconciler creates a deployment and does a rolling upgrade with
  2115  // destructive changes
  2116  func TestReconciler_CreateDeployment_RollingUpgrade_Destructive(t *testing.T) {
  2117  	job := mock.Job()
  2118  	job.TaskGroups[0].Update = noCanaryUpdate
  2119  
  2120  	// Create 10 allocations from the old job
  2121  	var allocs []*structs.Allocation
  2122  	for i := 0; i < 10; i++ {
  2123  		alloc := mock.Alloc()
  2124  		alloc.Job = job
  2125  		alloc.JobID = job.ID
  2126  		alloc.NodeID = uuid.Generate()
  2127  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2128  		alloc.TaskGroup = job.TaskGroups[0].Name
  2129  		allocs = append(allocs, alloc)
  2130  	}
  2131  
  2132  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  2133  	r := reconciler.Compute()
  2134  
  2135  	d := structs.NewDeployment(job)
  2136  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2137  		DesiredTotal: 10,
  2138  	}
  2139  
  2140  	// Assert the correct results
  2141  	assertResults(t, r, &resultExpectation{
  2142  		createDeployment:  d,
  2143  		deploymentUpdates: nil,
  2144  		destructive:       4,
  2145  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2146  			job.TaskGroups[0].Name: {
  2147  				DestructiveUpdate: 4,
  2148  				Ignore:            6,
  2149  			},
  2150  		},
  2151  	})
  2152  
  2153  	assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate))
  2154  }
  2155  
  2156  // Tests the reconciler creates a deployment for inplace updates
  2157  func TestReconciler_CreateDeployment_RollingUpgrade_Inplace(t *testing.T) {
  2158  	jobOld := mock.Job()
  2159  	job := jobOld.Copy()
  2160  	job.Version++
  2161  	job.TaskGroups[0].Update = noCanaryUpdate
  2162  
  2163  	// Create 10 allocations from the old job
  2164  	var allocs []*structs.Allocation
  2165  	for i := 0; i < 10; i++ {
  2166  		alloc := mock.Alloc()
  2167  		alloc.Job = jobOld
  2168  		alloc.JobID = job.ID
  2169  		alloc.NodeID = uuid.Generate()
  2170  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2171  		alloc.TaskGroup = job.TaskGroups[0].Name
  2172  		allocs = append(allocs, alloc)
  2173  	}
  2174  
  2175  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "")
  2176  	r := reconciler.Compute()
  2177  
  2178  	d := structs.NewDeployment(job)
  2179  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2180  		DesiredTotal: 10,
  2181  	}
  2182  
  2183  	// Assert the correct results
  2184  	assertResults(t, r, &resultExpectation{
  2185  		createDeployment:  d,
  2186  		deploymentUpdates: nil,
  2187  		place:             0,
  2188  		inplace:           10,
  2189  		stop:              0,
  2190  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2191  			job.TaskGroups[0].Name: {
  2192  				InPlaceUpdate: 10,
  2193  			},
  2194  		},
  2195  	})
  2196  }
  2197  
  2198  // Tests the reconciler doesn't creates a deployment if there are no changes
  2199  func TestReconciler_DontCreateDeployment_NoChanges(t *testing.T) {
  2200  	job := mock.Job()
  2201  	job.TaskGroups[0].Update = noCanaryUpdate
  2202  
  2203  	// Create 10 allocations from the job
  2204  	var allocs []*structs.Allocation
  2205  	for i := 0; i < 10; i++ {
  2206  		alloc := mock.Alloc()
  2207  		alloc.Job = job
  2208  		alloc.JobID = job.ID
  2209  		alloc.NodeID = uuid.Generate()
  2210  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2211  		alloc.TaskGroup = job.TaskGroups[0].Name
  2212  		allocs = append(allocs, alloc)
  2213  	}
  2214  
  2215  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  2216  	r := reconciler.Compute()
  2217  
  2218  	// Assert the correct results
  2219  	assertResults(t, r, &resultExpectation{
  2220  		createDeployment:  nil,
  2221  		deploymentUpdates: nil,
  2222  		place:             0,
  2223  		inplace:           0,
  2224  		stop:              0,
  2225  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2226  			job.TaskGroups[0].Name: {
  2227  				DestructiveUpdate: 0,
  2228  				Ignore:            10,
  2229  			},
  2230  		},
  2231  	})
  2232  }
  2233  
  2234  // Tests the reconciler doesn't place any more canaries when the deployment is
  2235  // paused or failed
  2236  func TestReconciler_PausedOrFailedDeployment_NoMoreCanaries(t *testing.T) {
  2237  	job := mock.Job()
  2238  	job.TaskGroups[0].Update = canaryUpdate
  2239  
  2240  	cases := []struct {
  2241  		name             string
  2242  		deploymentStatus string
  2243  		stop             uint64
  2244  	}{
  2245  		{
  2246  			name:             "paused deployment",
  2247  			deploymentStatus: structs.DeploymentStatusPaused,
  2248  			stop:             0,
  2249  		},
  2250  		{
  2251  			name:             "failed deployment",
  2252  			deploymentStatus: structs.DeploymentStatusFailed,
  2253  			stop:             1,
  2254  		},
  2255  	}
  2256  
  2257  	for _, c := range cases {
  2258  		t.Run(c.name, func(t *testing.T) {
  2259  			// Create a deployment that is paused/failed and has placed some canaries
  2260  			d := structs.NewDeployment(job)
  2261  			d.Status = c.deploymentStatus
  2262  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2263  				Promoted:        false,
  2264  				DesiredCanaries: 2,
  2265  				DesiredTotal:    10,
  2266  				PlacedAllocs:    1,
  2267  			}
  2268  
  2269  			// Create 10 allocations for the original job
  2270  			var allocs []*structs.Allocation
  2271  			for i := 0; i < 10; i++ {
  2272  				alloc := mock.Alloc()
  2273  				alloc.Job = job
  2274  				alloc.JobID = job.ID
  2275  				alloc.NodeID = uuid.Generate()
  2276  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2277  				alloc.TaskGroup = job.TaskGroups[0].Name
  2278  				allocs = append(allocs, alloc)
  2279  			}
  2280  
  2281  			// Create one canary
  2282  			canary := mock.Alloc()
  2283  			canary.Job = job
  2284  			canary.JobID = job.ID
  2285  			canary.NodeID = uuid.Generate()
  2286  			canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0)
  2287  			canary.TaskGroup = job.TaskGroups[0].Name
  2288  			canary.DeploymentID = d.ID
  2289  			allocs = append(allocs, canary)
  2290  			d.TaskGroups[canary.TaskGroup].PlacedCanaries = []string{canary.ID}
  2291  
  2292  			mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{canary.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive)
  2293  			reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  2294  			r := reconciler.Compute()
  2295  
  2296  			// Assert the correct results
  2297  			assertResults(t, r, &resultExpectation{
  2298  				createDeployment:  nil,
  2299  				deploymentUpdates: nil,
  2300  				place:             0,
  2301  				inplace:           0,
  2302  				stop:              int(c.stop),
  2303  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2304  					job.TaskGroups[0].Name: {
  2305  						Ignore: 11 - c.stop,
  2306  						Stop:   c.stop,
  2307  					},
  2308  				},
  2309  			})
  2310  		})
  2311  	}
  2312  }
  2313  
  2314  // Tests the reconciler doesn't place any more allocs when the deployment is
  2315  // paused or failed
  2316  func TestReconciler_PausedOrFailedDeployment_NoMorePlacements(t *testing.T) {
  2317  	job := mock.Job()
  2318  	job.TaskGroups[0].Update = noCanaryUpdate
  2319  	job.TaskGroups[0].Count = 15
  2320  
  2321  	cases := []struct {
  2322  		name             string
  2323  		deploymentStatus string
  2324  	}{
  2325  		{
  2326  			name:             "paused deployment",
  2327  			deploymentStatus: structs.DeploymentStatusPaused,
  2328  		},
  2329  		{
  2330  			name:             "failed deployment",
  2331  			deploymentStatus: structs.DeploymentStatusFailed,
  2332  		},
  2333  	}
  2334  
  2335  	for _, c := range cases {
  2336  		t.Run(c.name, func(t *testing.T) {
  2337  			// Create a deployment that is paused and has placed some canaries
  2338  			d := structs.NewDeployment(job)
  2339  			d.Status = c.deploymentStatus
  2340  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2341  				Promoted:     false,
  2342  				DesiredTotal: 15,
  2343  				PlacedAllocs: 10,
  2344  			}
  2345  
  2346  			// Create 10 allocations for the new job
  2347  			var allocs []*structs.Allocation
  2348  			for i := 0; i < 10; i++ {
  2349  				alloc := mock.Alloc()
  2350  				alloc.Job = job
  2351  				alloc.JobID = job.ID
  2352  				alloc.NodeID = uuid.Generate()
  2353  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2354  				alloc.TaskGroup = job.TaskGroups[0].Name
  2355  				allocs = append(allocs, alloc)
  2356  			}
  2357  
  2358  			reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  2359  			r := reconciler.Compute()
  2360  
  2361  			// Assert the correct results
  2362  			assertResults(t, r, &resultExpectation{
  2363  				createDeployment:  nil,
  2364  				deploymentUpdates: nil,
  2365  				place:             0,
  2366  				inplace:           0,
  2367  				stop:              0,
  2368  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2369  					job.TaskGroups[0].Name: {
  2370  						Ignore: 10,
  2371  					},
  2372  				},
  2373  			})
  2374  		})
  2375  	}
  2376  }
  2377  
  2378  // Tests the reconciler doesn't do any more destructive updates when the
  2379  // deployment is paused or failed
  2380  func TestReconciler_PausedOrFailedDeployment_NoMoreDestructiveUpdates(t *testing.T) {
  2381  	job := mock.Job()
  2382  	job.TaskGroups[0].Update = noCanaryUpdate
  2383  
  2384  	cases := []struct {
  2385  		name             string
  2386  		deploymentStatus string
  2387  	}{
  2388  		{
  2389  			name:             "paused deployment",
  2390  			deploymentStatus: structs.DeploymentStatusPaused,
  2391  		},
  2392  		{
  2393  			name:             "failed deployment",
  2394  			deploymentStatus: structs.DeploymentStatusFailed,
  2395  		},
  2396  	}
  2397  
  2398  	for _, c := range cases {
  2399  		t.Run(c.name, func(t *testing.T) {
  2400  			// Create a deployment that is paused and has placed some canaries
  2401  			d := structs.NewDeployment(job)
  2402  			d.Status = c.deploymentStatus
  2403  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2404  				Promoted:     false,
  2405  				DesiredTotal: 10,
  2406  				PlacedAllocs: 1,
  2407  			}
  2408  
  2409  			// Create 9 allocations for the original job
  2410  			var allocs []*structs.Allocation
  2411  			for i := 1; i < 10; i++ {
  2412  				alloc := mock.Alloc()
  2413  				alloc.Job = job
  2414  				alloc.JobID = job.ID
  2415  				alloc.NodeID = uuid.Generate()
  2416  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2417  				alloc.TaskGroup = job.TaskGroups[0].Name
  2418  				allocs = append(allocs, alloc)
  2419  			}
  2420  
  2421  			// Create one for the new job
  2422  			newAlloc := mock.Alloc()
  2423  			newAlloc.Job = job
  2424  			newAlloc.JobID = job.ID
  2425  			newAlloc.NodeID = uuid.Generate()
  2426  			newAlloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0)
  2427  			newAlloc.TaskGroup = job.TaskGroups[0].Name
  2428  			newAlloc.DeploymentID = d.ID
  2429  			allocs = append(allocs, newAlloc)
  2430  
  2431  			mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{newAlloc.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive)
  2432  			reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  2433  			r := reconciler.Compute()
  2434  
  2435  			// Assert the correct results
  2436  			assertResults(t, r, &resultExpectation{
  2437  				createDeployment:  nil,
  2438  				deploymentUpdates: nil,
  2439  				place:             0,
  2440  				inplace:           0,
  2441  				stop:              0,
  2442  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2443  					job.TaskGroups[0].Name: {
  2444  						Ignore: 10,
  2445  					},
  2446  				},
  2447  			})
  2448  		})
  2449  	}
  2450  }
  2451  
  2452  // Tests the reconciler handles migrations correctly when a deployment is paused
  2453  // or failed
  2454  func TestReconciler_PausedOrFailedDeployment_Migrations(t *testing.T) {
  2455  	job := mock.Job()
  2456  	job.TaskGroups[0].Update = noCanaryUpdate
  2457  
  2458  	cases := []struct {
  2459  		name              string
  2460  		deploymentStatus  string
  2461  		place             int
  2462  		stop              int
  2463  		ignoreAnnotation  uint64
  2464  		migrateAnnotation uint64
  2465  		stopAnnotation    uint64
  2466  	}{
  2467  		{
  2468  			name:             "paused deployment",
  2469  			deploymentStatus: structs.DeploymentStatusPaused,
  2470  			place:            0,
  2471  			stop:             3,
  2472  			ignoreAnnotation: 5,
  2473  			stopAnnotation:   3,
  2474  		},
  2475  		{
  2476  			name:              "failed deployment",
  2477  			deploymentStatus:  structs.DeploymentStatusFailed,
  2478  			place:             0,
  2479  			stop:              3,
  2480  			ignoreAnnotation:  5,
  2481  			migrateAnnotation: 0,
  2482  			stopAnnotation:    3,
  2483  		},
  2484  	}
  2485  
  2486  	for _, c := range cases {
  2487  		t.Run(c.name, func(t *testing.T) {
  2488  			// Create a deployment that is paused and has placed some canaries
  2489  			d := structs.NewDeployment(job)
  2490  			d.Status = c.deploymentStatus
  2491  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2492  				Promoted:     false,
  2493  				DesiredTotal: 10,
  2494  				PlacedAllocs: 8,
  2495  			}
  2496  
  2497  			// Create 8 allocations in the deployment
  2498  			var allocs []*structs.Allocation
  2499  			for i := 0; i < 8; i++ {
  2500  				alloc := mock.Alloc()
  2501  				alloc.Job = job
  2502  				alloc.JobID = job.ID
  2503  				alloc.NodeID = uuid.Generate()
  2504  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2505  				alloc.TaskGroup = job.TaskGroups[0].Name
  2506  				alloc.DeploymentID = d.ID
  2507  				allocs = append(allocs, alloc)
  2508  			}
  2509  
  2510  			// Build a map of tainted nodes
  2511  			tainted := make(map[string]*structs.Node, 3)
  2512  			for i := 0; i < 3; i++ {
  2513  				n := mock.Node()
  2514  				n.ID = allocs[i].NodeID
  2515  				allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true)
  2516  				n.Drain = true
  2517  				tainted[n.ID] = n
  2518  			}
  2519  
  2520  			reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, tainted, "")
  2521  			r := reconciler.Compute()
  2522  
  2523  			// Assert the correct results
  2524  			assertResults(t, r, &resultExpectation{
  2525  				createDeployment:  nil,
  2526  				deploymentUpdates: nil,
  2527  				place:             c.place,
  2528  				inplace:           0,
  2529  				stop:              c.stop,
  2530  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2531  					job.TaskGroups[0].Name: {
  2532  						Migrate: c.migrateAnnotation,
  2533  						Ignore:  c.ignoreAnnotation,
  2534  						Stop:    c.stopAnnotation,
  2535  					},
  2536  				},
  2537  			})
  2538  		})
  2539  	}
  2540  }
  2541  
  2542  // Tests the reconciler handles migrating a canary correctly on a draining node
  2543  func TestReconciler_DrainNode_Canary(t *testing.T) {
  2544  	job := mock.Job()
  2545  	job.TaskGroups[0].Update = canaryUpdate
  2546  
  2547  	// Create a deployment that is paused and has placed some canaries
  2548  	d := structs.NewDeployment(job)
  2549  	s := &structs.DeploymentState{
  2550  		Promoted:        false,
  2551  		DesiredTotal:    10,
  2552  		DesiredCanaries: 2,
  2553  		PlacedAllocs:    2,
  2554  	}
  2555  	d.TaskGroups[job.TaskGroups[0].Name] = s
  2556  
  2557  	// Create 10 allocations from the old job
  2558  	var allocs []*structs.Allocation
  2559  	for i := 0; i < 10; i++ {
  2560  		alloc := mock.Alloc()
  2561  		alloc.Job = job
  2562  		alloc.JobID = job.ID
  2563  		alloc.NodeID = uuid.Generate()
  2564  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2565  		alloc.TaskGroup = job.TaskGroups[0].Name
  2566  		allocs = append(allocs, alloc)
  2567  	}
  2568  
  2569  	// Create two canaries for the new job
  2570  	handled := make(map[string]allocUpdateType)
  2571  	for i := 0; i < 2; i++ {
  2572  		// Create one canary
  2573  		canary := mock.Alloc()
  2574  		canary.Job = job
  2575  		canary.JobID = job.ID
  2576  		canary.NodeID = uuid.Generate()
  2577  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2578  		canary.TaskGroup = job.TaskGroups[0].Name
  2579  		canary.DeploymentID = d.ID
  2580  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  2581  		allocs = append(allocs, canary)
  2582  		handled[canary.ID] = allocUpdateFnIgnore
  2583  	}
  2584  
  2585  	// Build a map of tainted nodes that contains the last canary
  2586  	tainted := make(map[string]*structs.Node, 1)
  2587  	n := mock.Node()
  2588  	n.ID = allocs[11].NodeID
  2589  	allocs[11].DesiredTransition.Migrate = helper.BoolToPtr(true)
  2590  	n.Drain = true
  2591  	tainted[n.ID] = n
  2592  
  2593  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  2594  	reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "")
  2595  	r := reconciler.Compute()
  2596  
  2597  	// Assert the correct results
  2598  	assertResults(t, r, &resultExpectation{
  2599  		createDeployment:  nil,
  2600  		deploymentUpdates: nil,
  2601  		place:             1,
  2602  		inplace:           0,
  2603  		stop:              1,
  2604  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2605  			job.TaskGroups[0].Name: {
  2606  				Canary: 1,
  2607  				Ignore: 11,
  2608  			},
  2609  		},
  2610  	})
  2611  	assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop))
  2612  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  2613  }
  2614  
  2615  // Tests the reconciler handles migrating a canary correctly on a lost node
  2616  func TestReconciler_LostNode_Canary(t *testing.T) {
  2617  	job := mock.Job()
  2618  	job.TaskGroups[0].Update = canaryUpdate
  2619  
  2620  	// Create a deployment that is paused and has placed some canaries
  2621  	d := structs.NewDeployment(job)
  2622  	s := &structs.DeploymentState{
  2623  		Promoted:        false,
  2624  		DesiredTotal:    10,
  2625  		DesiredCanaries: 2,
  2626  		PlacedAllocs:    2,
  2627  	}
  2628  	d.TaskGroups[job.TaskGroups[0].Name] = s
  2629  
  2630  	// Create 10 allocations from the old job
  2631  	var allocs []*structs.Allocation
  2632  	for i := 0; i < 10; i++ {
  2633  		alloc := mock.Alloc()
  2634  		alloc.Job = job
  2635  		alloc.JobID = job.ID
  2636  		alloc.NodeID = uuid.Generate()
  2637  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2638  		alloc.TaskGroup = job.TaskGroups[0].Name
  2639  		allocs = append(allocs, alloc)
  2640  	}
  2641  
  2642  	// Create two canaries for the new job
  2643  	handled := make(map[string]allocUpdateType)
  2644  	for i := 0; i < 2; i++ {
  2645  		// Create one canary
  2646  		canary := mock.Alloc()
  2647  		canary.Job = job
  2648  		canary.JobID = job.ID
  2649  		canary.NodeID = uuid.Generate()
  2650  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2651  		canary.TaskGroup = job.TaskGroups[0].Name
  2652  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  2653  		canary.DeploymentID = d.ID
  2654  		allocs = append(allocs, canary)
  2655  		handled[canary.ID] = allocUpdateFnIgnore
  2656  	}
  2657  
  2658  	// Build a map of tainted nodes that contains the last canary
  2659  	tainted := make(map[string]*structs.Node, 1)
  2660  	n := mock.Node()
  2661  	n.ID = allocs[11].NodeID
  2662  	n.Status = structs.NodeStatusDown
  2663  	tainted[n.ID] = n
  2664  
  2665  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  2666  	reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "")
  2667  	r := reconciler.Compute()
  2668  
  2669  	// Assert the correct results
  2670  	assertResults(t, r, &resultExpectation{
  2671  		createDeployment:  nil,
  2672  		deploymentUpdates: nil,
  2673  		place:             1,
  2674  		inplace:           0,
  2675  		stop:              1,
  2676  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2677  			job.TaskGroups[0].Name: {
  2678  				Canary: 1,
  2679  				Ignore: 11,
  2680  			},
  2681  		},
  2682  	})
  2683  
  2684  	assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop))
  2685  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  2686  }
  2687  
  2688  // Tests the reconciler handles stopping canaries from older deployments
  2689  func TestReconciler_StopOldCanaries(t *testing.T) {
  2690  	job := mock.Job()
  2691  	job.TaskGroups[0].Update = canaryUpdate
  2692  
  2693  	// Create an old deployment that has placed some canaries
  2694  	d := structs.NewDeployment(job)
  2695  	s := &structs.DeploymentState{
  2696  		Promoted:        false,
  2697  		DesiredTotal:    10,
  2698  		DesiredCanaries: 2,
  2699  		PlacedAllocs:    2,
  2700  	}
  2701  	d.TaskGroups[job.TaskGroups[0].Name] = s
  2702  
  2703  	// Update the job
  2704  	job.Version += 10
  2705  
  2706  	// Create 10 allocations from the old job
  2707  	var allocs []*structs.Allocation
  2708  	for i := 0; i < 10; i++ {
  2709  		alloc := mock.Alloc()
  2710  		alloc.Job = job
  2711  		alloc.JobID = job.ID
  2712  		alloc.NodeID = uuid.Generate()
  2713  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2714  		alloc.TaskGroup = job.TaskGroups[0].Name
  2715  		allocs = append(allocs, alloc)
  2716  	}
  2717  
  2718  	// Create canaries
  2719  	for i := 0; i < 2; i++ {
  2720  		// Create one canary
  2721  		canary := mock.Alloc()
  2722  		canary.Job = job
  2723  		canary.JobID = job.ID
  2724  		canary.NodeID = uuid.Generate()
  2725  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2726  		canary.TaskGroup = job.TaskGroups[0].Name
  2727  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  2728  		canary.DeploymentID = d.ID
  2729  		allocs = append(allocs, canary)
  2730  	}
  2731  
  2732  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  2733  	r := reconciler.Compute()
  2734  
  2735  	newD := structs.NewDeployment(job)
  2736  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  2737  	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2738  		DesiredCanaries: 2,
  2739  		DesiredTotal:    10,
  2740  	}
  2741  
  2742  	// Assert the correct results
  2743  	assertResults(t, r, &resultExpectation{
  2744  		createDeployment: newD,
  2745  		deploymentUpdates: []*structs.DeploymentStatusUpdate{
  2746  			{
  2747  				DeploymentID:      d.ID,
  2748  				Status:            structs.DeploymentStatusCancelled,
  2749  				StatusDescription: structs.DeploymentStatusDescriptionNewerJob,
  2750  			},
  2751  		},
  2752  		place:   2,
  2753  		inplace: 0,
  2754  		stop:    2,
  2755  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2756  			job.TaskGroups[0].Name: {
  2757  				Canary: 2,
  2758  				Stop:   2,
  2759  				Ignore: 10,
  2760  			},
  2761  		},
  2762  	})
  2763  
  2764  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  2765  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  2766  }
  2767  
  2768  // Tests the reconciler creates new canaries when the job changes
  2769  func TestReconciler_NewCanaries(t *testing.T) {
  2770  	job := mock.Job()
  2771  	job.TaskGroups[0].Update = canaryUpdate
  2772  
  2773  	// Create 10 allocations from the old job
  2774  	var allocs []*structs.Allocation
  2775  	for i := 0; i < 10; i++ {
  2776  		alloc := mock.Alloc()
  2777  		alloc.Job = job
  2778  		alloc.JobID = job.ID
  2779  		alloc.NodeID = uuid.Generate()
  2780  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2781  		alloc.TaskGroup = job.TaskGroups[0].Name
  2782  		allocs = append(allocs, alloc)
  2783  	}
  2784  
  2785  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  2786  	r := reconciler.Compute()
  2787  
  2788  	newD := structs.NewDeployment(job)
  2789  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  2790  	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2791  		DesiredCanaries: 2,
  2792  		DesiredTotal:    10,
  2793  	}
  2794  
  2795  	// Assert the correct results
  2796  	assertResults(t, r, &resultExpectation{
  2797  		createDeployment:  newD,
  2798  		deploymentUpdates: nil,
  2799  		place:             2,
  2800  		inplace:           0,
  2801  		stop:              0,
  2802  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2803  			job.TaskGroups[0].Name: {
  2804  				Canary: 2,
  2805  				Ignore: 10,
  2806  			},
  2807  		},
  2808  	})
  2809  
  2810  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  2811  }
  2812  
  2813  // Tests the reconciler creates new canaries when the job changes for multiple
  2814  // task groups
  2815  func TestReconciler_NewCanaries_MultiTG(t *testing.T) {
  2816  	job := mock.Job()
  2817  	job.TaskGroups[0].Update = canaryUpdate
  2818  	job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy())
  2819  	job.TaskGroups[0].Name = "tg2"
  2820  
  2821  	// Create 10 allocations from the old job for each tg
  2822  	var allocs []*structs.Allocation
  2823  	for j := 0; j < 2; j++ {
  2824  		for i := 0; i < 10; i++ {
  2825  			alloc := mock.Alloc()
  2826  			alloc.Job = job
  2827  			alloc.JobID = job.ID
  2828  			alloc.NodeID = uuid.Generate()
  2829  			alloc.Name = structs.AllocName(job.ID, job.TaskGroups[j].Name, uint(i))
  2830  			alloc.TaskGroup = job.TaskGroups[j].Name
  2831  			allocs = append(allocs, alloc)
  2832  		}
  2833  	}
  2834  
  2835  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  2836  	r := reconciler.Compute()
  2837  
  2838  	newD := structs.NewDeployment(job)
  2839  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  2840  	state := &structs.DeploymentState{
  2841  		DesiredCanaries: 2,
  2842  		DesiredTotal:    10,
  2843  	}
  2844  	newD.TaskGroups[job.TaskGroups[0].Name] = state
  2845  	newD.TaskGroups[job.TaskGroups[1].Name] = state.Copy()
  2846  
  2847  	// Assert the correct results
  2848  	assertResults(t, r, &resultExpectation{
  2849  		createDeployment:  newD,
  2850  		deploymentUpdates: nil,
  2851  		place:             4,
  2852  		inplace:           0,
  2853  		stop:              0,
  2854  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2855  			job.TaskGroups[0].Name: {
  2856  				Canary: 2,
  2857  				Ignore: 10,
  2858  			},
  2859  			job.TaskGroups[1].Name: {
  2860  				Canary: 2,
  2861  				Ignore: 10,
  2862  			},
  2863  		},
  2864  	})
  2865  
  2866  	assertNamesHaveIndexes(t, intRange(0, 1, 0, 1), placeResultsToNames(r.place))
  2867  }
  2868  
  2869  // Tests the reconciler creates new canaries when the job changes and scales up
  2870  func TestReconciler_NewCanaries_ScaleUp(t *testing.T) {
  2871  	// Scale the job up to 15
  2872  	job := mock.Job()
  2873  	job.TaskGroups[0].Update = canaryUpdate
  2874  	job.TaskGroups[0].Count = 15
  2875  
  2876  	// Create 10 allocations from the old job
  2877  	var allocs []*structs.Allocation
  2878  	for i := 0; i < 10; i++ {
  2879  		alloc := mock.Alloc()
  2880  		alloc.Job = job
  2881  		alloc.JobID = job.ID
  2882  		alloc.NodeID = uuid.Generate()
  2883  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2884  		alloc.TaskGroup = job.TaskGroups[0].Name
  2885  		allocs = append(allocs, alloc)
  2886  	}
  2887  
  2888  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  2889  	r := reconciler.Compute()
  2890  
  2891  	newD := structs.NewDeployment(job)
  2892  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  2893  	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2894  		DesiredCanaries: 2,
  2895  		DesiredTotal:    15,
  2896  	}
  2897  
  2898  	// Assert the correct results
  2899  	assertResults(t, r, &resultExpectation{
  2900  		createDeployment:  newD,
  2901  		deploymentUpdates: nil,
  2902  		place:             2,
  2903  		inplace:           0,
  2904  		stop:              0,
  2905  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2906  			job.TaskGroups[0].Name: {
  2907  				Canary: 2,
  2908  				Ignore: 10,
  2909  			},
  2910  		},
  2911  	})
  2912  
  2913  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  2914  }
  2915  
  2916  // Tests the reconciler creates new canaries when the job changes and scales
  2917  // down
  2918  func TestReconciler_NewCanaries_ScaleDown(t *testing.T) {
  2919  	// Scale the job down to 5
  2920  	job := mock.Job()
  2921  	job.TaskGroups[0].Update = canaryUpdate
  2922  	job.TaskGroups[0].Count = 5
  2923  
  2924  	// Create 10 allocations from the old job
  2925  	var allocs []*structs.Allocation
  2926  	for i := 0; i < 10; i++ {
  2927  		alloc := mock.Alloc()
  2928  		alloc.Job = job
  2929  		alloc.JobID = job.ID
  2930  		alloc.NodeID = uuid.Generate()
  2931  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2932  		alloc.TaskGroup = job.TaskGroups[0].Name
  2933  		allocs = append(allocs, alloc)
  2934  	}
  2935  
  2936  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  2937  	r := reconciler.Compute()
  2938  
  2939  	newD := structs.NewDeployment(job)
  2940  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  2941  	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2942  		DesiredCanaries: 2,
  2943  		DesiredTotal:    5,
  2944  	}
  2945  
  2946  	// Assert the correct results
  2947  	assertResults(t, r, &resultExpectation{
  2948  		createDeployment:  newD,
  2949  		deploymentUpdates: nil,
  2950  		place:             2,
  2951  		inplace:           0,
  2952  		stop:              5,
  2953  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2954  			job.TaskGroups[0].Name: {
  2955  				Canary: 2,
  2956  				Stop:   5,
  2957  				Ignore: 5,
  2958  			},
  2959  		},
  2960  	})
  2961  
  2962  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  2963  	assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop))
  2964  }
  2965  
  2966  // Tests the reconciler handles filling the names of partially placed canaries
  2967  func TestReconciler_NewCanaries_FillNames(t *testing.T) {
  2968  	job := mock.Job()
  2969  	job.TaskGroups[0].Update = &structs.UpdateStrategy{
  2970  		Canary:          4,
  2971  		MaxParallel:     2,
  2972  		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
  2973  		MinHealthyTime:  10 * time.Second,
  2974  		HealthyDeadline: 10 * time.Minute,
  2975  	}
  2976  
  2977  	// Create an existing deployment that has placed some canaries
  2978  	d := structs.NewDeployment(job)
  2979  	s := &structs.DeploymentState{
  2980  		Promoted:        false,
  2981  		DesiredTotal:    10,
  2982  		DesiredCanaries: 4,
  2983  		PlacedAllocs:    2,
  2984  	}
  2985  	d.TaskGroups[job.TaskGroups[0].Name] = s
  2986  
  2987  	// Create 10 allocations from the old job
  2988  	var allocs []*structs.Allocation
  2989  	for i := 0; i < 10; i++ {
  2990  		alloc := mock.Alloc()
  2991  		alloc.Job = job
  2992  		alloc.JobID = job.ID
  2993  		alloc.NodeID = uuid.Generate()
  2994  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2995  		alloc.TaskGroup = job.TaskGroups[0].Name
  2996  		allocs = append(allocs, alloc)
  2997  	}
  2998  
  2999  	// Create canaries but pick names at the ends
  3000  	for i := 0; i < 4; i += 3 {
  3001  		// Create one canary
  3002  		canary := mock.Alloc()
  3003  		canary.Job = job
  3004  		canary.JobID = job.ID
  3005  		canary.NodeID = uuid.Generate()
  3006  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3007  		canary.TaskGroup = job.TaskGroups[0].Name
  3008  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3009  		canary.DeploymentID = d.ID
  3010  		allocs = append(allocs, canary)
  3011  	}
  3012  
  3013  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  3014  	r := reconciler.Compute()
  3015  
  3016  	// Assert the correct results
  3017  	assertResults(t, r, &resultExpectation{
  3018  		createDeployment:  nil,
  3019  		deploymentUpdates: nil,
  3020  		place:             2,
  3021  		inplace:           0,
  3022  		stop:              0,
  3023  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3024  			job.TaskGroups[0].Name: {
  3025  				Canary: 2,
  3026  				Ignore: 12,
  3027  			},
  3028  		},
  3029  	})
  3030  
  3031  	assertNamesHaveIndexes(t, intRange(1, 2), placeResultsToNames(r.place))
  3032  }
  3033  
  3034  // Tests the reconciler handles canary promotion by unblocking max_parallel
  3035  func TestReconciler_PromoteCanaries_Unblock(t *testing.T) {
  3036  	job := mock.Job()
  3037  	job.TaskGroups[0].Update = canaryUpdate
  3038  
  3039  	// Create an existing deployment that has placed some canaries and mark them
  3040  	// promoted
  3041  	d := structs.NewDeployment(job)
  3042  	s := &structs.DeploymentState{
  3043  		Promoted:        true,
  3044  		DesiredTotal:    10,
  3045  		DesiredCanaries: 2,
  3046  		PlacedAllocs:    2,
  3047  	}
  3048  	d.TaskGroups[job.TaskGroups[0].Name] = s
  3049  
  3050  	// Create 10 allocations from the old job
  3051  	var allocs []*structs.Allocation
  3052  	for i := 0; i < 10; i++ {
  3053  		alloc := mock.Alloc()
  3054  		alloc.Job = job
  3055  		alloc.JobID = job.ID
  3056  		alloc.NodeID = uuid.Generate()
  3057  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3058  		alloc.TaskGroup = job.TaskGroups[0].Name
  3059  		allocs = append(allocs, alloc)
  3060  	}
  3061  
  3062  	// Create the canaries
  3063  	handled := make(map[string]allocUpdateType)
  3064  	for i := 0; i < 2; i++ {
  3065  		// Create one canary
  3066  		canary := mock.Alloc()
  3067  		canary.Job = job
  3068  		canary.JobID = job.ID
  3069  		canary.NodeID = uuid.Generate()
  3070  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3071  		canary.TaskGroup = job.TaskGroups[0].Name
  3072  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3073  		canary.DeploymentID = d.ID
  3074  		canary.DeploymentStatus = &structs.AllocDeploymentStatus{
  3075  			Healthy: helper.BoolToPtr(true),
  3076  		}
  3077  		allocs = append(allocs, canary)
  3078  		handled[canary.ID] = allocUpdateFnIgnore
  3079  	}
  3080  
  3081  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3082  	reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  3083  	r := reconciler.Compute()
  3084  
  3085  	// Assert the correct results
  3086  	assertResults(t, r, &resultExpectation{
  3087  		createDeployment:  nil,
  3088  		deploymentUpdates: nil,
  3089  		destructive:       2,
  3090  		stop:              2,
  3091  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3092  			job.TaskGroups[0].Name: {
  3093  				Stop:              2,
  3094  				DestructiveUpdate: 2,
  3095  				Ignore:            8,
  3096  			},
  3097  		},
  3098  	})
  3099  
  3100  	assertNoCanariesStopped(t, d, r.stop)
  3101  	assertNamesHaveIndexes(t, intRange(2, 3), destructiveResultsToNames(r.destructiveUpdate))
  3102  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  3103  }
  3104  
  3105  // Tests the reconciler handles canary promotion when the canary count equals
  3106  // the total correctly
  3107  func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) {
  3108  	job := mock.Job()
  3109  	job.TaskGroups[0].Update = canaryUpdate
  3110  	job.TaskGroups[0].Count = 2
  3111  
  3112  	// Create an existing deployment that has placed some canaries and mark them
  3113  	// promoted
  3114  	d := structs.NewDeployment(job)
  3115  	s := &structs.DeploymentState{
  3116  		Promoted:        true,
  3117  		DesiredTotal:    2,
  3118  		DesiredCanaries: 2,
  3119  		PlacedAllocs:    2,
  3120  	}
  3121  	d.TaskGroups[job.TaskGroups[0].Name] = s
  3122  
  3123  	// Create 2 allocations from the old job
  3124  	var allocs []*structs.Allocation
  3125  	for i := 0; i < 2; i++ {
  3126  		alloc := mock.Alloc()
  3127  		alloc.Job = job
  3128  		alloc.JobID = job.ID
  3129  		alloc.NodeID = uuid.Generate()
  3130  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3131  		alloc.TaskGroup = job.TaskGroups[0].Name
  3132  		allocs = append(allocs, alloc)
  3133  	}
  3134  
  3135  	// Create the canaries
  3136  	handled := make(map[string]allocUpdateType)
  3137  	for i := 0; i < 2; i++ {
  3138  		// Create one canary
  3139  		canary := mock.Alloc()
  3140  		canary.Job = job
  3141  		canary.JobID = job.ID
  3142  		canary.NodeID = uuid.Generate()
  3143  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3144  		canary.TaskGroup = job.TaskGroups[0].Name
  3145  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3146  		canary.DeploymentID = d.ID
  3147  		canary.DeploymentStatus = &structs.AllocDeploymentStatus{
  3148  			Healthy: helper.BoolToPtr(true),
  3149  		}
  3150  		allocs = append(allocs, canary)
  3151  		handled[canary.ID] = allocUpdateFnIgnore
  3152  	}
  3153  
  3154  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3155  	reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  3156  	r := reconciler.Compute()
  3157  
  3158  	updates := []*structs.DeploymentStatusUpdate{
  3159  		{
  3160  			DeploymentID:      d.ID,
  3161  			Status:            structs.DeploymentStatusSuccessful,
  3162  			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
  3163  		},
  3164  	}
  3165  
  3166  	// Assert the correct results
  3167  	assertResults(t, r, &resultExpectation{
  3168  		createDeployment:  nil,
  3169  		deploymentUpdates: updates,
  3170  		place:             0,
  3171  		inplace:           0,
  3172  		stop:              2,
  3173  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3174  			job.TaskGroups[0].Name: {
  3175  				Stop:   2,
  3176  				Ignore: 2,
  3177  			},
  3178  		},
  3179  	})
  3180  
  3181  	assertNoCanariesStopped(t, d, r.stop)
  3182  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  3183  }
  3184  
  3185  // Tests the reconciler checks the health of placed allocs to determine the
  3186  // limit
  3187  func TestReconciler_DeploymentLimit_HealthAccounting(t *testing.T) {
  3188  	job := mock.Job()
  3189  	job.TaskGroups[0].Update = noCanaryUpdate
  3190  
  3191  	cases := []struct {
  3192  		healthy int
  3193  	}{
  3194  		{
  3195  			healthy: 0,
  3196  		},
  3197  		{
  3198  			healthy: 1,
  3199  		},
  3200  		{
  3201  			healthy: 2,
  3202  		},
  3203  		{
  3204  			healthy: 3,
  3205  		},
  3206  		{
  3207  			healthy: 4,
  3208  		},
  3209  	}
  3210  
  3211  	for _, c := range cases {
  3212  		t.Run(fmt.Sprintf("%d healthy", c.healthy), func(t *testing.T) {
  3213  			// Create an existing deployment that has placed some canaries and mark them
  3214  			// promoted
  3215  			d := structs.NewDeployment(job)
  3216  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3217  				Promoted:     true,
  3218  				DesiredTotal: 10,
  3219  				PlacedAllocs: 4,
  3220  			}
  3221  
  3222  			// Create 6 allocations from the old job
  3223  			var allocs []*structs.Allocation
  3224  			for i := 4; i < 10; i++ {
  3225  				alloc := mock.Alloc()
  3226  				alloc.Job = job
  3227  				alloc.JobID = job.ID
  3228  				alloc.NodeID = uuid.Generate()
  3229  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3230  				alloc.TaskGroup = job.TaskGroups[0].Name
  3231  				allocs = append(allocs, alloc)
  3232  			}
  3233  
  3234  			// Create the new allocs
  3235  			handled := make(map[string]allocUpdateType)
  3236  			for i := 0; i < 4; i++ {
  3237  				new := mock.Alloc()
  3238  				new.Job = job
  3239  				new.JobID = job.ID
  3240  				new.NodeID = uuid.Generate()
  3241  				new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3242  				new.TaskGroup = job.TaskGroups[0].Name
  3243  				new.DeploymentID = d.ID
  3244  				if i < c.healthy {
  3245  					new.DeploymentStatus = &structs.AllocDeploymentStatus{
  3246  						Healthy: helper.BoolToPtr(true),
  3247  					}
  3248  				}
  3249  				allocs = append(allocs, new)
  3250  				handled[new.ID] = allocUpdateFnIgnore
  3251  			}
  3252  
  3253  			mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3254  			reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  3255  			r := reconciler.Compute()
  3256  
  3257  			// Assert the correct results
  3258  			assertResults(t, r, &resultExpectation{
  3259  				createDeployment:  nil,
  3260  				deploymentUpdates: nil,
  3261  				destructive:       c.healthy,
  3262  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3263  					job.TaskGroups[0].Name: {
  3264  						DestructiveUpdate: uint64(c.healthy),
  3265  						Ignore:            uint64(10 - c.healthy),
  3266  					},
  3267  				},
  3268  			})
  3269  
  3270  			if c.healthy != 0 {
  3271  				assertNamesHaveIndexes(t, intRange(4, 3+c.healthy), destructiveResultsToNames(r.destructiveUpdate))
  3272  			}
  3273  		})
  3274  	}
  3275  }
  3276  
  3277  // Tests the reconciler handles an alloc on a tainted node during a rolling
  3278  // update
  3279  func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) {
  3280  	job := mock.Job()
  3281  	job.TaskGroups[0].Update = noCanaryUpdate
  3282  
  3283  	// Create an existing deployment that has some placed allocs
  3284  	d := structs.NewDeployment(job)
  3285  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3286  		Promoted:     true,
  3287  		DesiredTotal: 10,
  3288  		PlacedAllocs: 7,
  3289  	}
  3290  
  3291  	// Create 2 allocations from the old job
  3292  	var allocs []*structs.Allocation
  3293  	for i := 8; i < 10; i++ {
  3294  		alloc := mock.Alloc()
  3295  		alloc.Job = job
  3296  		alloc.JobID = job.ID
  3297  		alloc.NodeID = uuid.Generate()
  3298  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3299  		alloc.TaskGroup = job.TaskGroups[0].Name
  3300  		allocs = append(allocs, alloc)
  3301  	}
  3302  
  3303  	// Create the healthy replacements
  3304  	handled := make(map[string]allocUpdateType)
  3305  	for i := 0; i < 8; i++ {
  3306  		new := mock.Alloc()
  3307  		new.Job = job
  3308  		new.JobID = job.ID
  3309  		new.NodeID = uuid.Generate()
  3310  		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3311  		new.TaskGroup = job.TaskGroups[0].Name
  3312  		new.DeploymentID = d.ID
  3313  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  3314  			Healthy: helper.BoolToPtr(true),
  3315  		}
  3316  		allocs = append(allocs, new)
  3317  		handled[new.ID] = allocUpdateFnIgnore
  3318  	}
  3319  
  3320  	// Build a map of tainted nodes
  3321  	tainted := make(map[string]*structs.Node, 3)
  3322  	for i := 0; i < 3; i++ {
  3323  		n := mock.Node()
  3324  		n.ID = allocs[2+i].NodeID
  3325  		if i == 0 {
  3326  			n.Status = structs.NodeStatusDown
  3327  		} else {
  3328  			n.Drain = true
  3329  			allocs[2+i].DesiredTransition.Migrate = helper.BoolToPtr(true)
  3330  		}
  3331  		tainted[n.ID] = n
  3332  	}
  3333  
  3334  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3335  	reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "")
  3336  	r := reconciler.Compute()
  3337  
  3338  	// Assert the correct results
  3339  	assertResults(t, r, &resultExpectation{
  3340  		createDeployment:  nil,
  3341  		deploymentUpdates: nil,
  3342  		place:             3,
  3343  		destructive:       2,
  3344  		stop:              3,
  3345  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3346  			job.TaskGroups[0].Name: {
  3347  				Place:             1, // Place the lost
  3348  				Stop:              1, // Stop the lost
  3349  				Migrate:           2, // Migrate the tainted
  3350  				DestructiveUpdate: 2,
  3351  				Ignore:            5,
  3352  			},
  3353  		},
  3354  	})
  3355  
  3356  	assertNamesHaveIndexes(t, intRange(8, 9), destructiveResultsToNames(r.destructiveUpdate))
  3357  	assertNamesHaveIndexes(t, intRange(0, 2), placeResultsToNames(r.place))
  3358  	assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop))
  3359  }
  3360  
  3361  // Tests the reconciler handles a failed deployment and only replaces lost
  3362  // deployments
  3363  func TestReconciler_FailedDeployment_PlacementLost(t *testing.T) {
  3364  	job := mock.Job()
  3365  	job.TaskGroups[0].Update = noCanaryUpdate
  3366  
  3367  	// Create an existing failed deployment that has some placed allocs
  3368  	d := structs.NewDeployment(job)
  3369  	d.Status = structs.DeploymentStatusFailed
  3370  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3371  		Promoted:     true,
  3372  		DesiredTotal: 10,
  3373  		PlacedAllocs: 4,
  3374  	}
  3375  
  3376  	// Create 6 allocations from the old job
  3377  	var allocs []*structs.Allocation
  3378  	for i := 4; i < 10; i++ {
  3379  		alloc := mock.Alloc()
  3380  		alloc.Job = job
  3381  		alloc.JobID = job.ID
  3382  		alloc.NodeID = uuid.Generate()
  3383  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3384  		alloc.TaskGroup = job.TaskGroups[0].Name
  3385  		allocs = append(allocs, alloc)
  3386  	}
  3387  
  3388  	// Create the healthy replacements
  3389  	handled := make(map[string]allocUpdateType)
  3390  	for i := 0; i < 4; i++ {
  3391  		new := mock.Alloc()
  3392  		new.Job = job
  3393  		new.JobID = job.ID
  3394  		new.NodeID = uuid.Generate()
  3395  		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3396  		new.TaskGroup = job.TaskGroups[0].Name
  3397  		new.DeploymentID = d.ID
  3398  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  3399  			Healthy: helper.BoolToPtr(true),
  3400  		}
  3401  		allocs = append(allocs, new)
  3402  		handled[new.ID] = allocUpdateFnIgnore
  3403  	}
  3404  
  3405  	// Build a map of tainted nodes
  3406  	tainted := make(map[string]*structs.Node, 2)
  3407  	for i := 0; i < 2; i++ {
  3408  		n := mock.Node()
  3409  		n.ID = allocs[6+i].NodeID
  3410  		if i == 0 {
  3411  			n.Status = structs.NodeStatusDown
  3412  		} else {
  3413  			n.Drain = true
  3414  			allocs[6+i].DesiredTransition.Migrate = helper.BoolToPtr(true)
  3415  		}
  3416  		tainted[n.ID] = n
  3417  	}
  3418  
  3419  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3420  	reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "")
  3421  	r := reconciler.Compute()
  3422  
  3423  	// Assert the correct results
  3424  	assertResults(t, r, &resultExpectation{
  3425  		createDeployment:  nil,
  3426  		deploymentUpdates: nil,
  3427  		place:             1, // Only replace the lost node
  3428  		inplace:           0,
  3429  		stop:              2,
  3430  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3431  			job.TaskGroups[0].Name: {
  3432  				Place:  1,
  3433  				Stop:   2,
  3434  				Ignore: 8,
  3435  			},
  3436  		},
  3437  	})
  3438  
  3439  	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
  3440  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  3441  }
  3442  
  3443  // Tests the reconciler handles a run after a deployment is complete
  3444  // successfully.
  3445  func TestReconciler_CompleteDeployment(t *testing.T) {
  3446  	job := mock.Job()
  3447  	job.TaskGroups[0].Update = canaryUpdate
  3448  
  3449  	d := structs.NewDeployment(job)
  3450  	d.Status = structs.DeploymentStatusSuccessful
  3451  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3452  		Promoted:        true,
  3453  		DesiredTotal:    10,
  3454  		DesiredCanaries: 2,
  3455  		PlacedAllocs:    10,
  3456  		HealthyAllocs:   10,
  3457  	}
  3458  
  3459  	// Create allocations from the old job
  3460  	var allocs []*structs.Allocation
  3461  	for i := 0; i < 10; i++ {
  3462  		alloc := mock.Alloc()
  3463  		alloc.Job = job
  3464  		alloc.JobID = job.ID
  3465  		alloc.NodeID = uuid.Generate()
  3466  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3467  		alloc.TaskGroup = job.TaskGroups[0].Name
  3468  		alloc.DeploymentID = d.ID
  3469  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  3470  			Healthy: helper.BoolToPtr(true),
  3471  		}
  3472  		allocs = append(allocs, alloc)
  3473  	}
  3474  
  3475  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  3476  	r := reconciler.Compute()
  3477  
  3478  	// Assert the correct results
  3479  	assertResults(t, r, &resultExpectation{
  3480  		createDeployment:  nil,
  3481  		deploymentUpdates: nil,
  3482  		place:             0,
  3483  		inplace:           0,
  3484  		stop:              0,
  3485  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3486  			job.TaskGroups[0].Name: {
  3487  				Ignore: 10,
  3488  			},
  3489  		},
  3490  	})
  3491  }
  3492  
  3493  // Test that a failed deployment cancels non-promoted canaries
  3494  func TestReconciler_FailedDeployment_CancelCanaries(t *testing.T) {
  3495  	// Create a job with two task groups
  3496  	job := mock.Job()
  3497  	job.TaskGroups[0].Update = canaryUpdate
  3498  	job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy())
  3499  	job.TaskGroups[1].Name = "two"
  3500  
  3501  	// Create an existing failed deployment that has promoted one task group
  3502  	d := structs.NewDeployment(job)
  3503  	d.Status = structs.DeploymentStatusFailed
  3504  	s0 := &structs.DeploymentState{
  3505  		Promoted:        true,
  3506  		DesiredTotal:    10,
  3507  		DesiredCanaries: 2,
  3508  		PlacedAllocs:    4,
  3509  	}
  3510  	s1 := &structs.DeploymentState{
  3511  		Promoted:        false,
  3512  		DesiredTotal:    10,
  3513  		DesiredCanaries: 2,
  3514  		PlacedAllocs:    2,
  3515  	}
  3516  	d.TaskGroups[job.TaskGroups[0].Name] = s0
  3517  	d.TaskGroups[job.TaskGroups[1].Name] = s1
  3518  
  3519  	// Create 6 allocations from the old job
  3520  	var allocs []*structs.Allocation
  3521  	handled := make(map[string]allocUpdateType)
  3522  	for _, group := range []int{0, 1} {
  3523  		replacements := 4
  3524  		state := s0
  3525  		if group == 1 {
  3526  			replacements = 2
  3527  			state = s1
  3528  		}
  3529  
  3530  		// Create the healthy replacements
  3531  		for i := 0; i < replacements; i++ {
  3532  			new := mock.Alloc()
  3533  			new.Job = job
  3534  			new.JobID = job.ID
  3535  			new.NodeID = uuid.Generate()
  3536  			new.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i))
  3537  			new.TaskGroup = job.TaskGroups[group].Name
  3538  			new.DeploymentID = d.ID
  3539  			new.DeploymentStatus = &structs.AllocDeploymentStatus{
  3540  				Healthy: helper.BoolToPtr(true),
  3541  			}
  3542  			allocs = append(allocs, new)
  3543  			handled[new.ID] = allocUpdateFnIgnore
  3544  
  3545  			// Add the alloc to the canary list
  3546  			if i < 2 {
  3547  				state.PlacedCanaries = append(state.PlacedCanaries, new.ID)
  3548  			}
  3549  		}
  3550  		for i := replacements; i < 10; i++ {
  3551  			alloc := mock.Alloc()
  3552  			alloc.Job = job
  3553  			alloc.JobID = job.ID
  3554  			alloc.NodeID = uuid.Generate()
  3555  			alloc.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i))
  3556  			alloc.TaskGroup = job.TaskGroups[group].Name
  3557  			allocs = append(allocs, alloc)
  3558  		}
  3559  	}
  3560  
  3561  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3562  	reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  3563  	r := reconciler.Compute()
  3564  
  3565  	// Assert the correct results
  3566  	assertResults(t, r, &resultExpectation{
  3567  		createDeployment:  nil,
  3568  		deploymentUpdates: nil,
  3569  		place:             0,
  3570  		inplace:           0,
  3571  		stop:              2,
  3572  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3573  			job.TaskGroups[0].Name: {
  3574  				Ignore: 10,
  3575  			},
  3576  			job.TaskGroups[1].Name: {
  3577  				Stop:   2,
  3578  				Ignore: 8,
  3579  			},
  3580  		},
  3581  	})
  3582  
  3583  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  3584  }
  3585  
  3586  // Test that a failed deployment and updated job works
  3587  func TestReconciler_FailedDeployment_NewJob(t *testing.T) {
  3588  	job := mock.Job()
  3589  	job.TaskGroups[0].Update = noCanaryUpdate
  3590  
  3591  	// Create an existing failed deployment that has some placed allocs
  3592  	d := structs.NewDeployment(job)
  3593  	d.Status = structs.DeploymentStatusFailed
  3594  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3595  		Promoted:     true,
  3596  		DesiredTotal: 10,
  3597  		PlacedAllocs: 4,
  3598  	}
  3599  
  3600  	// Create 6 allocations from the old job
  3601  	var allocs []*structs.Allocation
  3602  	for i := 4; i < 10; i++ {
  3603  		alloc := mock.Alloc()
  3604  		alloc.Job = job
  3605  		alloc.JobID = job.ID
  3606  		alloc.NodeID = uuid.Generate()
  3607  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3608  		alloc.TaskGroup = job.TaskGroups[0].Name
  3609  		allocs = append(allocs, alloc)
  3610  	}
  3611  
  3612  	// Create the healthy replacements
  3613  	for i := 0; i < 4; i++ {
  3614  		new := mock.Alloc()
  3615  		new.Job = job
  3616  		new.JobID = job.ID
  3617  		new.NodeID = uuid.Generate()
  3618  		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3619  		new.TaskGroup = job.TaskGroups[0].Name
  3620  		new.DeploymentID = d.ID
  3621  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  3622  			Healthy: helper.BoolToPtr(true),
  3623  		}
  3624  		allocs = append(allocs, new)
  3625  	}
  3626  
  3627  	// Up the job version
  3628  	jobNew := job.Copy()
  3629  	jobNew.Version += 100
  3630  
  3631  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, jobNew, d, allocs, nil, "")
  3632  	r := reconciler.Compute()
  3633  
  3634  	dnew := structs.NewDeployment(jobNew)
  3635  	dnew.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3636  		DesiredTotal: 10,
  3637  	}
  3638  
  3639  	// Assert the correct results
  3640  	assertResults(t, r, &resultExpectation{
  3641  		createDeployment:  dnew,
  3642  		deploymentUpdates: nil,
  3643  		destructive:       4,
  3644  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3645  			job.TaskGroups[0].Name: {
  3646  				DestructiveUpdate: 4,
  3647  				Ignore:            6,
  3648  			},
  3649  		},
  3650  	})
  3651  
  3652  	assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate))
  3653  }
  3654  
  3655  // Tests the reconciler marks a deployment as complete
  3656  func TestReconciler_MarkDeploymentComplete(t *testing.T) {
  3657  	job := mock.Job()
  3658  	job.TaskGroups[0].Update = noCanaryUpdate
  3659  
  3660  	d := structs.NewDeployment(job)
  3661  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3662  		Promoted:      true,
  3663  		DesiredTotal:  10,
  3664  		PlacedAllocs:  10,
  3665  		HealthyAllocs: 10,
  3666  	}
  3667  
  3668  	// Create allocations from the old job
  3669  	var allocs []*structs.Allocation
  3670  	for i := 0; i < 10; i++ {
  3671  		alloc := mock.Alloc()
  3672  		alloc.Job = job
  3673  		alloc.JobID = job.ID
  3674  		alloc.NodeID = uuid.Generate()
  3675  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3676  		alloc.TaskGroup = job.TaskGroups[0].Name
  3677  		alloc.DeploymentID = d.ID
  3678  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  3679  			Healthy: helper.BoolToPtr(true),
  3680  		}
  3681  		allocs = append(allocs, alloc)
  3682  	}
  3683  
  3684  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  3685  	r := reconciler.Compute()
  3686  
  3687  	updates := []*structs.DeploymentStatusUpdate{
  3688  		{
  3689  			DeploymentID:      d.ID,
  3690  			Status:            structs.DeploymentStatusSuccessful,
  3691  			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
  3692  		},
  3693  	}
  3694  
  3695  	// Assert the correct results
  3696  	assertResults(t, r, &resultExpectation{
  3697  		createDeployment:  nil,
  3698  		deploymentUpdates: updates,
  3699  		place:             0,
  3700  		inplace:           0,
  3701  		stop:              0,
  3702  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3703  			job.TaskGroups[0].Name: {
  3704  				Ignore: 10,
  3705  			},
  3706  		},
  3707  	})
  3708  }
  3709  
  3710  // Tests the reconciler handles changing a job such that a deployment is created
  3711  // while doing a scale up but as the second eval.
  3712  func TestReconciler_JobChange_ScaleUp_SecondEval(t *testing.T) {
  3713  	// Scale the job up to 15
  3714  	job := mock.Job()
  3715  	job.TaskGroups[0].Update = noCanaryUpdate
  3716  	job.TaskGroups[0].Count = 30
  3717  
  3718  	// Create a deployment that is paused and has placed some canaries
  3719  	d := structs.NewDeployment(job)
  3720  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3721  		Promoted:     false,
  3722  		DesiredTotal: 30,
  3723  		PlacedAllocs: 20,
  3724  	}
  3725  
  3726  	// Create 10 allocations from the old job
  3727  	var allocs []*structs.Allocation
  3728  	for i := 0; i < 10; i++ {
  3729  		alloc := mock.Alloc()
  3730  		alloc.Job = job
  3731  		alloc.JobID = job.ID
  3732  		alloc.NodeID = uuid.Generate()
  3733  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3734  		alloc.TaskGroup = job.TaskGroups[0].Name
  3735  		allocs = append(allocs, alloc)
  3736  	}
  3737  
  3738  	// Create 20 from new job
  3739  	handled := make(map[string]allocUpdateType)
  3740  	for i := 10; i < 30; i++ {
  3741  		alloc := mock.Alloc()
  3742  		alloc.Job = job
  3743  		alloc.JobID = job.ID
  3744  		alloc.DeploymentID = d.ID
  3745  		alloc.NodeID = uuid.Generate()
  3746  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3747  		alloc.TaskGroup = job.TaskGroups[0].Name
  3748  		allocs = append(allocs, alloc)
  3749  		handled[alloc.ID] = allocUpdateFnIgnore
  3750  	}
  3751  
  3752  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3753  	reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  3754  	r := reconciler.Compute()
  3755  
  3756  	// Assert the correct results
  3757  	assertResults(t, r, &resultExpectation{
  3758  		createDeployment:  nil,
  3759  		deploymentUpdates: nil,
  3760  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3761  			job.TaskGroups[0].Name: {
  3762  				// All should be ignored because nothing has been marked as
  3763  				// healthy.
  3764  				Ignore: 30,
  3765  			},
  3766  		},
  3767  	})
  3768  }
  3769  
  3770  // Tests the reconciler doesn't stop allocations when doing a rolling upgrade
  3771  // where the count of the old job allocs is < desired count.
  3772  func TestReconciler_RollingUpgrade_MissingAllocs(t *testing.T) {
  3773  	job := mock.Job()
  3774  	job.TaskGroups[0].Update = noCanaryUpdate
  3775  
  3776  	// Create 7 allocations from the old job
  3777  	var allocs []*structs.Allocation
  3778  	for i := 0; i < 7; i++ {
  3779  		alloc := mock.Alloc()
  3780  		alloc.Job = job
  3781  		alloc.JobID = job.ID
  3782  		alloc.NodeID = uuid.Generate()
  3783  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3784  		alloc.TaskGroup = job.TaskGroups[0].Name
  3785  		allocs = append(allocs, alloc)
  3786  	}
  3787  
  3788  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3789  	r := reconciler.Compute()
  3790  
  3791  	d := structs.NewDeployment(job)
  3792  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3793  		DesiredTotal: 10,
  3794  	}
  3795  
  3796  	// Assert the correct results
  3797  	assertResults(t, r, &resultExpectation{
  3798  		createDeployment:  d,
  3799  		deploymentUpdates: nil,
  3800  		place:             3,
  3801  		destructive:       1,
  3802  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3803  			job.TaskGroups[0].Name: {
  3804  				Place:             3,
  3805  				DestructiveUpdate: 1,
  3806  				Ignore:            6,
  3807  			},
  3808  		},
  3809  	})
  3810  
  3811  	assertNamesHaveIndexes(t, intRange(7, 9), placeResultsToNames(r.place))
  3812  	assertNamesHaveIndexes(t, intRange(0, 0), destructiveResultsToNames(r.destructiveUpdate))
  3813  }
  3814  
  3815  // Tests that the reconciler handles rerunning a batch job in the case that the
  3816  // allocations are from an older instance of the job.
  3817  func TestReconciler_Batch_Rerun(t *testing.T) {
  3818  	job := mock.Job()
  3819  	job.Type = structs.JobTypeBatch
  3820  	job.TaskGroups[0].Update = nil
  3821  
  3822  	// Create 10 allocations from the old job and have them be complete
  3823  	var allocs []*structs.Allocation
  3824  	for i := 0; i < 10; i++ {
  3825  		alloc := mock.Alloc()
  3826  		alloc.Job = job
  3827  		alloc.JobID = job.ID
  3828  		alloc.NodeID = uuid.Generate()
  3829  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3830  		alloc.TaskGroup = job.TaskGroups[0].Name
  3831  		alloc.ClientStatus = structs.AllocClientStatusComplete
  3832  		alloc.DesiredStatus = structs.AllocDesiredStatusStop
  3833  		allocs = append(allocs, alloc)
  3834  	}
  3835  
  3836  	// Create a copy of the job that is "new"
  3837  	job2 := job.Copy()
  3838  	job2.CreateIndex++
  3839  
  3840  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job2.ID, job2, nil, allocs, nil, "")
  3841  	r := reconciler.Compute()
  3842  
  3843  	// Assert the correct results
  3844  	assertResults(t, r, &resultExpectation{
  3845  		createDeployment:  nil,
  3846  		deploymentUpdates: nil,
  3847  		place:             10,
  3848  		destructive:       0,
  3849  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3850  			job.TaskGroups[0].Name: {
  3851  				Place:             10,
  3852  				DestructiveUpdate: 0,
  3853  				Ignore:            10,
  3854  			},
  3855  		},
  3856  	})
  3857  
  3858  	assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
  3859  }
  3860  
  3861  // Test that a failed deployment will not result in rescheduling failed allocations
  3862  func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) {
  3863  	job := mock.Job()
  3864  	job.TaskGroups[0].Update = noCanaryUpdate
  3865  
  3866  	tgName := job.TaskGroups[0].Name
  3867  	now := time.Now()
  3868  	// Create an existing failed deployment that has some placed allocs
  3869  	d := structs.NewDeployment(job)
  3870  	d.Status = structs.DeploymentStatusFailed
  3871  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3872  		Promoted:     true,
  3873  		DesiredTotal: 5,
  3874  		PlacedAllocs: 4,
  3875  	}
  3876  
  3877  	// Create 4 allocations and mark two as failed
  3878  	var allocs []*structs.Allocation
  3879  	for i := 0; i < 4; i++ {
  3880  		alloc := mock.Alloc()
  3881  		alloc.Job = job
  3882  		alloc.JobID = job.ID
  3883  		alloc.NodeID = uuid.Generate()
  3884  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3885  		alloc.TaskGroup = job.TaskGroups[0].Name
  3886  		allocs = append(allocs, alloc)
  3887  	}
  3888  
  3889  	//create some allocations that are reschedulable now
  3890  	allocs[2].ClientStatus = structs.AllocClientStatusFailed
  3891  	allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  3892  		StartedAt:  now.Add(-1 * time.Hour),
  3893  		FinishedAt: now.Add(-10 * time.Second)}}
  3894  
  3895  	allocs[3].ClientStatus = structs.AllocClientStatusFailed
  3896  	allocs[3].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  3897  		StartedAt:  now.Add(-1 * time.Hour),
  3898  		FinishedAt: now.Add(-10 * time.Second)}}
  3899  
  3900  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  3901  	r := reconciler.Compute()
  3902  
  3903  	// Assert that no rescheduled placements were created
  3904  	assertResults(t, r, &resultExpectation{
  3905  		place:             0,
  3906  		createDeployment:  nil,
  3907  		deploymentUpdates: nil,
  3908  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3909  			job.TaskGroups[0].Name: {
  3910  				Ignore: 2,
  3911  			},
  3912  		},
  3913  	})
  3914  }
  3915  
  3916  // Test that a running deployment with failed allocs will not result in rescheduling failed allocations
  3917  func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) {
  3918  	job := mock.Job()
  3919  	job.TaskGroups[0].Update = noCanaryUpdate
  3920  	tgName := job.TaskGroups[0].Name
  3921  	now := time.Now()
  3922  
  3923  	// Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet
  3924  	d := structs.NewDeployment(job)
  3925  	d.Status = structs.DeploymentStatusRunning
  3926  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3927  		Promoted:     false,
  3928  		DesiredTotal: 5,
  3929  		PlacedAllocs: 4,
  3930  	}
  3931  
  3932  	// Create 4 allocations and mark two as failed
  3933  	var allocs []*structs.Allocation
  3934  	for i := 0; i < 4; i++ {
  3935  		alloc := mock.Alloc()
  3936  		alloc.Job = job
  3937  		alloc.JobID = job.ID
  3938  		alloc.NodeID = uuid.Generate()
  3939  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3940  		alloc.TaskGroup = job.TaskGroups[0].Name
  3941  		alloc.DeploymentID = d.ID
  3942  		allocs = append(allocs, alloc)
  3943  	}
  3944  
  3945  	// Create allocs that are reschedulable now
  3946  	allocs[2].ClientStatus = structs.AllocClientStatusFailed
  3947  	allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  3948  		StartedAt:  now.Add(-1 * time.Hour),
  3949  		FinishedAt: now.Add(-10 * time.Second)}}
  3950  
  3951  	allocs[3].ClientStatus = structs.AllocClientStatusFailed
  3952  	allocs[3].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  3953  		StartedAt:  now.Add(-1 * time.Hour),
  3954  		FinishedAt: now.Add(-10 * time.Second)}}
  3955  
  3956  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  3957  	r := reconciler.Compute()
  3958  
  3959  	// Assert that no rescheduled placements were created
  3960  	assertResults(t, r, &resultExpectation{
  3961  		place:             0,
  3962  		createDeployment:  nil,
  3963  		deploymentUpdates: nil,
  3964  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3965  			job.TaskGroups[0].Name: {
  3966  				Ignore: 2,
  3967  			},
  3968  		},
  3969  	})
  3970  }
  3971  
  3972  // Test that a failed deployment cancels non-promoted canaries
  3973  func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) {
  3974  	// Create a job
  3975  	job := mock.Job()
  3976  	job.TaskGroups[0].Count = 3
  3977  	job.TaskGroups[0].Update = &structs.UpdateStrategy{
  3978  		Canary:          3,
  3979  		MaxParallel:     2,
  3980  		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
  3981  		MinHealthyTime:  10 * time.Second,
  3982  		HealthyDeadline: 10 * time.Minute,
  3983  		Stagger:         31 * time.Second,
  3984  	}
  3985  
  3986  	// Create v1 of the job
  3987  	jobv1 := job.Copy()
  3988  	jobv1.Version = 1
  3989  	jobv1.TaskGroups[0].Meta = map[string]string{"version": "1"}
  3990  
  3991  	// Create v2 of the job
  3992  	jobv2 := job.Copy()
  3993  	jobv2.Version = 2
  3994  	jobv2.TaskGroups[0].Meta = map[string]string{"version": "2"}
  3995  
  3996  	// Create an existing failed deployment that has promoted one task group
  3997  	d := structs.NewDeployment(jobv2)
  3998  	state := &structs.DeploymentState{
  3999  		Promoted:     false,
  4000  		DesiredTotal: 3,
  4001  		PlacedAllocs: 3,
  4002  	}
  4003  	d.TaskGroups[job.TaskGroups[0].Name] = state
  4004  
  4005  	// Create the original
  4006  	var allocs []*structs.Allocation
  4007  	for i := 0; i < 3; i++ {
  4008  		new := mock.Alloc()
  4009  		new.Job = jobv2
  4010  		new.JobID = job.ID
  4011  		new.NodeID = uuid.Generate()
  4012  		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4013  		new.TaskGroup = job.TaskGroups[0].Name
  4014  		new.DeploymentID = d.ID
  4015  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  4016  			Healthy: helper.BoolToPtr(true),
  4017  		}
  4018  		new.ClientStatus = structs.AllocClientStatusRunning
  4019  		allocs = append(allocs, new)
  4020  
  4021  	}
  4022  	for i := 0; i < 3; i++ {
  4023  		new := mock.Alloc()
  4024  		new.Job = jobv1
  4025  		new.JobID = jobv1.ID
  4026  		new.NodeID = uuid.Generate()
  4027  		new.Name = structs.AllocName(jobv1.ID, jobv1.TaskGroups[0].Name, uint(i))
  4028  		new.TaskGroup = job.TaskGroups[0].Name
  4029  		new.DeploymentID = uuid.Generate()
  4030  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  4031  			Healthy: helper.BoolToPtr(false),
  4032  		}
  4033  		new.DesiredStatus = structs.AllocDesiredStatusStop
  4034  		new.ClientStatus = structs.AllocClientStatusFailed
  4035  		allocs = append(allocs, new)
  4036  	}
  4037  
  4038  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, jobv2, d, allocs, nil, "")
  4039  	r := reconciler.Compute()
  4040  
  4041  	updates := []*structs.DeploymentStatusUpdate{
  4042  		{
  4043  			DeploymentID:      d.ID,
  4044  			Status:            structs.DeploymentStatusSuccessful,
  4045  			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
  4046  		},
  4047  	}
  4048  
  4049  	// Assert the correct results
  4050  	assertResults(t, r, &resultExpectation{
  4051  		createDeployment:  nil,
  4052  		deploymentUpdates: updates,
  4053  		place:             0,
  4054  		inplace:           0,
  4055  		stop:              0,
  4056  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4057  			job.TaskGroups[0].Name: {
  4058  				Stop:          0,
  4059  				InPlaceUpdate: 0,
  4060  				Ignore:        3,
  4061  			},
  4062  		},
  4063  	})
  4064  }