github.com/anuvu/nomad@v0.8.7-atom1/scheduler/reconcile_test.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"fmt"
     5  	"log"
     6  	"os"
     7  	"reflect"
     8  	"regexp"
     9  	"strconv"
    10  	"testing"
    11  	"time"
    12  
    13  	"github.com/hashicorp/nomad/helper"
    14  	"github.com/hashicorp/nomad/helper/uuid"
    15  	"github.com/hashicorp/nomad/nomad/mock"
    16  	"github.com/hashicorp/nomad/nomad/structs"
    17  	"github.com/kr/pretty"
    18  	"github.com/stretchr/testify/assert"
    19  	"github.com/stretchr/testify/require"
    20  )
    21  
    22  var (
    23  	canaryUpdate = &structs.UpdateStrategy{
    24  		Canary:          2,
    25  		MaxParallel:     2,
    26  		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
    27  		MinHealthyTime:  10 * time.Second,
    28  		HealthyDeadline: 10 * time.Minute,
    29  		Stagger:         31 * time.Second,
    30  	}
    31  
    32  	noCanaryUpdate = &structs.UpdateStrategy{
    33  		MaxParallel:     4,
    34  		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
    35  		MinHealthyTime:  10 * time.Second,
    36  		HealthyDeadline: 10 * time.Minute,
    37  		Stagger:         31 * time.Second,
    38  	}
    39  )
    40  
    41  func testLogger() *log.Logger {
    42  	return log.New(os.Stderr, "", log.LstdFlags)
    43  }
    44  
    45  func allocUpdateFnIgnore(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) {
    46  	return true, false, nil
    47  }
    48  
    49  func allocUpdateFnDestructive(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) {
    50  	return false, true, nil
    51  }
    52  
    53  func allocUpdateFnInplace(existing *structs.Allocation, _ *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) {
    54  	// Create a shallow copy
    55  	newAlloc := existing.CopySkipJob()
    56  	newAlloc.TaskResources = make(map[string]*structs.Resources)
    57  
    58  	// Use the new task resources but keep the network from the old
    59  	for _, task := range newTG.Tasks {
    60  		r := task.Resources.Copy()
    61  		r.Networks = existing.TaskResources[task.Name].Networks
    62  		newAlloc.TaskResources[task.Name] = r
    63  	}
    64  
    65  	return false, false, newAlloc
    66  }
    67  
    68  func allocUpdateFnMock(handled map[string]allocUpdateType, unhandled allocUpdateType) allocUpdateType {
    69  	return func(existing *structs.Allocation, newJob *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) {
    70  		if fn, ok := handled[existing.ID]; ok {
    71  			return fn(existing, newJob, newTG)
    72  		}
    73  
    74  		return unhandled(existing, newJob, newTG)
    75  	}
    76  }
    77  
    78  var (
    79  	// AllocationIndexRegex is a regular expression to find the allocation index.
    80  	allocationIndexRegex = regexp.MustCompile(".+\\[(\\d+)\\]$")
    81  )
    82  
    83  // allocNameToIndex returns the index of the allocation.
    84  func allocNameToIndex(name string) uint {
    85  	matches := allocationIndexRegex.FindStringSubmatch(name)
    86  	if len(matches) != 2 {
    87  		return 0
    88  	}
    89  
    90  	index, err := strconv.Atoi(matches[1])
    91  	if err != nil {
    92  		return 0
    93  	}
    94  
    95  	return uint(index)
    96  }
    97  
    98  func assertNamesHaveIndexes(t *testing.T, indexes []int, names []string) {
    99  	t.Helper()
   100  	m := make(map[uint]int)
   101  	for _, i := range indexes {
   102  		m[uint(i)] += 1
   103  	}
   104  
   105  	for _, n := range names {
   106  		index := allocNameToIndex(n)
   107  		val, contained := m[index]
   108  		if !contained {
   109  			t.Fatalf("Unexpected index %d from name %s\nAll names: %v", index, n, names)
   110  		}
   111  
   112  		val--
   113  		if val < 0 {
   114  			t.Fatalf("Index %d repeated too many times\nAll names: %v", index, names)
   115  		}
   116  		m[index] = val
   117  	}
   118  
   119  	for k, remainder := range m {
   120  		if remainder != 0 {
   121  			t.Fatalf("Index %d has %d remaining uses expected\nAll names: %v", k, remainder, names)
   122  		}
   123  	}
   124  }
   125  
   126  func assertNoCanariesStopped(t *testing.T, d *structs.Deployment, stop []allocStopResult) {
   127  	t.Helper()
   128  	canaryIndex := make(map[string]struct{})
   129  	for _, state := range d.TaskGroups {
   130  		for _, c := range state.PlacedCanaries {
   131  			canaryIndex[c] = struct{}{}
   132  		}
   133  	}
   134  
   135  	for _, s := range stop {
   136  		if _, ok := canaryIndex[s.alloc.ID]; ok {
   137  			t.Fatalf("Stopping canary alloc %q %q", s.alloc.ID, s.alloc.Name)
   138  		}
   139  	}
   140  }
   141  
   142  func assertPlaceResultsHavePreviousAllocs(t *testing.T, numPrevious int, place []allocPlaceResult) {
   143  	t.Helper()
   144  	names := make(map[string]struct{}, numPrevious)
   145  
   146  	found := 0
   147  	for _, p := range place {
   148  		if _, ok := names[p.name]; ok {
   149  			t.Fatalf("Name %q already placed", p.name)
   150  		}
   151  		names[p.name] = struct{}{}
   152  
   153  		if p.previousAlloc == nil {
   154  			continue
   155  		}
   156  
   157  		if act := p.previousAlloc.Name; p.name != act {
   158  			t.Fatalf("Name mismatch on previous alloc; got %q; want %q", act, p.name)
   159  		}
   160  		found++
   161  	}
   162  	if numPrevious != found {
   163  		t.Fatalf("wanted %d; got %d placements with previous allocs", numPrevious, found)
   164  	}
   165  }
   166  
   167  func assertPlacementsAreRescheduled(t *testing.T, numRescheduled int, place []allocPlaceResult) {
   168  	t.Helper()
   169  	names := make(map[string]struct{}, numRescheduled)
   170  
   171  	found := 0
   172  	for _, p := range place {
   173  		if _, ok := names[p.name]; ok {
   174  			t.Fatalf("Name %q already placed", p.name)
   175  		}
   176  		names[p.name] = struct{}{}
   177  
   178  		if p.previousAlloc == nil {
   179  			continue
   180  		}
   181  		if p.reschedule {
   182  			found++
   183  		}
   184  
   185  	}
   186  	if numRescheduled != found {
   187  		t.Fatalf("wanted %d; got %d placements that are rescheduled", numRescheduled, found)
   188  	}
   189  }
   190  
   191  func intRange(pairs ...int) []int {
   192  	if len(pairs)%2 != 0 {
   193  		return nil
   194  	}
   195  
   196  	var r []int
   197  	for i := 0; i < len(pairs); i += 2 {
   198  		for j := pairs[i]; j <= pairs[i+1]; j++ {
   199  			r = append(r, j)
   200  		}
   201  	}
   202  	return r
   203  }
   204  
   205  func placeResultsToNames(place []allocPlaceResult) []string {
   206  	names := make([]string, 0, len(place))
   207  	for _, p := range place {
   208  		names = append(names, p.name)
   209  	}
   210  	return names
   211  }
   212  
   213  func destructiveResultsToNames(destructive []allocDestructiveResult) []string {
   214  	names := make([]string, 0, len(destructive))
   215  	for _, d := range destructive {
   216  		names = append(names, d.placeName)
   217  	}
   218  	return names
   219  }
   220  
   221  func stopResultsToNames(stop []allocStopResult) []string {
   222  	names := make([]string, 0, len(stop))
   223  	for _, s := range stop {
   224  		names = append(names, s.alloc.Name)
   225  	}
   226  	return names
   227  }
   228  
   229  func attributeUpdatesToNames(attributeUpdates map[string]*structs.Allocation) []string {
   230  	names := make([]string, 0, len(attributeUpdates))
   231  	for _, a := range attributeUpdates {
   232  		names = append(names, a.Name)
   233  	}
   234  	return names
   235  }
   236  
   237  func allocsToNames(allocs []*structs.Allocation) []string {
   238  	names := make([]string, 0, len(allocs))
   239  	for _, a := range allocs {
   240  		names = append(names, a.Name)
   241  	}
   242  	return names
   243  }
   244  
   245  type resultExpectation struct {
   246  	createDeployment  *structs.Deployment
   247  	deploymentUpdates []*structs.DeploymentStatusUpdate
   248  	place             int
   249  	destructive       int
   250  	inplace           int
   251  	attributeUpdates  int
   252  	stop              int
   253  	desiredTGUpdates  map[string]*structs.DesiredUpdates
   254  }
   255  
   256  func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) {
   257  	t.Helper()
   258  	assert := assert.New(t)
   259  
   260  	if exp.createDeployment != nil && r.deployment == nil {
   261  		t.Errorf("Expect a created deployment got none")
   262  	} else if exp.createDeployment == nil && r.deployment != nil {
   263  		t.Errorf("Expect no created deployment; got %#v", r.deployment)
   264  	} else if exp.createDeployment != nil && r.deployment != nil {
   265  		// Clear the deployment ID
   266  		r.deployment.ID, exp.createDeployment.ID = "", ""
   267  		if !reflect.DeepEqual(r.deployment, exp.createDeployment) {
   268  			t.Errorf("Unexpected createdDeployment; got\n %#v\nwant\n%#v\nDiff: %v",
   269  				r.deployment, exp.createDeployment, pretty.Diff(r.deployment, exp.createDeployment))
   270  		}
   271  	}
   272  
   273  	assert.EqualValues(exp.deploymentUpdates, r.deploymentUpdates, "Expected Deployment Updates")
   274  	assert.Len(r.place, exp.place, "Expected Placements")
   275  	assert.Len(r.destructiveUpdate, exp.destructive, "Expected Destructive")
   276  	assert.Len(r.inplaceUpdate, exp.inplace, "Expected Inplace Updates")
   277  	assert.Len(r.attributeUpdates, exp.attributeUpdates, "Expected Attribute Updates")
   278  	assert.Len(r.stop, exp.stop, "Expected Stops")
   279  	assert.EqualValues(exp.desiredTGUpdates, r.desiredTGUpdates, "Expected Desired TG Update Annotations")
   280  }
   281  
   282  // Tests the reconciler properly handles placements for a job that has no
   283  // existing allocations
   284  func TestReconciler_Place_NoExisting(t *testing.T) {
   285  	job := mock.Job()
   286  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, nil, nil, "")
   287  	r := reconciler.Compute()
   288  
   289  	// Assert the correct results
   290  	assertResults(t, r, &resultExpectation{
   291  		createDeployment:  nil,
   292  		deploymentUpdates: nil,
   293  		place:             10,
   294  		inplace:           0,
   295  		stop:              0,
   296  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   297  			job.TaskGroups[0].Name: {
   298  				Place: 10,
   299  			},
   300  		},
   301  	})
   302  
   303  	assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
   304  }
   305  
   306  // Tests the reconciler properly handles placements for a job that has some
   307  // existing allocations
   308  func TestReconciler_Place_Existing(t *testing.T) {
   309  	job := mock.Job()
   310  
   311  	// Create 3 existing allocations
   312  	var allocs []*structs.Allocation
   313  	for i := 0; i < 5; i++ {
   314  		alloc := mock.Alloc()
   315  		alloc.Job = job
   316  		alloc.JobID = job.ID
   317  		alloc.NodeID = uuid.Generate()
   318  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   319  		allocs = append(allocs, alloc)
   320  	}
   321  
   322  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
   323  	r := reconciler.Compute()
   324  
   325  	// Assert the correct results
   326  	assertResults(t, r, &resultExpectation{
   327  		createDeployment:  nil,
   328  		deploymentUpdates: nil,
   329  		place:             5,
   330  		inplace:           0,
   331  		stop:              0,
   332  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   333  			job.TaskGroups[0].Name: {
   334  				Place:  5,
   335  				Ignore: 5,
   336  			},
   337  		},
   338  	})
   339  
   340  	assertNamesHaveIndexes(t, intRange(5, 9), placeResultsToNames(r.place))
   341  }
   342  
   343  // Tests the reconciler properly handles stopping allocations for a job that has
   344  // scaled down
   345  func TestReconciler_ScaleDown_Partial(t *testing.T) {
   346  	// Has desired 10
   347  	job := mock.Job()
   348  
   349  	// Create 20 existing allocations
   350  	var allocs []*structs.Allocation
   351  	for i := 0; i < 20; i++ {
   352  		alloc := mock.Alloc()
   353  		alloc.Job = job
   354  		alloc.JobID = job.ID
   355  		alloc.NodeID = uuid.Generate()
   356  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   357  		allocs = append(allocs, alloc)
   358  	}
   359  
   360  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
   361  	r := reconciler.Compute()
   362  
   363  	// Assert the correct results
   364  	assertResults(t, r, &resultExpectation{
   365  		createDeployment:  nil,
   366  		deploymentUpdates: nil,
   367  		place:             0,
   368  		inplace:           0,
   369  		stop:              10,
   370  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   371  			job.TaskGroups[0].Name: {
   372  				Ignore: 10,
   373  				Stop:   10,
   374  			},
   375  		},
   376  	})
   377  
   378  	assertNamesHaveIndexes(t, intRange(10, 19), stopResultsToNames(r.stop))
   379  }
   380  
   381  // Tests the reconciler properly handles stopping allocations for a job that has
   382  // scaled down to zero desired
   383  func TestReconciler_ScaleDown_Zero(t *testing.T) {
   384  	// Set desired 0
   385  	job := mock.Job()
   386  	job.TaskGroups[0].Count = 0
   387  
   388  	// Create 20 existing allocations
   389  	var allocs []*structs.Allocation
   390  	for i := 0; i < 20; i++ {
   391  		alloc := mock.Alloc()
   392  		alloc.Job = job
   393  		alloc.JobID = job.ID
   394  		alloc.NodeID = uuid.Generate()
   395  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   396  		allocs = append(allocs, alloc)
   397  	}
   398  
   399  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
   400  	r := reconciler.Compute()
   401  
   402  	// Assert the correct results
   403  	assertResults(t, r, &resultExpectation{
   404  		createDeployment:  nil,
   405  		deploymentUpdates: nil,
   406  		place:             0,
   407  		inplace:           0,
   408  		stop:              20,
   409  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   410  			job.TaskGroups[0].Name: {
   411  				Stop: 20,
   412  			},
   413  		},
   414  	})
   415  
   416  	assertNamesHaveIndexes(t, intRange(0, 19), stopResultsToNames(r.stop))
   417  }
   418  
   419  // Tests the reconciler properly handles stopping allocations for a job that has
   420  // scaled down to zero desired where allocs have duplicate names
   421  func TestReconciler_ScaleDown_Zero_DuplicateNames(t *testing.T) {
   422  	// Set desired 0
   423  	job := mock.Job()
   424  	job.TaskGroups[0].Count = 0
   425  
   426  	// Create 20 existing allocations
   427  	var allocs []*structs.Allocation
   428  	var expectedStopped []int
   429  	for i := 0; i < 20; i++ {
   430  		alloc := mock.Alloc()
   431  		alloc.Job = job
   432  		alloc.JobID = job.ID
   433  		alloc.NodeID = uuid.Generate()
   434  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2))
   435  		allocs = append(allocs, alloc)
   436  		expectedStopped = append(expectedStopped, i%2)
   437  	}
   438  
   439  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
   440  	r := reconciler.Compute()
   441  
   442  	// Assert the correct results
   443  	assertResults(t, r, &resultExpectation{
   444  		createDeployment:  nil,
   445  		deploymentUpdates: nil,
   446  		place:             0,
   447  		inplace:           0,
   448  		stop:              20,
   449  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   450  			job.TaskGroups[0].Name: {
   451  				Stop: 20,
   452  			},
   453  		},
   454  	})
   455  
   456  	assertNamesHaveIndexes(t, expectedStopped, stopResultsToNames(r.stop))
   457  }
   458  
   459  // Tests the reconciler properly handles inplace upgrading allocations
   460  func TestReconciler_Inplace(t *testing.T) {
   461  	job := mock.Job()
   462  
   463  	// Create 10 existing allocations
   464  	var allocs []*structs.Allocation
   465  	for i := 0; i < 10; i++ {
   466  		alloc := mock.Alloc()
   467  		alloc.Job = job
   468  		alloc.JobID = job.ID
   469  		alloc.NodeID = uuid.Generate()
   470  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   471  		allocs = append(allocs, alloc)
   472  	}
   473  
   474  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "")
   475  	r := reconciler.Compute()
   476  
   477  	// Assert the correct results
   478  	assertResults(t, r, &resultExpectation{
   479  		createDeployment:  nil,
   480  		deploymentUpdates: nil,
   481  		place:             0,
   482  		inplace:           10,
   483  		stop:              0,
   484  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   485  			job.TaskGroups[0].Name: {
   486  				InPlaceUpdate: 10,
   487  			},
   488  		},
   489  	})
   490  
   491  	assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate))
   492  }
   493  
   494  // Tests the reconciler properly handles inplace upgrading allocations while
   495  // scaling up
   496  func TestReconciler_Inplace_ScaleUp(t *testing.T) {
   497  	// Set desired 15
   498  	job := mock.Job()
   499  	job.TaskGroups[0].Count = 15
   500  
   501  	// Create 10 existing allocations
   502  	var allocs []*structs.Allocation
   503  	for i := 0; i < 10; i++ {
   504  		alloc := mock.Alloc()
   505  		alloc.Job = job
   506  		alloc.JobID = job.ID
   507  		alloc.NodeID = uuid.Generate()
   508  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   509  		allocs = append(allocs, alloc)
   510  	}
   511  
   512  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "")
   513  	r := reconciler.Compute()
   514  
   515  	// Assert the correct results
   516  	assertResults(t, r, &resultExpectation{
   517  		createDeployment:  nil,
   518  		deploymentUpdates: nil,
   519  		place:             5,
   520  		inplace:           10,
   521  		stop:              0,
   522  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   523  			job.TaskGroups[0].Name: {
   524  				Place:         5,
   525  				InPlaceUpdate: 10,
   526  			},
   527  		},
   528  	})
   529  
   530  	assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate))
   531  	assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place))
   532  }
   533  
   534  // Tests the reconciler properly handles inplace upgrading allocations while
   535  // scaling down
   536  func TestReconciler_Inplace_ScaleDown(t *testing.T) {
   537  	// Set desired 5
   538  	job := mock.Job()
   539  	job.TaskGroups[0].Count = 5
   540  
   541  	// Create 10 existing allocations
   542  	var allocs []*structs.Allocation
   543  	for i := 0; i < 10; i++ {
   544  		alloc := mock.Alloc()
   545  		alloc.Job = job
   546  		alloc.JobID = job.ID
   547  		alloc.NodeID = uuid.Generate()
   548  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   549  		allocs = append(allocs, alloc)
   550  	}
   551  
   552  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "")
   553  	r := reconciler.Compute()
   554  
   555  	// Assert the correct results
   556  	assertResults(t, r, &resultExpectation{
   557  		createDeployment:  nil,
   558  		deploymentUpdates: nil,
   559  		place:             0,
   560  		inplace:           5,
   561  		stop:              5,
   562  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   563  			job.TaskGroups[0].Name: {
   564  				Stop:          5,
   565  				InPlaceUpdate: 5,
   566  			},
   567  		},
   568  	})
   569  
   570  	assertNamesHaveIndexes(t, intRange(0, 4), allocsToNames(r.inplaceUpdate))
   571  	assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop))
   572  }
   573  
   574  // Tests the reconciler properly handles destructive upgrading allocations
   575  func TestReconciler_Destructive(t *testing.T) {
   576  	job := mock.Job()
   577  
   578  	// Create 10 existing allocations
   579  	var allocs []*structs.Allocation
   580  	for i := 0; i < 10; i++ {
   581  		alloc := mock.Alloc()
   582  		alloc.Job = job
   583  		alloc.JobID = job.ID
   584  		alloc.NodeID = uuid.Generate()
   585  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   586  		allocs = append(allocs, alloc)
   587  	}
   588  
   589  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
   590  	r := reconciler.Compute()
   591  
   592  	// Assert the correct results
   593  	assertResults(t, r, &resultExpectation{
   594  		createDeployment:  nil,
   595  		deploymentUpdates: nil,
   596  		destructive:       10,
   597  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   598  			job.TaskGroups[0].Name: {
   599  				DestructiveUpdate: 10,
   600  			},
   601  		},
   602  	})
   603  
   604  	assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate))
   605  }
   606  
   607  // Tests the reconciler properly handles destructive upgrading allocations while
   608  // scaling up
   609  func TestReconciler_Destructive_ScaleUp(t *testing.T) {
   610  	// Set desired 15
   611  	job := mock.Job()
   612  	job.TaskGroups[0].Count = 15
   613  
   614  	// Create 10 existing allocations
   615  	var allocs []*structs.Allocation
   616  	for i := 0; i < 10; i++ {
   617  		alloc := mock.Alloc()
   618  		alloc.Job = job
   619  		alloc.JobID = job.ID
   620  		alloc.NodeID = uuid.Generate()
   621  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   622  		allocs = append(allocs, alloc)
   623  	}
   624  
   625  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
   626  	r := reconciler.Compute()
   627  
   628  	// Assert the correct results
   629  	assertResults(t, r, &resultExpectation{
   630  		createDeployment:  nil,
   631  		deploymentUpdates: nil,
   632  		place:             5,
   633  		destructive:       10,
   634  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   635  			job.TaskGroups[0].Name: {
   636  				Place:             5,
   637  				DestructiveUpdate: 10,
   638  			},
   639  		},
   640  	})
   641  
   642  	assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate))
   643  	assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place))
   644  }
   645  
   646  // Tests the reconciler properly handles destructive upgrading allocations while
   647  // scaling down
   648  func TestReconciler_Destructive_ScaleDown(t *testing.T) {
   649  	// Set desired 5
   650  	job := mock.Job()
   651  	job.TaskGroups[0].Count = 5
   652  
   653  	// Create 10 existing allocations
   654  	var allocs []*structs.Allocation
   655  	for i := 0; i < 10; i++ {
   656  		alloc := mock.Alloc()
   657  		alloc.Job = job
   658  		alloc.JobID = job.ID
   659  		alloc.NodeID = uuid.Generate()
   660  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   661  		allocs = append(allocs, alloc)
   662  	}
   663  
   664  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
   665  	r := reconciler.Compute()
   666  
   667  	// Assert the correct results
   668  	assertResults(t, r, &resultExpectation{
   669  		createDeployment:  nil,
   670  		deploymentUpdates: nil,
   671  		destructive:       5,
   672  		stop:              5,
   673  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   674  			job.TaskGroups[0].Name: {
   675  				Stop:              5,
   676  				DestructiveUpdate: 5,
   677  			},
   678  		},
   679  	})
   680  
   681  	assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop))
   682  	assertNamesHaveIndexes(t, intRange(0, 4), destructiveResultsToNames(r.destructiveUpdate))
   683  }
   684  
   685  // Tests the reconciler properly handles lost nodes with allocations
   686  func TestReconciler_LostNode(t *testing.T) {
   687  	job := mock.Job()
   688  
   689  	// Create 10 existing allocations
   690  	var allocs []*structs.Allocation
   691  	for i := 0; i < 10; i++ {
   692  		alloc := mock.Alloc()
   693  		alloc.Job = job
   694  		alloc.JobID = job.ID
   695  		alloc.NodeID = uuid.Generate()
   696  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   697  		allocs = append(allocs, alloc)
   698  	}
   699  
   700  	// Build a map of tainted nodes
   701  	tainted := make(map[string]*structs.Node, 2)
   702  	for i := 0; i < 2; i++ {
   703  		n := mock.Node()
   704  		n.ID = allocs[i].NodeID
   705  		n.Status = structs.NodeStatusDown
   706  		tainted[n.ID] = n
   707  	}
   708  
   709  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   710  	r := reconciler.Compute()
   711  
   712  	// Assert the correct results
   713  	assertResults(t, r, &resultExpectation{
   714  		createDeployment:  nil,
   715  		deploymentUpdates: nil,
   716  		place:             2,
   717  		inplace:           0,
   718  		stop:              2,
   719  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   720  			job.TaskGroups[0].Name: {
   721  				Place:  2,
   722  				Stop:   2,
   723  				Ignore: 8,
   724  			},
   725  		},
   726  	})
   727  
   728  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
   729  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
   730  }
   731  
   732  // Tests the reconciler properly handles lost nodes with allocations while
   733  // scaling up
   734  func TestReconciler_LostNode_ScaleUp(t *testing.T) {
   735  	// Set desired 15
   736  	job := mock.Job()
   737  	job.TaskGroups[0].Count = 15
   738  
   739  	// Create 10 existing allocations
   740  	var allocs []*structs.Allocation
   741  	for i := 0; i < 10; i++ {
   742  		alloc := mock.Alloc()
   743  		alloc.Job = job
   744  		alloc.JobID = job.ID
   745  		alloc.NodeID = uuid.Generate()
   746  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   747  		allocs = append(allocs, alloc)
   748  	}
   749  
   750  	// Build a map of tainted nodes
   751  	tainted := make(map[string]*structs.Node, 2)
   752  	for i := 0; i < 2; i++ {
   753  		n := mock.Node()
   754  		n.ID = allocs[i].NodeID
   755  		n.Status = structs.NodeStatusDown
   756  		tainted[n.ID] = n
   757  	}
   758  
   759  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   760  	r := reconciler.Compute()
   761  
   762  	// Assert the correct results
   763  	assertResults(t, r, &resultExpectation{
   764  		createDeployment:  nil,
   765  		deploymentUpdates: nil,
   766  		place:             7,
   767  		inplace:           0,
   768  		stop:              2,
   769  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   770  			job.TaskGroups[0].Name: {
   771  				Place:  7,
   772  				Stop:   2,
   773  				Ignore: 8,
   774  			},
   775  		},
   776  	})
   777  
   778  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
   779  	assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place))
   780  }
   781  
   782  // Tests the reconciler properly handles lost nodes with allocations while
   783  // scaling down
   784  func TestReconciler_LostNode_ScaleDown(t *testing.T) {
   785  	// Set desired 5
   786  	job := mock.Job()
   787  	job.TaskGroups[0].Count = 5
   788  
   789  	// Create 10 existing allocations
   790  	var allocs []*structs.Allocation
   791  	for i := 0; i < 10; i++ {
   792  		alloc := mock.Alloc()
   793  		alloc.Job = job
   794  		alloc.JobID = job.ID
   795  		alloc.NodeID = uuid.Generate()
   796  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   797  		allocs = append(allocs, alloc)
   798  	}
   799  
   800  	// Build a map of tainted nodes
   801  	tainted := make(map[string]*structs.Node, 2)
   802  	for i := 0; i < 2; i++ {
   803  		n := mock.Node()
   804  		n.ID = allocs[i].NodeID
   805  		n.Status = structs.NodeStatusDown
   806  		tainted[n.ID] = n
   807  	}
   808  
   809  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   810  	r := reconciler.Compute()
   811  
   812  	// Assert the correct results
   813  	assertResults(t, r, &resultExpectation{
   814  		createDeployment:  nil,
   815  		deploymentUpdates: nil,
   816  		place:             0,
   817  		inplace:           0,
   818  		stop:              5,
   819  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   820  			job.TaskGroups[0].Name: {
   821  				Stop:   5,
   822  				Ignore: 5,
   823  			},
   824  		},
   825  	})
   826  
   827  	assertNamesHaveIndexes(t, intRange(0, 1, 7, 9), stopResultsToNames(r.stop))
   828  }
   829  
   830  // Tests the reconciler properly handles draining nodes with allocations
   831  func TestReconciler_DrainNode(t *testing.T) {
   832  	job := mock.Job()
   833  
   834  	// Create 10 existing allocations
   835  	var allocs []*structs.Allocation
   836  	for i := 0; i < 10; i++ {
   837  		alloc := mock.Alloc()
   838  		alloc.Job = job
   839  		alloc.JobID = job.ID
   840  		alloc.NodeID = uuid.Generate()
   841  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   842  		allocs = append(allocs, alloc)
   843  	}
   844  
   845  	// Build a map of tainted nodes
   846  	tainted := make(map[string]*structs.Node, 2)
   847  	for i := 0; i < 2; i++ {
   848  		n := mock.Node()
   849  		n.ID = allocs[i].NodeID
   850  		allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true)
   851  		n.Drain = true
   852  		tainted[n.ID] = n
   853  	}
   854  
   855  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   856  	r := reconciler.Compute()
   857  
   858  	// Assert the correct results
   859  	assertResults(t, r, &resultExpectation{
   860  		createDeployment:  nil,
   861  		deploymentUpdates: nil,
   862  		place:             2,
   863  		inplace:           0,
   864  		stop:              2,
   865  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   866  			job.TaskGroups[0].Name: {
   867  				Migrate: 2,
   868  				Ignore:  8,
   869  			},
   870  		},
   871  	})
   872  
   873  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
   874  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
   875  	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
   876  	// These should not have the reschedule field set
   877  	assertPlacementsAreRescheduled(t, 0, r.place)
   878  }
   879  
   880  // Tests the reconciler properly handles draining nodes with allocations while
   881  // scaling up
   882  func TestReconciler_DrainNode_ScaleUp(t *testing.T) {
   883  	// Set desired 15
   884  	job := mock.Job()
   885  	job.TaskGroups[0].Count = 15
   886  
   887  	// Create 10 existing allocations
   888  	var allocs []*structs.Allocation
   889  	for i := 0; i < 10; i++ {
   890  		alloc := mock.Alloc()
   891  		alloc.Job = job
   892  		alloc.JobID = job.ID
   893  		alloc.NodeID = uuid.Generate()
   894  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   895  		allocs = append(allocs, alloc)
   896  	}
   897  
   898  	// Build a map of tainted nodes
   899  	tainted := make(map[string]*structs.Node, 2)
   900  	for i := 0; i < 2; i++ {
   901  		n := mock.Node()
   902  		n.ID = allocs[i].NodeID
   903  		allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true)
   904  		n.Drain = true
   905  		tainted[n.ID] = n
   906  	}
   907  
   908  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   909  	r := reconciler.Compute()
   910  
   911  	// Assert the correct results
   912  	assertResults(t, r, &resultExpectation{
   913  		createDeployment:  nil,
   914  		deploymentUpdates: nil,
   915  		place:             7,
   916  		inplace:           0,
   917  		stop:              2,
   918  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   919  			job.TaskGroups[0].Name: {
   920  				Place:   5,
   921  				Migrate: 2,
   922  				Ignore:  8,
   923  			},
   924  		},
   925  	})
   926  
   927  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
   928  	assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place))
   929  	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
   930  	// These should not have the reschedule field set
   931  	assertPlacementsAreRescheduled(t, 0, r.place)
   932  }
   933  
   934  // Tests the reconciler properly handles draining nodes with allocations while
   935  // scaling down
   936  func TestReconciler_DrainNode_ScaleDown(t *testing.T) {
   937  	// Set desired 8
   938  	job := mock.Job()
   939  	job.TaskGroups[0].Count = 8
   940  
   941  	// Create 10 existing allocations
   942  	var allocs []*structs.Allocation
   943  	for i := 0; i < 10; i++ {
   944  		alloc := mock.Alloc()
   945  		alloc.Job = job
   946  		alloc.JobID = job.ID
   947  		alloc.NodeID = uuid.Generate()
   948  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   949  		allocs = append(allocs, alloc)
   950  	}
   951  
   952  	// Build a map of tainted nodes
   953  	tainted := make(map[string]*structs.Node, 3)
   954  	for i := 0; i < 3; i++ {
   955  		n := mock.Node()
   956  		n.ID = allocs[i].NodeID
   957  		allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true)
   958  		n.Drain = true
   959  		tainted[n.ID] = n
   960  	}
   961  
   962  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   963  	r := reconciler.Compute()
   964  
   965  	// Assert the correct results
   966  	assertResults(t, r, &resultExpectation{
   967  		createDeployment:  nil,
   968  		deploymentUpdates: nil,
   969  		place:             1,
   970  		inplace:           0,
   971  		stop:              3,
   972  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   973  			job.TaskGroups[0].Name: {
   974  				Migrate: 1,
   975  				Stop:    2,
   976  				Ignore:  7,
   977  			},
   978  		},
   979  	})
   980  
   981  	assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop))
   982  	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
   983  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
   984  	// These should not have the reschedule field set
   985  	assertPlacementsAreRescheduled(t, 0, r.place)
   986  }
   987  
   988  // Tests the reconciler properly handles a task group being removed
   989  func TestReconciler_RemovedTG(t *testing.T) {
   990  	job := mock.Job()
   991  
   992  	// Create 10 allocations for a tg that no longer exists
   993  	var allocs []*structs.Allocation
   994  	for i := 0; i < 10; i++ {
   995  		alloc := mock.Alloc()
   996  		alloc.Job = job
   997  		alloc.JobID = job.ID
   998  		alloc.NodeID = uuid.Generate()
   999  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1000  		allocs = append(allocs, alloc)
  1001  	}
  1002  
  1003  	oldName := job.TaskGroups[0].Name
  1004  	newName := "different"
  1005  	job.TaskGroups[0].Name = newName
  1006  
  1007  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1008  	r := reconciler.Compute()
  1009  
  1010  	// Assert the correct results
  1011  	assertResults(t, r, &resultExpectation{
  1012  		createDeployment:  nil,
  1013  		deploymentUpdates: nil,
  1014  		place:             10,
  1015  		inplace:           0,
  1016  		stop:              10,
  1017  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1018  			oldName: {
  1019  				Stop: 10,
  1020  			},
  1021  			newName: {
  1022  				Place: 10,
  1023  			},
  1024  		},
  1025  	})
  1026  
  1027  	assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop))
  1028  	assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
  1029  }
  1030  
  1031  // Tests the reconciler properly handles a job in stopped states
  1032  func TestReconciler_JobStopped(t *testing.T) {
  1033  	job := mock.Job()
  1034  	job.Stop = true
  1035  
  1036  	cases := []struct {
  1037  		name             string
  1038  		job              *structs.Job
  1039  		jobID, taskGroup string
  1040  	}{
  1041  		{
  1042  			name:      "stopped job",
  1043  			job:       job,
  1044  			jobID:     job.ID,
  1045  			taskGroup: job.TaskGroups[0].Name,
  1046  		},
  1047  		{
  1048  			name:      "nil job",
  1049  			job:       nil,
  1050  			jobID:     "foo",
  1051  			taskGroup: "bar",
  1052  		},
  1053  	}
  1054  
  1055  	for _, c := range cases {
  1056  		t.Run(c.name, func(t *testing.T) {
  1057  			// Create 10 allocations
  1058  			var allocs []*structs.Allocation
  1059  			for i := 0; i < 10; i++ {
  1060  				alloc := mock.Alloc()
  1061  				alloc.Job = c.job
  1062  				alloc.JobID = c.jobID
  1063  				alloc.NodeID = uuid.Generate()
  1064  				alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i))
  1065  				alloc.TaskGroup = c.taskGroup
  1066  				allocs = append(allocs, alloc)
  1067  			}
  1068  
  1069  			reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, c.jobID, c.job, nil, allocs, nil, "")
  1070  			r := reconciler.Compute()
  1071  
  1072  			// Assert the correct results
  1073  			assertResults(t, r, &resultExpectation{
  1074  				createDeployment:  nil,
  1075  				deploymentUpdates: nil,
  1076  				place:             0,
  1077  				inplace:           0,
  1078  				stop:              10,
  1079  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1080  					c.taskGroup: {
  1081  						Stop: 10,
  1082  					},
  1083  				},
  1084  			})
  1085  
  1086  			assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop))
  1087  		})
  1088  	}
  1089  }
  1090  
  1091  // Tests the reconciler doesn't update allocs in terminal state
  1092  // when job is stopped or nil
  1093  func TestReconciler_JobStopped_TerminalAllocs(t *testing.T) {
  1094  	job := mock.Job()
  1095  	job.Stop = true
  1096  
  1097  	cases := []struct {
  1098  		name             string
  1099  		job              *structs.Job
  1100  		jobID, taskGroup string
  1101  	}{
  1102  		{
  1103  			name:      "stopped job",
  1104  			job:       job,
  1105  			jobID:     job.ID,
  1106  			taskGroup: job.TaskGroups[0].Name,
  1107  		},
  1108  		{
  1109  			name:      "nil job",
  1110  			job:       nil,
  1111  			jobID:     "foo",
  1112  			taskGroup: "bar",
  1113  		},
  1114  	}
  1115  
  1116  	for _, c := range cases {
  1117  		t.Run(c.name, func(t *testing.T) {
  1118  			// Create 10 terminal allocations
  1119  			var allocs []*structs.Allocation
  1120  			for i := 0; i < 10; i++ {
  1121  				alloc := mock.Alloc()
  1122  				alloc.Job = c.job
  1123  				alloc.JobID = c.jobID
  1124  				alloc.NodeID = uuid.Generate()
  1125  				alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i))
  1126  				alloc.TaskGroup = c.taskGroup
  1127  				if i%2 == 0 {
  1128  					alloc.DesiredStatus = structs.AllocDesiredStatusStop
  1129  				} else {
  1130  					alloc.ClientStatus = structs.AllocClientStatusFailed
  1131  				}
  1132  				allocs = append(allocs, alloc)
  1133  			}
  1134  
  1135  			reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, c.jobID, c.job, nil, allocs, nil, "")
  1136  			r := reconciler.Compute()
  1137  			require.Len(t, r.stop, 0)
  1138  			// Assert the correct results
  1139  			assertResults(t, r, &resultExpectation{
  1140  				createDeployment:  nil,
  1141  				deploymentUpdates: nil,
  1142  				place:             0,
  1143  				inplace:           0,
  1144  				stop:              0,
  1145  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1146  					c.taskGroup: {},
  1147  				},
  1148  			})
  1149  		})
  1150  	}
  1151  }
  1152  
  1153  // Tests the reconciler properly handles jobs with multiple task groups
  1154  func TestReconciler_MultiTG(t *testing.T) {
  1155  	job := mock.Job()
  1156  	tg2 := job.TaskGroups[0].Copy()
  1157  	tg2.Name = "foo"
  1158  	job.TaskGroups = append(job.TaskGroups, tg2)
  1159  
  1160  	// Create 2 existing allocations for the first tg
  1161  	var allocs []*structs.Allocation
  1162  	for i := 0; i < 2; i++ {
  1163  		alloc := mock.Alloc()
  1164  		alloc.Job = job
  1165  		alloc.JobID = job.ID
  1166  		alloc.NodeID = uuid.Generate()
  1167  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1168  		allocs = append(allocs, alloc)
  1169  	}
  1170  
  1171  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1172  	r := reconciler.Compute()
  1173  
  1174  	// Assert the correct results
  1175  	assertResults(t, r, &resultExpectation{
  1176  		createDeployment:  nil,
  1177  		deploymentUpdates: nil,
  1178  		place:             18,
  1179  		inplace:           0,
  1180  		stop:              0,
  1181  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1182  			job.TaskGroups[0].Name: {
  1183  				Place:  8,
  1184  				Ignore: 2,
  1185  			},
  1186  			tg2.Name: {
  1187  				Place: 10,
  1188  			},
  1189  		},
  1190  	})
  1191  
  1192  	assertNamesHaveIndexes(t, intRange(2, 9, 0, 9), placeResultsToNames(r.place))
  1193  }
  1194  
  1195  // Tests the reconciler properly handles jobs with multiple task groups with
  1196  // only one having an update stanza and a deployment already being created
  1197  func TestReconciler_MultiTG_SingleUpdateStanza(t *testing.T) {
  1198  	job := mock.Job()
  1199  	tg2 := job.TaskGroups[0].Copy()
  1200  	tg2.Name = "foo"
  1201  	job.TaskGroups = append(job.TaskGroups, tg2)
  1202  	job.TaskGroups[0].Update = noCanaryUpdate
  1203  
  1204  	// Create all the allocs
  1205  	var allocs []*structs.Allocation
  1206  	for i := 0; i < 2; i++ {
  1207  		for j := 0; j < 10; j++ {
  1208  			alloc := mock.Alloc()
  1209  			alloc.Job = job
  1210  			alloc.JobID = job.ID
  1211  			alloc.NodeID = uuid.Generate()
  1212  			alloc.Name = structs.AllocName(job.ID, job.TaskGroups[i].Name, uint(j))
  1213  			alloc.TaskGroup = job.TaskGroups[i].Name
  1214  			allocs = append(allocs, alloc)
  1215  		}
  1216  	}
  1217  
  1218  	d := structs.NewDeployment(job)
  1219  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  1220  		DesiredTotal: 10,
  1221  	}
  1222  
  1223  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  1224  	r := reconciler.Compute()
  1225  
  1226  	// Assert the correct results
  1227  	assertResults(t, r, &resultExpectation{
  1228  		createDeployment:  nil,
  1229  		deploymentUpdates: nil,
  1230  		place:             0,
  1231  		inplace:           0,
  1232  		stop:              0,
  1233  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1234  			job.TaskGroups[0].Name: {
  1235  				Ignore: 10,
  1236  			},
  1237  			tg2.Name: {
  1238  				Ignore: 10,
  1239  			},
  1240  		},
  1241  	})
  1242  }
  1243  
  1244  // Tests delayed rescheduling of failed batch allocations
  1245  func TestReconciler_RescheduleLater_Batch(t *testing.T) {
  1246  	require := require.New(t)
  1247  
  1248  	// Set desired 4
  1249  	job := mock.Job()
  1250  	job.TaskGroups[0].Count = 4
  1251  	now := time.Now()
  1252  
  1253  	// Set up reschedule policy
  1254  	delayDur := 15 * time.Second
  1255  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "constant"}
  1256  	tgName := job.TaskGroups[0].Name
  1257  
  1258  	// Create 6 existing allocations - 2 running, 1 complete and 3 failed
  1259  	var allocs []*structs.Allocation
  1260  	for i := 0; i < 6; i++ {
  1261  		alloc := mock.Alloc()
  1262  		alloc.Job = job
  1263  		alloc.JobID = job.ID
  1264  		alloc.NodeID = uuid.Generate()
  1265  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1266  		allocs = append(allocs, alloc)
  1267  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1268  	}
  1269  
  1270  	// Mark 3 as failed with restart tracking info
  1271  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1272  	allocs[0].NextAllocation = allocs[1].ID
  1273  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1274  	allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1275  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1276  			PrevAllocID: allocs[0].ID,
  1277  			PrevNodeID:  uuid.Generate(),
  1278  		},
  1279  	}}
  1280  	allocs[1].NextAllocation = allocs[2].ID
  1281  	allocs[2].ClientStatus = structs.AllocClientStatusFailed
  1282  	allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1283  		StartedAt:  now.Add(-1 * time.Hour),
  1284  		FinishedAt: now}}
  1285  	allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1286  		{RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(),
  1287  			PrevAllocID: allocs[0].ID,
  1288  			PrevNodeID:  uuid.Generate(),
  1289  		},
  1290  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1291  			PrevAllocID: allocs[1].ID,
  1292  			PrevNodeID:  uuid.Generate(),
  1293  		},
  1294  	}}
  1295  
  1296  	// Mark one as complete
  1297  	allocs[5].ClientStatus = structs.AllocClientStatusComplete
  1298  
  1299  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, uuid.Generate())
  1300  	r := reconciler.Compute()
  1301  
  1302  	// Two reschedule attempts were already made, one more can be made at a future time
  1303  	// Verify that the follow up eval has the expected waitUntil time
  1304  	evals := r.desiredFollowupEvals[tgName]
  1305  	require.NotNil(evals)
  1306  	require.Equal(1, len(evals))
  1307  	require.Equal(now.Add(delayDur), evals[0].WaitUntil)
  1308  
  1309  	// Alloc 5 should not be replaced because it is terminal
  1310  	assertResults(t, r, &resultExpectation{
  1311  		createDeployment:  nil,
  1312  		deploymentUpdates: nil,
  1313  		place:             0,
  1314  		inplace:           0,
  1315  		attributeUpdates:  1,
  1316  		stop:              0,
  1317  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1318  			job.TaskGroups[0].Name: {
  1319  				Place:         0,
  1320  				InPlaceUpdate: 0,
  1321  				Ignore:        4,
  1322  			},
  1323  		},
  1324  	})
  1325  	assertNamesHaveIndexes(t, intRange(2, 2), attributeUpdatesToNames(r.attributeUpdates))
  1326  
  1327  	// Verify that the followup evalID field is set correctly
  1328  	var annotated *structs.Allocation
  1329  	for _, a := range r.attributeUpdates {
  1330  		annotated = a
  1331  	}
  1332  	require.Equal(evals[0].ID, annotated.FollowupEvalID)
  1333  }
  1334  
  1335  // Tests delayed rescheduling of failed batch allocations and batching of allocs
  1336  // with fail times that are close together
  1337  func TestReconciler_RescheduleLaterWithBatchedEvals_Batch(t *testing.T) {
  1338  	require := require.New(t)
  1339  
  1340  	// Set desired 4
  1341  	job := mock.Job()
  1342  	job.TaskGroups[0].Count = 10
  1343  	now := time.Now()
  1344  
  1345  	// Set up reschedule policy
  1346  	delayDur := 15 * time.Second
  1347  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "constant"}
  1348  	tgName := job.TaskGroups[0].Name
  1349  
  1350  	// Create 10 existing allocations
  1351  	var allocs []*structs.Allocation
  1352  	for i := 0; i < 10; i++ {
  1353  		alloc := mock.Alloc()
  1354  		alloc.Job = job
  1355  		alloc.JobID = job.ID
  1356  		alloc.NodeID = uuid.Generate()
  1357  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1358  		allocs = append(allocs, alloc)
  1359  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1360  	}
  1361  
  1362  	// Mark 5 as failed with fail times very close together
  1363  	for i := 0; i < 5; i++ {
  1364  		allocs[i].ClientStatus = structs.AllocClientStatusFailed
  1365  		allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1366  			StartedAt:  now.Add(-1 * time.Hour),
  1367  			FinishedAt: now.Add(time.Duration(50*i) * time.Millisecond)}}
  1368  	}
  1369  
  1370  	// Mark two more as failed several seconds later
  1371  	for i := 5; i < 7; i++ {
  1372  		allocs[i].ClientStatus = structs.AllocClientStatusFailed
  1373  		allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1374  			StartedAt:  now.Add(-1 * time.Hour),
  1375  			FinishedAt: now.Add(10 * time.Second)}}
  1376  	}
  1377  
  1378  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, uuid.Generate())
  1379  	r := reconciler.Compute()
  1380  
  1381  	// Verify that two follow up evals were created
  1382  	evals := r.desiredFollowupEvals[tgName]
  1383  	require.NotNil(evals)
  1384  	require.Equal(2, len(evals))
  1385  
  1386  	// Verify expected WaitUntil values for both batched evals
  1387  	require.Equal(now.Add(delayDur), evals[0].WaitUntil)
  1388  	secondBatchDuration := delayDur + 10*time.Second
  1389  	require.Equal(now.Add(secondBatchDuration), evals[1].WaitUntil)
  1390  
  1391  	// Alloc 5 should not be replaced because it is terminal
  1392  	assertResults(t, r, &resultExpectation{
  1393  		createDeployment:  nil,
  1394  		deploymentUpdates: nil,
  1395  		place:             0,
  1396  		inplace:           0,
  1397  		attributeUpdates:  7,
  1398  		stop:              0,
  1399  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1400  			job.TaskGroups[0].Name: {
  1401  				Place:         0,
  1402  				InPlaceUpdate: 0,
  1403  				Ignore:        10,
  1404  			},
  1405  		},
  1406  	})
  1407  	assertNamesHaveIndexes(t, intRange(0, 6), attributeUpdatesToNames(r.attributeUpdates))
  1408  
  1409  	// Verify that the followup evalID field is set correctly
  1410  	for _, alloc := range r.attributeUpdates {
  1411  		if allocNameToIndex(alloc.Name) < 5 {
  1412  			require.Equal(evals[0].ID, alloc.FollowupEvalID)
  1413  		} else if allocNameToIndex(alloc.Name) < 7 {
  1414  			require.Equal(evals[1].ID, alloc.FollowupEvalID)
  1415  		} else {
  1416  			t.Fatalf("Unexpected alloc name in Inplace results %v", alloc.Name)
  1417  		}
  1418  	}
  1419  }
  1420  
  1421  // Tests rescheduling failed batch allocations
  1422  func TestReconciler_RescheduleNow_Batch(t *testing.T) {
  1423  	require := require.New(t)
  1424  	// Set desired 4
  1425  	job := mock.Job()
  1426  	job.TaskGroups[0].Count = 4
  1427  	now := time.Now()
  1428  	// Set up reschedule policy
  1429  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: 5 * time.Second, DelayFunction: "constant"}
  1430  	tgName := job.TaskGroups[0].Name
  1431  	// Create 6 existing allocations - 2 running, 1 complete and 3 failed
  1432  	var allocs []*structs.Allocation
  1433  	for i := 0; i < 6; i++ {
  1434  		alloc := mock.Alloc()
  1435  		alloc.Job = job
  1436  		alloc.JobID = job.ID
  1437  		alloc.NodeID = uuid.Generate()
  1438  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1439  		allocs = append(allocs, alloc)
  1440  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1441  	}
  1442  	// Mark 3 as failed with restart tracking info
  1443  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1444  	allocs[0].NextAllocation = allocs[1].ID
  1445  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1446  	allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1447  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1448  			PrevAllocID: allocs[0].ID,
  1449  			PrevNodeID:  uuid.Generate(),
  1450  		},
  1451  	}}
  1452  	allocs[1].NextAllocation = allocs[2].ID
  1453  	allocs[2].ClientStatus = structs.AllocClientStatusFailed
  1454  	allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1455  		StartedAt:  now.Add(-1 * time.Hour),
  1456  		FinishedAt: now.Add(-5 * time.Second)}}
  1457  	allocs[2].FollowupEvalID = uuid.Generate()
  1458  	allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1459  		{RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(),
  1460  			PrevAllocID: allocs[0].ID,
  1461  			PrevNodeID:  uuid.Generate(),
  1462  		},
  1463  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1464  			PrevAllocID: allocs[1].ID,
  1465  			PrevNodeID:  uuid.Generate(),
  1466  		},
  1467  	}}
  1468  	// Mark one as complete
  1469  	allocs[5].ClientStatus = structs.AllocClientStatusComplete
  1470  
  1471  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, "")
  1472  	reconciler.now = now
  1473  	r := reconciler.Compute()
  1474  
  1475  	// Verify that no follow up evals were created
  1476  	evals := r.desiredFollowupEvals[tgName]
  1477  	require.Nil(evals)
  1478  
  1479  	// Two reschedule attempts were made, one more can be made now
  1480  	// Alloc 5 should not be replaced because it is terminal
  1481  	assertResults(t, r, &resultExpectation{
  1482  		createDeployment:  nil,
  1483  		deploymentUpdates: nil,
  1484  		place:             1,
  1485  		inplace:           0,
  1486  		stop:              0,
  1487  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1488  			job.TaskGroups[0].Name: {
  1489  				Place:  1,
  1490  				Ignore: 3,
  1491  			},
  1492  		},
  1493  	})
  1494  
  1495  	assertNamesHaveIndexes(t, intRange(2, 2), placeResultsToNames(r.place))
  1496  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1497  	assertPlacementsAreRescheduled(t, 1, r.place)
  1498  
  1499  }
  1500  
  1501  // Tests rescheduling failed service allocations with desired state stop
  1502  func TestReconciler_RescheduleLater_Service(t *testing.T) {
  1503  	require := require.New(t)
  1504  
  1505  	// Set desired 5
  1506  	job := mock.Job()
  1507  	job.TaskGroups[0].Count = 5
  1508  	tgName := job.TaskGroups[0].Name
  1509  	now := time.Now()
  1510  
  1511  	// Set up reschedule policy
  1512  	delayDur := 15 * time.Second
  1513  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour, Delay: delayDur, MaxDelay: 1 * time.Hour}
  1514  
  1515  	// Create 5 existing allocations
  1516  	var allocs []*structs.Allocation
  1517  	for i := 0; i < 5; i++ {
  1518  		alloc := mock.Alloc()
  1519  		alloc.Job = job
  1520  		alloc.JobID = job.ID
  1521  		alloc.NodeID = uuid.Generate()
  1522  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1523  		allocs = append(allocs, alloc)
  1524  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1525  	}
  1526  
  1527  	// Mark two as failed
  1528  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1529  
  1530  	// Mark one of them as already rescheduled once
  1531  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1532  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1533  			PrevAllocID: uuid.Generate(),
  1534  			PrevNodeID:  uuid.Generate(),
  1535  		},
  1536  	}}
  1537  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1538  		StartedAt:  now.Add(-1 * time.Hour),
  1539  		FinishedAt: now}}
  1540  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1541  
  1542  	// Mark one as desired state stop
  1543  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  1544  
  1545  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, uuid.Generate())
  1546  	r := reconciler.Compute()
  1547  
  1548  	// Should place a new placement and create a follow up eval for the delayed reschedule
  1549  	// Verify that the follow up eval has the expected waitUntil time
  1550  	evals := r.desiredFollowupEvals[tgName]
  1551  	require.NotNil(evals)
  1552  	require.Equal(1, len(evals))
  1553  	require.Equal(now.Add(delayDur), evals[0].WaitUntil)
  1554  
  1555  	assertResults(t, r, &resultExpectation{
  1556  		createDeployment:  nil,
  1557  		deploymentUpdates: nil,
  1558  		place:             1,
  1559  		inplace:           0,
  1560  		attributeUpdates:  1,
  1561  		stop:              0,
  1562  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1563  			job.TaskGroups[0].Name: {
  1564  				Place:         1,
  1565  				InPlaceUpdate: 0,
  1566  				Ignore:        4,
  1567  			},
  1568  		},
  1569  	})
  1570  
  1571  	assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
  1572  	assertNamesHaveIndexes(t, intRange(1, 1), attributeUpdatesToNames(r.attributeUpdates))
  1573  
  1574  	// Verify that the followup evalID field is set correctly
  1575  	var annotated *structs.Allocation
  1576  	for _, a := range r.attributeUpdates {
  1577  		annotated = a
  1578  	}
  1579  	require.Equal(evals[0].ID, annotated.FollowupEvalID)
  1580  }
  1581  
  1582  // Tests service allocations with client status complete
  1583  func TestReconciler_Service_ClientStatusComplete(t *testing.T) {
  1584  	// Set desired 5
  1585  	job := mock.Job()
  1586  	job.TaskGroups[0].Count = 5
  1587  
  1588  	// Set up reschedule policy
  1589  	delayDur := 15 * time.Second
  1590  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1591  		Attempts: 1,
  1592  		Interval: 24 * time.Hour,
  1593  		Delay:    delayDur,
  1594  		MaxDelay: 1 * time.Hour,
  1595  	}
  1596  
  1597  	// Create 5 existing allocations
  1598  	var allocs []*structs.Allocation
  1599  	for i := 0; i < 5; i++ {
  1600  		alloc := mock.Alloc()
  1601  		alloc.Job = job
  1602  		alloc.JobID = job.ID
  1603  		alloc.NodeID = uuid.Generate()
  1604  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1605  		allocs = append(allocs, alloc)
  1606  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1607  		alloc.DesiredStatus = structs.AllocDesiredStatusRun
  1608  	}
  1609  
  1610  	// Mark one as client status complete
  1611  	allocs[4].ClientStatus = structs.AllocClientStatusComplete
  1612  
  1613  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1614  	r := reconciler.Compute()
  1615  
  1616  	// Should place a new placement for the alloc that was marked complete
  1617  	assertResults(t, r, &resultExpectation{
  1618  		createDeployment:  nil,
  1619  		deploymentUpdates: nil,
  1620  		place:             1,
  1621  		inplace:           0,
  1622  		stop:              0,
  1623  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1624  			job.TaskGroups[0].Name: {
  1625  				Place:         1,
  1626  				InPlaceUpdate: 0,
  1627  				Ignore:        4,
  1628  			},
  1629  		},
  1630  	})
  1631  
  1632  	assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
  1633  
  1634  }
  1635  
  1636  // Tests service job placement with desired stop and client status complete
  1637  func TestReconciler_Service_DesiredStop_ClientStatusComplete(t *testing.T) {
  1638  	// Set desired 5
  1639  	job := mock.Job()
  1640  	job.TaskGroups[0].Count = 5
  1641  
  1642  	// Set up reschedule policy
  1643  	delayDur := 15 * time.Second
  1644  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1645  		Attempts: 1,
  1646  		Interval: 24 * time.Hour,
  1647  		Delay:    delayDur,
  1648  		MaxDelay: 1 * time.Hour,
  1649  	}
  1650  
  1651  	// Create 5 existing allocations
  1652  	var allocs []*structs.Allocation
  1653  	for i := 0; i < 5; i++ {
  1654  		alloc := mock.Alloc()
  1655  		alloc.Job = job
  1656  		alloc.JobID = job.ID
  1657  		alloc.NodeID = uuid.Generate()
  1658  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1659  		allocs = append(allocs, alloc)
  1660  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1661  		alloc.DesiredStatus = structs.AllocDesiredStatusRun
  1662  	}
  1663  
  1664  	// Mark one as failed but with desired status stop
  1665  	// Should not trigger rescheduling logic but should trigger a placement
  1666  	allocs[4].ClientStatus = structs.AllocClientStatusFailed
  1667  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  1668  
  1669  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1670  	r := reconciler.Compute()
  1671  
  1672  	// Should place a new placement for the alloc that was marked stopped
  1673  	assertResults(t, r, &resultExpectation{
  1674  		createDeployment:  nil,
  1675  		deploymentUpdates: nil,
  1676  		place:             1,
  1677  		inplace:           0,
  1678  		stop:              0,
  1679  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1680  			job.TaskGroups[0].Name: {
  1681  				Place:         1,
  1682  				InPlaceUpdate: 0,
  1683  				Ignore:        4,
  1684  			},
  1685  		},
  1686  	})
  1687  
  1688  	assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
  1689  
  1690  	// Should not have any follow up evals created
  1691  	require := require.New(t)
  1692  	require.Equal(0, len(r.desiredFollowupEvals))
  1693  }
  1694  
  1695  // Tests rescheduling failed service allocations with desired state stop
  1696  func TestReconciler_RescheduleNow_Service(t *testing.T) {
  1697  	require := require.New(t)
  1698  
  1699  	// Set desired 5
  1700  	job := mock.Job()
  1701  	job.TaskGroups[0].Count = 5
  1702  	tgName := job.TaskGroups[0].Name
  1703  	now := time.Now()
  1704  
  1705  	// Set up reschedule policy and update stanza
  1706  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1707  		Attempts:      1,
  1708  		Interval:      24 * time.Hour,
  1709  		Delay:         5 * time.Second,
  1710  		DelayFunction: "",
  1711  		MaxDelay:      1 * time.Hour,
  1712  		Unlimited:     false,
  1713  	}
  1714  	job.TaskGroups[0].Update = noCanaryUpdate
  1715  
  1716  	// Create 5 existing allocations
  1717  	var allocs []*structs.Allocation
  1718  	for i := 0; i < 5; i++ {
  1719  		alloc := mock.Alloc()
  1720  		alloc.Job = job
  1721  		alloc.JobID = job.ID
  1722  		alloc.NodeID = uuid.Generate()
  1723  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1724  		allocs = append(allocs, alloc)
  1725  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1726  	}
  1727  
  1728  	// Mark two as failed
  1729  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1730  
  1731  	// Mark one of them as already rescheduled once
  1732  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1733  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1734  			PrevAllocID: uuid.Generate(),
  1735  			PrevNodeID:  uuid.Generate(),
  1736  		},
  1737  	}}
  1738  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1739  		StartedAt:  now.Add(-1 * time.Hour),
  1740  		FinishedAt: now.Add(-10 * time.Second)}}
  1741  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1742  
  1743  	// Mark one as desired state stop
  1744  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  1745  
  1746  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1747  	r := reconciler.Compute()
  1748  
  1749  	// Verify that no follow up evals were created
  1750  	evals := r.desiredFollowupEvals[tgName]
  1751  	require.Nil(evals)
  1752  
  1753  	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
  1754  	assertResults(t, r, &resultExpectation{
  1755  		createDeployment:  nil,
  1756  		deploymentUpdates: nil,
  1757  		place:             2,
  1758  		inplace:           0,
  1759  		stop:              0,
  1760  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1761  			job.TaskGroups[0].Name: {
  1762  				Place:  2,
  1763  				Ignore: 3,
  1764  			},
  1765  		},
  1766  	})
  1767  
  1768  	// Rescheduled allocs should have previous allocs
  1769  	assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place))
  1770  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1771  	assertPlacementsAreRescheduled(t, 1, r.place)
  1772  }
  1773  
  1774  // Tests rescheduling failed service allocations when there's clock drift (upto a second)
  1775  func TestReconciler_RescheduleNow_WithinAllowedTimeWindow(t *testing.T) {
  1776  	require := require.New(t)
  1777  
  1778  	// Set desired 5
  1779  	job := mock.Job()
  1780  	job.TaskGroups[0].Count = 5
  1781  	tgName := job.TaskGroups[0].Name
  1782  	now := time.Now()
  1783  
  1784  	// Set up reschedule policy and update stanza
  1785  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1786  		Attempts:      1,
  1787  		Interval:      24 * time.Hour,
  1788  		Delay:         5 * time.Second,
  1789  		DelayFunction: "",
  1790  		MaxDelay:      1 * time.Hour,
  1791  		Unlimited:     false,
  1792  	}
  1793  	job.TaskGroups[0].Update = noCanaryUpdate
  1794  
  1795  	// Create 5 existing allocations
  1796  	var allocs []*structs.Allocation
  1797  	for i := 0; i < 5; i++ {
  1798  		alloc := mock.Alloc()
  1799  		alloc.Job = job
  1800  		alloc.JobID = job.ID
  1801  		alloc.NodeID = uuid.Generate()
  1802  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1803  		allocs = append(allocs, alloc)
  1804  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1805  	}
  1806  
  1807  	// Mark one as failed
  1808  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1809  
  1810  	// Mark one of them as already rescheduled once
  1811  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1812  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1813  			PrevAllocID: uuid.Generate(),
  1814  			PrevNodeID:  uuid.Generate(),
  1815  		},
  1816  	}}
  1817  	// Set fail time to 4 seconds ago which falls within the reschedule window
  1818  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1819  		StartedAt:  now.Add(-1 * time.Hour),
  1820  		FinishedAt: now.Add(-4 * time.Second)}}
  1821  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1822  
  1823  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1824  	reconciler.now = now
  1825  	r := reconciler.Compute()
  1826  
  1827  	// Verify that no follow up evals were created
  1828  	evals := r.desiredFollowupEvals[tgName]
  1829  	require.Nil(evals)
  1830  
  1831  	// Verify that one rescheduled alloc was placed
  1832  	assertResults(t, r, &resultExpectation{
  1833  		createDeployment:  nil,
  1834  		deploymentUpdates: nil,
  1835  		place:             1,
  1836  		inplace:           0,
  1837  		stop:              0,
  1838  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1839  			job.TaskGroups[0].Name: {
  1840  				Place:  1,
  1841  				Ignore: 4,
  1842  			},
  1843  		},
  1844  	})
  1845  
  1846  	// Rescheduled allocs should have previous allocs
  1847  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  1848  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1849  	assertPlacementsAreRescheduled(t, 1, r.place)
  1850  }
  1851  
  1852  // Tests rescheduling failed service allocations when the eval ID matches and there's a large clock drift
  1853  func TestReconciler_RescheduleNow_EvalIDMatch(t *testing.T) {
  1854  	require := require.New(t)
  1855  
  1856  	// Set desired 5
  1857  	job := mock.Job()
  1858  	job.TaskGroups[0].Count = 5
  1859  	tgName := job.TaskGroups[0].Name
  1860  	now := time.Now()
  1861  
  1862  	// Set up reschedule policy and update stanza
  1863  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1864  		Attempts:      1,
  1865  		Interval:      24 * time.Hour,
  1866  		Delay:         5 * time.Second,
  1867  		DelayFunction: "",
  1868  		MaxDelay:      1 * time.Hour,
  1869  		Unlimited:     false,
  1870  	}
  1871  	job.TaskGroups[0].Update = noCanaryUpdate
  1872  
  1873  	// Create 5 existing allocations
  1874  	var allocs []*structs.Allocation
  1875  	for i := 0; i < 5; i++ {
  1876  		alloc := mock.Alloc()
  1877  		alloc.Job = job
  1878  		alloc.JobID = job.ID
  1879  		alloc.NodeID = uuid.Generate()
  1880  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1881  		allocs = append(allocs, alloc)
  1882  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1883  	}
  1884  
  1885  	// Mark one as failed
  1886  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1887  
  1888  	// Mark one of them as already rescheduled once
  1889  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1890  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1891  			PrevAllocID: uuid.Generate(),
  1892  			PrevNodeID:  uuid.Generate(),
  1893  		},
  1894  	}}
  1895  	// Set fail time to 5 seconds ago and eval ID
  1896  	evalID := uuid.Generate()
  1897  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1898  		StartedAt:  now.Add(-1 * time.Hour),
  1899  		FinishedAt: now.Add(-5 * time.Second)}}
  1900  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1901  	allocs[1].FollowupEvalID = evalID
  1902  
  1903  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, evalID)
  1904  	reconciler.now = now.Add(-30 * time.Second)
  1905  	r := reconciler.Compute()
  1906  
  1907  	// Verify that no follow up evals were created
  1908  	evals := r.desiredFollowupEvals[tgName]
  1909  	require.Nil(evals)
  1910  
  1911  	// Verify that one rescheduled alloc was placed
  1912  	assertResults(t, r, &resultExpectation{
  1913  		createDeployment:  nil,
  1914  		deploymentUpdates: nil,
  1915  		place:             1,
  1916  		inplace:           0,
  1917  		stop:              0,
  1918  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1919  			job.TaskGroups[0].Name: {
  1920  				Place:  1,
  1921  				Ignore: 4,
  1922  			},
  1923  		},
  1924  	})
  1925  
  1926  	// Rescheduled allocs should have previous allocs
  1927  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  1928  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1929  	assertPlacementsAreRescheduled(t, 1, r.place)
  1930  }
  1931  
  1932  // Tests rescheduling failed service allocations when there are canaries
  1933  func TestReconciler_RescheduleNow_Service_WithCanaries(t *testing.T) {
  1934  	require := require.New(t)
  1935  
  1936  	// Set desired 5
  1937  	job := mock.Job()
  1938  	job.TaskGroups[0].Count = 5
  1939  	tgName := job.TaskGroups[0].Name
  1940  	now := time.Now()
  1941  
  1942  	// Set up reschedule policy and update stanza
  1943  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1944  		Attempts:      1,
  1945  		Interval:      24 * time.Hour,
  1946  		Delay:         5 * time.Second,
  1947  		DelayFunction: "",
  1948  		MaxDelay:      1 * time.Hour,
  1949  		Unlimited:     false,
  1950  	}
  1951  	job.TaskGroups[0].Update = canaryUpdate
  1952  
  1953  	job2 := job.Copy()
  1954  	job2.Version++
  1955  
  1956  	d := structs.NewDeployment(job2)
  1957  	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  1958  	s := &structs.DeploymentState{
  1959  		DesiredCanaries: 2,
  1960  		DesiredTotal:    5,
  1961  	}
  1962  	d.TaskGroups[job.TaskGroups[0].Name] = s
  1963  
  1964  	// Create 5 existing allocations
  1965  	var allocs []*structs.Allocation
  1966  	for i := 0; i < 5; i++ {
  1967  		alloc := mock.Alloc()
  1968  		alloc.Job = job
  1969  		alloc.JobID = job.ID
  1970  		alloc.NodeID = uuid.Generate()
  1971  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1972  		allocs = append(allocs, alloc)
  1973  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1974  	}
  1975  
  1976  	// Mark three as failed
  1977  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1978  
  1979  	// Mark one of them as already rescheduled once
  1980  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1981  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1982  			PrevAllocID: uuid.Generate(),
  1983  			PrevNodeID:  uuid.Generate(),
  1984  		},
  1985  	}}
  1986  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1987  		StartedAt:  now.Add(-1 * time.Hour),
  1988  		FinishedAt: now.Add(-10 * time.Second)}}
  1989  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1990  
  1991  	// Mark one as desired state stop
  1992  	allocs[4].ClientStatus = structs.AllocClientStatusFailed
  1993  
  1994  	// Create 2 canary allocations
  1995  	for i := 0; i < 2; i++ {
  1996  		alloc := mock.Alloc()
  1997  		alloc.Job = job
  1998  		alloc.JobID = job.ID
  1999  		alloc.NodeID = uuid.Generate()
  2000  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2001  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2002  		alloc.DeploymentID = d.ID
  2003  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  2004  			Canary:  true,
  2005  			Healthy: helper.BoolToPtr(false),
  2006  		}
  2007  		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
  2008  		allocs = append(allocs, alloc)
  2009  	}
  2010  
  2011  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "")
  2012  	r := reconciler.Compute()
  2013  
  2014  	// Verify that no follow up evals were created
  2015  	evals := r.desiredFollowupEvals[tgName]
  2016  	require.Nil(evals)
  2017  
  2018  	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
  2019  	assertResults(t, r, &resultExpectation{
  2020  		createDeployment:  nil,
  2021  		deploymentUpdates: nil,
  2022  		place:             2,
  2023  		inplace:           0,
  2024  		stop:              0,
  2025  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2026  			job.TaskGroups[0].Name: {
  2027  				Place:  2,
  2028  				Ignore: 5,
  2029  			},
  2030  		},
  2031  	})
  2032  
  2033  	// Rescheduled allocs should have previous allocs
  2034  	assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place))
  2035  	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
  2036  	assertPlacementsAreRescheduled(t, 2, r.place)
  2037  }
  2038  
  2039  // Tests rescheduling failed canary service allocations
  2040  func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) {
  2041  	require := require.New(t)
  2042  
  2043  	// Set desired 5
  2044  	job := mock.Job()
  2045  	job.TaskGroups[0].Count = 5
  2046  	tgName := job.TaskGroups[0].Name
  2047  	now := time.Now()
  2048  
  2049  	// Set up reschedule policy and update stanza
  2050  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  2051  		Delay:         5 * time.Second,
  2052  		DelayFunction: "constant",
  2053  		MaxDelay:      1 * time.Hour,
  2054  		Unlimited:     true,
  2055  	}
  2056  	job.TaskGroups[0].Update = canaryUpdate
  2057  
  2058  	job2 := job.Copy()
  2059  	job2.Version++
  2060  
  2061  	d := structs.NewDeployment(job2)
  2062  	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  2063  	s := &structs.DeploymentState{
  2064  		DesiredCanaries: 2,
  2065  		DesiredTotal:    5,
  2066  	}
  2067  	d.TaskGroups[job.TaskGroups[0].Name] = s
  2068  
  2069  	// Create 5 existing allocations
  2070  	var allocs []*structs.Allocation
  2071  	for i := 0; i < 5; i++ {
  2072  		alloc := mock.Alloc()
  2073  		alloc.Job = job
  2074  		alloc.JobID = job.ID
  2075  		alloc.NodeID = uuid.Generate()
  2076  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2077  		allocs = append(allocs, alloc)
  2078  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2079  	}
  2080  
  2081  	// Create 2 healthy canary allocations
  2082  	for i := 0; i < 2; i++ {
  2083  		alloc := mock.Alloc()
  2084  		alloc.Job = job
  2085  		alloc.JobID = job.ID
  2086  		alloc.NodeID = uuid.Generate()
  2087  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2088  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2089  		alloc.DeploymentID = d.ID
  2090  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  2091  			Canary:  true,
  2092  			Healthy: helper.BoolToPtr(false),
  2093  		}
  2094  		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
  2095  		allocs = append(allocs, alloc)
  2096  	}
  2097  
  2098  	// Mark the canaries as failed
  2099  	allocs[5].ClientStatus = structs.AllocClientStatusFailed
  2100  	allocs[5].DesiredTransition.Reschedule = helper.BoolToPtr(true)
  2101  
  2102  	// Mark one of them as already rescheduled once
  2103  	allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  2104  		{RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(),
  2105  			PrevAllocID: uuid.Generate(),
  2106  			PrevNodeID:  uuid.Generate(),
  2107  		},
  2108  	}}
  2109  
  2110  	allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  2111  		StartedAt:  now.Add(-1 * time.Hour),
  2112  		FinishedAt: now.Add(-10 * time.Second)}}
  2113  	allocs[6].ClientStatus = structs.AllocClientStatusFailed
  2114  	allocs[6].DesiredTransition.Reschedule = helper.BoolToPtr(true)
  2115  
  2116  	// Create 4 unhealthy canary allocations that have already been replaced
  2117  	for i := 0; i < 4; i++ {
  2118  		alloc := mock.Alloc()
  2119  		alloc.Job = job
  2120  		alloc.JobID = job.ID
  2121  		alloc.NodeID = uuid.Generate()
  2122  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2))
  2123  		alloc.ClientStatus = structs.AllocClientStatusFailed
  2124  		alloc.DeploymentID = d.ID
  2125  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  2126  			Canary:  true,
  2127  			Healthy: helper.BoolToPtr(false),
  2128  		}
  2129  		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
  2130  		allocs = append(allocs, alloc)
  2131  	}
  2132  
  2133  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "")
  2134  	reconciler.now = now
  2135  	r := reconciler.Compute()
  2136  
  2137  	// Verify that no follow up evals were created
  2138  	evals := r.desiredFollowupEvals[tgName]
  2139  	require.Nil(evals)
  2140  
  2141  	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
  2142  	assertResults(t, r, &resultExpectation{
  2143  		createDeployment:  nil,
  2144  		deploymentUpdates: nil,
  2145  		place:             2,
  2146  		inplace:           0,
  2147  		stop:              0,
  2148  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2149  			job.TaskGroups[0].Name: {
  2150  				Place:  2,
  2151  				Ignore: 9,
  2152  			},
  2153  		},
  2154  	})
  2155  
  2156  	// Rescheduled allocs should have previous allocs
  2157  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  2158  	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
  2159  	assertPlacementsAreRescheduled(t, 2, r.place)
  2160  }
  2161  
  2162  // Tests rescheduling failed canary service allocations when one has reached its
  2163  // reschedule limit
  2164  func TestReconciler_RescheduleNow_Service_Canaries_Limit(t *testing.T) {
  2165  	require := require.New(t)
  2166  
  2167  	// Set desired 5
  2168  	job := mock.Job()
  2169  	job.TaskGroups[0].Count = 5
  2170  	tgName := job.TaskGroups[0].Name
  2171  	now := time.Now()
  2172  
  2173  	// Set up reschedule policy and update stanza
  2174  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  2175  		Attempts:      1,
  2176  		Interval:      24 * time.Hour,
  2177  		Delay:         5 * time.Second,
  2178  		DelayFunction: "",
  2179  		MaxDelay:      1 * time.Hour,
  2180  		Unlimited:     false,
  2181  	}
  2182  	job.TaskGroups[0].Update = canaryUpdate
  2183  
  2184  	job2 := job.Copy()
  2185  	job2.Version++
  2186  
  2187  	d := structs.NewDeployment(job2)
  2188  	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  2189  	s := &structs.DeploymentState{
  2190  		DesiredCanaries: 2,
  2191  		DesiredTotal:    5,
  2192  	}
  2193  	d.TaskGroups[job.TaskGroups[0].Name] = s
  2194  
  2195  	// Create 5 existing allocations
  2196  	var allocs []*structs.Allocation
  2197  	for i := 0; i < 5; i++ {
  2198  		alloc := mock.Alloc()
  2199  		alloc.Job = job
  2200  		alloc.JobID = job.ID
  2201  		alloc.NodeID = uuid.Generate()
  2202  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2203  		allocs = append(allocs, alloc)
  2204  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2205  	}
  2206  
  2207  	// Create 2 healthy canary allocations
  2208  	for i := 0; i < 2; i++ {
  2209  		alloc := mock.Alloc()
  2210  		alloc.Job = job
  2211  		alloc.JobID = job.ID
  2212  		alloc.NodeID = uuid.Generate()
  2213  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2214  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2215  		alloc.DeploymentID = d.ID
  2216  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  2217  			Canary:  true,
  2218  			Healthy: helper.BoolToPtr(false),
  2219  		}
  2220  		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
  2221  		allocs = append(allocs, alloc)
  2222  	}
  2223  
  2224  	// Mark the canaries as failed
  2225  	allocs[5].ClientStatus = structs.AllocClientStatusFailed
  2226  	allocs[5].DesiredTransition.Reschedule = helper.BoolToPtr(true)
  2227  
  2228  	// Mark one of them as already rescheduled once
  2229  	allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  2230  		{RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(),
  2231  			PrevAllocID: uuid.Generate(),
  2232  			PrevNodeID:  uuid.Generate(),
  2233  		},
  2234  	}}
  2235  
  2236  	allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  2237  		StartedAt:  now.Add(-1 * time.Hour),
  2238  		FinishedAt: now.Add(-10 * time.Second)}}
  2239  	allocs[6].ClientStatus = structs.AllocClientStatusFailed
  2240  	allocs[6].DesiredTransition.Reschedule = helper.BoolToPtr(true)
  2241  
  2242  	// Create 4 unhealthy canary allocations that have already been replaced
  2243  	for i := 0; i < 4; i++ {
  2244  		alloc := mock.Alloc()
  2245  		alloc.Job = job
  2246  		alloc.JobID = job.ID
  2247  		alloc.NodeID = uuid.Generate()
  2248  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2))
  2249  		alloc.ClientStatus = structs.AllocClientStatusFailed
  2250  		alloc.DeploymentID = d.ID
  2251  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  2252  			Canary:  true,
  2253  			Healthy: helper.BoolToPtr(false),
  2254  		}
  2255  		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
  2256  		allocs = append(allocs, alloc)
  2257  	}
  2258  
  2259  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "")
  2260  	reconciler.now = now
  2261  	r := reconciler.Compute()
  2262  
  2263  	// Verify that no follow up evals were created
  2264  	evals := r.desiredFollowupEvals[tgName]
  2265  	require.Nil(evals)
  2266  
  2267  	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
  2268  	assertResults(t, r, &resultExpectation{
  2269  		createDeployment:  nil,
  2270  		deploymentUpdates: nil,
  2271  		place:             1,
  2272  		inplace:           0,
  2273  		stop:              0,
  2274  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2275  			job.TaskGroups[0].Name: {
  2276  				Place:  1,
  2277  				Ignore: 10,
  2278  			},
  2279  		},
  2280  	})
  2281  
  2282  	// Rescheduled allocs should have previous allocs
  2283  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  2284  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  2285  	assertPlacementsAreRescheduled(t, 1, r.place)
  2286  }
  2287  
  2288  // Tests failed service allocations that were already rescheduled won't be rescheduled again
  2289  func TestReconciler_DontReschedule_PreviouslyRescheduled(t *testing.T) {
  2290  	// Set desired 5
  2291  	job := mock.Job()
  2292  	job.TaskGroups[0].Count = 5
  2293  
  2294  	// Set up reschedule policy
  2295  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 5, Interval: 24 * time.Hour}
  2296  
  2297  	// Create 7 existing allocations
  2298  	var allocs []*structs.Allocation
  2299  	for i := 0; i < 7; i++ {
  2300  		alloc := mock.Alloc()
  2301  		alloc.Job = job
  2302  		alloc.JobID = job.ID
  2303  		alloc.NodeID = uuid.Generate()
  2304  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2305  		allocs = append(allocs, alloc)
  2306  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2307  	}
  2308  	// Mark two as failed and rescheduled
  2309  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  2310  	allocs[0].ID = allocs[1].ID
  2311  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  2312  	allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  2313  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  2314  			PrevAllocID: uuid.Generate(),
  2315  			PrevNodeID:  uuid.Generate(),
  2316  		},
  2317  	}}
  2318  	allocs[1].NextAllocation = allocs[2].ID
  2319  
  2320  	// Mark one as desired state stop
  2321  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  2322  
  2323  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  2324  	r := reconciler.Compute()
  2325  
  2326  	// Should place 1 - one is a new placement to make up the desired count of 5
  2327  	// failing allocs are not rescheduled
  2328  	assertResults(t, r, &resultExpectation{
  2329  		createDeployment:  nil,
  2330  		deploymentUpdates: nil,
  2331  		place:             1,
  2332  		inplace:           0,
  2333  		stop:              0,
  2334  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2335  			job.TaskGroups[0].Name: {
  2336  				Place:  1,
  2337  				Ignore: 4,
  2338  			},
  2339  		},
  2340  	})
  2341  
  2342  	// name index 0 is used for the replacement because its
  2343  	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
  2344  }
  2345  
  2346  // Tests the reconciler cancels an old deployment when the job is being stopped
  2347  func TestReconciler_CancelDeployment_JobStop(t *testing.T) {
  2348  	job := mock.Job()
  2349  	job.Stop = true
  2350  
  2351  	running := structs.NewDeployment(job)
  2352  	failed := structs.NewDeployment(job)
  2353  	failed.Status = structs.DeploymentStatusFailed
  2354  
  2355  	cases := []struct {
  2356  		name             string
  2357  		job              *structs.Job
  2358  		jobID, taskGroup string
  2359  		deployment       *structs.Deployment
  2360  		cancel           bool
  2361  	}{
  2362  		{
  2363  			name:       "stopped job, running deployment",
  2364  			job:        job,
  2365  			jobID:      job.ID,
  2366  			taskGroup:  job.TaskGroups[0].Name,
  2367  			deployment: running,
  2368  			cancel:     true,
  2369  		},
  2370  		{
  2371  			name:       "nil job, running deployment",
  2372  			job:        nil,
  2373  			jobID:      "foo",
  2374  			taskGroup:  "bar",
  2375  			deployment: running,
  2376  			cancel:     true,
  2377  		},
  2378  		{
  2379  			name:       "stopped job, failed deployment",
  2380  			job:        job,
  2381  			jobID:      job.ID,
  2382  			taskGroup:  job.TaskGroups[0].Name,
  2383  			deployment: failed,
  2384  			cancel:     false,
  2385  		},
  2386  		{
  2387  			name:       "nil job, failed deployment",
  2388  			job:        nil,
  2389  			jobID:      "foo",
  2390  			taskGroup:  "bar",
  2391  			deployment: failed,
  2392  			cancel:     false,
  2393  		},
  2394  	}
  2395  
  2396  	for _, c := range cases {
  2397  		t.Run(c.name, func(t *testing.T) {
  2398  			// Create 10 allocations
  2399  			var allocs []*structs.Allocation
  2400  			for i := 0; i < 10; i++ {
  2401  				alloc := mock.Alloc()
  2402  				alloc.Job = c.job
  2403  				alloc.JobID = c.jobID
  2404  				alloc.NodeID = uuid.Generate()
  2405  				alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i))
  2406  				alloc.TaskGroup = c.taskGroup
  2407  				allocs = append(allocs, alloc)
  2408  			}
  2409  
  2410  			reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, c.jobID, c.job, c.deployment, allocs, nil, "")
  2411  			r := reconciler.Compute()
  2412  
  2413  			var updates []*structs.DeploymentStatusUpdate
  2414  			if c.cancel {
  2415  				updates = []*structs.DeploymentStatusUpdate{
  2416  					{
  2417  						DeploymentID:      c.deployment.ID,
  2418  						Status:            structs.DeploymentStatusCancelled,
  2419  						StatusDescription: structs.DeploymentStatusDescriptionStoppedJob,
  2420  					},
  2421  				}
  2422  			}
  2423  
  2424  			// Assert the correct results
  2425  			assertResults(t, r, &resultExpectation{
  2426  				createDeployment:  nil,
  2427  				deploymentUpdates: updates,
  2428  				place:             0,
  2429  				inplace:           0,
  2430  				stop:              10,
  2431  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2432  					c.taskGroup: {
  2433  						Stop: 10,
  2434  					},
  2435  				},
  2436  			})
  2437  
  2438  			assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop))
  2439  		})
  2440  	}
  2441  }
  2442  
  2443  // Tests the reconciler cancels an old deployment when the job is updated
  2444  func TestReconciler_CancelDeployment_JobUpdate(t *testing.T) {
  2445  	// Create a base job
  2446  	job := mock.Job()
  2447  
  2448  	// Create two deployments
  2449  	running := structs.NewDeployment(job)
  2450  	failed := structs.NewDeployment(job)
  2451  	failed.Status = structs.DeploymentStatusFailed
  2452  
  2453  	// Make the job newer than the deployment
  2454  	job.Version += 10
  2455  
  2456  	cases := []struct {
  2457  		name       string
  2458  		deployment *structs.Deployment
  2459  		cancel     bool
  2460  	}{
  2461  		{
  2462  			name:       "running deployment",
  2463  			deployment: running,
  2464  			cancel:     true,
  2465  		},
  2466  		{
  2467  			name:       "failed deployment",
  2468  			deployment: failed,
  2469  			cancel:     false,
  2470  		},
  2471  	}
  2472  
  2473  	for _, c := range cases {
  2474  		t.Run(c.name, func(t *testing.T) {
  2475  			// Create 10 allocations
  2476  			var allocs []*structs.Allocation
  2477  			for i := 0; i < 10; i++ {
  2478  				alloc := mock.Alloc()
  2479  				alloc.Job = job
  2480  				alloc.JobID = job.ID
  2481  				alloc.NodeID = uuid.Generate()
  2482  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2483  				alloc.TaskGroup = job.TaskGroups[0].Name
  2484  				allocs = append(allocs, alloc)
  2485  			}
  2486  
  2487  			reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, c.deployment, allocs, nil, "")
  2488  			r := reconciler.Compute()
  2489  
  2490  			var updates []*structs.DeploymentStatusUpdate
  2491  			if c.cancel {
  2492  				updates = []*structs.DeploymentStatusUpdate{
  2493  					{
  2494  						DeploymentID:      c.deployment.ID,
  2495  						Status:            structs.DeploymentStatusCancelled,
  2496  						StatusDescription: structs.DeploymentStatusDescriptionNewerJob,
  2497  					},
  2498  				}
  2499  			}
  2500  
  2501  			// Assert the correct results
  2502  			assertResults(t, r, &resultExpectation{
  2503  				createDeployment:  nil,
  2504  				deploymentUpdates: updates,
  2505  				place:             0,
  2506  				inplace:           0,
  2507  				stop:              0,
  2508  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2509  					job.TaskGroups[0].Name: {
  2510  						Ignore: 10,
  2511  					},
  2512  				},
  2513  			})
  2514  		})
  2515  	}
  2516  }
  2517  
  2518  // Tests the reconciler creates a deployment and does a rolling upgrade with
  2519  // destructive changes
  2520  func TestReconciler_CreateDeployment_RollingUpgrade_Destructive(t *testing.T) {
  2521  	job := mock.Job()
  2522  	job.TaskGroups[0].Update = noCanaryUpdate
  2523  
  2524  	// Create 10 allocations from the old job
  2525  	var allocs []*structs.Allocation
  2526  	for i := 0; i < 10; i++ {
  2527  		alloc := mock.Alloc()
  2528  		alloc.Job = job
  2529  		alloc.JobID = job.ID
  2530  		alloc.NodeID = uuid.Generate()
  2531  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2532  		alloc.TaskGroup = job.TaskGroups[0].Name
  2533  		allocs = append(allocs, alloc)
  2534  	}
  2535  
  2536  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  2537  	r := reconciler.Compute()
  2538  
  2539  	d := structs.NewDeployment(job)
  2540  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2541  		DesiredTotal: 10,
  2542  	}
  2543  
  2544  	// Assert the correct results
  2545  	assertResults(t, r, &resultExpectation{
  2546  		createDeployment:  d,
  2547  		deploymentUpdates: nil,
  2548  		destructive:       4,
  2549  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2550  			job.TaskGroups[0].Name: {
  2551  				DestructiveUpdate: 4,
  2552  				Ignore:            6,
  2553  			},
  2554  		},
  2555  	})
  2556  
  2557  	assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate))
  2558  }
  2559  
  2560  // Tests the reconciler creates a deployment for inplace updates
  2561  func TestReconciler_CreateDeployment_RollingUpgrade_Inplace(t *testing.T) {
  2562  	jobOld := mock.Job()
  2563  	job := jobOld.Copy()
  2564  	job.Version++
  2565  	job.TaskGroups[0].Update = noCanaryUpdate
  2566  
  2567  	// Create 10 allocations from the old job
  2568  	var allocs []*structs.Allocation
  2569  	for i := 0; i < 10; i++ {
  2570  		alloc := mock.Alloc()
  2571  		alloc.Job = jobOld
  2572  		alloc.JobID = job.ID
  2573  		alloc.NodeID = uuid.Generate()
  2574  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2575  		alloc.TaskGroup = job.TaskGroups[0].Name
  2576  		allocs = append(allocs, alloc)
  2577  	}
  2578  
  2579  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "")
  2580  	r := reconciler.Compute()
  2581  
  2582  	d := structs.NewDeployment(job)
  2583  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2584  		DesiredTotal: 10,
  2585  	}
  2586  
  2587  	// Assert the correct results
  2588  	assertResults(t, r, &resultExpectation{
  2589  		createDeployment:  d,
  2590  		deploymentUpdates: nil,
  2591  		place:             0,
  2592  		inplace:           10,
  2593  		stop:              0,
  2594  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2595  			job.TaskGroups[0].Name: {
  2596  				InPlaceUpdate: 10,
  2597  			},
  2598  		},
  2599  	})
  2600  }
  2601  
  2602  // Tests the reconciler creates a deployment when the job has a newer create index
  2603  func TestReconciler_CreateDeployment_NewerCreateIndex(t *testing.T) {
  2604  	jobOld := mock.Job()
  2605  	job := jobOld.Copy()
  2606  	job.TaskGroups[0].Update = noCanaryUpdate
  2607  	job.CreateIndex += 100
  2608  
  2609  	// Create 5 allocations from the old job
  2610  	var allocs []*structs.Allocation
  2611  	for i := 0; i < 5; i++ {
  2612  		alloc := mock.Alloc()
  2613  		alloc.Job = jobOld
  2614  		alloc.JobID = jobOld.ID
  2615  		alloc.NodeID = uuid.Generate()
  2616  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2617  		alloc.TaskGroup = job.TaskGroups[0].Name
  2618  		allocs = append(allocs, alloc)
  2619  	}
  2620  
  2621  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  2622  	r := reconciler.Compute()
  2623  
  2624  	d := structs.NewDeployment(job)
  2625  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2626  		DesiredTotal: 5,
  2627  	}
  2628  
  2629  	// Assert the correct results
  2630  	assertResults(t, r, &resultExpectation{
  2631  		createDeployment:  d,
  2632  		deploymentUpdates: nil,
  2633  		place:             5,
  2634  		destructive:       0,
  2635  		inplace:           0,
  2636  		stop:              0,
  2637  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2638  			job.TaskGroups[0].Name: {
  2639  				InPlaceUpdate:     0,
  2640  				Ignore:            5,
  2641  				Place:             5,
  2642  				DestructiveUpdate: 0,
  2643  			},
  2644  		},
  2645  	})
  2646  }
  2647  
  2648  // Tests the reconciler doesn't creates a deployment if there are no changes
  2649  func TestReconciler_DontCreateDeployment_NoChanges(t *testing.T) {
  2650  	job := mock.Job()
  2651  	job.TaskGroups[0].Update = noCanaryUpdate
  2652  
  2653  	// Create 10 allocations from the job
  2654  	var allocs []*structs.Allocation
  2655  	for i := 0; i < 10; i++ {
  2656  		alloc := mock.Alloc()
  2657  		alloc.Job = job
  2658  		alloc.JobID = job.ID
  2659  		alloc.NodeID = uuid.Generate()
  2660  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2661  		alloc.TaskGroup = job.TaskGroups[0].Name
  2662  		allocs = append(allocs, alloc)
  2663  	}
  2664  
  2665  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  2666  	r := reconciler.Compute()
  2667  
  2668  	// Assert the correct results
  2669  	assertResults(t, r, &resultExpectation{
  2670  		createDeployment:  nil,
  2671  		deploymentUpdates: nil,
  2672  		place:             0,
  2673  		inplace:           0,
  2674  		stop:              0,
  2675  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2676  			job.TaskGroups[0].Name: {
  2677  				DestructiveUpdate: 0,
  2678  				Ignore:            10,
  2679  			},
  2680  		},
  2681  	})
  2682  }
  2683  
  2684  // Tests the reconciler doesn't place any more canaries when the deployment is
  2685  // paused or failed
  2686  func TestReconciler_PausedOrFailedDeployment_NoMoreCanaries(t *testing.T) {
  2687  	job := mock.Job()
  2688  	job.TaskGroups[0].Update = canaryUpdate
  2689  
  2690  	cases := []struct {
  2691  		name             string
  2692  		deploymentStatus string
  2693  		stop             uint64
  2694  	}{
  2695  		{
  2696  			name:             "paused deployment",
  2697  			deploymentStatus: structs.DeploymentStatusPaused,
  2698  			stop:             0,
  2699  		},
  2700  		{
  2701  			name:             "failed deployment",
  2702  			deploymentStatus: structs.DeploymentStatusFailed,
  2703  			stop:             1,
  2704  		},
  2705  	}
  2706  
  2707  	for _, c := range cases {
  2708  		t.Run(c.name, func(t *testing.T) {
  2709  			// Create a deployment that is paused/failed and has placed some canaries
  2710  			d := structs.NewDeployment(job)
  2711  			d.Status = c.deploymentStatus
  2712  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2713  				Promoted:        false,
  2714  				DesiredCanaries: 2,
  2715  				DesiredTotal:    10,
  2716  				PlacedAllocs:    1,
  2717  			}
  2718  
  2719  			// Create 10 allocations for the original job
  2720  			var allocs []*structs.Allocation
  2721  			for i := 0; i < 10; i++ {
  2722  				alloc := mock.Alloc()
  2723  				alloc.Job = job
  2724  				alloc.JobID = job.ID
  2725  				alloc.NodeID = uuid.Generate()
  2726  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2727  				alloc.TaskGroup = job.TaskGroups[0].Name
  2728  				allocs = append(allocs, alloc)
  2729  			}
  2730  
  2731  			// Create one canary
  2732  			canary := mock.Alloc()
  2733  			canary.Job = job
  2734  			canary.JobID = job.ID
  2735  			canary.NodeID = uuid.Generate()
  2736  			canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0)
  2737  			canary.TaskGroup = job.TaskGroups[0].Name
  2738  			canary.DeploymentID = d.ID
  2739  			allocs = append(allocs, canary)
  2740  			d.TaskGroups[canary.TaskGroup].PlacedCanaries = []string{canary.ID}
  2741  
  2742  			mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{canary.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive)
  2743  			reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  2744  			r := reconciler.Compute()
  2745  
  2746  			// Assert the correct results
  2747  			assertResults(t, r, &resultExpectation{
  2748  				createDeployment:  nil,
  2749  				deploymentUpdates: nil,
  2750  				place:             0,
  2751  				inplace:           0,
  2752  				stop:              int(c.stop),
  2753  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2754  					job.TaskGroups[0].Name: {
  2755  						Ignore: 11 - c.stop,
  2756  						Stop:   c.stop,
  2757  					},
  2758  				},
  2759  			})
  2760  		})
  2761  	}
  2762  }
  2763  
  2764  // Tests the reconciler doesn't place any more allocs when the deployment is
  2765  // paused or failed
  2766  func TestReconciler_PausedOrFailedDeployment_NoMorePlacements(t *testing.T) {
  2767  	job := mock.Job()
  2768  	job.TaskGroups[0].Update = noCanaryUpdate
  2769  	job.TaskGroups[0].Count = 15
  2770  
  2771  	cases := []struct {
  2772  		name             string
  2773  		deploymentStatus string
  2774  	}{
  2775  		{
  2776  			name:             "paused deployment",
  2777  			deploymentStatus: structs.DeploymentStatusPaused,
  2778  		},
  2779  		{
  2780  			name:             "failed deployment",
  2781  			deploymentStatus: structs.DeploymentStatusFailed,
  2782  		},
  2783  	}
  2784  
  2785  	for _, c := range cases {
  2786  		t.Run(c.name, func(t *testing.T) {
  2787  			// Create a deployment that is paused and has placed some canaries
  2788  			d := structs.NewDeployment(job)
  2789  			d.Status = c.deploymentStatus
  2790  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2791  				Promoted:     false,
  2792  				DesiredTotal: 15,
  2793  				PlacedAllocs: 10,
  2794  			}
  2795  
  2796  			// Create 10 allocations for the new job
  2797  			var allocs []*structs.Allocation
  2798  			for i := 0; i < 10; i++ {
  2799  				alloc := mock.Alloc()
  2800  				alloc.Job = job
  2801  				alloc.JobID = job.ID
  2802  				alloc.NodeID = uuid.Generate()
  2803  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2804  				alloc.TaskGroup = job.TaskGroups[0].Name
  2805  				allocs = append(allocs, alloc)
  2806  			}
  2807  
  2808  			reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  2809  			r := reconciler.Compute()
  2810  
  2811  			// Assert the correct results
  2812  			assertResults(t, r, &resultExpectation{
  2813  				createDeployment:  nil,
  2814  				deploymentUpdates: nil,
  2815  				place:             0,
  2816  				inplace:           0,
  2817  				stop:              0,
  2818  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2819  					job.TaskGroups[0].Name: {
  2820  						Ignore: 10,
  2821  					},
  2822  				},
  2823  			})
  2824  		})
  2825  	}
  2826  }
  2827  
  2828  // Tests the reconciler doesn't do any more destructive updates when the
  2829  // deployment is paused or failed
  2830  func TestReconciler_PausedOrFailedDeployment_NoMoreDestructiveUpdates(t *testing.T) {
  2831  	job := mock.Job()
  2832  	job.TaskGroups[0].Update = noCanaryUpdate
  2833  
  2834  	cases := []struct {
  2835  		name             string
  2836  		deploymentStatus string
  2837  	}{
  2838  		{
  2839  			name:             "paused deployment",
  2840  			deploymentStatus: structs.DeploymentStatusPaused,
  2841  		},
  2842  		{
  2843  			name:             "failed deployment",
  2844  			deploymentStatus: structs.DeploymentStatusFailed,
  2845  		},
  2846  	}
  2847  
  2848  	for _, c := range cases {
  2849  		t.Run(c.name, func(t *testing.T) {
  2850  			// Create a deployment that is paused and has placed some canaries
  2851  			d := structs.NewDeployment(job)
  2852  			d.Status = c.deploymentStatus
  2853  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2854  				Promoted:     false,
  2855  				DesiredTotal: 10,
  2856  				PlacedAllocs: 1,
  2857  			}
  2858  
  2859  			// Create 9 allocations for the original job
  2860  			var allocs []*structs.Allocation
  2861  			for i := 1; i < 10; i++ {
  2862  				alloc := mock.Alloc()
  2863  				alloc.Job = job
  2864  				alloc.JobID = job.ID
  2865  				alloc.NodeID = uuid.Generate()
  2866  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2867  				alloc.TaskGroup = job.TaskGroups[0].Name
  2868  				allocs = append(allocs, alloc)
  2869  			}
  2870  
  2871  			// Create one for the new job
  2872  			newAlloc := mock.Alloc()
  2873  			newAlloc.Job = job
  2874  			newAlloc.JobID = job.ID
  2875  			newAlloc.NodeID = uuid.Generate()
  2876  			newAlloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0)
  2877  			newAlloc.TaskGroup = job.TaskGroups[0].Name
  2878  			newAlloc.DeploymentID = d.ID
  2879  			allocs = append(allocs, newAlloc)
  2880  
  2881  			mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{newAlloc.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive)
  2882  			reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  2883  			r := reconciler.Compute()
  2884  
  2885  			// Assert the correct results
  2886  			assertResults(t, r, &resultExpectation{
  2887  				createDeployment:  nil,
  2888  				deploymentUpdates: nil,
  2889  				place:             0,
  2890  				inplace:           0,
  2891  				stop:              0,
  2892  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2893  					job.TaskGroups[0].Name: {
  2894  						Ignore: 10,
  2895  					},
  2896  				},
  2897  			})
  2898  		})
  2899  	}
  2900  }
  2901  
  2902  // Tests the reconciler handles migrating a canary correctly on a draining node
  2903  func TestReconciler_DrainNode_Canary(t *testing.T) {
  2904  	job := mock.Job()
  2905  	job.TaskGroups[0].Update = canaryUpdate
  2906  
  2907  	// Create a deployment that is paused and has placed some canaries
  2908  	d := structs.NewDeployment(job)
  2909  	s := &structs.DeploymentState{
  2910  		Promoted:        false,
  2911  		DesiredTotal:    10,
  2912  		DesiredCanaries: 2,
  2913  		PlacedAllocs:    2,
  2914  	}
  2915  	d.TaskGroups[job.TaskGroups[0].Name] = s
  2916  
  2917  	// Create 10 allocations from the old job
  2918  	var allocs []*structs.Allocation
  2919  	for i := 0; i < 10; i++ {
  2920  		alloc := mock.Alloc()
  2921  		alloc.Job = job
  2922  		alloc.JobID = job.ID
  2923  		alloc.NodeID = uuid.Generate()
  2924  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2925  		alloc.TaskGroup = job.TaskGroups[0].Name
  2926  		allocs = append(allocs, alloc)
  2927  	}
  2928  
  2929  	// Create two canaries for the new job
  2930  	handled := make(map[string]allocUpdateType)
  2931  	for i := 0; i < 2; i++ {
  2932  		// Create one canary
  2933  		canary := mock.Alloc()
  2934  		canary.Job = job
  2935  		canary.JobID = job.ID
  2936  		canary.NodeID = uuid.Generate()
  2937  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2938  		canary.TaskGroup = job.TaskGroups[0].Name
  2939  		canary.DeploymentID = d.ID
  2940  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  2941  		allocs = append(allocs, canary)
  2942  		handled[canary.ID] = allocUpdateFnIgnore
  2943  	}
  2944  
  2945  	// Build a map of tainted nodes that contains the last canary
  2946  	tainted := make(map[string]*structs.Node, 1)
  2947  	n := mock.Node()
  2948  	n.ID = allocs[11].NodeID
  2949  	allocs[11].DesiredTransition.Migrate = helper.BoolToPtr(true)
  2950  	n.Drain = true
  2951  	tainted[n.ID] = n
  2952  
  2953  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  2954  	reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "")
  2955  	r := reconciler.Compute()
  2956  
  2957  	// Assert the correct results
  2958  	assertResults(t, r, &resultExpectation{
  2959  		createDeployment:  nil,
  2960  		deploymentUpdates: nil,
  2961  		place:             1,
  2962  		inplace:           0,
  2963  		stop:              1,
  2964  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2965  			job.TaskGroups[0].Name: {
  2966  				Canary: 1,
  2967  				Ignore: 11,
  2968  			},
  2969  		},
  2970  	})
  2971  	assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop))
  2972  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  2973  }
  2974  
  2975  // Tests the reconciler handles migrating a canary correctly on a lost node
  2976  func TestReconciler_LostNode_Canary(t *testing.T) {
  2977  	job := mock.Job()
  2978  	job.TaskGroups[0].Update = canaryUpdate
  2979  
  2980  	// Create a deployment that is paused and has placed some canaries
  2981  	d := structs.NewDeployment(job)
  2982  	s := &structs.DeploymentState{
  2983  		Promoted:        false,
  2984  		DesiredTotal:    10,
  2985  		DesiredCanaries: 2,
  2986  		PlacedAllocs:    2,
  2987  	}
  2988  	d.TaskGroups[job.TaskGroups[0].Name] = s
  2989  
  2990  	// Create 10 allocations from the old job
  2991  	var allocs []*structs.Allocation
  2992  	for i := 0; i < 10; i++ {
  2993  		alloc := mock.Alloc()
  2994  		alloc.Job = job
  2995  		alloc.JobID = job.ID
  2996  		alloc.NodeID = uuid.Generate()
  2997  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2998  		alloc.TaskGroup = job.TaskGroups[0].Name
  2999  		allocs = append(allocs, alloc)
  3000  	}
  3001  
  3002  	// Create two canaries for the new job
  3003  	handled := make(map[string]allocUpdateType)
  3004  	for i := 0; i < 2; i++ {
  3005  		// Create one canary
  3006  		canary := mock.Alloc()
  3007  		canary.Job = job
  3008  		canary.JobID = job.ID
  3009  		canary.NodeID = uuid.Generate()
  3010  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3011  		canary.TaskGroup = job.TaskGroups[0].Name
  3012  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3013  		canary.DeploymentID = d.ID
  3014  		allocs = append(allocs, canary)
  3015  		handled[canary.ID] = allocUpdateFnIgnore
  3016  	}
  3017  
  3018  	// Build a map of tainted nodes that contains the last canary
  3019  	tainted := make(map[string]*structs.Node, 1)
  3020  	n := mock.Node()
  3021  	n.ID = allocs[11].NodeID
  3022  	n.Status = structs.NodeStatusDown
  3023  	tainted[n.ID] = n
  3024  
  3025  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3026  	reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "")
  3027  	r := reconciler.Compute()
  3028  
  3029  	// Assert the correct results
  3030  	assertResults(t, r, &resultExpectation{
  3031  		createDeployment:  nil,
  3032  		deploymentUpdates: nil,
  3033  		place:             1,
  3034  		inplace:           0,
  3035  		stop:              1,
  3036  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3037  			job.TaskGroups[0].Name: {
  3038  				Canary: 1,
  3039  				Ignore: 11,
  3040  			},
  3041  		},
  3042  	})
  3043  
  3044  	assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop))
  3045  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  3046  }
  3047  
  3048  // Tests the reconciler handles stopping canaries from older deployments
  3049  func TestReconciler_StopOldCanaries(t *testing.T) {
  3050  	job := mock.Job()
  3051  	job.TaskGroups[0].Update = canaryUpdate
  3052  
  3053  	// Create an old deployment that has placed some canaries
  3054  	d := structs.NewDeployment(job)
  3055  	s := &structs.DeploymentState{
  3056  		Promoted:        false,
  3057  		DesiredTotal:    10,
  3058  		DesiredCanaries: 2,
  3059  		PlacedAllocs:    2,
  3060  	}
  3061  	d.TaskGroups[job.TaskGroups[0].Name] = s
  3062  
  3063  	// Update the job
  3064  	job.Version += 10
  3065  
  3066  	// Create 10 allocations from the old job
  3067  	var allocs []*structs.Allocation
  3068  	for i := 0; i < 10; i++ {
  3069  		alloc := mock.Alloc()
  3070  		alloc.Job = job
  3071  		alloc.JobID = job.ID
  3072  		alloc.NodeID = uuid.Generate()
  3073  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3074  		alloc.TaskGroup = job.TaskGroups[0].Name
  3075  		allocs = append(allocs, alloc)
  3076  	}
  3077  
  3078  	// Create canaries
  3079  	for i := 0; i < 2; i++ {
  3080  		// Create one canary
  3081  		canary := mock.Alloc()
  3082  		canary.Job = job
  3083  		canary.JobID = job.ID
  3084  		canary.NodeID = uuid.Generate()
  3085  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3086  		canary.TaskGroup = job.TaskGroups[0].Name
  3087  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3088  		canary.DeploymentID = d.ID
  3089  		allocs = append(allocs, canary)
  3090  	}
  3091  
  3092  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  3093  	r := reconciler.Compute()
  3094  
  3095  	newD := structs.NewDeployment(job)
  3096  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3097  	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3098  		DesiredCanaries: 2,
  3099  		DesiredTotal:    10,
  3100  	}
  3101  
  3102  	// Assert the correct results
  3103  	assertResults(t, r, &resultExpectation{
  3104  		createDeployment: newD,
  3105  		deploymentUpdates: []*structs.DeploymentStatusUpdate{
  3106  			{
  3107  				DeploymentID:      d.ID,
  3108  				Status:            structs.DeploymentStatusCancelled,
  3109  				StatusDescription: structs.DeploymentStatusDescriptionNewerJob,
  3110  			},
  3111  		},
  3112  		place:   2,
  3113  		inplace: 0,
  3114  		stop:    2,
  3115  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3116  			job.TaskGroups[0].Name: {
  3117  				Canary: 2,
  3118  				Stop:   2,
  3119  				Ignore: 10,
  3120  			},
  3121  		},
  3122  	})
  3123  
  3124  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  3125  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  3126  }
  3127  
  3128  // Tests the reconciler creates new canaries when the job changes
  3129  func TestReconciler_NewCanaries(t *testing.T) {
  3130  	job := mock.Job()
  3131  	job.TaskGroups[0].Update = canaryUpdate
  3132  
  3133  	// Create 10 allocations from the old job
  3134  	var allocs []*structs.Allocation
  3135  	for i := 0; i < 10; i++ {
  3136  		alloc := mock.Alloc()
  3137  		alloc.Job = job
  3138  		alloc.JobID = job.ID
  3139  		alloc.NodeID = uuid.Generate()
  3140  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3141  		alloc.TaskGroup = job.TaskGroups[0].Name
  3142  		allocs = append(allocs, alloc)
  3143  	}
  3144  
  3145  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3146  	r := reconciler.Compute()
  3147  
  3148  	newD := structs.NewDeployment(job)
  3149  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3150  	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3151  		DesiredCanaries: 2,
  3152  		DesiredTotal:    10,
  3153  	}
  3154  
  3155  	// Assert the correct results
  3156  	assertResults(t, r, &resultExpectation{
  3157  		createDeployment:  newD,
  3158  		deploymentUpdates: nil,
  3159  		place:             2,
  3160  		inplace:           0,
  3161  		stop:              0,
  3162  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3163  			job.TaskGroups[0].Name: {
  3164  				Canary: 2,
  3165  				Ignore: 10,
  3166  			},
  3167  		},
  3168  	})
  3169  
  3170  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  3171  }
  3172  
  3173  // Tests the reconciler creates new canaries when the job changes and the
  3174  // canary count is greater than the task group count
  3175  func TestReconciler_NewCanaries_CountGreater(t *testing.T) {
  3176  	job := mock.Job()
  3177  	job.TaskGroups[0].Count = 3
  3178  	job.TaskGroups[0].Update = canaryUpdate.Copy()
  3179  	job.TaskGroups[0].Update.Canary = 7
  3180  
  3181  	// Create 3 allocations from the old job
  3182  	var allocs []*structs.Allocation
  3183  	for i := 0; i < 3; i++ {
  3184  		alloc := mock.Alloc()
  3185  		alloc.Job = job
  3186  		alloc.JobID = job.ID
  3187  		alloc.NodeID = uuid.Generate()
  3188  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3189  		alloc.TaskGroup = job.TaskGroups[0].Name
  3190  		allocs = append(allocs, alloc)
  3191  	}
  3192  
  3193  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3194  	r := reconciler.Compute()
  3195  
  3196  	newD := structs.NewDeployment(job)
  3197  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3198  	state := &structs.DeploymentState{
  3199  		DesiredCanaries: 7,
  3200  		DesiredTotal:    3,
  3201  	}
  3202  	newD.TaskGroups[job.TaskGroups[0].Name] = state
  3203  
  3204  	// Assert the correct results
  3205  	assertResults(t, r, &resultExpectation{
  3206  		createDeployment:  newD,
  3207  		deploymentUpdates: nil,
  3208  		place:             7,
  3209  		inplace:           0,
  3210  		stop:              0,
  3211  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3212  			job.TaskGroups[0].Name: {
  3213  				Canary: 7,
  3214  				Ignore: 3,
  3215  			},
  3216  		},
  3217  	})
  3218  
  3219  	assertNamesHaveIndexes(t, intRange(0, 2, 3, 6), placeResultsToNames(r.place))
  3220  }
  3221  
  3222  // Tests the reconciler creates new canaries when the job changes for multiple
  3223  // task groups
  3224  func TestReconciler_NewCanaries_MultiTG(t *testing.T) {
  3225  	job := mock.Job()
  3226  	job.TaskGroups[0].Update = canaryUpdate
  3227  	job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy())
  3228  	job.TaskGroups[0].Name = "tg2"
  3229  
  3230  	// Create 10 allocations from the old job for each tg
  3231  	var allocs []*structs.Allocation
  3232  	for j := 0; j < 2; j++ {
  3233  		for i := 0; i < 10; i++ {
  3234  			alloc := mock.Alloc()
  3235  			alloc.Job = job
  3236  			alloc.JobID = job.ID
  3237  			alloc.NodeID = uuid.Generate()
  3238  			alloc.Name = structs.AllocName(job.ID, job.TaskGroups[j].Name, uint(i))
  3239  			alloc.TaskGroup = job.TaskGroups[j].Name
  3240  			allocs = append(allocs, alloc)
  3241  		}
  3242  	}
  3243  
  3244  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3245  	r := reconciler.Compute()
  3246  
  3247  	newD := structs.NewDeployment(job)
  3248  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3249  	state := &structs.DeploymentState{
  3250  		DesiredCanaries: 2,
  3251  		DesiredTotal:    10,
  3252  	}
  3253  	newD.TaskGroups[job.TaskGroups[0].Name] = state
  3254  	newD.TaskGroups[job.TaskGroups[1].Name] = state.Copy()
  3255  
  3256  	// Assert the correct results
  3257  	assertResults(t, r, &resultExpectation{
  3258  		createDeployment:  newD,
  3259  		deploymentUpdates: nil,
  3260  		place:             4,
  3261  		inplace:           0,
  3262  		stop:              0,
  3263  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3264  			job.TaskGroups[0].Name: {
  3265  				Canary: 2,
  3266  				Ignore: 10,
  3267  			},
  3268  			job.TaskGroups[1].Name: {
  3269  				Canary: 2,
  3270  				Ignore: 10,
  3271  			},
  3272  		},
  3273  	})
  3274  
  3275  	assertNamesHaveIndexes(t, intRange(0, 1, 0, 1), placeResultsToNames(r.place))
  3276  }
  3277  
  3278  // Tests the reconciler creates new canaries when the job changes and scales up
  3279  func TestReconciler_NewCanaries_ScaleUp(t *testing.T) {
  3280  	// Scale the job up to 15
  3281  	job := mock.Job()
  3282  	job.TaskGroups[0].Update = canaryUpdate
  3283  	job.TaskGroups[0].Count = 15
  3284  
  3285  	// Create 10 allocations from the old job
  3286  	var allocs []*structs.Allocation
  3287  	for i := 0; i < 10; i++ {
  3288  		alloc := mock.Alloc()
  3289  		alloc.Job = job
  3290  		alloc.JobID = job.ID
  3291  		alloc.NodeID = uuid.Generate()
  3292  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3293  		alloc.TaskGroup = job.TaskGroups[0].Name
  3294  		allocs = append(allocs, alloc)
  3295  	}
  3296  
  3297  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3298  	r := reconciler.Compute()
  3299  
  3300  	newD := structs.NewDeployment(job)
  3301  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3302  	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3303  		DesiredCanaries: 2,
  3304  		DesiredTotal:    15,
  3305  	}
  3306  
  3307  	// Assert the correct results
  3308  	assertResults(t, r, &resultExpectation{
  3309  		createDeployment:  newD,
  3310  		deploymentUpdates: nil,
  3311  		place:             2,
  3312  		inplace:           0,
  3313  		stop:              0,
  3314  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3315  			job.TaskGroups[0].Name: {
  3316  				Canary: 2,
  3317  				Ignore: 10,
  3318  			},
  3319  		},
  3320  	})
  3321  
  3322  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  3323  }
  3324  
  3325  // Tests the reconciler creates new canaries when the job changes and scales
  3326  // down
  3327  func TestReconciler_NewCanaries_ScaleDown(t *testing.T) {
  3328  	// Scale the job down to 5
  3329  	job := mock.Job()
  3330  	job.TaskGroups[0].Update = canaryUpdate
  3331  	job.TaskGroups[0].Count = 5
  3332  
  3333  	// Create 10 allocations from the old job
  3334  	var allocs []*structs.Allocation
  3335  	for i := 0; i < 10; i++ {
  3336  		alloc := mock.Alloc()
  3337  		alloc.Job = job
  3338  		alloc.JobID = job.ID
  3339  		alloc.NodeID = uuid.Generate()
  3340  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3341  		alloc.TaskGroup = job.TaskGroups[0].Name
  3342  		allocs = append(allocs, alloc)
  3343  	}
  3344  
  3345  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3346  	r := reconciler.Compute()
  3347  
  3348  	newD := structs.NewDeployment(job)
  3349  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3350  	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3351  		DesiredCanaries: 2,
  3352  		DesiredTotal:    5,
  3353  	}
  3354  
  3355  	// Assert the correct results
  3356  	assertResults(t, r, &resultExpectation{
  3357  		createDeployment:  newD,
  3358  		deploymentUpdates: nil,
  3359  		place:             2,
  3360  		inplace:           0,
  3361  		stop:              5,
  3362  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3363  			job.TaskGroups[0].Name: {
  3364  				Canary: 2,
  3365  				Stop:   5,
  3366  				Ignore: 5,
  3367  			},
  3368  		},
  3369  	})
  3370  
  3371  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  3372  	assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop))
  3373  }
  3374  
  3375  // Tests the reconciler handles filling the names of partially placed canaries
  3376  func TestReconciler_NewCanaries_FillNames(t *testing.T) {
  3377  	job := mock.Job()
  3378  	job.TaskGroups[0].Update = &structs.UpdateStrategy{
  3379  		Canary:          4,
  3380  		MaxParallel:     2,
  3381  		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
  3382  		MinHealthyTime:  10 * time.Second,
  3383  		HealthyDeadline: 10 * time.Minute,
  3384  	}
  3385  
  3386  	// Create an existing deployment that has placed some canaries
  3387  	d := structs.NewDeployment(job)
  3388  	s := &structs.DeploymentState{
  3389  		Promoted:        false,
  3390  		DesiredTotal:    10,
  3391  		DesiredCanaries: 4,
  3392  		PlacedAllocs:    2,
  3393  	}
  3394  	d.TaskGroups[job.TaskGroups[0].Name] = s
  3395  
  3396  	// Create 10 allocations from the old job
  3397  	var allocs []*structs.Allocation
  3398  	for i := 0; i < 10; i++ {
  3399  		alloc := mock.Alloc()
  3400  		alloc.Job = job
  3401  		alloc.JobID = job.ID
  3402  		alloc.NodeID = uuid.Generate()
  3403  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3404  		alloc.TaskGroup = job.TaskGroups[0].Name
  3405  		allocs = append(allocs, alloc)
  3406  	}
  3407  
  3408  	// Create canaries but pick names at the ends
  3409  	for i := 0; i < 4; i += 3 {
  3410  		// Create one canary
  3411  		canary := mock.Alloc()
  3412  		canary.Job = job
  3413  		canary.JobID = job.ID
  3414  		canary.NodeID = uuid.Generate()
  3415  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3416  		canary.TaskGroup = job.TaskGroups[0].Name
  3417  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3418  		canary.DeploymentID = d.ID
  3419  		allocs = append(allocs, canary)
  3420  	}
  3421  
  3422  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  3423  	r := reconciler.Compute()
  3424  
  3425  	// Assert the correct results
  3426  	assertResults(t, r, &resultExpectation{
  3427  		createDeployment:  nil,
  3428  		deploymentUpdates: nil,
  3429  		place:             2,
  3430  		inplace:           0,
  3431  		stop:              0,
  3432  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3433  			job.TaskGroups[0].Name: {
  3434  				Canary: 2,
  3435  				Ignore: 12,
  3436  			},
  3437  		},
  3438  	})
  3439  
  3440  	assertNamesHaveIndexes(t, intRange(1, 2), placeResultsToNames(r.place))
  3441  }
  3442  
  3443  // Tests the reconciler handles canary promotion by unblocking max_parallel
  3444  func TestReconciler_PromoteCanaries_Unblock(t *testing.T) {
  3445  	job := mock.Job()
  3446  	job.TaskGroups[0].Update = canaryUpdate
  3447  
  3448  	// Create an existing deployment that has placed some canaries and mark them
  3449  	// promoted
  3450  	d := structs.NewDeployment(job)
  3451  	s := &structs.DeploymentState{
  3452  		Promoted:        true,
  3453  		DesiredTotal:    10,
  3454  		DesiredCanaries: 2,
  3455  		PlacedAllocs:    2,
  3456  	}
  3457  	d.TaskGroups[job.TaskGroups[0].Name] = s
  3458  
  3459  	// Create 10 allocations from the old job
  3460  	var allocs []*structs.Allocation
  3461  	for i := 0; i < 10; i++ {
  3462  		alloc := mock.Alloc()
  3463  		alloc.Job = job
  3464  		alloc.JobID = job.ID
  3465  		alloc.NodeID = uuid.Generate()
  3466  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3467  		alloc.TaskGroup = job.TaskGroups[0].Name
  3468  		allocs = append(allocs, alloc)
  3469  	}
  3470  
  3471  	// Create the canaries
  3472  	handled := make(map[string]allocUpdateType)
  3473  	for i := 0; i < 2; i++ {
  3474  		// Create one canary
  3475  		canary := mock.Alloc()
  3476  		canary.Job = job
  3477  		canary.JobID = job.ID
  3478  		canary.NodeID = uuid.Generate()
  3479  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3480  		canary.TaskGroup = job.TaskGroups[0].Name
  3481  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3482  		canary.DeploymentID = d.ID
  3483  		canary.DeploymentStatus = &structs.AllocDeploymentStatus{
  3484  			Healthy: helper.BoolToPtr(true),
  3485  		}
  3486  		allocs = append(allocs, canary)
  3487  		handled[canary.ID] = allocUpdateFnIgnore
  3488  	}
  3489  
  3490  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3491  	reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  3492  	r := reconciler.Compute()
  3493  
  3494  	// Assert the correct results
  3495  	assertResults(t, r, &resultExpectation{
  3496  		createDeployment:  nil,
  3497  		deploymentUpdates: nil,
  3498  		destructive:       2,
  3499  		stop:              2,
  3500  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3501  			job.TaskGroups[0].Name: {
  3502  				Stop:              2,
  3503  				DestructiveUpdate: 2,
  3504  				Ignore:            8,
  3505  			},
  3506  		},
  3507  	})
  3508  
  3509  	assertNoCanariesStopped(t, d, r.stop)
  3510  	assertNamesHaveIndexes(t, intRange(2, 3), destructiveResultsToNames(r.destructiveUpdate))
  3511  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  3512  }
  3513  
  3514  // Tests the reconciler handles canary promotion when the canary count equals
  3515  // the total correctly
  3516  func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) {
  3517  	job := mock.Job()
  3518  	job.TaskGroups[0].Update = canaryUpdate
  3519  	job.TaskGroups[0].Count = 2
  3520  
  3521  	// Create an existing deployment that has placed some canaries and mark them
  3522  	// promoted
  3523  	d := structs.NewDeployment(job)
  3524  	s := &structs.DeploymentState{
  3525  		Promoted:        true,
  3526  		DesiredTotal:    2,
  3527  		DesiredCanaries: 2,
  3528  		PlacedAllocs:    2,
  3529  		HealthyAllocs:   2,
  3530  	}
  3531  	d.TaskGroups[job.TaskGroups[0].Name] = s
  3532  
  3533  	// Create 2 allocations from the old job
  3534  	var allocs []*structs.Allocation
  3535  	for i := 0; i < 2; i++ {
  3536  		alloc := mock.Alloc()
  3537  		alloc.Job = job
  3538  		alloc.JobID = job.ID
  3539  		alloc.NodeID = uuid.Generate()
  3540  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3541  		alloc.TaskGroup = job.TaskGroups[0].Name
  3542  		allocs = append(allocs, alloc)
  3543  	}
  3544  
  3545  	// Create the canaries
  3546  	handled := make(map[string]allocUpdateType)
  3547  	for i := 0; i < 2; i++ {
  3548  		// Create one canary
  3549  		canary := mock.Alloc()
  3550  		canary.Job = job
  3551  		canary.JobID = job.ID
  3552  		canary.NodeID = uuid.Generate()
  3553  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3554  		canary.TaskGroup = job.TaskGroups[0].Name
  3555  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3556  		canary.DeploymentID = d.ID
  3557  		canary.DeploymentStatus = &structs.AllocDeploymentStatus{
  3558  			Healthy: helper.BoolToPtr(true),
  3559  		}
  3560  		allocs = append(allocs, canary)
  3561  		handled[canary.ID] = allocUpdateFnIgnore
  3562  	}
  3563  
  3564  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3565  	reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  3566  	r := reconciler.Compute()
  3567  
  3568  	updates := []*structs.DeploymentStatusUpdate{
  3569  		{
  3570  			DeploymentID:      d.ID,
  3571  			Status:            structs.DeploymentStatusSuccessful,
  3572  			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
  3573  		},
  3574  	}
  3575  
  3576  	// Assert the correct results
  3577  	assertResults(t, r, &resultExpectation{
  3578  		createDeployment:  nil,
  3579  		deploymentUpdates: updates,
  3580  		place:             0,
  3581  		inplace:           0,
  3582  		stop:              2,
  3583  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3584  			job.TaskGroups[0].Name: {
  3585  				Stop:   2,
  3586  				Ignore: 2,
  3587  			},
  3588  		},
  3589  	})
  3590  
  3591  	assertNoCanariesStopped(t, d, r.stop)
  3592  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  3593  }
  3594  
  3595  // Tests the reconciler checks the health of placed allocs to determine the
  3596  // limit
  3597  func TestReconciler_DeploymentLimit_HealthAccounting(t *testing.T) {
  3598  	job := mock.Job()
  3599  	job.TaskGroups[0].Update = noCanaryUpdate
  3600  
  3601  	cases := []struct {
  3602  		healthy int
  3603  	}{
  3604  		{
  3605  			healthy: 0,
  3606  		},
  3607  		{
  3608  			healthy: 1,
  3609  		},
  3610  		{
  3611  			healthy: 2,
  3612  		},
  3613  		{
  3614  			healthy: 3,
  3615  		},
  3616  		{
  3617  			healthy: 4,
  3618  		},
  3619  	}
  3620  
  3621  	for _, c := range cases {
  3622  		t.Run(fmt.Sprintf("%d healthy", c.healthy), func(t *testing.T) {
  3623  			// Create an existing deployment that has placed some canaries and mark them
  3624  			// promoted
  3625  			d := structs.NewDeployment(job)
  3626  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3627  				Promoted:     true,
  3628  				DesiredTotal: 10,
  3629  				PlacedAllocs: 4,
  3630  			}
  3631  
  3632  			// Create 6 allocations from the old job
  3633  			var allocs []*structs.Allocation
  3634  			for i := 4; i < 10; i++ {
  3635  				alloc := mock.Alloc()
  3636  				alloc.Job = job
  3637  				alloc.JobID = job.ID
  3638  				alloc.NodeID = uuid.Generate()
  3639  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3640  				alloc.TaskGroup = job.TaskGroups[0].Name
  3641  				allocs = append(allocs, alloc)
  3642  			}
  3643  
  3644  			// Create the new allocs
  3645  			handled := make(map[string]allocUpdateType)
  3646  			for i := 0; i < 4; i++ {
  3647  				new := mock.Alloc()
  3648  				new.Job = job
  3649  				new.JobID = job.ID
  3650  				new.NodeID = uuid.Generate()
  3651  				new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3652  				new.TaskGroup = job.TaskGroups[0].Name
  3653  				new.DeploymentID = d.ID
  3654  				if i < c.healthy {
  3655  					new.DeploymentStatus = &structs.AllocDeploymentStatus{
  3656  						Healthy: helper.BoolToPtr(true),
  3657  					}
  3658  				}
  3659  				allocs = append(allocs, new)
  3660  				handled[new.ID] = allocUpdateFnIgnore
  3661  			}
  3662  
  3663  			mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3664  			reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  3665  			r := reconciler.Compute()
  3666  
  3667  			// Assert the correct results
  3668  			assertResults(t, r, &resultExpectation{
  3669  				createDeployment:  nil,
  3670  				deploymentUpdates: nil,
  3671  				destructive:       c.healthy,
  3672  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3673  					job.TaskGroups[0].Name: {
  3674  						DestructiveUpdate: uint64(c.healthy),
  3675  						Ignore:            uint64(10 - c.healthy),
  3676  					},
  3677  				},
  3678  			})
  3679  
  3680  			if c.healthy != 0 {
  3681  				assertNamesHaveIndexes(t, intRange(4, 3+c.healthy), destructiveResultsToNames(r.destructiveUpdate))
  3682  			}
  3683  		})
  3684  	}
  3685  }
  3686  
  3687  // Tests the reconciler handles an alloc on a tainted node during a rolling
  3688  // update
  3689  func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) {
  3690  	job := mock.Job()
  3691  	job.TaskGroups[0].Update = noCanaryUpdate
  3692  
  3693  	// Create an existing deployment that has some placed allocs
  3694  	d := structs.NewDeployment(job)
  3695  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3696  		Promoted:     true,
  3697  		DesiredTotal: 10,
  3698  		PlacedAllocs: 7,
  3699  	}
  3700  
  3701  	// Create 2 allocations from the old job
  3702  	var allocs []*structs.Allocation
  3703  	for i := 8; i < 10; i++ {
  3704  		alloc := mock.Alloc()
  3705  		alloc.Job = job
  3706  		alloc.JobID = job.ID
  3707  		alloc.NodeID = uuid.Generate()
  3708  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3709  		alloc.TaskGroup = job.TaskGroups[0].Name
  3710  		allocs = append(allocs, alloc)
  3711  	}
  3712  
  3713  	// Create the healthy replacements
  3714  	handled := make(map[string]allocUpdateType)
  3715  	for i := 0; i < 8; i++ {
  3716  		new := mock.Alloc()
  3717  		new.Job = job
  3718  		new.JobID = job.ID
  3719  		new.NodeID = uuid.Generate()
  3720  		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3721  		new.TaskGroup = job.TaskGroups[0].Name
  3722  		new.DeploymentID = d.ID
  3723  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  3724  			Healthy: helper.BoolToPtr(true),
  3725  		}
  3726  		allocs = append(allocs, new)
  3727  		handled[new.ID] = allocUpdateFnIgnore
  3728  	}
  3729  
  3730  	// Build a map of tainted nodes
  3731  	tainted := make(map[string]*structs.Node, 3)
  3732  	for i := 0; i < 3; i++ {
  3733  		n := mock.Node()
  3734  		n.ID = allocs[2+i].NodeID
  3735  		if i == 0 {
  3736  			n.Status = structs.NodeStatusDown
  3737  		} else {
  3738  			n.Drain = true
  3739  			allocs[2+i].DesiredTransition.Migrate = helper.BoolToPtr(true)
  3740  		}
  3741  		tainted[n.ID] = n
  3742  	}
  3743  
  3744  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3745  	reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "")
  3746  	r := reconciler.Compute()
  3747  
  3748  	// Assert the correct results
  3749  	assertResults(t, r, &resultExpectation{
  3750  		createDeployment:  nil,
  3751  		deploymentUpdates: nil,
  3752  		place:             3,
  3753  		destructive:       2,
  3754  		stop:              3,
  3755  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3756  			job.TaskGroups[0].Name: {
  3757  				Place:             1, // Place the lost
  3758  				Stop:              1, // Stop the lost
  3759  				Migrate:           2, // Migrate the tainted
  3760  				DestructiveUpdate: 2,
  3761  				Ignore:            5,
  3762  			},
  3763  		},
  3764  	})
  3765  
  3766  	assertNamesHaveIndexes(t, intRange(8, 9), destructiveResultsToNames(r.destructiveUpdate))
  3767  	assertNamesHaveIndexes(t, intRange(0, 2), placeResultsToNames(r.place))
  3768  	assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop))
  3769  }
  3770  
  3771  // Tests the reconciler handles a failed deployment with allocs on tainted
  3772  // nodes
  3773  func TestReconciler_FailedDeployment_TaintedNodes(t *testing.T) {
  3774  	job := mock.Job()
  3775  	job.TaskGroups[0].Update = noCanaryUpdate
  3776  
  3777  	// Create an existing failed deployment that has some placed allocs
  3778  	d := structs.NewDeployment(job)
  3779  	d.Status = structs.DeploymentStatusFailed
  3780  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3781  		Promoted:     true,
  3782  		DesiredTotal: 10,
  3783  		PlacedAllocs: 4,
  3784  	}
  3785  
  3786  	// Create 6 allocations from the old job
  3787  	var allocs []*structs.Allocation
  3788  	for i := 4; i < 10; i++ {
  3789  		alloc := mock.Alloc()
  3790  		alloc.Job = job
  3791  		alloc.JobID = job.ID
  3792  		alloc.NodeID = uuid.Generate()
  3793  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3794  		alloc.TaskGroup = job.TaskGroups[0].Name
  3795  		allocs = append(allocs, alloc)
  3796  	}
  3797  
  3798  	// Create the healthy replacements
  3799  	handled := make(map[string]allocUpdateType)
  3800  	for i := 0; i < 4; i++ {
  3801  		new := mock.Alloc()
  3802  		new.Job = job
  3803  		new.JobID = job.ID
  3804  		new.NodeID = uuid.Generate()
  3805  		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3806  		new.TaskGroup = job.TaskGroups[0].Name
  3807  		new.DeploymentID = d.ID
  3808  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  3809  			Healthy: helper.BoolToPtr(true),
  3810  		}
  3811  		allocs = append(allocs, new)
  3812  		handled[new.ID] = allocUpdateFnIgnore
  3813  	}
  3814  
  3815  	// Build a map of tainted nodes
  3816  	tainted := make(map[string]*structs.Node, 2)
  3817  	for i := 0; i < 2; i++ {
  3818  		n := mock.Node()
  3819  		n.ID = allocs[6+i].NodeID
  3820  		if i == 0 {
  3821  			n.Status = structs.NodeStatusDown
  3822  		} else {
  3823  			n.Drain = true
  3824  			allocs[6+i].DesiredTransition.Migrate = helper.BoolToPtr(true)
  3825  		}
  3826  		tainted[n.ID] = n
  3827  	}
  3828  
  3829  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3830  	reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "")
  3831  	r := reconciler.Compute()
  3832  
  3833  	// Assert the correct results
  3834  	assertResults(t, r, &resultExpectation{
  3835  		createDeployment:  nil,
  3836  		deploymentUpdates: nil,
  3837  		place:             2,
  3838  		inplace:           0,
  3839  		stop:              2,
  3840  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3841  			job.TaskGroups[0].Name: {
  3842  				Place:   1,
  3843  				Migrate: 1,
  3844  				Stop:    1,
  3845  				Ignore:  8,
  3846  			},
  3847  		},
  3848  	})
  3849  
  3850  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  3851  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  3852  }
  3853  
  3854  // Tests the reconciler handles a run after a deployment is complete
  3855  // successfully.
  3856  func TestReconciler_CompleteDeployment(t *testing.T) {
  3857  	job := mock.Job()
  3858  	job.TaskGroups[0].Update = canaryUpdate
  3859  
  3860  	d := structs.NewDeployment(job)
  3861  	d.Status = structs.DeploymentStatusSuccessful
  3862  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3863  		Promoted:        true,
  3864  		DesiredTotal:    10,
  3865  		DesiredCanaries: 2,
  3866  		PlacedAllocs:    10,
  3867  		HealthyAllocs:   10,
  3868  	}
  3869  
  3870  	// Create allocations from the old job
  3871  	var allocs []*structs.Allocation
  3872  	for i := 0; i < 10; i++ {
  3873  		alloc := mock.Alloc()
  3874  		alloc.Job = job
  3875  		alloc.JobID = job.ID
  3876  		alloc.NodeID = uuid.Generate()
  3877  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3878  		alloc.TaskGroup = job.TaskGroups[0].Name
  3879  		alloc.DeploymentID = d.ID
  3880  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  3881  			Healthy: helper.BoolToPtr(true),
  3882  		}
  3883  		allocs = append(allocs, alloc)
  3884  	}
  3885  
  3886  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  3887  	r := reconciler.Compute()
  3888  
  3889  	// Assert the correct results
  3890  	assertResults(t, r, &resultExpectation{
  3891  		createDeployment:  nil,
  3892  		deploymentUpdates: nil,
  3893  		place:             0,
  3894  		inplace:           0,
  3895  		stop:              0,
  3896  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3897  			job.TaskGroups[0].Name: {
  3898  				Ignore: 10,
  3899  			},
  3900  		},
  3901  	})
  3902  }
  3903  
  3904  // Tests that the reconciler marks a deployment as complete once there is
  3905  // nothing left to place even if there are failed allocations that are part of
  3906  // the deployment.
  3907  func TestReconciler_MarkDeploymentComplete_FailedAllocations(t *testing.T) {
  3908  	job := mock.Job()
  3909  	job.TaskGroups[0].Update = noCanaryUpdate
  3910  
  3911  	d := structs.NewDeployment(job)
  3912  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3913  		DesiredTotal:  10,
  3914  		PlacedAllocs:  20,
  3915  		HealthyAllocs: 10,
  3916  	}
  3917  
  3918  	// Create 10 healthy allocs and 10 allocs that are failed
  3919  	var allocs []*structs.Allocation
  3920  	for i := 0; i < 20; i++ {
  3921  		alloc := mock.Alloc()
  3922  		alloc.Job = job
  3923  		alloc.JobID = job.ID
  3924  		alloc.NodeID = uuid.Generate()
  3925  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%10))
  3926  		alloc.TaskGroup = job.TaskGroups[0].Name
  3927  		alloc.DeploymentID = d.ID
  3928  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{}
  3929  		if i < 10 {
  3930  			alloc.ClientStatus = structs.AllocClientStatusRunning
  3931  			alloc.DeploymentStatus.Healthy = helper.BoolToPtr(true)
  3932  		} else {
  3933  			alloc.DesiredStatus = structs.AllocDesiredStatusStop
  3934  			alloc.ClientStatus = structs.AllocClientStatusFailed
  3935  			alloc.DeploymentStatus.Healthy = helper.BoolToPtr(false)
  3936  		}
  3937  
  3938  		allocs = append(allocs, alloc)
  3939  	}
  3940  
  3941  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  3942  	r := reconciler.Compute()
  3943  
  3944  	updates := []*structs.DeploymentStatusUpdate{
  3945  		{
  3946  			DeploymentID:      d.ID,
  3947  			Status:            structs.DeploymentStatusSuccessful,
  3948  			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
  3949  		},
  3950  	}
  3951  
  3952  	// Assert the correct results
  3953  	assertResults(t, r, &resultExpectation{
  3954  		createDeployment:  nil,
  3955  		deploymentUpdates: updates,
  3956  		place:             0,
  3957  		inplace:           0,
  3958  		stop:              0,
  3959  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3960  			job.TaskGroups[0].Name: {
  3961  				Ignore: 10,
  3962  			},
  3963  		},
  3964  	})
  3965  }
  3966  
  3967  // Test that a failed deployment cancels non-promoted canaries
  3968  func TestReconciler_FailedDeployment_CancelCanaries(t *testing.T) {
  3969  	// Create a job with two task groups
  3970  	job := mock.Job()
  3971  	job.TaskGroups[0].Update = canaryUpdate
  3972  	job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy())
  3973  	job.TaskGroups[1].Name = "two"
  3974  
  3975  	// Create an existing failed deployment that has promoted one task group
  3976  	d := structs.NewDeployment(job)
  3977  	d.Status = structs.DeploymentStatusFailed
  3978  	s0 := &structs.DeploymentState{
  3979  		Promoted:        true,
  3980  		DesiredTotal:    10,
  3981  		DesiredCanaries: 2,
  3982  		PlacedAllocs:    4,
  3983  	}
  3984  	s1 := &structs.DeploymentState{
  3985  		Promoted:        false,
  3986  		DesiredTotal:    10,
  3987  		DesiredCanaries: 2,
  3988  		PlacedAllocs:    2,
  3989  	}
  3990  	d.TaskGroups[job.TaskGroups[0].Name] = s0
  3991  	d.TaskGroups[job.TaskGroups[1].Name] = s1
  3992  
  3993  	// Create 6 allocations from the old job
  3994  	var allocs []*structs.Allocation
  3995  	handled := make(map[string]allocUpdateType)
  3996  	for _, group := range []int{0, 1} {
  3997  		replacements := 4
  3998  		state := s0
  3999  		if group == 1 {
  4000  			replacements = 2
  4001  			state = s1
  4002  		}
  4003  
  4004  		// Create the healthy replacements
  4005  		for i := 0; i < replacements; i++ {
  4006  			new := mock.Alloc()
  4007  			new.Job = job
  4008  			new.JobID = job.ID
  4009  			new.NodeID = uuid.Generate()
  4010  			new.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i))
  4011  			new.TaskGroup = job.TaskGroups[group].Name
  4012  			new.DeploymentID = d.ID
  4013  			new.DeploymentStatus = &structs.AllocDeploymentStatus{
  4014  				Healthy: helper.BoolToPtr(true),
  4015  			}
  4016  			allocs = append(allocs, new)
  4017  			handled[new.ID] = allocUpdateFnIgnore
  4018  
  4019  			// Add the alloc to the canary list
  4020  			if i < 2 {
  4021  				state.PlacedCanaries = append(state.PlacedCanaries, new.ID)
  4022  			}
  4023  		}
  4024  		for i := replacements; i < 10; i++ {
  4025  			alloc := mock.Alloc()
  4026  			alloc.Job = job
  4027  			alloc.JobID = job.ID
  4028  			alloc.NodeID = uuid.Generate()
  4029  			alloc.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i))
  4030  			alloc.TaskGroup = job.TaskGroups[group].Name
  4031  			allocs = append(allocs, alloc)
  4032  		}
  4033  	}
  4034  
  4035  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  4036  	reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  4037  	r := reconciler.Compute()
  4038  
  4039  	// Assert the correct results
  4040  	assertResults(t, r, &resultExpectation{
  4041  		createDeployment:  nil,
  4042  		deploymentUpdates: nil,
  4043  		place:             0,
  4044  		inplace:           0,
  4045  		stop:              2,
  4046  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4047  			job.TaskGroups[0].Name: {
  4048  				Ignore: 10,
  4049  			},
  4050  			job.TaskGroups[1].Name: {
  4051  				Stop:   2,
  4052  				Ignore: 8,
  4053  			},
  4054  		},
  4055  	})
  4056  
  4057  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  4058  }
  4059  
  4060  // Test that a failed deployment and updated job works
  4061  func TestReconciler_FailedDeployment_NewJob(t *testing.T) {
  4062  	job := mock.Job()
  4063  	job.TaskGroups[0].Update = noCanaryUpdate
  4064  
  4065  	// Create an existing failed deployment that has some placed allocs
  4066  	d := structs.NewDeployment(job)
  4067  	d.Status = structs.DeploymentStatusFailed
  4068  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4069  		Promoted:     true,
  4070  		DesiredTotal: 10,
  4071  		PlacedAllocs: 4,
  4072  	}
  4073  
  4074  	// Create 6 allocations from the old job
  4075  	var allocs []*structs.Allocation
  4076  	for i := 4; i < 10; i++ {
  4077  		alloc := mock.Alloc()
  4078  		alloc.Job = job
  4079  		alloc.JobID = job.ID
  4080  		alloc.NodeID = uuid.Generate()
  4081  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4082  		alloc.TaskGroup = job.TaskGroups[0].Name
  4083  		allocs = append(allocs, alloc)
  4084  	}
  4085  
  4086  	// Create the healthy replacements
  4087  	for i := 0; i < 4; i++ {
  4088  		new := mock.Alloc()
  4089  		new.Job = job
  4090  		new.JobID = job.ID
  4091  		new.NodeID = uuid.Generate()
  4092  		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4093  		new.TaskGroup = job.TaskGroups[0].Name
  4094  		new.DeploymentID = d.ID
  4095  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  4096  			Healthy: helper.BoolToPtr(true),
  4097  		}
  4098  		allocs = append(allocs, new)
  4099  	}
  4100  
  4101  	// Up the job version
  4102  	jobNew := job.Copy()
  4103  	jobNew.Version += 100
  4104  
  4105  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, jobNew, d, allocs, nil, "")
  4106  	r := reconciler.Compute()
  4107  
  4108  	dnew := structs.NewDeployment(jobNew)
  4109  	dnew.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4110  		DesiredTotal: 10,
  4111  	}
  4112  
  4113  	// Assert the correct results
  4114  	assertResults(t, r, &resultExpectation{
  4115  		createDeployment:  dnew,
  4116  		deploymentUpdates: nil,
  4117  		destructive:       4,
  4118  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4119  			job.TaskGroups[0].Name: {
  4120  				DestructiveUpdate: 4,
  4121  				Ignore:            6,
  4122  			},
  4123  		},
  4124  	})
  4125  
  4126  	assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate))
  4127  }
  4128  
  4129  // Tests the reconciler marks a deployment as complete
  4130  func TestReconciler_MarkDeploymentComplete(t *testing.T) {
  4131  	job := mock.Job()
  4132  	job.TaskGroups[0].Update = noCanaryUpdate
  4133  
  4134  	d := structs.NewDeployment(job)
  4135  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4136  		Promoted:      true,
  4137  		DesiredTotal:  10,
  4138  		PlacedAllocs:  10,
  4139  		HealthyAllocs: 10,
  4140  	}
  4141  
  4142  	// Create allocations from the old job
  4143  	var allocs []*structs.Allocation
  4144  	for i := 0; i < 10; i++ {
  4145  		alloc := mock.Alloc()
  4146  		alloc.Job = job
  4147  		alloc.JobID = job.ID
  4148  		alloc.NodeID = uuid.Generate()
  4149  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4150  		alloc.TaskGroup = job.TaskGroups[0].Name
  4151  		alloc.DeploymentID = d.ID
  4152  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  4153  			Healthy: helper.BoolToPtr(true),
  4154  		}
  4155  		allocs = append(allocs, alloc)
  4156  	}
  4157  
  4158  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  4159  	r := reconciler.Compute()
  4160  
  4161  	updates := []*structs.DeploymentStatusUpdate{
  4162  		{
  4163  			DeploymentID:      d.ID,
  4164  			Status:            structs.DeploymentStatusSuccessful,
  4165  			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
  4166  		},
  4167  	}
  4168  
  4169  	// Assert the correct results
  4170  	assertResults(t, r, &resultExpectation{
  4171  		createDeployment:  nil,
  4172  		deploymentUpdates: updates,
  4173  		place:             0,
  4174  		inplace:           0,
  4175  		stop:              0,
  4176  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4177  			job.TaskGroups[0].Name: {
  4178  				Ignore: 10,
  4179  			},
  4180  		},
  4181  	})
  4182  }
  4183  
  4184  // Tests the reconciler handles changing a job such that a deployment is created
  4185  // while doing a scale up but as the second eval.
  4186  func TestReconciler_JobChange_ScaleUp_SecondEval(t *testing.T) {
  4187  	// Scale the job up to 15
  4188  	job := mock.Job()
  4189  	job.TaskGroups[0].Update = noCanaryUpdate
  4190  	job.TaskGroups[0].Count = 30
  4191  
  4192  	// Create a deployment that is paused and has placed some canaries
  4193  	d := structs.NewDeployment(job)
  4194  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4195  		Promoted:     false,
  4196  		DesiredTotal: 30,
  4197  		PlacedAllocs: 20,
  4198  	}
  4199  
  4200  	// Create 10 allocations from the old job
  4201  	var allocs []*structs.Allocation
  4202  	for i := 0; i < 10; i++ {
  4203  		alloc := mock.Alloc()
  4204  		alloc.Job = job
  4205  		alloc.JobID = job.ID
  4206  		alloc.NodeID = uuid.Generate()
  4207  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4208  		alloc.TaskGroup = job.TaskGroups[0].Name
  4209  		allocs = append(allocs, alloc)
  4210  	}
  4211  
  4212  	// Create 20 from new job
  4213  	handled := make(map[string]allocUpdateType)
  4214  	for i := 10; i < 30; i++ {
  4215  		alloc := mock.Alloc()
  4216  		alloc.Job = job
  4217  		alloc.JobID = job.ID
  4218  		alloc.DeploymentID = d.ID
  4219  		alloc.NodeID = uuid.Generate()
  4220  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4221  		alloc.TaskGroup = job.TaskGroups[0].Name
  4222  		allocs = append(allocs, alloc)
  4223  		handled[alloc.ID] = allocUpdateFnIgnore
  4224  	}
  4225  
  4226  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  4227  	reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  4228  	r := reconciler.Compute()
  4229  
  4230  	// Assert the correct results
  4231  	assertResults(t, r, &resultExpectation{
  4232  		createDeployment:  nil,
  4233  		deploymentUpdates: nil,
  4234  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4235  			job.TaskGroups[0].Name: {
  4236  				// All should be ignored because nothing has been marked as
  4237  				// healthy.
  4238  				Ignore: 30,
  4239  			},
  4240  		},
  4241  	})
  4242  }
  4243  
  4244  // Tests the reconciler doesn't stop allocations when doing a rolling upgrade
  4245  // where the count of the old job allocs is < desired count.
  4246  func TestReconciler_RollingUpgrade_MissingAllocs(t *testing.T) {
  4247  	job := mock.Job()
  4248  	job.TaskGroups[0].Update = noCanaryUpdate
  4249  
  4250  	// Create 7 allocations from the old job
  4251  	var allocs []*structs.Allocation
  4252  	for i := 0; i < 7; i++ {
  4253  		alloc := mock.Alloc()
  4254  		alloc.Job = job
  4255  		alloc.JobID = job.ID
  4256  		alloc.NodeID = uuid.Generate()
  4257  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4258  		alloc.TaskGroup = job.TaskGroups[0].Name
  4259  		allocs = append(allocs, alloc)
  4260  	}
  4261  
  4262  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  4263  	r := reconciler.Compute()
  4264  
  4265  	d := structs.NewDeployment(job)
  4266  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4267  		DesiredTotal: 10,
  4268  	}
  4269  
  4270  	// Assert the correct results
  4271  	assertResults(t, r, &resultExpectation{
  4272  		createDeployment:  d,
  4273  		deploymentUpdates: nil,
  4274  		place:             3,
  4275  		destructive:       1,
  4276  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4277  			job.TaskGroups[0].Name: {
  4278  				Place:             3,
  4279  				DestructiveUpdate: 1,
  4280  				Ignore:            6,
  4281  			},
  4282  		},
  4283  	})
  4284  
  4285  	assertNamesHaveIndexes(t, intRange(7, 9), placeResultsToNames(r.place))
  4286  	assertNamesHaveIndexes(t, intRange(0, 0), destructiveResultsToNames(r.destructiveUpdate))
  4287  }
  4288  
  4289  // Tests that the reconciler handles rerunning a batch job in the case that the
  4290  // allocations are from an older instance of the job.
  4291  func TestReconciler_Batch_Rerun(t *testing.T) {
  4292  	job := mock.Job()
  4293  	job.Type = structs.JobTypeBatch
  4294  	job.TaskGroups[0].Update = nil
  4295  
  4296  	// Create 10 allocations from the old job and have them be complete
  4297  	var allocs []*structs.Allocation
  4298  	for i := 0; i < 10; i++ {
  4299  		alloc := mock.Alloc()
  4300  		alloc.Job = job
  4301  		alloc.JobID = job.ID
  4302  		alloc.NodeID = uuid.Generate()
  4303  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4304  		alloc.TaskGroup = job.TaskGroups[0].Name
  4305  		alloc.ClientStatus = structs.AllocClientStatusComplete
  4306  		alloc.DesiredStatus = structs.AllocDesiredStatusStop
  4307  		allocs = append(allocs, alloc)
  4308  	}
  4309  
  4310  	// Create a copy of the job that is "new"
  4311  	job2 := job.Copy()
  4312  	job2.CreateIndex++
  4313  
  4314  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job2.ID, job2, nil, allocs, nil, "")
  4315  	r := reconciler.Compute()
  4316  
  4317  	// Assert the correct results
  4318  	assertResults(t, r, &resultExpectation{
  4319  		createDeployment:  nil,
  4320  		deploymentUpdates: nil,
  4321  		place:             10,
  4322  		destructive:       0,
  4323  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4324  			job.TaskGroups[0].Name: {
  4325  				Place:             10,
  4326  				DestructiveUpdate: 0,
  4327  				Ignore:            10,
  4328  			},
  4329  		},
  4330  	})
  4331  
  4332  	assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
  4333  }
  4334  
  4335  // Test that a failed deployment will not result in rescheduling failed allocations
  4336  func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) {
  4337  	job := mock.Job()
  4338  	job.TaskGroups[0].Update = noCanaryUpdate
  4339  
  4340  	tgName := job.TaskGroups[0].Name
  4341  	now := time.Now()
  4342  	// Create an existing failed deployment that has some placed allocs
  4343  	d := structs.NewDeployment(job)
  4344  	d.Status = structs.DeploymentStatusFailed
  4345  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4346  		Promoted:     true,
  4347  		DesiredTotal: 5,
  4348  		PlacedAllocs: 4,
  4349  	}
  4350  
  4351  	// Create 4 allocations and mark two as failed
  4352  	var allocs []*structs.Allocation
  4353  	for i := 0; i < 4; i++ {
  4354  		alloc := mock.Alloc()
  4355  		alloc.Job = job
  4356  		alloc.JobID = job.ID
  4357  		alloc.NodeID = uuid.Generate()
  4358  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4359  		alloc.TaskGroup = job.TaskGroups[0].Name
  4360  		alloc.DeploymentID = d.ID
  4361  		allocs = append(allocs, alloc)
  4362  	}
  4363  
  4364  	//create some allocations that are reschedulable now
  4365  	allocs[2].ClientStatus = structs.AllocClientStatusFailed
  4366  	allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  4367  		StartedAt:  now.Add(-1 * time.Hour),
  4368  		FinishedAt: now.Add(-10 * time.Second)}}
  4369  
  4370  	allocs[3].ClientStatus = structs.AllocClientStatusFailed
  4371  	allocs[3].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  4372  		StartedAt:  now.Add(-1 * time.Hour),
  4373  		FinishedAt: now.Add(-10 * time.Second)}}
  4374  
  4375  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  4376  	r := reconciler.Compute()
  4377  
  4378  	// Assert that no rescheduled placements were created
  4379  	assertResults(t, r, &resultExpectation{
  4380  		place:             0,
  4381  		createDeployment:  nil,
  4382  		deploymentUpdates: nil,
  4383  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4384  			job.TaskGroups[0].Name: {
  4385  				Ignore: 2,
  4386  			},
  4387  		},
  4388  	})
  4389  }
  4390  
  4391  // Test that a running deployment with failed allocs will not result in
  4392  // rescheduling failed allocations unless they are marked as reschedulable.
  4393  func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) {
  4394  	job := mock.Job()
  4395  	job.TaskGroups[0].Update = noCanaryUpdate
  4396  	tgName := job.TaskGroups[0].Name
  4397  	now := time.Now()
  4398  
  4399  	// Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet
  4400  	d := structs.NewDeployment(job)
  4401  	d.Status = structs.DeploymentStatusRunning
  4402  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4403  		Promoted:     false,
  4404  		DesiredTotal: 10,
  4405  		PlacedAllocs: 10,
  4406  	}
  4407  
  4408  	// Create 10 allocations
  4409  	var allocs []*structs.Allocation
  4410  	for i := 0; i < 10; i++ {
  4411  		alloc := mock.Alloc()
  4412  		alloc.Job = job
  4413  		alloc.JobID = job.ID
  4414  		alloc.NodeID = uuid.Generate()
  4415  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4416  		alloc.TaskGroup = job.TaskGroups[0].Name
  4417  		alloc.DeploymentID = d.ID
  4418  		alloc.ClientStatus = structs.AllocClientStatusFailed
  4419  		alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  4420  			StartedAt:  now.Add(-1 * time.Hour),
  4421  			FinishedAt: now.Add(-10 * time.Second)}}
  4422  		allocs = append(allocs, alloc)
  4423  	}
  4424  
  4425  	// Mark half of them as reschedulable
  4426  	for i := 0; i < 5; i++ {
  4427  		allocs[i].DesiredTransition.Reschedule = helper.BoolToPtr(true)
  4428  	}
  4429  
  4430  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  4431  	r := reconciler.Compute()
  4432  
  4433  	// Assert that no rescheduled placements were created
  4434  	assertResults(t, r, &resultExpectation{
  4435  		place:             5,
  4436  		createDeployment:  nil,
  4437  		deploymentUpdates: nil,
  4438  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4439  			job.TaskGroups[0].Name: {
  4440  				Place:  5,
  4441  				Ignore: 5,
  4442  			},
  4443  		},
  4444  	})
  4445  }
  4446  
  4447  // Test that a failed deployment cancels non-promoted canaries
  4448  func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) {
  4449  	// Create a job
  4450  	job := mock.Job()
  4451  	job.TaskGroups[0].Count = 3
  4452  	job.TaskGroups[0].Update = &structs.UpdateStrategy{
  4453  		Canary:          3,
  4454  		MaxParallel:     2,
  4455  		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
  4456  		MinHealthyTime:  10 * time.Second,
  4457  		HealthyDeadline: 10 * time.Minute,
  4458  		Stagger:         31 * time.Second,
  4459  	}
  4460  
  4461  	// Create v1 of the job
  4462  	jobv1 := job.Copy()
  4463  	jobv1.Version = 1
  4464  	jobv1.TaskGroups[0].Meta = map[string]string{"version": "1"}
  4465  
  4466  	// Create v2 of the job
  4467  	jobv2 := job.Copy()
  4468  	jobv2.Version = 2
  4469  	jobv2.TaskGroups[0].Meta = map[string]string{"version": "2"}
  4470  
  4471  	d := structs.NewDeployment(jobv2)
  4472  	state := &structs.DeploymentState{
  4473  		Promoted:      true,
  4474  		DesiredTotal:  3,
  4475  		PlacedAllocs:  3,
  4476  		HealthyAllocs: 3,
  4477  	}
  4478  	d.TaskGroups[job.TaskGroups[0].Name] = state
  4479  
  4480  	// Create the original
  4481  	var allocs []*structs.Allocation
  4482  	for i := 0; i < 3; i++ {
  4483  		new := mock.Alloc()
  4484  		new.Job = jobv2
  4485  		new.JobID = job.ID
  4486  		new.NodeID = uuid.Generate()
  4487  		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4488  		new.TaskGroup = job.TaskGroups[0].Name
  4489  		new.DeploymentID = d.ID
  4490  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  4491  			Healthy: helper.BoolToPtr(true),
  4492  		}
  4493  		new.ClientStatus = structs.AllocClientStatusRunning
  4494  		allocs = append(allocs, new)
  4495  
  4496  	}
  4497  	for i := 0; i < 3; i++ {
  4498  		new := mock.Alloc()
  4499  		new.Job = jobv1
  4500  		new.JobID = jobv1.ID
  4501  		new.NodeID = uuid.Generate()
  4502  		new.Name = structs.AllocName(jobv1.ID, jobv1.TaskGroups[0].Name, uint(i))
  4503  		new.TaskGroup = job.TaskGroups[0].Name
  4504  		new.DeploymentID = uuid.Generate()
  4505  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  4506  			Healthy: helper.BoolToPtr(false),
  4507  		}
  4508  		new.DesiredStatus = structs.AllocDesiredStatusStop
  4509  		new.ClientStatus = structs.AllocClientStatusFailed
  4510  		allocs = append(allocs, new)
  4511  	}
  4512  
  4513  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, jobv2, d, allocs, nil, "")
  4514  	r := reconciler.Compute()
  4515  
  4516  	updates := []*structs.DeploymentStatusUpdate{
  4517  		{
  4518  			DeploymentID:      d.ID,
  4519  			Status:            structs.DeploymentStatusSuccessful,
  4520  			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
  4521  		},
  4522  	}
  4523  
  4524  	// Assert the correct results
  4525  	assertResults(t, r, &resultExpectation{
  4526  		createDeployment:  nil,
  4527  		deploymentUpdates: updates,
  4528  		place:             0,
  4529  		inplace:           0,
  4530  		stop:              0,
  4531  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4532  			job.TaskGroups[0].Name: {
  4533  				Stop:          0,
  4534  				InPlaceUpdate: 0,
  4535  				Ignore:        3,
  4536  			},
  4537  		},
  4538  	})
  4539  }
  4540  
  4541  // Test that a successful deployment with failed allocs will result in
  4542  // rescheduling failed allocations
  4543  func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T) {
  4544  	job := mock.Job()
  4545  	job.TaskGroups[0].Update = noCanaryUpdate
  4546  	tgName := job.TaskGroups[0].Name
  4547  	now := time.Now()
  4548  
  4549  	// Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet
  4550  	d := structs.NewDeployment(job)
  4551  	d.Status = structs.DeploymentStatusSuccessful
  4552  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4553  		Promoted:     false,
  4554  		DesiredTotal: 10,
  4555  		PlacedAllocs: 10,
  4556  	}
  4557  
  4558  	// Create 10 allocations
  4559  	var allocs []*structs.Allocation
  4560  	for i := 0; i < 10; i++ {
  4561  		alloc := mock.Alloc()
  4562  		alloc.Job = job
  4563  		alloc.JobID = job.ID
  4564  		alloc.NodeID = uuid.Generate()
  4565  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4566  		alloc.TaskGroup = job.TaskGroups[0].Name
  4567  		alloc.DeploymentID = d.ID
  4568  		alloc.ClientStatus = structs.AllocClientStatusFailed
  4569  		alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  4570  			StartedAt:  now.Add(-1 * time.Hour),
  4571  			FinishedAt: now.Add(-10 * time.Second)}}
  4572  		allocs = append(allocs, alloc)
  4573  	}
  4574  
  4575  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  4576  	r := reconciler.Compute()
  4577  
  4578  	// Assert that rescheduled placements were created
  4579  	assertResults(t, r, &resultExpectation{
  4580  		place:             10,
  4581  		createDeployment:  nil,
  4582  		deploymentUpdates: nil,
  4583  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4584  			job.TaskGroups[0].Name: {
  4585  				Place:  10,
  4586  				Ignore: 0,
  4587  			},
  4588  		},
  4589  	})
  4590  	assertPlaceResultsHavePreviousAllocs(t, 10, r.place)
  4591  }
  4592  
  4593  // Tests force rescheduling a failed alloc that is past its reschedule limit
  4594  func TestReconciler_ForceReschedule_Service(t *testing.T) {
  4595  	require := require.New(t)
  4596  
  4597  	// Set desired 5
  4598  	job := mock.Job()
  4599  	job.TaskGroups[0].Count = 5
  4600  	tgName := job.TaskGroups[0].Name
  4601  
  4602  	// Set up reschedule policy and update stanza
  4603  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  4604  		Attempts:      1,
  4605  		Interval:      24 * time.Hour,
  4606  		Delay:         5 * time.Second,
  4607  		DelayFunction: "",
  4608  		MaxDelay:      1 * time.Hour,
  4609  		Unlimited:     false,
  4610  	}
  4611  	job.TaskGroups[0].Update = noCanaryUpdate
  4612  
  4613  	// Create 5 existing allocations
  4614  	var allocs []*structs.Allocation
  4615  	for i := 0; i < 5; i++ {
  4616  		alloc := mock.Alloc()
  4617  		alloc.Job = job
  4618  		alloc.JobID = job.ID
  4619  		alloc.NodeID = uuid.Generate()
  4620  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4621  		allocs = append(allocs, alloc)
  4622  		alloc.ClientStatus = structs.AllocClientStatusRunning
  4623  	}
  4624  
  4625  	// Mark one as failed and past its reschedule limit so not eligible to reschedule
  4626  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  4627  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  4628  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  4629  			PrevAllocID: uuid.Generate(),
  4630  			PrevNodeID:  uuid.Generate(),
  4631  		},
  4632  	}}
  4633  
  4634  	// Mark DesiredTransition ForceReschedule
  4635  	allocs[0].DesiredTransition = structs.DesiredTransition{ForceReschedule: helper.BoolToPtr(true)}
  4636  
  4637  	reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  4638  	r := reconciler.Compute()
  4639  
  4640  	// Verify that no follow up evals were created
  4641  	evals := r.desiredFollowupEvals[tgName]
  4642  	require.Nil(evals)
  4643  
  4644  	// Verify that one rescheduled alloc was created because of the forced reschedule
  4645  	assertResults(t, r, &resultExpectation{
  4646  		createDeployment:  nil,
  4647  		deploymentUpdates: nil,
  4648  		place:             1,
  4649  		inplace:           0,
  4650  		stop:              0,
  4651  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4652  			job.TaskGroups[0].Name: {
  4653  				Place:  1,
  4654  				Ignore: 4,
  4655  			},
  4656  		},
  4657  	})
  4658  
  4659  	// Rescheduled allocs should have previous allocs
  4660  	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
  4661  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  4662  	assertPlacementsAreRescheduled(t, 1, r.place)
  4663  }