github.com/banmanh482/nomad@v0.11.8/scheduler/reconcile_test.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"fmt"
     5  	"reflect"
     6  	"regexp"
     7  	"strconv"
     8  	"testing"
     9  	"time"
    10  
    11  	"github.com/hashicorp/nomad/helper"
    12  	"github.com/hashicorp/nomad/helper/testlog"
    13  	"github.com/hashicorp/nomad/helper/uuid"
    14  	"github.com/hashicorp/nomad/nomad/mock"
    15  	"github.com/hashicorp/nomad/nomad/structs"
    16  	"github.com/kr/pretty"
    17  	"github.com/stretchr/testify/assert"
    18  	"github.com/stretchr/testify/require"
    19  )
    20  
    21  var (
    22  	canaryUpdate = &structs.UpdateStrategy{
    23  		Canary:          2,
    24  		MaxParallel:     2,
    25  		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
    26  		MinHealthyTime:  10 * time.Second,
    27  		HealthyDeadline: 10 * time.Minute,
    28  		Stagger:         31 * time.Second,
    29  	}
    30  
    31  	noCanaryUpdate = &structs.UpdateStrategy{
    32  		MaxParallel:     4,
    33  		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
    34  		MinHealthyTime:  10 * time.Second,
    35  		HealthyDeadline: 10 * time.Minute,
    36  		Stagger:         31 * time.Second,
    37  	}
    38  )
    39  
    40  func allocUpdateFnIgnore(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) {
    41  	return true, false, nil
    42  }
    43  
    44  func allocUpdateFnDestructive(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) {
    45  	return false, true, nil
    46  }
    47  
    48  func allocUpdateFnInplace(existing *structs.Allocation, _ *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) {
    49  	// Create a shallow copy
    50  	newAlloc := existing.CopySkipJob()
    51  	newAlloc.AllocatedResources = &structs.AllocatedResources{
    52  		Tasks: map[string]*structs.AllocatedTaskResources{},
    53  		Shared: structs.AllocatedSharedResources{
    54  			DiskMB: int64(newTG.EphemeralDisk.SizeMB),
    55  		},
    56  	}
    57  
    58  	// Use the new task resources but keep the network from the old
    59  	for _, task := range newTG.Tasks {
    60  		networks := existing.AllocatedResources.Tasks[task.Name].Copy().Networks
    61  		newAlloc.AllocatedResources.Tasks[task.Name] = &structs.AllocatedTaskResources{
    62  			Cpu: structs.AllocatedCpuResources{
    63  				CpuShares: int64(task.Resources.CPU),
    64  			},
    65  			Memory: structs.AllocatedMemoryResources{
    66  				MemoryMB: int64(task.Resources.MemoryMB),
    67  			},
    68  			Networks: networks,
    69  		}
    70  	}
    71  
    72  	return false, false, newAlloc
    73  }
    74  
    75  func allocUpdateFnMock(handled map[string]allocUpdateType, unhandled allocUpdateType) allocUpdateType {
    76  	return func(existing *structs.Allocation, newJob *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) {
    77  		if fn, ok := handled[existing.ID]; ok {
    78  			return fn(existing, newJob, newTG)
    79  		}
    80  
    81  		return unhandled(existing, newJob, newTG)
    82  	}
    83  }
    84  
    85  var (
    86  	// AllocationIndexRegex is a regular expression to find the allocation index.
    87  	allocationIndexRegex = regexp.MustCompile(".+\\[(\\d+)\\]$")
    88  )
    89  
    90  // allocNameToIndex returns the index of the allocation.
    91  func allocNameToIndex(name string) uint {
    92  	matches := allocationIndexRegex.FindStringSubmatch(name)
    93  	if len(matches) != 2 {
    94  		return 0
    95  	}
    96  
    97  	index, err := strconv.Atoi(matches[1])
    98  	if err != nil {
    99  		return 0
   100  	}
   101  
   102  	return uint(index)
   103  }
   104  
   105  func assertNamesHaveIndexes(t *testing.T, indexes []int, names []string) {
   106  	t.Helper()
   107  	m := make(map[uint]int)
   108  	for _, i := range indexes {
   109  		m[uint(i)] += 1
   110  	}
   111  
   112  	for _, n := range names {
   113  		index := allocNameToIndex(n)
   114  		val, contained := m[index]
   115  		if !contained {
   116  			t.Fatalf("Unexpected index %d from name %s\nAll names: %v", index, n, names)
   117  		}
   118  
   119  		val--
   120  		if val < 0 {
   121  			t.Fatalf("Index %d repeated too many times\nAll names: %v", index, names)
   122  		}
   123  		m[index] = val
   124  	}
   125  
   126  	for k, remainder := range m {
   127  		if remainder != 0 {
   128  			t.Fatalf("Index %d has %d remaining uses expected\nAll names: %v", k, remainder, names)
   129  		}
   130  	}
   131  }
   132  
   133  func assertNoCanariesStopped(t *testing.T, d *structs.Deployment, stop []allocStopResult) {
   134  	t.Helper()
   135  	canaryIndex := make(map[string]struct{})
   136  	for _, state := range d.TaskGroups {
   137  		for _, c := range state.PlacedCanaries {
   138  			canaryIndex[c] = struct{}{}
   139  		}
   140  	}
   141  
   142  	for _, s := range stop {
   143  		if _, ok := canaryIndex[s.alloc.ID]; ok {
   144  			t.Fatalf("Stopping canary alloc %q %q", s.alloc.ID, s.alloc.Name)
   145  		}
   146  	}
   147  }
   148  
   149  func assertPlaceResultsHavePreviousAllocs(t *testing.T, numPrevious int, place []allocPlaceResult) {
   150  	t.Helper()
   151  	names := make(map[string]struct{}, numPrevious)
   152  
   153  	found := 0
   154  	for _, p := range place {
   155  		if _, ok := names[p.name]; ok {
   156  			t.Fatalf("Name %q already placed", p.name)
   157  		}
   158  		names[p.name] = struct{}{}
   159  
   160  		if p.previousAlloc == nil {
   161  			continue
   162  		}
   163  
   164  		if act := p.previousAlloc.Name; p.name != act {
   165  			t.Fatalf("Name mismatch on previous alloc; got %q; want %q", act, p.name)
   166  		}
   167  		found++
   168  	}
   169  	if numPrevious != found {
   170  		t.Fatalf("wanted %d; got %d placements with previous allocs", numPrevious, found)
   171  	}
   172  }
   173  
   174  func assertPlacementsAreRescheduled(t *testing.T, numRescheduled int, place []allocPlaceResult) {
   175  	t.Helper()
   176  	names := make(map[string]struct{}, numRescheduled)
   177  
   178  	found := 0
   179  	for _, p := range place {
   180  		if _, ok := names[p.name]; ok {
   181  			t.Fatalf("Name %q already placed", p.name)
   182  		}
   183  		names[p.name] = struct{}{}
   184  
   185  		if p.previousAlloc == nil {
   186  			continue
   187  		}
   188  		if p.reschedule {
   189  			found++
   190  		}
   191  
   192  	}
   193  	if numRescheduled != found {
   194  		t.Fatalf("wanted %d; got %d placements that are rescheduled", numRescheduled, found)
   195  	}
   196  }
   197  
   198  func intRange(pairs ...int) []int {
   199  	if len(pairs)%2 != 0 {
   200  		return nil
   201  	}
   202  
   203  	var r []int
   204  	for i := 0; i < len(pairs); i += 2 {
   205  		for j := pairs[i]; j <= pairs[i+1]; j++ {
   206  			r = append(r, j)
   207  		}
   208  	}
   209  	return r
   210  }
   211  
   212  func placeResultsToNames(place []allocPlaceResult) []string {
   213  	names := make([]string, 0, len(place))
   214  	for _, p := range place {
   215  		names = append(names, p.name)
   216  	}
   217  	return names
   218  }
   219  
   220  func destructiveResultsToNames(destructive []allocDestructiveResult) []string {
   221  	names := make([]string, 0, len(destructive))
   222  	for _, d := range destructive {
   223  		names = append(names, d.placeName)
   224  	}
   225  	return names
   226  }
   227  
   228  func stopResultsToNames(stop []allocStopResult) []string {
   229  	names := make([]string, 0, len(stop))
   230  	for _, s := range stop {
   231  		names = append(names, s.alloc.Name)
   232  	}
   233  	return names
   234  }
   235  
   236  func attributeUpdatesToNames(attributeUpdates map[string]*structs.Allocation) []string {
   237  	names := make([]string, 0, len(attributeUpdates))
   238  	for _, a := range attributeUpdates {
   239  		names = append(names, a.Name)
   240  	}
   241  	return names
   242  }
   243  
   244  func allocsToNames(allocs []*structs.Allocation) []string {
   245  	names := make([]string, 0, len(allocs))
   246  	for _, a := range allocs {
   247  		names = append(names, a.Name)
   248  	}
   249  	return names
   250  }
   251  
   252  type resultExpectation struct {
   253  	createDeployment  *structs.Deployment
   254  	deploymentUpdates []*structs.DeploymentStatusUpdate
   255  	place             int
   256  	destructive       int
   257  	inplace           int
   258  	attributeUpdates  int
   259  	stop              int
   260  	desiredTGUpdates  map[string]*structs.DesiredUpdates
   261  }
   262  
   263  func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) {
   264  	t.Helper()
   265  	assert := assert.New(t)
   266  
   267  	if exp.createDeployment != nil && r.deployment == nil {
   268  		t.Errorf("Expect a created deployment got none")
   269  	} else if exp.createDeployment == nil && r.deployment != nil {
   270  		t.Errorf("Expect no created deployment; got %#v", r.deployment)
   271  	} else if exp.createDeployment != nil && r.deployment != nil {
   272  		// Clear the deployment ID
   273  		r.deployment.ID, exp.createDeployment.ID = "", ""
   274  		if !reflect.DeepEqual(r.deployment, exp.createDeployment) {
   275  			t.Errorf("Unexpected createdDeployment; got\n %#v\nwant\n%#v\nDiff: %v",
   276  				r.deployment, exp.createDeployment, pretty.Diff(r.deployment, exp.createDeployment))
   277  		}
   278  	}
   279  
   280  	assert.EqualValues(exp.deploymentUpdates, r.deploymentUpdates, "Expected Deployment Updates")
   281  	assert.Len(r.place, exp.place, "Expected Placements")
   282  	assert.Len(r.destructiveUpdate, exp.destructive, "Expected Destructive")
   283  	assert.Len(r.inplaceUpdate, exp.inplace, "Expected Inplace Updates")
   284  	assert.Len(r.attributeUpdates, exp.attributeUpdates, "Expected Attribute Updates")
   285  	assert.Len(r.stop, exp.stop, "Expected Stops")
   286  	assert.EqualValues(exp.desiredTGUpdates, r.desiredTGUpdates, "Expected Desired TG Update Annotations")
   287  }
   288  
   289  // Tests the reconciler properly handles placements for a job that has no
   290  // existing allocations
   291  func TestReconciler_Place_NoExisting(t *testing.T) {
   292  	job := mock.Job()
   293  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, nil, nil, "")
   294  	r := reconciler.Compute()
   295  
   296  	// Assert the correct results
   297  	assertResults(t, r, &resultExpectation{
   298  		createDeployment:  nil,
   299  		deploymentUpdates: nil,
   300  		place:             10,
   301  		inplace:           0,
   302  		stop:              0,
   303  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   304  			job.TaskGroups[0].Name: {
   305  				Place: 10,
   306  			},
   307  		},
   308  	})
   309  
   310  	assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
   311  }
   312  
   313  // Tests the reconciler properly handles placements for a job that has some
   314  // existing allocations
   315  func TestReconciler_Place_Existing(t *testing.T) {
   316  	job := mock.Job()
   317  
   318  	// Create 3 existing allocations
   319  	var allocs []*structs.Allocation
   320  	for i := 0; i < 5; i++ {
   321  		alloc := mock.Alloc()
   322  		alloc.Job = job
   323  		alloc.JobID = job.ID
   324  		alloc.NodeID = uuid.Generate()
   325  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   326  		allocs = append(allocs, alloc)
   327  	}
   328  
   329  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
   330  	r := reconciler.Compute()
   331  
   332  	// Assert the correct results
   333  	assertResults(t, r, &resultExpectation{
   334  		createDeployment:  nil,
   335  		deploymentUpdates: nil,
   336  		place:             5,
   337  		inplace:           0,
   338  		stop:              0,
   339  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   340  			job.TaskGroups[0].Name: {
   341  				Place:  5,
   342  				Ignore: 5,
   343  			},
   344  		},
   345  	})
   346  
   347  	assertNamesHaveIndexes(t, intRange(5, 9), placeResultsToNames(r.place))
   348  }
   349  
   350  // Tests the reconciler properly handles stopping allocations for a job that has
   351  // scaled down
   352  func TestReconciler_ScaleDown_Partial(t *testing.T) {
   353  	// Has desired 10
   354  	job := mock.Job()
   355  
   356  	// Create 20 existing allocations
   357  	var allocs []*structs.Allocation
   358  	for i := 0; i < 20; i++ {
   359  		alloc := mock.Alloc()
   360  		alloc.Job = job
   361  		alloc.JobID = job.ID
   362  		alloc.NodeID = uuid.Generate()
   363  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   364  		allocs = append(allocs, alloc)
   365  	}
   366  
   367  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
   368  	r := reconciler.Compute()
   369  
   370  	// Assert the correct results
   371  	assertResults(t, r, &resultExpectation{
   372  		createDeployment:  nil,
   373  		deploymentUpdates: nil,
   374  		place:             0,
   375  		inplace:           0,
   376  		stop:              10,
   377  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   378  			job.TaskGroups[0].Name: {
   379  				Ignore: 10,
   380  				Stop:   10,
   381  			},
   382  		},
   383  	})
   384  
   385  	assertNamesHaveIndexes(t, intRange(10, 19), stopResultsToNames(r.stop))
   386  }
   387  
   388  // Tests the reconciler properly handles stopping allocations for a job that has
   389  // scaled down to zero desired
   390  func TestReconciler_ScaleDown_Zero(t *testing.T) {
   391  	// Set desired 0
   392  	job := mock.Job()
   393  	job.TaskGroups[0].Count = 0
   394  
   395  	// Create 20 existing allocations
   396  	var allocs []*structs.Allocation
   397  	for i := 0; i < 20; i++ {
   398  		alloc := mock.Alloc()
   399  		alloc.Job = job
   400  		alloc.JobID = job.ID
   401  		alloc.NodeID = uuid.Generate()
   402  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   403  		allocs = append(allocs, alloc)
   404  	}
   405  
   406  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
   407  	r := reconciler.Compute()
   408  
   409  	// Assert the correct results
   410  	assertResults(t, r, &resultExpectation{
   411  		createDeployment:  nil,
   412  		deploymentUpdates: nil,
   413  		place:             0,
   414  		inplace:           0,
   415  		stop:              20,
   416  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   417  			job.TaskGroups[0].Name: {
   418  				Stop: 20,
   419  			},
   420  		},
   421  	})
   422  
   423  	assertNamesHaveIndexes(t, intRange(0, 19), stopResultsToNames(r.stop))
   424  }
   425  
   426  // Tests the reconciler properly handles stopping allocations for a job that has
   427  // scaled down to zero desired where allocs have duplicate names
   428  func TestReconciler_ScaleDown_Zero_DuplicateNames(t *testing.T) {
   429  	// Set desired 0
   430  	job := mock.Job()
   431  	job.TaskGroups[0].Count = 0
   432  
   433  	// Create 20 existing allocations
   434  	var allocs []*structs.Allocation
   435  	var expectedStopped []int
   436  	for i := 0; i < 20; i++ {
   437  		alloc := mock.Alloc()
   438  		alloc.Job = job
   439  		alloc.JobID = job.ID
   440  		alloc.NodeID = uuid.Generate()
   441  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2))
   442  		allocs = append(allocs, alloc)
   443  		expectedStopped = append(expectedStopped, i%2)
   444  	}
   445  
   446  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
   447  	r := reconciler.Compute()
   448  
   449  	// Assert the correct results
   450  	assertResults(t, r, &resultExpectation{
   451  		createDeployment:  nil,
   452  		deploymentUpdates: nil,
   453  		place:             0,
   454  		inplace:           0,
   455  		stop:              20,
   456  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   457  			job.TaskGroups[0].Name: {
   458  				Stop: 20,
   459  			},
   460  		},
   461  	})
   462  
   463  	assertNamesHaveIndexes(t, expectedStopped, stopResultsToNames(r.stop))
   464  }
   465  
   466  // Tests the reconciler properly handles inplace upgrading allocations
   467  func TestReconciler_Inplace(t *testing.T) {
   468  	job := mock.Job()
   469  
   470  	// Create 10 existing allocations
   471  	var allocs []*structs.Allocation
   472  	for i := 0; i < 10; i++ {
   473  		alloc := mock.Alloc()
   474  		alloc.Job = job
   475  		alloc.JobID = job.ID
   476  		alloc.NodeID = uuid.Generate()
   477  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   478  		allocs = append(allocs, alloc)
   479  	}
   480  
   481  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "")
   482  	r := reconciler.Compute()
   483  
   484  	// Assert the correct results
   485  	assertResults(t, r, &resultExpectation{
   486  		createDeployment:  nil,
   487  		deploymentUpdates: nil,
   488  		place:             0,
   489  		inplace:           10,
   490  		stop:              0,
   491  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   492  			job.TaskGroups[0].Name: {
   493  				InPlaceUpdate: 10,
   494  			},
   495  		},
   496  	})
   497  
   498  	assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate))
   499  }
   500  
   501  // Tests the reconciler properly handles inplace upgrading allocations while
   502  // scaling up
   503  func TestReconciler_Inplace_ScaleUp(t *testing.T) {
   504  	// Set desired 15
   505  	job := mock.Job()
   506  	job.TaskGroups[0].Count = 15
   507  
   508  	// Create 10 existing allocations
   509  	var allocs []*structs.Allocation
   510  	for i := 0; i < 10; i++ {
   511  		alloc := mock.Alloc()
   512  		alloc.Job = job
   513  		alloc.JobID = job.ID
   514  		alloc.NodeID = uuid.Generate()
   515  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   516  		allocs = append(allocs, alloc)
   517  	}
   518  
   519  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "")
   520  	r := reconciler.Compute()
   521  
   522  	// Assert the correct results
   523  	assertResults(t, r, &resultExpectation{
   524  		createDeployment:  nil,
   525  		deploymentUpdates: nil,
   526  		place:             5,
   527  		inplace:           10,
   528  		stop:              0,
   529  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   530  			job.TaskGroups[0].Name: {
   531  				Place:         5,
   532  				InPlaceUpdate: 10,
   533  			},
   534  		},
   535  	})
   536  
   537  	assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate))
   538  	assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place))
   539  }
   540  
   541  // Tests the reconciler properly handles inplace upgrading allocations while
   542  // scaling down
   543  func TestReconciler_Inplace_ScaleDown(t *testing.T) {
   544  	// Set desired 5
   545  	job := mock.Job()
   546  	job.TaskGroups[0].Count = 5
   547  
   548  	// Create 10 existing allocations
   549  	var allocs []*structs.Allocation
   550  	for i := 0; i < 10; i++ {
   551  		alloc := mock.Alloc()
   552  		alloc.Job = job
   553  		alloc.JobID = job.ID
   554  		alloc.NodeID = uuid.Generate()
   555  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   556  		allocs = append(allocs, alloc)
   557  	}
   558  
   559  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "")
   560  	r := reconciler.Compute()
   561  
   562  	// Assert the correct results
   563  	assertResults(t, r, &resultExpectation{
   564  		createDeployment:  nil,
   565  		deploymentUpdates: nil,
   566  		place:             0,
   567  		inplace:           5,
   568  		stop:              5,
   569  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   570  			job.TaskGroups[0].Name: {
   571  				Stop:          5,
   572  				InPlaceUpdate: 5,
   573  			},
   574  		},
   575  	})
   576  
   577  	assertNamesHaveIndexes(t, intRange(0, 4), allocsToNames(r.inplaceUpdate))
   578  	assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop))
   579  }
   580  
   581  // Tests the reconciler properly handles destructive upgrading allocations
   582  func TestReconciler_Destructive(t *testing.T) {
   583  	job := mock.Job()
   584  
   585  	// Create 10 existing allocations
   586  	var allocs []*structs.Allocation
   587  	for i := 0; i < 10; i++ {
   588  		alloc := mock.Alloc()
   589  		alloc.Job = job
   590  		alloc.JobID = job.ID
   591  		alloc.NodeID = uuid.Generate()
   592  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   593  		allocs = append(allocs, alloc)
   594  	}
   595  
   596  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
   597  	r := reconciler.Compute()
   598  
   599  	// Assert the correct results
   600  	assertResults(t, r, &resultExpectation{
   601  		createDeployment:  nil,
   602  		deploymentUpdates: nil,
   603  		destructive:       10,
   604  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   605  			job.TaskGroups[0].Name: {
   606  				DestructiveUpdate: 10,
   607  			},
   608  		},
   609  	})
   610  
   611  	assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate))
   612  }
   613  
   614  // Tests the reconciler properly handles destructive upgrading allocations when max_parallel=0
   615  func TestReconciler_DestructiveMaxParallel(t *testing.T) {
   616  	job := mock.MaxParallelJob()
   617  
   618  	// Create 10 existing allocations
   619  	var allocs []*structs.Allocation
   620  	for i := 0; i < 10; i++ {
   621  		alloc := mock.Alloc()
   622  		alloc.Job = job
   623  		alloc.JobID = job.ID
   624  		alloc.NodeID = uuid.Generate()
   625  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   626  		allocs = append(allocs, alloc)
   627  	}
   628  
   629  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
   630  	r := reconciler.Compute()
   631  
   632  	// Assert the correct results
   633  	assertResults(t, r, &resultExpectation{
   634  		createDeployment:  nil,
   635  		deploymentUpdates: nil,
   636  		destructive:       10,
   637  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   638  			job.TaskGroups[0].Name: {
   639  				DestructiveUpdate: 10,
   640  			},
   641  		},
   642  	})
   643  
   644  	assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate))
   645  }
   646  
   647  // Tests the reconciler properly handles destructive upgrading allocations while
   648  // scaling up
   649  func TestReconciler_Destructive_ScaleUp(t *testing.T) {
   650  	// Set desired 15
   651  	job := mock.Job()
   652  	job.TaskGroups[0].Count = 15
   653  
   654  	// Create 10 existing allocations
   655  	var allocs []*structs.Allocation
   656  	for i := 0; i < 10; i++ {
   657  		alloc := mock.Alloc()
   658  		alloc.Job = job
   659  		alloc.JobID = job.ID
   660  		alloc.NodeID = uuid.Generate()
   661  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   662  		allocs = append(allocs, alloc)
   663  	}
   664  
   665  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
   666  	r := reconciler.Compute()
   667  
   668  	// Assert the correct results
   669  	assertResults(t, r, &resultExpectation{
   670  		createDeployment:  nil,
   671  		deploymentUpdates: nil,
   672  		place:             5,
   673  		destructive:       10,
   674  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   675  			job.TaskGroups[0].Name: {
   676  				Place:             5,
   677  				DestructiveUpdate: 10,
   678  			},
   679  		},
   680  	})
   681  
   682  	assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate))
   683  	assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place))
   684  }
   685  
   686  // Tests the reconciler properly handles destructive upgrading allocations while
   687  // scaling down
   688  func TestReconciler_Destructive_ScaleDown(t *testing.T) {
   689  	// Set desired 5
   690  	job := mock.Job()
   691  	job.TaskGroups[0].Count = 5
   692  
   693  	// Create 10 existing allocations
   694  	var allocs []*structs.Allocation
   695  	for i := 0; i < 10; i++ {
   696  		alloc := mock.Alloc()
   697  		alloc.Job = job
   698  		alloc.JobID = job.ID
   699  		alloc.NodeID = uuid.Generate()
   700  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   701  		allocs = append(allocs, alloc)
   702  	}
   703  
   704  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
   705  	r := reconciler.Compute()
   706  
   707  	// Assert the correct results
   708  	assertResults(t, r, &resultExpectation{
   709  		createDeployment:  nil,
   710  		deploymentUpdates: nil,
   711  		destructive:       5,
   712  		stop:              5,
   713  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   714  			job.TaskGroups[0].Name: {
   715  				Stop:              5,
   716  				DestructiveUpdate: 5,
   717  			},
   718  		},
   719  	})
   720  
   721  	assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop))
   722  	assertNamesHaveIndexes(t, intRange(0, 4), destructiveResultsToNames(r.destructiveUpdate))
   723  }
   724  
   725  // Tests the reconciler properly handles lost nodes with allocations
   726  func TestReconciler_LostNode(t *testing.T) {
   727  	job := mock.Job()
   728  
   729  	// Create 10 existing allocations
   730  	var allocs []*structs.Allocation
   731  	for i := 0; i < 10; i++ {
   732  		alloc := mock.Alloc()
   733  		alloc.Job = job
   734  		alloc.JobID = job.ID
   735  		alloc.NodeID = uuid.Generate()
   736  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   737  		allocs = append(allocs, alloc)
   738  	}
   739  
   740  	// Build a map of tainted nodes
   741  	tainted := make(map[string]*structs.Node, 2)
   742  	for i := 0; i < 2; i++ {
   743  		n := mock.Node()
   744  		n.ID = allocs[i].NodeID
   745  		n.Status = structs.NodeStatusDown
   746  		tainted[n.ID] = n
   747  	}
   748  
   749  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   750  	r := reconciler.Compute()
   751  
   752  	// Assert the correct results
   753  	assertResults(t, r, &resultExpectation{
   754  		createDeployment:  nil,
   755  		deploymentUpdates: nil,
   756  		place:             2,
   757  		inplace:           0,
   758  		stop:              2,
   759  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   760  			job.TaskGroups[0].Name: {
   761  				Place:  2,
   762  				Stop:   2,
   763  				Ignore: 8,
   764  			},
   765  		},
   766  	})
   767  
   768  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
   769  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
   770  }
   771  
   772  // Tests the reconciler properly handles lost nodes with allocations while
   773  // scaling up
   774  func TestReconciler_LostNode_ScaleUp(t *testing.T) {
   775  	// Set desired 15
   776  	job := mock.Job()
   777  	job.TaskGroups[0].Count = 15
   778  
   779  	// Create 10 existing allocations
   780  	var allocs []*structs.Allocation
   781  	for i := 0; i < 10; i++ {
   782  		alloc := mock.Alloc()
   783  		alloc.Job = job
   784  		alloc.JobID = job.ID
   785  		alloc.NodeID = uuid.Generate()
   786  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   787  		allocs = append(allocs, alloc)
   788  	}
   789  
   790  	// Build a map of tainted nodes
   791  	tainted := make(map[string]*structs.Node, 2)
   792  	for i := 0; i < 2; i++ {
   793  		n := mock.Node()
   794  		n.ID = allocs[i].NodeID
   795  		n.Status = structs.NodeStatusDown
   796  		tainted[n.ID] = n
   797  	}
   798  
   799  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   800  	r := reconciler.Compute()
   801  
   802  	// Assert the correct results
   803  	assertResults(t, r, &resultExpectation{
   804  		createDeployment:  nil,
   805  		deploymentUpdates: nil,
   806  		place:             7,
   807  		inplace:           0,
   808  		stop:              2,
   809  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   810  			job.TaskGroups[0].Name: {
   811  				Place:  7,
   812  				Stop:   2,
   813  				Ignore: 8,
   814  			},
   815  		},
   816  	})
   817  
   818  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
   819  	assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place))
   820  }
   821  
   822  // Tests the reconciler properly handles lost nodes with allocations while
   823  // scaling down
   824  func TestReconciler_LostNode_ScaleDown(t *testing.T) {
   825  	// Set desired 5
   826  	job := mock.Job()
   827  	job.TaskGroups[0].Count = 5
   828  
   829  	// Create 10 existing allocations
   830  	var allocs []*structs.Allocation
   831  	for i := 0; i < 10; i++ {
   832  		alloc := mock.Alloc()
   833  		alloc.Job = job
   834  		alloc.JobID = job.ID
   835  		alloc.NodeID = uuid.Generate()
   836  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   837  		allocs = append(allocs, alloc)
   838  	}
   839  
   840  	// Build a map of tainted nodes
   841  	tainted := make(map[string]*structs.Node, 2)
   842  	for i := 0; i < 2; i++ {
   843  		n := mock.Node()
   844  		n.ID = allocs[i].NodeID
   845  		n.Status = structs.NodeStatusDown
   846  		tainted[n.ID] = n
   847  	}
   848  
   849  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   850  	r := reconciler.Compute()
   851  
   852  	// Assert the correct results
   853  	assertResults(t, r, &resultExpectation{
   854  		createDeployment:  nil,
   855  		deploymentUpdates: nil,
   856  		place:             0,
   857  		inplace:           0,
   858  		stop:              5,
   859  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   860  			job.TaskGroups[0].Name: {
   861  				Stop:   5,
   862  				Ignore: 5,
   863  			},
   864  		},
   865  	})
   866  
   867  	assertNamesHaveIndexes(t, intRange(0, 1, 7, 9), stopResultsToNames(r.stop))
   868  }
   869  
   870  // Tests the reconciler properly handles draining nodes with allocations
   871  func TestReconciler_DrainNode(t *testing.T) {
   872  	job := mock.Job()
   873  
   874  	// Create 10 existing allocations
   875  	var allocs []*structs.Allocation
   876  	for i := 0; i < 10; i++ {
   877  		alloc := mock.Alloc()
   878  		alloc.Job = job
   879  		alloc.JobID = job.ID
   880  		alloc.NodeID = uuid.Generate()
   881  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   882  		allocs = append(allocs, alloc)
   883  	}
   884  
   885  	// Build a map of tainted nodes
   886  	tainted := make(map[string]*structs.Node, 2)
   887  	for i := 0; i < 2; i++ {
   888  		n := mock.Node()
   889  		n.ID = allocs[i].NodeID
   890  		allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true)
   891  		n.Drain = true
   892  		tainted[n.ID] = n
   893  	}
   894  
   895  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   896  	r := reconciler.Compute()
   897  
   898  	// Assert the correct results
   899  	assertResults(t, r, &resultExpectation{
   900  		createDeployment:  nil,
   901  		deploymentUpdates: nil,
   902  		place:             2,
   903  		inplace:           0,
   904  		stop:              2,
   905  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   906  			job.TaskGroups[0].Name: {
   907  				Migrate: 2,
   908  				Ignore:  8,
   909  			},
   910  		},
   911  	})
   912  
   913  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
   914  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
   915  	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
   916  	// These should not have the reschedule field set
   917  	assertPlacementsAreRescheduled(t, 0, r.place)
   918  }
   919  
   920  // Tests the reconciler properly handles draining nodes with allocations while
   921  // scaling up
   922  func TestReconciler_DrainNode_ScaleUp(t *testing.T) {
   923  	// Set desired 15
   924  	job := mock.Job()
   925  	job.TaskGroups[0].Count = 15
   926  
   927  	// Create 10 existing allocations
   928  	var allocs []*structs.Allocation
   929  	for i := 0; i < 10; i++ {
   930  		alloc := mock.Alloc()
   931  		alloc.Job = job
   932  		alloc.JobID = job.ID
   933  		alloc.NodeID = uuid.Generate()
   934  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   935  		allocs = append(allocs, alloc)
   936  	}
   937  
   938  	// Build a map of tainted nodes
   939  	tainted := make(map[string]*structs.Node, 2)
   940  	for i := 0; i < 2; i++ {
   941  		n := mock.Node()
   942  		n.ID = allocs[i].NodeID
   943  		allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true)
   944  		n.Drain = true
   945  		tainted[n.ID] = n
   946  	}
   947  
   948  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
   949  	r := reconciler.Compute()
   950  
   951  	// Assert the correct results
   952  	assertResults(t, r, &resultExpectation{
   953  		createDeployment:  nil,
   954  		deploymentUpdates: nil,
   955  		place:             7,
   956  		inplace:           0,
   957  		stop:              2,
   958  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
   959  			job.TaskGroups[0].Name: {
   960  				Place:   5,
   961  				Migrate: 2,
   962  				Ignore:  8,
   963  			},
   964  		},
   965  	})
   966  
   967  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
   968  	assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place))
   969  	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
   970  	// These should not have the reschedule field set
   971  	assertPlacementsAreRescheduled(t, 0, r.place)
   972  }
   973  
   974  // Tests the reconciler properly handles draining nodes with allocations while
   975  // scaling down
   976  func TestReconciler_DrainNode_ScaleDown(t *testing.T) {
   977  	// Set desired 8
   978  	job := mock.Job()
   979  	job.TaskGroups[0].Count = 8
   980  
   981  	// Create 10 existing allocations
   982  	var allocs []*structs.Allocation
   983  	for i := 0; i < 10; i++ {
   984  		alloc := mock.Alloc()
   985  		alloc.Job = job
   986  		alloc.JobID = job.ID
   987  		alloc.NodeID = uuid.Generate()
   988  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
   989  		allocs = append(allocs, alloc)
   990  	}
   991  
   992  	// Build a map of tainted nodes
   993  	tainted := make(map[string]*structs.Node, 3)
   994  	for i := 0; i < 3; i++ {
   995  		n := mock.Node()
   996  		n.ID = allocs[i].NodeID
   997  		allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true)
   998  		n.Drain = true
   999  		tainted[n.ID] = n
  1000  	}
  1001  
  1002  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "")
  1003  	r := reconciler.Compute()
  1004  
  1005  	// Assert the correct results
  1006  	assertResults(t, r, &resultExpectation{
  1007  		createDeployment:  nil,
  1008  		deploymentUpdates: nil,
  1009  		place:             1,
  1010  		inplace:           0,
  1011  		stop:              3,
  1012  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1013  			job.TaskGroups[0].Name: {
  1014  				Migrate: 1,
  1015  				Stop:    2,
  1016  				Ignore:  7,
  1017  			},
  1018  		},
  1019  	})
  1020  
  1021  	assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop))
  1022  	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
  1023  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1024  	// These should not have the reschedule field set
  1025  	assertPlacementsAreRescheduled(t, 0, r.place)
  1026  }
  1027  
  1028  // Tests the reconciler properly handles a task group being removed
  1029  func TestReconciler_RemovedTG(t *testing.T) {
  1030  	job := mock.Job()
  1031  
  1032  	// Create 10 allocations for a tg that no longer exists
  1033  	var allocs []*structs.Allocation
  1034  	for i := 0; i < 10; i++ {
  1035  		alloc := mock.Alloc()
  1036  		alloc.Job = job
  1037  		alloc.JobID = job.ID
  1038  		alloc.NodeID = uuid.Generate()
  1039  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1040  		allocs = append(allocs, alloc)
  1041  	}
  1042  
  1043  	oldName := job.TaskGroups[0].Name
  1044  	newName := "different"
  1045  	job.TaskGroups[0].Name = newName
  1046  
  1047  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1048  	r := reconciler.Compute()
  1049  
  1050  	// Assert the correct results
  1051  	assertResults(t, r, &resultExpectation{
  1052  		createDeployment:  nil,
  1053  		deploymentUpdates: nil,
  1054  		place:             10,
  1055  		inplace:           0,
  1056  		stop:              10,
  1057  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1058  			oldName: {
  1059  				Stop: 10,
  1060  			},
  1061  			newName: {
  1062  				Place: 10,
  1063  			},
  1064  		},
  1065  	})
  1066  
  1067  	assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop))
  1068  	assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
  1069  }
  1070  
  1071  // Tests the reconciler properly handles a job in stopped states
  1072  func TestReconciler_JobStopped(t *testing.T) {
  1073  	job := mock.Job()
  1074  	job.Stop = true
  1075  
  1076  	cases := []struct {
  1077  		name             string
  1078  		job              *structs.Job
  1079  		jobID, taskGroup string
  1080  	}{
  1081  		{
  1082  			name:      "stopped job",
  1083  			job:       job,
  1084  			jobID:     job.ID,
  1085  			taskGroup: job.TaskGroups[0].Name,
  1086  		},
  1087  		{
  1088  			name:      "nil job",
  1089  			job:       nil,
  1090  			jobID:     "foo",
  1091  			taskGroup: "bar",
  1092  		},
  1093  	}
  1094  
  1095  	for _, c := range cases {
  1096  		t.Run(c.name, func(t *testing.T) {
  1097  			// Create 10 allocations
  1098  			var allocs []*structs.Allocation
  1099  			for i := 0; i < 10; i++ {
  1100  				alloc := mock.Alloc()
  1101  				alloc.Job = c.job
  1102  				alloc.JobID = c.jobID
  1103  				alloc.NodeID = uuid.Generate()
  1104  				alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i))
  1105  				alloc.TaskGroup = c.taskGroup
  1106  				allocs = append(allocs, alloc)
  1107  			}
  1108  
  1109  			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job, nil, allocs, nil, "")
  1110  			r := reconciler.Compute()
  1111  
  1112  			// Assert the correct results
  1113  			assertResults(t, r, &resultExpectation{
  1114  				createDeployment:  nil,
  1115  				deploymentUpdates: nil,
  1116  				place:             0,
  1117  				inplace:           0,
  1118  				stop:              10,
  1119  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1120  					c.taskGroup: {
  1121  						Stop: 10,
  1122  					},
  1123  				},
  1124  			})
  1125  
  1126  			assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop))
  1127  		})
  1128  	}
  1129  }
  1130  
  1131  // Tests the reconciler doesn't update allocs in terminal state
  1132  // when job is stopped or nil
  1133  func TestReconciler_JobStopped_TerminalAllocs(t *testing.T) {
  1134  	job := mock.Job()
  1135  	job.Stop = true
  1136  
  1137  	cases := []struct {
  1138  		name             string
  1139  		job              *structs.Job
  1140  		jobID, taskGroup string
  1141  	}{
  1142  		{
  1143  			name:      "stopped job",
  1144  			job:       job,
  1145  			jobID:     job.ID,
  1146  			taskGroup: job.TaskGroups[0].Name,
  1147  		},
  1148  		{
  1149  			name:      "nil job",
  1150  			job:       nil,
  1151  			jobID:     "foo",
  1152  			taskGroup: "bar",
  1153  		},
  1154  	}
  1155  
  1156  	for _, c := range cases {
  1157  		t.Run(c.name, func(t *testing.T) {
  1158  			// Create 10 terminal allocations
  1159  			var allocs []*structs.Allocation
  1160  			for i := 0; i < 10; i++ {
  1161  				alloc := mock.Alloc()
  1162  				alloc.Job = c.job
  1163  				alloc.JobID = c.jobID
  1164  				alloc.NodeID = uuid.Generate()
  1165  				alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i))
  1166  				alloc.TaskGroup = c.taskGroup
  1167  				if i%2 == 0 {
  1168  					alloc.DesiredStatus = structs.AllocDesiredStatusStop
  1169  				} else {
  1170  					alloc.ClientStatus = structs.AllocClientStatusFailed
  1171  				}
  1172  				allocs = append(allocs, alloc)
  1173  			}
  1174  
  1175  			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job, nil, allocs, nil, "")
  1176  			r := reconciler.Compute()
  1177  			require.Len(t, r.stop, 0)
  1178  			// Assert the correct results
  1179  			assertResults(t, r, &resultExpectation{
  1180  				createDeployment:  nil,
  1181  				deploymentUpdates: nil,
  1182  				place:             0,
  1183  				inplace:           0,
  1184  				stop:              0,
  1185  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1186  					c.taskGroup: {},
  1187  				},
  1188  			})
  1189  		})
  1190  	}
  1191  }
  1192  
  1193  // Tests the reconciler properly handles jobs with multiple task groups
  1194  func TestReconciler_MultiTG(t *testing.T) {
  1195  	job := mock.Job()
  1196  	tg2 := job.TaskGroups[0].Copy()
  1197  	tg2.Name = "foo"
  1198  	job.TaskGroups = append(job.TaskGroups, tg2)
  1199  
  1200  	// Create 2 existing allocations for the first tg
  1201  	var allocs []*structs.Allocation
  1202  	for i := 0; i < 2; i++ {
  1203  		alloc := mock.Alloc()
  1204  		alloc.Job = job
  1205  		alloc.JobID = job.ID
  1206  		alloc.NodeID = uuid.Generate()
  1207  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1208  		allocs = append(allocs, alloc)
  1209  	}
  1210  
  1211  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1212  	r := reconciler.Compute()
  1213  
  1214  	// Assert the correct results
  1215  	assertResults(t, r, &resultExpectation{
  1216  		createDeployment:  nil,
  1217  		deploymentUpdates: nil,
  1218  		place:             18,
  1219  		inplace:           0,
  1220  		stop:              0,
  1221  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1222  			job.TaskGroups[0].Name: {
  1223  				Place:  8,
  1224  				Ignore: 2,
  1225  			},
  1226  			tg2.Name: {
  1227  				Place: 10,
  1228  			},
  1229  		},
  1230  	})
  1231  
  1232  	assertNamesHaveIndexes(t, intRange(2, 9, 0, 9), placeResultsToNames(r.place))
  1233  }
  1234  
  1235  // Tests the reconciler properly handles jobs with multiple task groups with
  1236  // only one having an update stanza and a deployment already being created
  1237  func TestReconciler_MultiTG_SingleUpdateStanza(t *testing.T) {
  1238  	job := mock.Job()
  1239  	tg2 := job.TaskGroups[0].Copy()
  1240  	tg2.Name = "foo"
  1241  	job.TaskGroups = append(job.TaskGroups, tg2)
  1242  	job.TaskGroups[0].Update = noCanaryUpdate
  1243  
  1244  	// Create all the allocs
  1245  	var allocs []*structs.Allocation
  1246  	for i := 0; i < 2; i++ {
  1247  		for j := 0; j < 10; j++ {
  1248  			alloc := mock.Alloc()
  1249  			alloc.Job = job
  1250  			alloc.JobID = job.ID
  1251  			alloc.NodeID = uuid.Generate()
  1252  			alloc.Name = structs.AllocName(job.ID, job.TaskGroups[i].Name, uint(j))
  1253  			alloc.TaskGroup = job.TaskGroups[i].Name
  1254  			allocs = append(allocs, alloc)
  1255  		}
  1256  	}
  1257  
  1258  	d := structs.NewDeployment(job)
  1259  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  1260  		DesiredTotal: 10,
  1261  	}
  1262  
  1263  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  1264  	r := reconciler.Compute()
  1265  
  1266  	// Assert the correct results
  1267  	assertResults(t, r, &resultExpectation{
  1268  		createDeployment:  nil,
  1269  		deploymentUpdates: nil,
  1270  		place:             0,
  1271  		inplace:           0,
  1272  		stop:              0,
  1273  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1274  			job.TaskGroups[0].Name: {
  1275  				Ignore: 10,
  1276  			},
  1277  			tg2.Name: {
  1278  				Ignore: 10,
  1279  			},
  1280  		},
  1281  	})
  1282  }
  1283  
  1284  // Tests delayed rescheduling of failed batch allocations
  1285  func TestReconciler_RescheduleLater_Batch(t *testing.T) {
  1286  	require := require.New(t)
  1287  
  1288  	// Set desired 4
  1289  	job := mock.Job()
  1290  	job.TaskGroups[0].Count = 4
  1291  	now := time.Now()
  1292  
  1293  	// Set up reschedule policy
  1294  	delayDur := 15 * time.Second
  1295  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "constant"}
  1296  	tgName := job.TaskGroups[0].Name
  1297  
  1298  	// Create 6 existing allocations - 2 running, 1 complete and 3 failed
  1299  	var allocs []*structs.Allocation
  1300  	for i := 0; i < 6; i++ {
  1301  		alloc := mock.Alloc()
  1302  		alloc.Job = job
  1303  		alloc.JobID = job.ID
  1304  		alloc.NodeID = uuid.Generate()
  1305  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1306  		allocs = append(allocs, alloc)
  1307  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1308  	}
  1309  
  1310  	// Mark 3 as failed with restart tracking info
  1311  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1312  	allocs[0].NextAllocation = allocs[1].ID
  1313  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1314  	allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1315  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1316  			PrevAllocID: allocs[0].ID,
  1317  			PrevNodeID:  uuid.Generate(),
  1318  		},
  1319  	}}
  1320  	allocs[1].NextAllocation = allocs[2].ID
  1321  	allocs[2].ClientStatus = structs.AllocClientStatusFailed
  1322  	allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1323  		StartedAt:  now.Add(-1 * time.Hour),
  1324  		FinishedAt: now}}
  1325  	allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1326  		{RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(),
  1327  			PrevAllocID: allocs[0].ID,
  1328  			PrevNodeID:  uuid.Generate(),
  1329  		},
  1330  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1331  			PrevAllocID: allocs[1].ID,
  1332  			PrevNodeID:  uuid.Generate(),
  1333  		},
  1334  	}}
  1335  
  1336  	// Mark one as complete
  1337  	allocs[5].ClientStatus = structs.AllocClientStatusComplete
  1338  
  1339  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, uuid.Generate())
  1340  	r := reconciler.Compute()
  1341  
  1342  	// Two reschedule attempts were already made, one more can be made at a future time
  1343  	// Verify that the follow up eval has the expected waitUntil time
  1344  	evals := r.desiredFollowupEvals[tgName]
  1345  	require.NotNil(evals)
  1346  	require.Equal(1, len(evals))
  1347  	require.Equal(now.Add(delayDur), evals[0].WaitUntil)
  1348  
  1349  	// Alloc 5 should not be replaced because it is terminal
  1350  	assertResults(t, r, &resultExpectation{
  1351  		createDeployment:  nil,
  1352  		deploymentUpdates: nil,
  1353  		place:             0,
  1354  		inplace:           0,
  1355  		attributeUpdates:  1,
  1356  		stop:              0,
  1357  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1358  			job.TaskGroups[0].Name: {
  1359  				Place:         0,
  1360  				InPlaceUpdate: 0,
  1361  				Ignore:        4,
  1362  				Stop:          0,
  1363  			},
  1364  		},
  1365  	})
  1366  	assertNamesHaveIndexes(t, intRange(2, 2), attributeUpdatesToNames(r.attributeUpdates))
  1367  
  1368  	// Verify that the followup evalID field is set correctly
  1369  	var annotated *structs.Allocation
  1370  	for _, a := range r.attributeUpdates {
  1371  		annotated = a
  1372  	}
  1373  	require.Equal(evals[0].ID, annotated.FollowupEvalID)
  1374  }
  1375  
  1376  // Tests delayed rescheduling of failed batch allocations and batching of allocs
  1377  // with fail times that are close together
  1378  func TestReconciler_RescheduleLaterWithBatchedEvals_Batch(t *testing.T) {
  1379  	require := require.New(t)
  1380  
  1381  	// Set desired 4
  1382  	job := mock.Job()
  1383  	job.TaskGroups[0].Count = 10
  1384  	now := time.Now()
  1385  
  1386  	// Set up reschedule policy
  1387  	delayDur := 15 * time.Second
  1388  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "constant"}
  1389  	tgName := job.TaskGroups[0].Name
  1390  
  1391  	// Create 10 existing allocations
  1392  	var allocs []*structs.Allocation
  1393  	for i := 0; i < 10; i++ {
  1394  		alloc := mock.Alloc()
  1395  		alloc.Job = job
  1396  		alloc.JobID = job.ID
  1397  		alloc.NodeID = uuid.Generate()
  1398  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1399  		allocs = append(allocs, alloc)
  1400  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1401  	}
  1402  
  1403  	// Mark 5 as failed with fail times very close together
  1404  	for i := 0; i < 5; i++ {
  1405  		allocs[i].ClientStatus = structs.AllocClientStatusFailed
  1406  		allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1407  			StartedAt:  now.Add(-1 * time.Hour),
  1408  			FinishedAt: now.Add(time.Duration(50*i) * time.Millisecond)}}
  1409  	}
  1410  
  1411  	// Mark two more as failed several seconds later
  1412  	for i := 5; i < 7; i++ {
  1413  		allocs[i].ClientStatus = structs.AllocClientStatusFailed
  1414  		allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1415  			StartedAt:  now.Add(-1 * time.Hour),
  1416  			FinishedAt: now.Add(10 * time.Second)}}
  1417  	}
  1418  
  1419  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, uuid.Generate())
  1420  	r := reconciler.Compute()
  1421  
  1422  	// Verify that two follow up evals were created
  1423  	evals := r.desiredFollowupEvals[tgName]
  1424  	require.NotNil(evals)
  1425  	require.Equal(2, len(evals))
  1426  
  1427  	// Verify expected WaitUntil values for both batched evals
  1428  	require.Equal(now.Add(delayDur), evals[0].WaitUntil)
  1429  	secondBatchDuration := delayDur + 10*time.Second
  1430  	require.Equal(now.Add(secondBatchDuration), evals[1].WaitUntil)
  1431  
  1432  	// Alloc 5 should not be replaced because it is terminal
  1433  	assertResults(t, r, &resultExpectation{
  1434  		createDeployment:  nil,
  1435  		deploymentUpdates: nil,
  1436  		place:             0,
  1437  		inplace:           0,
  1438  		attributeUpdates:  7,
  1439  		stop:              0,
  1440  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1441  			job.TaskGroups[0].Name: {
  1442  				Place:         0,
  1443  				InPlaceUpdate: 0,
  1444  				Ignore:        10,
  1445  				Stop:          0,
  1446  			},
  1447  		},
  1448  	})
  1449  	assertNamesHaveIndexes(t, intRange(0, 6), attributeUpdatesToNames(r.attributeUpdates))
  1450  
  1451  	// Verify that the followup evalID field is set correctly
  1452  	for _, alloc := range r.attributeUpdates {
  1453  		if allocNameToIndex(alloc.Name) < 5 {
  1454  			require.Equal(evals[0].ID, alloc.FollowupEvalID)
  1455  		} else if allocNameToIndex(alloc.Name) < 7 {
  1456  			require.Equal(evals[1].ID, alloc.FollowupEvalID)
  1457  		} else {
  1458  			t.Fatalf("Unexpected alloc name in Inplace results %v", alloc.Name)
  1459  		}
  1460  	}
  1461  }
  1462  
  1463  // Tests rescheduling failed batch allocations
  1464  func TestReconciler_RescheduleNow_Batch(t *testing.T) {
  1465  	require := require.New(t)
  1466  	// Set desired 4
  1467  	job := mock.Job()
  1468  	job.TaskGroups[0].Count = 4
  1469  	now := time.Now()
  1470  	// Set up reschedule policy
  1471  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: 5 * time.Second, DelayFunction: "constant"}
  1472  	tgName := job.TaskGroups[0].Name
  1473  	// Create 6 existing allocations - 2 running, 1 complete and 3 failed
  1474  	var allocs []*structs.Allocation
  1475  	for i := 0; i < 6; i++ {
  1476  		alloc := mock.Alloc()
  1477  		alloc.Job = job
  1478  		alloc.JobID = job.ID
  1479  		alloc.NodeID = uuid.Generate()
  1480  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1481  		allocs = append(allocs, alloc)
  1482  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1483  	}
  1484  	// Mark 3 as failed with restart tracking info
  1485  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1486  	allocs[0].NextAllocation = allocs[1].ID
  1487  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1488  	allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1489  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1490  			PrevAllocID: allocs[0].ID,
  1491  			PrevNodeID:  uuid.Generate(),
  1492  		},
  1493  	}}
  1494  	allocs[1].NextAllocation = allocs[2].ID
  1495  	allocs[2].ClientStatus = structs.AllocClientStatusFailed
  1496  	allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1497  		StartedAt:  now.Add(-1 * time.Hour),
  1498  		FinishedAt: now.Add(-5 * time.Second)}}
  1499  	allocs[2].FollowupEvalID = uuid.Generate()
  1500  	allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1501  		{RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(),
  1502  			PrevAllocID: allocs[0].ID,
  1503  			PrevNodeID:  uuid.Generate(),
  1504  		},
  1505  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1506  			PrevAllocID: allocs[1].ID,
  1507  			PrevNodeID:  uuid.Generate(),
  1508  		},
  1509  	}}
  1510  	// Mark one as complete
  1511  	allocs[5].ClientStatus = structs.AllocClientStatusComplete
  1512  
  1513  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, "")
  1514  	reconciler.now = now
  1515  	r := reconciler.Compute()
  1516  
  1517  	// Verify that no follow up evals were created
  1518  	evals := r.desiredFollowupEvals[tgName]
  1519  	require.Nil(evals)
  1520  
  1521  	// Two reschedule attempts were made, one more can be made now
  1522  	// Alloc 5 should not be replaced because it is terminal
  1523  	assertResults(t, r, &resultExpectation{
  1524  		createDeployment:  nil,
  1525  		deploymentUpdates: nil,
  1526  		place:             1,
  1527  		stop:              1,
  1528  		inplace:           0,
  1529  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1530  			job.TaskGroups[0].Name: {
  1531  				Place:  1,
  1532  				Stop:   1,
  1533  				Ignore: 3,
  1534  			},
  1535  		},
  1536  	})
  1537  
  1538  	assertNamesHaveIndexes(t, intRange(2, 2), placeResultsToNames(r.place))
  1539  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1540  	assertPlacementsAreRescheduled(t, 1, r.place)
  1541  
  1542  }
  1543  
  1544  // Tests rescheduling failed service allocations with desired state stop
  1545  func TestReconciler_RescheduleLater_Service(t *testing.T) {
  1546  	require := require.New(t)
  1547  
  1548  	// Set desired 5
  1549  	job := mock.Job()
  1550  	job.TaskGroups[0].Count = 5
  1551  	tgName := job.TaskGroups[0].Name
  1552  	now := time.Now()
  1553  
  1554  	// Set up reschedule policy
  1555  	delayDur := 15 * time.Second
  1556  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour, Delay: delayDur, MaxDelay: 1 * time.Hour}
  1557  
  1558  	// Create 5 existing allocations
  1559  	var allocs []*structs.Allocation
  1560  	for i := 0; i < 5; i++ {
  1561  		alloc := mock.Alloc()
  1562  		alloc.Job = job
  1563  		alloc.JobID = job.ID
  1564  		alloc.NodeID = uuid.Generate()
  1565  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1566  		allocs = append(allocs, alloc)
  1567  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1568  	}
  1569  
  1570  	// Mark two as failed
  1571  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1572  
  1573  	// Mark one of them as already rescheduled once
  1574  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1575  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1576  			PrevAllocID: uuid.Generate(),
  1577  			PrevNodeID:  uuid.Generate(),
  1578  		},
  1579  	}}
  1580  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1581  		StartedAt:  now.Add(-1 * time.Hour),
  1582  		FinishedAt: now}}
  1583  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1584  
  1585  	// Mark one as desired state stop
  1586  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  1587  
  1588  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, uuid.Generate())
  1589  	r := reconciler.Compute()
  1590  
  1591  	// Should place a new placement and create a follow up eval for the delayed reschedule
  1592  	// Verify that the follow up eval has the expected waitUntil time
  1593  	evals := r.desiredFollowupEvals[tgName]
  1594  	require.NotNil(evals)
  1595  	require.Equal(1, len(evals))
  1596  	require.Equal(now.Add(delayDur), evals[0].WaitUntil)
  1597  
  1598  	assertResults(t, r, &resultExpectation{
  1599  		createDeployment:  nil,
  1600  		deploymentUpdates: nil,
  1601  		place:             1,
  1602  		inplace:           0,
  1603  		attributeUpdates:  1,
  1604  		stop:              0,
  1605  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1606  			job.TaskGroups[0].Name: {
  1607  				Place:         1,
  1608  				InPlaceUpdate: 0,
  1609  				Ignore:        4,
  1610  				Stop:          0,
  1611  			},
  1612  		},
  1613  	})
  1614  
  1615  	assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
  1616  	assertNamesHaveIndexes(t, intRange(1, 1), attributeUpdatesToNames(r.attributeUpdates))
  1617  
  1618  	// Verify that the followup evalID field is set correctly
  1619  	var annotated *structs.Allocation
  1620  	for _, a := range r.attributeUpdates {
  1621  		annotated = a
  1622  	}
  1623  	require.Equal(evals[0].ID, annotated.FollowupEvalID)
  1624  }
  1625  
  1626  // Tests service allocations with client status complete
  1627  func TestReconciler_Service_ClientStatusComplete(t *testing.T) {
  1628  	// Set desired 5
  1629  	job := mock.Job()
  1630  	job.TaskGroups[0].Count = 5
  1631  
  1632  	// Set up reschedule policy
  1633  	delayDur := 15 * time.Second
  1634  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1635  		Attempts: 1,
  1636  		Interval: 24 * time.Hour,
  1637  		Delay:    delayDur,
  1638  		MaxDelay: 1 * time.Hour,
  1639  	}
  1640  
  1641  	// Create 5 existing allocations
  1642  	var allocs []*structs.Allocation
  1643  	for i := 0; i < 5; i++ {
  1644  		alloc := mock.Alloc()
  1645  		alloc.Job = job
  1646  		alloc.JobID = job.ID
  1647  		alloc.NodeID = uuid.Generate()
  1648  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1649  		allocs = append(allocs, alloc)
  1650  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1651  		alloc.DesiredStatus = structs.AllocDesiredStatusRun
  1652  	}
  1653  
  1654  	// Mark one as client status complete
  1655  	allocs[4].ClientStatus = structs.AllocClientStatusComplete
  1656  
  1657  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1658  	r := reconciler.Compute()
  1659  
  1660  	// Should place a new placement for the alloc that was marked complete
  1661  	assertResults(t, r, &resultExpectation{
  1662  		createDeployment:  nil,
  1663  		deploymentUpdates: nil,
  1664  		place:             1,
  1665  		inplace:           0,
  1666  		stop:              0,
  1667  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1668  			job.TaskGroups[0].Name: {
  1669  				Place:         1,
  1670  				InPlaceUpdate: 0,
  1671  				Ignore:        4,
  1672  			},
  1673  		},
  1674  	})
  1675  
  1676  	assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
  1677  
  1678  }
  1679  
  1680  // Tests service job placement with desired stop and client status complete
  1681  func TestReconciler_Service_DesiredStop_ClientStatusComplete(t *testing.T) {
  1682  	// Set desired 5
  1683  	job := mock.Job()
  1684  	job.TaskGroups[0].Count = 5
  1685  
  1686  	// Set up reschedule policy
  1687  	delayDur := 15 * time.Second
  1688  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1689  		Attempts: 1,
  1690  		Interval: 24 * time.Hour,
  1691  		Delay:    delayDur,
  1692  		MaxDelay: 1 * time.Hour,
  1693  	}
  1694  
  1695  	// Create 5 existing allocations
  1696  	var allocs []*structs.Allocation
  1697  	for i := 0; i < 5; i++ {
  1698  		alloc := mock.Alloc()
  1699  		alloc.Job = job
  1700  		alloc.JobID = job.ID
  1701  		alloc.NodeID = uuid.Generate()
  1702  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1703  		allocs = append(allocs, alloc)
  1704  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1705  		alloc.DesiredStatus = structs.AllocDesiredStatusRun
  1706  	}
  1707  
  1708  	// Mark one as failed but with desired status stop
  1709  	// Should not trigger rescheduling logic but should trigger a placement
  1710  	allocs[4].ClientStatus = structs.AllocClientStatusFailed
  1711  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  1712  
  1713  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1714  	r := reconciler.Compute()
  1715  
  1716  	// Should place a new placement for the alloc that was marked stopped
  1717  	assertResults(t, r, &resultExpectation{
  1718  		createDeployment:  nil,
  1719  		deploymentUpdates: nil,
  1720  		place:             1,
  1721  		inplace:           0,
  1722  		stop:              0,
  1723  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1724  			job.TaskGroups[0].Name: {
  1725  				Place:         1,
  1726  				InPlaceUpdate: 0,
  1727  				Ignore:        4,
  1728  			},
  1729  		},
  1730  	})
  1731  
  1732  	assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place))
  1733  
  1734  	// Should not have any follow up evals created
  1735  	require := require.New(t)
  1736  	require.Equal(0, len(r.desiredFollowupEvals))
  1737  }
  1738  
  1739  // Tests rescheduling failed service allocations with desired state stop
  1740  func TestReconciler_RescheduleNow_Service(t *testing.T) {
  1741  	require := require.New(t)
  1742  
  1743  	// Set desired 5
  1744  	job := mock.Job()
  1745  	job.TaskGroups[0].Count = 5
  1746  	tgName := job.TaskGroups[0].Name
  1747  	now := time.Now()
  1748  
  1749  	// Set up reschedule policy and update stanza
  1750  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1751  		Attempts:      1,
  1752  		Interval:      24 * time.Hour,
  1753  		Delay:         5 * time.Second,
  1754  		DelayFunction: "",
  1755  		MaxDelay:      1 * time.Hour,
  1756  		Unlimited:     false,
  1757  	}
  1758  	job.TaskGroups[0].Update = noCanaryUpdate
  1759  
  1760  	// Create 5 existing allocations
  1761  	var allocs []*structs.Allocation
  1762  	for i := 0; i < 5; i++ {
  1763  		alloc := mock.Alloc()
  1764  		alloc.Job = job
  1765  		alloc.JobID = job.ID
  1766  		alloc.NodeID = uuid.Generate()
  1767  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1768  		allocs = append(allocs, alloc)
  1769  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1770  	}
  1771  
  1772  	// Mark two as failed
  1773  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1774  
  1775  	// Mark one of them as already rescheduled once
  1776  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1777  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1778  			PrevAllocID: uuid.Generate(),
  1779  			PrevNodeID:  uuid.Generate(),
  1780  		},
  1781  	}}
  1782  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1783  		StartedAt:  now.Add(-1 * time.Hour),
  1784  		FinishedAt: now.Add(-10 * time.Second)}}
  1785  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1786  
  1787  	// Mark one as desired state stop
  1788  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  1789  
  1790  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1791  	r := reconciler.Compute()
  1792  
  1793  	// Verify that no follow up evals were created
  1794  	evals := r.desiredFollowupEvals[tgName]
  1795  	require.Nil(evals)
  1796  
  1797  	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
  1798  	assertResults(t, r, &resultExpectation{
  1799  		createDeployment:  nil,
  1800  		deploymentUpdates: nil,
  1801  		place:             2,
  1802  		inplace:           0,
  1803  		stop:              1,
  1804  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1805  			job.TaskGroups[0].Name: {
  1806  				Place:  2,
  1807  				Ignore: 3,
  1808  				Stop:   1,
  1809  			},
  1810  		},
  1811  	})
  1812  
  1813  	// Rescheduled allocs should have previous allocs
  1814  	assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place))
  1815  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1816  	assertPlacementsAreRescheduled(t, 1, r.place)
  1817  }
  1818  
  1819  // Tests rescheduling failed service allocations when there's clock drift (upto a second)
  1820  func TestReconciler_RescheduleNow_WithinAllowedTimeWindow(t *testing.T) {
  1821  	require := require.New(t)
  1822  
  1823  	// Set desired 5
  1824  	job := mock.Job()
  1825  	job.TaskGroups[0].Count = 5
  1826  	tgName := job.TaskGroups[0].Name
  1827  	now := time.Now()
  1828  
  1829  	// Set up reschedule policy and update stanza
  1830  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1831  		Attempts:      1,
  1832  		Interval:      24 * time.Hour,
  1833  		Delay:         5 * time.Second,
  1834  		DelayFunction: "",
  1835  		MaxDelay:      1 * time.Hour,
  1836  		Unlimited:     false,
  1837  	}
  1838  	job.TaskGroups[0].Update = noCanaryUpdate
  1839  
  1840  	// Create 5 existing allocations
  1841  	var allocs []*structs.Allocation
  1842  	for i := 0; i < 5; i++ {
  1843  		alloc := mock.Alloc()
  1844  		alloc.Job = job
  1845  		alloc.JobID = job.ID
  1846  		alloc.NodeID = uuid.Generate()
  1847  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1848  		allocs = append(allocs, alloc)
  1849  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1850  	}
  1851  
  1852  	// Mark one as failed
  1853  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1854  
  1855  	// Mark one of them as already rescheduled once
  1856  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1857  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1858  			PrevAllocID: uuid.Generate(),
  1859  			PrevNodeID:  uuid.Generate(),
  1860  		},
  1861  	}}
  1862  	// Set fail time to 4 seconds ago which falls within the reschedule window
  1863  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1864  		StartedAt:  now.Add(-1 * time.Hour),
  1865  		FinishedAt: now.Add(-4 * time.Second)}}
  1866  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1867  
  1868  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  1869  	reconciler.now = now
  1870  	r := reconciler.Compute()
  1871  
  1872  	// Verify that no follow up evals were created
  1873  	evals := r.desiredFollowupEvals[tgName]
  1874  	require.Nil(evals)
  1875  
  1876  	// Verify that one rescheduled alloc was placed
  1877  	assertResults(t, r, &resultExpectation{
  1878  		createDeployment:  nil,
  1879  		deploymentUpdates: nil,
  1880  		place:             1,
  1881  		inplace:           0,
  1882  		stop:              1,
  1883  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1884  			job.TaskGroups[0].Name: {
  1885  				Place:  1,
  1886  				Stop:   1,
  1887  				Ignore: 4,
  1888  			},
  1889  		},
  1890  	})
  1891  
  1892  	// Rescheduled allocs should have previous allocs
  1893  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  1894  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1895  	assertPlacementsAreRescheduled(t, 1, r.place)
  1896  }
  1897  
  1898  // Tests rescheduling failed service allocations when the eval ID matches and there's a large clock drift
  1899  func TestReconciler_RescheduleNow_EvalIDMatch(t *testing.T) {
  1900  	require := require.New(t)
  1901  
  1902  	// Set desired 5
  1903  	job := mock.Job()
  1904  	job.TaskGroups[0].Count = 5
  1905  	tgName := job.TaskGroups[0].Name
  1906  	now := time.Now()
  1907  
  1908  	// Set up reschedule policy and update stanza
  1909  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1910  		Attempts:      1,
  1911  		Interval:      24 * time.Hour,
  1912  		Delay:         5 * time.Second,
  1913  		DelayFunction: "",
  1914  		MaxDelay:      1 * time.Hour,
  1915  		Unlimited:     false,
  1916  	}
  1917  	job.TaskGroups[0].Update = noCanaryUpdate
  1918  
  1919  	// Create 5 existing allocations
  1920  	var allocs []*structs.Allocation
  1921  	for i := 0; i < 5; i++ {
  1922  		alloc := mock.Alloc()
  1923  		alloc.Job = job
  1924  		alloc.JobID = job.ID
  1925  		alloc.NodeID = uuid.Generate()
  1926  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  1927  		allocs = append(allocs, alloc)
  1928  		alloc.ClientStatus = structs.AllocClientStatusRunning
  1929  	}
  1930  
  1931  	// Mark one as failed
  1932  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  1933  
  1934  	// Mark one of them as already rescheduled once
  1935  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  1936  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  1937  			PrevAllocID: uuid.Generate(),
  1938  			PrevNodeID:  uuid.Generate(),
  1939  		},
  1940  	}}
  1941  	// Set fail time to 5 seconds ago and eval ID
  1942  	evalID := uuid.Generate()
  1943  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  1944  		StartedAt:  now.Add(-1 * time.Hour),
  1945  		FinishedAt: now.Add(-5 * time.Second)}}
  1946  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  1947  	allocs[1].FollowupEvalID = evalID
  1948  
  1949  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, evalID)
  1950  	reconciler.now = now.Add(-30 * time.Second)
  1951  	r := reconciler.Compute()
  1952  
  1953  	// Verify that no follow up evals were created
  1954  	evals := r.desiredFollowupEvals[tgName]
  1955  	require.Nil(evals)
  1956  
  1957  	// Verify that one rescheduled alloc was placed
  1958  	assertResults(t, r, &resultExpectation{
  1959  		createDeployment:  nil,
  1960  		deploymentUpdates: nil,
  1961  		place:             1,
  1962  		stop:              1,
  1963  		inplace:           0,
  1964  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  1965  			job.TaskGroups[0].Name: {
  1966  				Place:  1,
  1967  				Stop:   1,
  1968  				Ignore: 4,
  1969  			},
  1970  		},
  1971  	})
  1972  
  1973  	// Rescheduled allocs should have previous allocs
  1974  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  1975  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  1976  	assertPlacementsAreRescheduled(t, 1, r.place)
  1977  }
  1978  
  1979  // Tests rescheduling failed service allocations when there are canaries
  1980  func TestReconciler_RescheduleNow_Service_WithCanaries(t *testing.T) {
  1981  	require := require.New(t)
  1982  
  1983  	// Set desired 5
  1984  	job := mock.Job()
  1985  	job.TaskGroups[0].Count = 5
  1986  	tgName := job.TaskGroups[0].Name
  1987  	now := time.Now()
  1988  
  1989  	// Set up reschedule policy and update stanza
  1990  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1991  		Attempts:      1,
  1992  		Interval:      24 * time.Hour,
  1993  		Delay:         5 * time.Second,
  1994  		DelayFunction: "",
  1995  		MaxDelay:      1 * time.Hour,
  1996  		Unlimited:     false,
  1997  	}
  1998  	job.TaskGroups[0].Update = canaryUpdate
  1999  
  2000  	job2 := job.Copy()
  2001  	job2.Version++
  2002  
  2003  	d := structs.NewDeployment(job2)
  2004  	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  2005  	s := &structs.DeploymentState{
  2006  		DesiredCanaries: 2,
  2007  		DesiredTotal:    5,
  2008  	}
  2009  	d.TaskGroups[job.TaskGroups[0].Name] = s
  2010  
  2011  	// Create 5 existing allocations
  2012  	var allocs []*structs.Allocation
  2013  	for i := 0; i < 5; i++ {
  2014  		alloc := mock.Alloc()
  2015  		alloc.Job = job
  2016  		alloc.JobID = job.ID
  2017  		alloc.NodeID = uuid.Generate()
  2018  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2019  		allocs = append(allocs, alloc)
  2020  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2021  	}
  2022  
  2023  	// Mark three as failed
  2024  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  2025  
  2026  	// Mark one of them as already rescheduled once
  2027  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  2028  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  2029  			PrevAllocID: uuid.Generate(),
  2030  			PrevNodeID:  uuid.Generate(),
  2031  		},
  2032  	}}
  2033  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  2034  		StartedAt:  now.Add(-1 * time.Hour),
  2035  		FinishedAt: now.Add(-10 * time.Second)}}
  2036  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  2037  
  2038  	// Mark one as desired state stop
  2039  	allocs[4].ClientStatus = structs.AllocClientStatusFailed
  2040  
  2041  	// Create 2 canary allocations
  2042  	for i := 0; i < 2; i++ {
  2043  		alloc := mock.Alloc()
  2044  		alloc.Job = job
  2045  		alloc.JobID = job.ID
  2046  		alloc.NodeID = uuid.Generate()
  2047  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2048  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2049  		alloc.DeploymentID = d.ID
  2050  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  2051  			Canary:  true,
  2052  			Healthy: helper.BoolToPtr(false),
  2053  		}
  2054  		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
  2055  		allocs = append(allocs, alloc)
  2056  	}
  2057  
  2058  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "")
  2059  	r := reconciler.Compute()
  2060  
  2061  	// Verify that no follow up evals were created
  2062  	evals := r.desiredFollowupEvals[tgName]
  2063  	require.Nil(evals)
  2064  
  2065  	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
  2066  	assertResults(t, r, &resultExpectation{
  2067  		createDeployment:  nil,
  2068  		deploymentUpdates: nil,
  2069  		place:             2,
  2070  		stop:              2,
  2071  		inplace:           0,
  2072  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2073  			job.TaskGroups[0].Name: {
  2074  				Place:  2,
  2075  				Stop:   2,
  2076  				Ignore: 5,
  2077  			},
  2078  		},
  2079  	})
  2080  
  2081  	// Rescheduled allocs should have previous allocs
  2082  	assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place))
  2083  	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
  2084  	assertPlacementsAreRescheduled(t, 2, r.place)
  2085  }
  2086  
  2087  // Tests rescheduling failed canary service allocations
  2088  func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) {
  2089  	require := require.New(t)
  2090  
  2091  	// Set desired 5
  2092  	job := mock.Job()
  2093  	job.TaskGroups[0].Count = 5
  2094  	tgName := job.TaskGroups[0].Name
  2095  	now := time.Now()
  2096  
  2097  	// Set up reschedule policy and update stanza
  2098  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  2099  		Delay:         5 * time.Second,
  2100  		DelayFunction: "constant",
  2101  		MaxDelay:      1 * time.Hour,
  2102  		Unlimited:     true,
  2103  	}
  2104  	job.TaskGroups[0].Update = canaryUpdate
  2105  
  2106  	job2 := job.Copy()
  2107  	job2.Version++
  2108  
  2109  	d := structs.NewDeployment(job2)
  2110  	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  2111  	s := &structs.DeploymentState{
  2112  		DesiredCanaries: 2,
  2113  		DesiredTotal:    5,
  2114  	}
  2115  	d.TaskGroups[job.TaskGroups[0].Name] = s
  2116  
  2117  	// Create 5 existing allocations
  2118  	var allocs []*structs.Allocation
  2119  	for i := 0; i < 5; i++ {
  2120  		alloc := mock.Alloc()
  2121  		alloc.Job = job
  2122  		alloc.JobID = job.ID
  2123  		alloc.NodeID = uuid.Generate()
  2124  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2125  		allocs = append(allocs, alloc)
  2126  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2127  	}
  2128  
  2129  	// Create 2 healthy canary allocations
  2130  	for i := 0; i < 2; i++ {
  2131  		alloc := mock.Alloc()
  2132  		alloc.Job = job
  2133  		alloc.JobID = job.ID
  2134  		alloc.NodeID = uuid.Generate()
  2135  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2136  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2137  		alloc.DeploymentID = d.ID
  2138  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  2139  			Canary:  true,
  2140  			Healthy: helper.BoolToPtr(false),
  2141  		}
  2142  		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
  2143  		allocs = append(allocs, alloc)
  2144  	}
  2145  
  2146  	// Mark the canaries as failed
  2147  	allocs[5].ClientStatus = structs.AllocClientStatusFailed
  2148  	allocs[5].DesiredTransition.Reschedule = helper.BoolToPtr(true)
  2149  
  2150  	// Mark one of them as already rescheduled once
  2151  	allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  2152  		{RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(),
  2153  			PrevAllocID: uuid.Generate(),
  2154  			PrevNodeID:  uuid.Generate(),
  2155  		},
  2156  	}}
  2157  
  2158  	allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  2159  		StartedAt:  now.Add(-1 * time.Hour),
  2160  		FinishedAt: now.Add(-10 * time.Second)}}
  2161  	allocs[6].ClientStatus = structs.AllocClientStatusFailed
  2162  	allocs[6].DesiredTransition.Reschedule = helper.BoolToPtr(true)
  2163  
  2164  	// Create 4 unhealthy canary allocations that have already been replaced
  2165  	for i := 0; i < 4; i++ {
  2166  		alloc := mock.Alloc()
  2167  		alloc.Job = job
  2168  		alloc.JobID = job.ID
  2169  		alloc.NodeID = uuid.Generate()
  2170  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2))
  2171  		alloc.ClientStatus = structs.AllocClientStatusFailed
  2172  		alloc.DeploymentID = d.ID
  2173  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  2174  			Canary:  true,
  2175  			Healthy: helper.BoolToPtr(false),
  2176  		}
  2177  		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
  2178  		allocs = append(allocs, alloc)
  2179  	}
  2180  
  2181  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "")
  2182  	reconciler.now = now
  2183  	r := reconciler.Compute()
  2184  
  2185  	// Verify that no follow up evals were created
  2186  	evals := r.desiredFollowupEvals[tgName]
  2187  	require.Nil(evals)
  2188  
  2189  	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
  2190  	assertResults(t, r, &resultExpectation{
  2191  		createDeployment:  nil,
  2192  		deploymentUpdates: nil,
  2193  		place:             2,
  2194  		stop:              2,
  2195  		inplace:           0,
  2196  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2197  			job.TaskGroups[0].Name: {
  2198  				Place:  2,
  2199  				Stop:   2,
  2200  				Ignore: 9,
  2201  			},
  2202  		},
  2203  	})
  2204  
  2205  	// Rescheduled allocs should have previous allocs
  2206  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  2207  	assertPlaceResultsHavePreviousAllocs(t, 2, r.place)
  2208  	assertPlacementsAreRescheduled(t, 2, r.place)
  2209  }
  2210  
  2211  // Tests rescheduling failed canary service allocations when one has reached its
  2212  // reschedule limit
  2213  func TestReconciler_RescheduleNow_Service_Canaries_Limit(t *testing.T) {
  2214  	require := require.New(t)
  2215  
  2216  	// Set desired 5
  2217  	job := mock.Job()
  2218  	job.TaskGroups[0].Count = 5
  2219  	tgName := job.TaskGroups[0].Name
  2220  	now := time.Now()
  2221  
  2222  	// Set up reschedule policy and update stanza
  2223  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  2224  		Attempts:      1,
  2225  		Interval:      24 * time.Hour,
  2226  		Delay:         5 * time.Second,
  2227  		DelayFunction: "",
  2228  		MaxDelay:      1 * time.Hour,
  2229  		Unlimited:     false,
  2230  	}
  2231  	job.TaskGroups[0].Update = canaryUpdate
  2232  
  2233  	job2 := job.Copy()
  2234  	job2.Version++
  2235  
  2236  	d := structs.NewDeployment(job2)
  2237  	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  2238  	s := &structs.DeploymentState{
  2239  		DesiredCanaries: 2,
  2240  		DesiredTotal:    5,
  2241  	}
  2242  	d.TaskGroups[job.TaskGroups[0].Name] = s
  2243  
  2244  	// Create 5 existing allocations
  2245  	var allocs []*structs.Allocation
  2246  	for i := 0; i < 5; i++ {
  2247  		alloc := mock.Alloc()
  2248  		alloc.Job = job
  2249  		alloc.JobID = job.ID
  2250  		alloc.NodeID = uuid.Generate()
  2251  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2252  		allocs = append(allocs, alloc)
  2253  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2254  	}
  2255  
  2256  	// Create 2 healthy canary allocations
  2257  	for i := 0; i < 2; i++ {
  2258  		alloc := mock.Alloc()
  2259  		alloc.Job = job
  2260  		alloc.JobID = job.ID
  2261  		alloc.NodeID = uuid.Generate()
  2262  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2263  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2264  		alloc.DeploymentID = d.ID
  2265  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  2266  			Canary:  true,
  2267  			Healthy: helper.BoolToPtr(false),
  2268  		}
  2269  		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
  2270  		allocs = append(allocs, alloc)
  2271  	}
  2272  
  2273  	// Mark the canaries as failed
  2274  	allocs[5].ClientStatus = structs.AllocClientStatusFailed
  2275  	allocs[5].DesiredTransition.Reschedule = helper.BoolToPtr(true)
  2276  
  2277  	// Mark one of them as already rescheduled once
  2278  	allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  2279  		{RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(),
  2280  			PrevAllocID: uuid.Generate(),
  2281  			PrevNodeID:  uuid.Generate(),
  2282  		},
  2283  	}}
  2284  
  2285  	allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  2286  		StartedAt:  now.Add(-1 * time.Hour),
  2287  		FinishedAt: now.Add(-10 * time.Second)}}
  2288  	allocs[6].ClientStatus = structs.AllocClientStatusFailed
  2289  	allocs[6].DesiredTransition.Reschedule = helper.BoolToPtr(true)
  2290  
  2291  	// Create 4 unhealthy canary allocations that have already been replaced
  2292  	for i := 0; i < 4; i++ {
  2293  		alloc := mock.Alloc()
  2294  		alloc.Job = job
  2295  		alloc.JobID = job.ID
  2296  		alloc.NodeID = uuid.Generate()
  2297  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2))
  2298  		alloc.ClientStatus = structs.AllocClientStatusFailed
  2299  		alloc.DeploymentID = d.ID
  2300  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  2301  			Canary:  true,
  2302  			Healthy: helper.BoolToPtr(false),
  2303  		}
  2304  		s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID)
  2305  		allocs = append(allocs, alloc)
  2306  	}
  2307  
  2308  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "")
  2309  	reconciler.now = now
  2310  	r := reconciler.Compute()
  2311  
  2312  	// Verify that no follow up evals were created
  2313  	evals := r.desiredFollowupEvals[tgName]
  2314  	require.Nil(evals)
  2315  
  2316  	// Verify that one rescheduled alloc and one replacement for terminal alloc were placed
  2317  	assertResults(t, r, &resultExpectation{
  2318  		createDeployment:  nil,
  2319  		deploymentUpdates: nil,
  2320  		place:             1,
  2321  		stop:              1,
  2322  		inplace:           0,
  2323  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2324  			job.TaskGroups[0].Name: {
  2325  				Place:  1,
  2326  				Stop:   1,
  2327  				Ignore: 10,
  2328  			},
  2329  		},
  2330  	})
  2331  
  2332  	// Rescheduled allocs should have previous allocs
  2333  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  2334  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  2335  	assertPlacementsAreRescheduled(t, 1, r.place)
  2336  }
  2337  
  2338  // Tests failed service allocations that were already rescheduled won't be rescheduled again
  2339  func TestReconciler_DontReschedule_PreviouslyRescheduled(t *testing.T) {
  2340  	// Set desired 5
  2341  	job := mock.Job()
  2342  	job.TaskGroups[0].Count = 5
  2343  
  2344  	// Set up reschedule policy
  2345  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 5, Interval: 24 * time.Hour}
  2346  
  2347  	// Create 7 existing allocations
  2348  	var allocs []*structs.Allocation
  2349  	for i := 0; i < 7; i++ {
  2350  		alloc := mock.Alloc()
  2351  		alloc.Job = job
  2352  		alloc.JobID = job.ID
  2353  		alloc.NodeID = uuid.Generate()
  2354  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2355  		allocs = append(allocs, alloc)
  2356  		alloc.ClientStatus = structs.AllocClientStatusRunning
  2357  	}
  2358  	// Mark two as failed and rescheduled
  2359  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  2360  	allocs[0].ID = allocs[1].ID
  2361  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  2362  	allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  2363  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  2364  			PrevAllocID: uuid.Generate(),
  2365  			PrevNodeID:  uuid.Generate(),
  2366  		},
  2367  	}}
  2368  	allocs[1].NextAllocation = allocs[2].ID
  2369  
  2370  	// Mark one as desired state stop
  2371  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  2372  
  2373  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  2374  	r := reconciler.Compute()
  2375  
  2376  	// Should place 1 - one is a new placement to make up the desired count of 5
  2377  	// failing allocs are not rescheduled
  2378  	assertResults(t, r, &resultExpectation{
  2379  		createDeployment:  nil,
  2380  		deploymentUpdates: nil,
  2381  		place:             1,
  2382  		inplace:           0,
  2383  		stop:              0,
  2384  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2385  			job.TaskGroups[0].Name: {
  2386  				Place:  1,
  2387  				Ignore: 4,
  2388  			},
  2389  		},
  2390  	})
  2391  
  2392  	// name index 0 is used for the replacement because its
  2393  	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
  2394  }
  2395  
  2396  // Tests the reconciler cancels an old deployment when the job is being stopped
  2397  func TestReconciler_CancelDeployment_JobStop(t *testing.T) {
  2398  	job := mock.Job()
  2399  	job.Stop = true
  2400  
  2401  	running := structs.NewDeployment(job)
  2402  	failed := structs.NewDeployment(job)
  2403  	failed.Status = structs.DeploymentStatusFailed
  2404  
  2405  	cases := []struct {
  2406  		name             string
  2407  		job              *structs.Job
  2408  		jobID, taskGroup string
  2409  		deployment       *structs.Deployment
  2410  		cancel           bool
  2411  	}{
  2412  		{
  2413  			name:       "stopped job, running deployment",
  2414  			job:        job,
  2415  			jobID:      job.ID,
  2416  			taskGroup:  job.TaskGroups[0].Name,
  2417  			deployment: running,
  2418  			cancel:     true,
  2419  		},
  2420  		{
  2421  			name:       "nil job, running deployment",
  2422  			job:        nil,
  2423  			jobID:      "foo",
  2424  			taskGroup:  "bar",
  2425  			deployment: running,
  2426  			cancel:     true,
  2427  		},
  2428  		{
  2429  			name:       "stopped job, failed deployment",
  2430  			job:        job,
  2431  			jobID:      job.ID,
  2432  			taskGroup:  job.TaskGroups[0].Name,
  2433  			deployment: failed,
  2434  			cancel:     false,
  2435  		},
  2436  		{
  2437  			name:       "nil job, failed deployment",
  2438  			job:        nil,
  2439  			jobID:      "foo",
  2440  			taskGroup:  "bar",
  2441  			deployment: failed,
  2442  			cancel:     false,
  2443  		},
  2444  	}
  2445  
  2446  	for _, c := range cases {
  2447  		t.Run(c.name, func(t *testing.T) {
  2448  			// Create 10 allocations
  2449  			var allocs []*structs.Allocation
  2450  			for i := 0; i < 10; i++ {
  2451  				alloc := mock.Alloc()
  2452  				alloc.Job = c.job
  2453  				alloc.JobID = c.jobID
  2454  				alloc.NodeID = uuid.Generate()
  2455  				alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i))
  2456  				alloc.TaskGroup = c.taskGroup
  2457  				allocs = append(allocs, alloc)
  2458  			}
  2459  
  2460  			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job, c.deployment, allocs, nil, "")
  2461  			r := reconciler.Compute()
  2462  
  2463  			var updates []*structs.DeploymentStatusUpdate
  2464  			if c.cancel {
  2465  				updates = []*structs.DeploymentStatusUpdate{
  2466  					{
  2467  						DeploymentID:      c.deployment.ID,
  2468  						Status:            structs.DeploymentStatusCancelled,
  2469  						StatusDescription: structs.DeploymentStatusDescriptionStoppedJob,
  2470  					},
  2471  				}
  2472  			}
  2473  
  2474  			// Assert the correct results
  2475  			assertResults(t, r, &resultExpectation{
  2476  				createDeployment:  nil,
  2477  				deploymentUpdates: updates,
  2478  				place:             0,
  2479  				inplace:           0,
  2480  				stop:              10,
  2481  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2482  					c.taskGroup: {
  2483  						Stop: 10,
  2484  					},
  2485  				},
  2486  			})
  2487  
  2488  			assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop))
  2489  		})
  2490  	}
  2491  }
  2492  
  2493  // Tests the reconciler cancels an old deployment when the job is updated
  2494  func TestReconciler_CancelDeployment_JobUpdate(t *testing.T) {
  2495  	// Create a base job
  2496  	job := mock.Job()
  2497  
  2498  	// Create two deployments
  2499  	running := structs.NewDeployment(job)
  2500  	failed := structs.NewDeployment(job)
  2501  	failed.Status = structs.DeploymentStatusFailed
  2502  
  2503  	// Make the job newer than the deployment
  2504  	job.Version += 10
  2505  
  2506  	cases := []struct {
  2507  		name       string
  2508  		deployment *structs.Deployment
  2509  		cancel     bool
  2510  	}{
  2511  		{
  2512  			name:       "running deployment",
  2513  			deployment: running,
  2514  			cancel:     true,
  2515  		},
  2516  		{
  2517  			name:       "failed deployment",
  2518  			deployment: failed,
  2519  			cancel:     false,
  2520  		},
  2521  	}
  2522  
  2523  	for _, c := range cases {
  2524  		t.Run(c.name, func(t *testing.T) {
  2525  			// Create 10 allocations
  2526  			var allocs []*structs.Allocation
  2527  			for i := 0; i < 10; i++ {
  2528  				alloc := mock.Alloc()
  2529  				alloc.Job = job
  2530  				alloc.JobID = job.ID
  2531  				alloc.NodeID = uuid.Generate()
  2532  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2533  				alloc.TaskGroup = job.TaskGroups[0].Name
  2534  				allocs = append(allocs, alloc)
  2535  			}
  2536  
  2537  			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, c.deployment, allocs, nil, "")
  2538  			r := reconciler.Compute()
  2539  
  2540  			var updates []*structs.DeploymentStatusUpdate
  2541  			if c.cancel {
  2542  				updates = []*structs.DeploymentStatusUpdate{
  2543  					{
  2544  						DeploymentID:      c.deployment.ID,
  2545  						Status:            structs.DeploymentStatusCancelled,
  2546  						StatusDescription: structs.DeploymentStatusDescriptionNewerJob,
  2547  					},
  2548  				}
  2549  			}
  2550  
  2551  			// Assert the correct results
  2552  			assertResults(t, r, &resultExpectation{
  2553  				createDeployment:  nil,
  2554  				deploymentUpdates: updates,
  2555  				place:             0,
  2556  				inplace:           0,
  2557  				stop:              0,
  2558  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2559  					job.TaskGroups[0].Name: {
  2560  						Ignore: 10,
  2561  					},
  2562  				},
  2563  			})
  2564  		})
  2565  	}
  2566  }
  2567  
  2568  // Tests the reconciler creates a deployment and does a rolling upgrade with
  2569  // destructive changes
  2570  func TestReconciler_CreateDeployment_RollingUpgrade_Destructive(t *testing.T) {
  2571  	job := mock.Job()
  2572  	job.TaskGroups[0].Update = noCanaryUpdate
  2573  
  2574  	// Create 10 allocations from the old job
  2575  	var allocs []*structs.Allocation
  2576  	for i := 0; i < 10; i++ {
  2577  		alloc := mock.Alloc()
  2578  		alloc.Job = job
  2579  		alloc.JobID = job.ID
  2580  		alloc.NodeID = uuid.Generate()
  2581  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2582  		alloc.TaskGroup = job.TaskGroups[0].Name
  2583  		allocs = append(allocs, alloc)
  2584  	}
  2585  
  2586  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  2587  	r := reconciler.Compute()
  2588  
  2589  	d := structs.NewDeployment(job)
  2590  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2591  		DesiredTotal: 10,
  2592  	}
  2593  
  2594  	// Assert the correct results
  2595  	assertResults(t, r, &resultExpectation{
  2596  		createDeployment:  d,
  2597  		deploymentUpdates: nil,
  2598  		destructive:       4,
  2599  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2600  			job.TaskGroups[0].Name: {
  2601  				DestructiveUpdate: 4,
  2602  				Ignore:            6,
  2603  			},
  2604  		},
  2605  	})
  2606  
  2607  	assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate))
  2608  }
  2609  
  2610  // Tests the reconciler creates a deployment for inplace updates
  2611  func TestReconciler_CreateDeployment_RollingUpgrade_Inplace(t *testing.T) {
  2612  	jobOld := mock.Job()
  2613  	job := jobOld.Copy()
  2614  	job.Version++
  2615  	job.TaskGroups[0].Update = noCanaryUpdate
  2616  
  2617  	// Create 10 allocations from the old job
  2618  	var allocs []*structs.Allocation
  2619  	for i := 0; i < 10; i++ {
  2620  		alloc := mock.Alloc()
  2621  		alloc.Job = jobOld
  2622  		alloc.JobID = job.ID
  2623  		alloc.NodeID = uuid.Generate()
  2624  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2625  		alloc.TaskGroup = job.TaskGroups[0].Name
  2626  		allocs = append(allocs, alloc)
  2627  	}
  2628  
  2629  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "")
  2630  	r := reconciler.Compute()
  2631  
  2632  	d := structs.NewDeployment(job)
  2633  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2634  		DesiredTotal: 10,
  2635  	}
  2636  
  2637  	// Assert the correct results
  2638  	assertResults(t, r, &resultExpectation{
  2639  		createDeployment:  d,
  2640  		deploymentUpdates: nil,
  2641  		place:             0,
  2642  		inplace:           10,
  2643  		stop:              0,
  2644  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2645  			job.TaskGroups[0].Name: {
  2646  				InPlaceUpdate: 10,
  2647  			},
  2648  		},
  2649  	})
  2650  }
  2651  
  2652  // Tests the reconciler creates a deployment when the job has a newer create index
  2653  func TestReconciler_CreateDeployment_NewerCreateIndex(t *testing.T) {
  2654  	jobOld := mock.Job()
  2655  	job := jobOld.Copy()
  2656  	job.TaskGroups[0].Update = noCanaryUpdate
  2657  	job.CreateIndex += 100
  2658  
  2659  	// Create 5 allocations from the old job
  2660  	var allocs []*structs.Allocation
  2661  	for i := 0; i < 5; i++ {
  2662  		alloc := mock.Alloc()
  2663  		alloc.Job = jobOld
  2664  		alloc.JobID = jobOld.ID
  2665  		alloc.NodeID = uuid.Generate()
  2666  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2667  		alloc.TaskGroup = job.TaskGroups[0].Name
  2668  		allocs = append(allocs, alloc)
  2669  	}
  2670  
  2671  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  2672  	r := reconciler.Compute()
  2673  
  2674  	d := structs.NewDeployment(job)
  2675  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2676  		DesiredTotal: 5,
  2677  	}
  2678  
  2679  	// Assert the correct results
  2680  	assertResults(t, r, &resultExpectation{
  2681  		createDeployment:  d,
  2682  		deploymentUpdates: nil,
  2683  		place:             5,
  2684  		destructive:       0,
  2685  		inplace:           0,
  2686  		stop:              0,
  2687  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2688  			job.TaskGroups[0].Name: {
  2689  				InPlaceUpdate:     0,
  2690  				Ignore:            5,
  2691  				Place:             5,
  2692  				DestructiveUpdate: 0,
  2693  			},
  2694  		},
  2695  	})
  2696  }
  2697  
  2698  // Tests the reconciler doesn't creates a deployment if there are no changes
  2699  func TestReconciler_DontCreateDeployment_NoChanges(t *testing.T) {
  2700  	job := mock.Job()
  2701  	job.TaskGroups[0].Update = noCanaryUpdate
  2702  
  2703  	// Create 10 allocations from the job
  2704  	var allocs []*structs.Allocation
  2705  	for i := 0; i < 10; i++ {
  2706  		alloc := mock.Alloc()
  2707  		alloc.Job = job
  2708  		alloc.JobID = job.ID
  2709  		alloc.NodeID = uuid.Generate()
  2710  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2711  		alloc.TaskGroup = job.TaskGroups[0].Name
  2712  		allocs = append(allocs, alloc)
  2713  	}
  2714  
  2715  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  2716  	r := reconciler.Compute()
  2717  
  2718  	// Assert the correct results
  2719  	assertResults(t, r, &resultExpectation{
  2720  		createDeployment:  nil,
  2721  		deploymentUpdates: nil,
  2722  		place:             0,
  2723  		inplace:           0,
  2724  		stop:              0,
  2725  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2726  			job.TaskGroups[0].Name: {
  2727  				DestructiveUpdate: 0,
  2728  				Ignore:            10,
  2729  			},
  2730  		},
  2731  	})
  2732  }
  2733  
  2734  // Tests the reconciler doesn't place any more canaries when the deployment is
  2735  // paused or failed
  2736  func TestReconciler_PausedOrFailedDeployment_NoMoreCanaries(t *testing.T) {
  2737  	job := mock.Job()
  2738  	job.TaskGroups[0].Update = canaryUpdate
  2739  
  2740  	cases := []struct {
  2741  		name             string
  2742  		deploymentStatus string
  2743  		stop             uint64
  2744  	}{
  2745  		{
  2746  			name:             "paused deployment",
  2747  			deploymentStatus: structs.DeploymentStatusPaused,
  2748  			stop:             0,
  2749  		},
  2750  		{
  2751  			name:             "failed deployment",
  2752  			deploymentStatus: structs.DeploymentStatusFailed,
  2753  			stop:             1,
  2754  		},
  2755  	}
  2756  
  2757  	for _, c := range cases {
  2758  		t.Run(c.name, func(t *testing.T) {
  2759  			// Create a deployment that is paused/failed and has placed some canaries
  2760  			d := structs.NewDeployment(job)
  2761  			d.Status = c.deploymentStatus
  2762  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2763  				Promoted:        false,
  2764  				DesiredCanaries: 2,
  2765  				DesiredTotal:    10,
  2766  				PlacedAllocs:    1,
  2767  			}
  2768  
  2769  			// Create 10 allocations for the original job
  2770  			var allocs []*structs.Allocation
  2771  			for i := 0; i < 10; i++ {
  2772  				alloc := mock.Alloc()
  2773  				alloc.Job = job
  2774  				alloc.JobID = job.ID
  2775  				alloc.NodeID = uuid.Generate()
  2776  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2777  				alloc.TaskGroup = job.TaskGroups[0].Name
  2778  				allocs = append(allocs, alloc)
  2779  			}
  2780  
  2781  			// Create one canary
  2782  			canary := mock.Alloc()
  2783  			canary.Job = job
  2784  			canary.JobID = job.ID
  2785  			canary.NodeID = uuid.Generate()
  2786  			canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0)
  2787  			canary.TaskGroup = job.TaskGroups[0].Name
  2788  			canary.DeploymentID = d.ID
  2789  			allocs = append(allocs, canary)
  2790  			d.TaskGroups[canary.TaskGroup].PlacedCanaries = []string{canary.ID}
  2791  
  2792  			mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{canary.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive)
  2793  			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  2794  			r := reconciler.Compute()
  2795  
  2796  			// Assert the correct results
  2797  			assertResults(t, r, &resultExpectation{
  2798  				createDeployment:  nil,
  2799  				deploymentUpdates: nil,
  2800  				place:             0,
  2801  				inplace:           0,
  2802  				stop:              int(c.stop),
  2803  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2804  					job.TaskGroups[0].Name: {
  2805  						Ignore: 11 - c.stop,
  2806  						Stop:   c.stop,
  2807  					},
  2808  				},
  2809  			})
  2810  		})
  2811  	}
  2812  }
  2813  
  2814  // Tests the reconciler doesn't place any more allocs when the deployment is
  2815  // paused or failed
  2816  func TestReconciler_PausedOrFailedDeployment_NoMorePlacements(t *testing.T) {
  2817  	job := mock.Job()
  2818  	job.TaskGroups[0].Update = noCanaryUpdate
  2819  	job.TaskGroups[0].Count = 15
  2820  
  2821  	cases := []struct {
  2822  		name             string
  2823  		deploymentStatus string
  2824  	}{
  2825  		{
  2826  			name:             "paused deployment",
  2827  			deploymentStatus: structs.DeploymentStatusPaused,
  2828  		},
  2829  		{
  2830  			name:             "failed deployment",
  2831  			deploymentStatus: structs.DeploymentStatusFailed,
  2832  		},
  2833  	}
  2834  
  2835  	for _, c := range cases {
  2836  		t.Run(c.name, func(t *testing.T) {
  2837  			// Create a deployment that is paused and has placed some canaries
  2838  			d := structs.NewDeployment(job)
  2839  			d.Status = c.deploymentStatus
  2840  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2841  				Promoted:     false,
  2842  				DesiredTotal: 15,
  2843  				PlacedAllocs: 10,
  2844  			}
  2845  
  2846  			// Create 10 allocations for the new job
  2847  			var allocs []*structs.Allocation
  2848  			for i := 0; i < 10; i++ {
  2849  				alloc := mock.Alloc()
  2850  				alloc.Job = job
  2851  				alloc.JobID = job.ID
  2852  				alloc.NodeID = uuid.Generate()
  2853  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2854  				alloc.TaskGroup = job.TaskGroups[0].Name
  2855  				allocs = append(allocs, alloc)
  2856  			}
  2857  
  2858  			reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  2859  			r := reconciler.Compute()
  2860  
  2861  			// Assert the correct results
  2862  			assertResults(t, r, &resultExpectation{
  2863  				createDeployment:  nil,
  2864  				deploymentUpdates: nil,
  2865  				place:             0,
  2866  				inplace:           0,
  2867  				stop:              0,
  2868  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2869  					job.TaskGroups[0].Name: {
  2870  						Ignore: 10,
  2871  					},
  2872  				},
  2873  			})
  2874  		})
  2875  	}
  2876  }
  2877  
  2878  // Tests the reconciler doesn't do any more destructive updates when the
  2879  // deployment is paused or failed
  2880  func TestReconciler_PausedOrFailedDeployment_NoMoreDestructiveUpdates(t *testing.T) {
  2881  	job := mock.Job()
  2882  	job.TaskGroups[0].Update = noCanaryUpdate
  2883  
  2884  	cases := []struct {
  2885  		name             string
  2886  		deploymentStatus string
  2887  	}{
  2888  		{
  2889  			name:             "paused deployment",
  2890  			deploymentStatus: structs.DeploymentStatusPaused,
  2891  		},
  2892  		{
  2893  			name:             "failed deployment",
  2894  			deploymentStatus: structs.DeploymentStatusFailed,
  2895  		},
  2896  	}
  2897  
  2898  	for _, c := range cases {
  2899  		t.Run(c.name, func(t *testing.T) {
  2900  			// Create a deployment that is paused and has placed some canaries
  2901  			d := structs.NewDeployment(job)
  2902  			d.Status = c.deploymentStatus
  2903  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  2904  				Promoted:     false,
  2905  				DesiredTotal: 10,
  2906  				PlacedAllocs: 1,
  2907  			}
  2908  
  2909  			// Create 9 allocations for the original job
  2910  			var allocs []*structs.Allocation
  2911  			for i := 1; i < 10; i++ {
  2912  				alloc := mock.Alloc()
  2913  				alloc.Job = job
  2914  				alloc.JobID = job.ID
  2915  				alloc.NodeID = uuid.Generate()
  2916  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2917  				alloc.TaskGroup = job.TaskGroups[0].Name
  2918  				allocs = append(allocs, alloc)
  2919  			}
  2920  
  2921  			// Create one for the new job
  2922  			newAlloc := mock.Alloc()
  2923  			newAlloc.Job = job
  2924  			newAlloc.JobID = job.ID
  2925  			newAlloc.NodeID = uuid.Generate()
  2926  			newAlloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0)
  2927  			newAlloc.TaskGroup = job.TaskGroups[0].Name
  2928  			newAlloc.DeploymentID = d.ID
  2929  			allocs = append(allocs, newAlloc)
  2930  
  2931  			mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{newAlloc.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive)
  2932  			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  2933  			r := reconciler.Compute()
  2934  
  2935  			// Assert the correct results
  2936  			assertResults(t, r, &resultExpectation{
  2937  				createDeployment:  nil,
  2938  				deploymentUpdates: nil,
  2939  				place:             0,
  2940  				inplace:           0,
  2941  				stop:              0,
  2942  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  2943  					job.TaskGroups[0].Name: {
  2944  						Ignore: 10,
  2945  					},
  2946  				},
  2947  			})
  2948  		})
  2949  	}
  2950  }
  2951  
  2952  // Tests the reconciler handles migrating a canary correctly on a draining node
  2953  func TestReconciler_DrainNode_Canary(t *testing.T) {
  2954  	job := mock.Job()
  2955  	job.TaskGroups[0].Update = canaryUpdate
  2956  
  2957  	// Create a deployment that is paused and has placed some canaries
  2958  	d := structs.NewDeployment(job)
  2959  	s := &structs.DeploymentState{
  2960  		Promoted:        false,
  2961  		DesiredTotal:    10,
  2962  		DesiredCanaries: 2,
  2963  		PlacedAllocs:    2,
  2964  	}
  2965  	d.TaskGroups[job.TaskGroups[0].Name] = s
  2966  
  2967  	// Create 10 allocations from the old job
  2968  	var allocs []*structs.Allocation
  2969  	for i := 0; i < 10; i++ {
  2970  		alloc := mock.Alloc()
  2971  		alloc.Job = job
  2972  		alloc.JobID = job.ID
  2973  		alloc.NodeID = uuid.Generate()
  2974  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2975  		alloc.TaskGroup = job.TaskGroups[0].Name
  2976  		allocs = append(allocs, alloc)
  2977  	}
  2978  
  2979  	// Create two canaries for the new job
  2980  	handled := make(map[string]allocUpdateType)
  2981  	for i := 0; i < 2; i++ {
  2982  		// Create one canary
  2983  		canary := mock.Alloc()
  2984  		canary.Job = job
  2985  		canary.JobID = job.ID
  2986  		canary.NodeID = uuid.Generate()
  2987  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  2988  		canary.TaskGroup = job.TaskGroups[0].Name
  2989  		canary.DeploymentID = d.ID
  2990  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  2991  		allocs = append(allocs, canary)
  2992  		handled[canary.ID] = allocUpdateFnIgnore
  2993  	}
  2994  
  2995  	// Build a map of tainted nodes that contains the last canary
  2996  	tainted := make(map[string]*structs.Node, 1)
  2997  	n := mock.Node()
  2998  	n.ID = allocs[11].NodeID
  2999  	allocs[11].DesiredTransition.Migrate = helper.BoolToPtr(true)
  3000  	n.Drain = true
  3001  	tainted[n.ID] = n
  3002  
  3003  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3004  	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "")
  3005  	r := reconciler.Compute()
  3006  
  3007  	// Assert the correct results
  3008  	assertResults(t, r, &resultExpectation{
  3009  		createDeployment:  nil,
  3010  		deploymentUpdates: nil,
  3011  		place:             1,
  3012  		inplace:           0,
  3013  		stop:              1,
  3014  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3015  			job.TaskGroups[0].Name: {
  3016  				Canary: 1,
  3017  				Ignore: 11,
  3018  			},
  3019  		},
  3020  	})
  3021  	assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop))
  3022  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  3023  }
  3024  
  3025  // Tests the reconciler handles migrating a canary correctly on a lost node
  3026  func TestReconciler_LostNode_Canary(t *testing.T) {
  3027  	job := mock.Job()
  3028  	job.TaskGroups[0].Update = canaryUpdate
  3029  
  3030  	// Create a deployment that is paused and has placed some canaries
  3031  	d := structs.NewDeployment(job)
  3032  	s := &structs.DeploymentState{
  3033  		Promoted:        false,
  3034  		DesiredTotal:    10,
  3035  		DesiredCanaries: 2,
  3036  		PlacedAllocs:    2,
  3037  	}
  3038  	d.TaskGroups[job.TaskGroups[0].Name] = s
  3039  
  3040  	// Create 10 allocations from the old job
  3041  	var allocs []*structs.Allocation
  3042  	for i := 0; i < 10; i++ {
  3043  		alloc := mock.Alloc()
  3044  		alloc.Job = job
  3045  		alloc.JobID = job.ID
  3046  		alloc.NodeID = uuid.Generate()
  3047  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3048  		alloc.TaskGroup = job.TaskGroups[0].Name
  3049  		allocs = append(allocs, alloc)
  3050  	}
  3051  
  3052  	// Create two canaries for the new job
  3053  	handled := make(map[string]allocUpdateType)
  3054  	for i := 0; i < 2; i++ {
  3055  		// Create one canary
  3056  		canary := mock.Alloc()
  3057  		canary.Job = job
  3058  		canary.JobID = job.ID
  3059  		canary.NodeID = uuid.Generate()
  3060  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3061  		canary.TaskGroup = job.TaskGroups[0].Name
  3062  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3063  		canary.DeploymentID = d.ID
  3064  		allocs = append(allocs, canary)
  3065  		handled[canary.ID] = allocUpdateFnIgnore
  3066  	}
  3067  
  3068  	// Build a map of tainted nodes that contains the last canary
  3069  	tainted := make(map[string]*structs.Node, 1)
  3070  	n := mock.Node()
  3071  	n.ID = allocs[11].NodeID
  3072  	n.Status = structs.NodeStatusDown
  3073  	tainted[n.ID] = n
  3074  
  3075  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3076  	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "")
  3077  	r := reconciler.Compute()
  3078  
  3079  	// Assert the correct results
  3080  	assertResults(t, r, &resultExpectation{
  3081  		createDeployment:  nil,
  3082  		deploymentUpdates: nil,
  3083  		place:             1,
  3084  		inplace:           0,
  3085  		stop:              1,
  3086  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3087  			job.TaskGroups[0].Name: {
  3088  				Canary: 1,
  3089  				Ignore: 11,
  3090  			},
  3091  		},
  3092  	})
  3093  
  3094  	assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop))
  3095  	assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place))
  3096  }
  3097  
  3098  // Tests the reconciler handles stopping canaries from older deployments
  3099  func TestReconciler_StopOldCanaries(t *testing.T) {
  3100  	job := mock.Job()
  3101  	job.TaskGroups[0].Update = canaryUpdate
  3102  
  3103  	// Create an old deployment that has placed some canaries
  3104  	d := structs.NewDeployment(job)
  3105  	s := &structs.DeploymentState{
  3106  		Promoted:        false,
  3107  		DesiredTotal:    10,
  3108  		DesiredCanaries: 2,
  3109  		PlacedAllocs:    2,
  3110  	}
  3111  	d.TaskGroups[job.TaskGroups[0].Name] = s
  3112  
  3113  	// Update the job
  3114  	job.Version += 10
  3115  
  3116  	// Create 10 allocations from the old job
  3117  	var allocs []*structs.Allocation
  3118  	for i := 0; i < 10; i++ {
  3119  		alloc := mock.Alloc()
  3120  		alloc.Job = job
  3121  		alloc.JobID = job.ID
  3122  		alloc.NodeID = uuid.Generate()
  3123  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3124  		alloc.TaskGroup = job.TaskGroups[0].Name
  3125  		allocs = append(allocs, alloc)
  3126  	}
  3127  
  3128  	// Create canaries
  3129  	for i := 0; i < 2; i++ {
  3130  		// Create one canary
  3131  		canary := mock.Alloc()
  3132  		canary.Job = job
  3133  		canary.JobID = job.ID
  3134  		canary.NodeID = uuid.Generate()
  3135  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3136  		canary.TaskGroup = job.TaskGroups[0].Name
  3137  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3138  		canary.DeploymentID = d.ID
  3139  		allocs = append(allocs, canary)
  3140  	}
  3141  
  3142  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  3143  	r := reconciler.Compute()
  3144  
  3145  	newD := structs.NewDeployment(job)
  3146  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3147  	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3148  		DesiredCanaries: 2,
  3149  		DesiredTotal:    10,
  3150  	}
  3151  
  3152  	// Assert the correct results
  3153  	assertResults(t, r, &resultExpectation{
  3154  		createDeployment: newD,
  3155  		deploymentUpdates: []*structs.DeploymentStatusUpdate{
  3156  			{
  3157  				DeploymentID:      d.ID,
  3158  				Status:            structs.DeploymentStatusCancelled,
  3159  				StatusDescription: structs.DeploymentStatusDescriptionNewerJob,
  3160  			},
  3161  		},
  3162  		place:   2,
  3163  		inplace: 0,
  3164  		stop:    2,
  3165  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3166  			job.TaskGroups[0].Name: {
  3167  				Canary: 2,
  3168  				Stop:   2,
  3169  				Ignore: 10,
  3170  			},
  3171  		},
  3172  	})
  3173  
  3174  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  3175  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  3176  }
  3177  
  3178  // Tests the reconciler creates new canaries when the job changes
  3179  func TestReconciler_NewCanaries(t *testing.T) {
  3180  	job := mock.Job()
  3181  	job.TaskGroups[0].Update = canaryUpdate
  3182  
  3183  	// Create 10 allocations from the old job
  3184  	var allocs []*structs.Allocation
  3185  	for i := 0; i < 10; i++ {
  3186  		alloc := mock.Alloc()
  3187  		alloc.Job = job
  3188  		alloc.JobID = job.ID
  3189  		alloc.NodeID = uuid.Generate()
  3190  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3191  		alloc.TaskGroup = job.TaskGroups[0].Name
  3192  		allocs = append(allocs, alloc)
  3193  	}
  3194  
  3195  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3196  	r := reconciler.Compute()
  3197  
  3198  	newD := structs.NewDeployment(job)
  3199  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3200  	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3201  		DesiredCanaries: 2,
  3202  		DesiredTotal:    10,
  3203  	}
  3204  
  3205  	// Assert the correct results
  3206  	assertResults(t, r, &resultExpectation{
  3207  		createDeployment:  newD,
  3208  		deploymentUpdates: nil,
  3209  		place:             2,
  3210  		inplace:           0,
  3211  		stop:              0,
  3212  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3213  			job.TaskGroups[0].Name: {
  3214  				Canary: 2,
  3215  				Ignore: 10,
  3216  			},
  3217  		},
  3218  	})
  3219  
  3220  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  3221  }
  3222  
  3223  // Tests the reconciler creates new canaries when the job changes and the
  3224  // canary count is greater than the task group count
  3225  func TestReconciler_NewCanaries_CountGreater(t *testing.T) {
  3226  	job := mock.Job()
  3227  	job.TaskGroups[0].Count = 3
  3228  	job.TaskGroups[0].Update = canaryUpdate.Copy()
  3229  	job.TaskGroups[0].Update.Canary = 7
  3230  
  3231  	// Create 3 allocations from the old job
  3232  	var allocs []*structs.Allocation
  3233  	for i := 0; i < 3; i++ {
  3234  		alloc := mock.Alloc()
  3235  		alloc.Job = job
  3236  		alloc.JobID = job.ID
  3237  		alloc.NodeID = uuid.Generate()
  3238  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3239  		alloc.TaskGroup = job.TaskGroups[0].Name
  3240  		allocs = append(allocs, alloc)
  3241  	}
  3242  
  3243  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3244  	r := reconciler.Compute()
  3245  
  3246  	newD := structs.NewDeployment(job)
  3247  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3248  	state := &structs.DeploymentState{
  3249  		DesiredCanaries: 7,
  3250  		DesiredTotal:    3,
  3251  	}
  3252  	newD.TaskGroups[job.TaskGroups[0].Name] = state
  3253  
  3254  	// Assert the correct results
  3255  	assertResults(t, r, &resultExpectation{
  3256  		createDeployment:  newD,
  3257  		deploymentUpdates: nil,
  3258  		place:             7,
  3259  		inplace:           0,
  3260  		stop:              0,
  3261  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3262  			job.TaskGroups[0].Name: {
  3263  				Canary: 7,
  3264  				Ignore: 3,
  3265  			},
  3266  		},
  3267  	})
  3268  
  3269  	assertNamesHaveIndexes(t, intRange(0, 2, 3, 6), placeResultsToNames(r.place))
  3270  }
  3271  
  3272  // Tests the reconciler creates new canaries when the job changes for multiple
  3273  // task groups
  3274  func TestReconciler_NewCanaries_MultiTG(t *testing.T) {
  3275  	job := mock.Job()
  3276  	job.TaskGroups[0].Update = canaryUpdate
  3277  	job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy())
  3278  	job.TaskGroups[0].Name = "tg2"
  3279  
  3280  	// Create 10 allocations from the old job for each tg
  3281  	var allocs []*structs.Allocation
  3282  	for j := 0; j < 2; j++ {
  3283  		for i := 0; i < 10; i++ {
  3284  			alloc := mock.Alloc()
  3285  			alloc.Job = job
  3286  			alloc.JobID = job.ID
  3287  			alloc.NodeID = uuid.Generate()
  3288  			alloc.Name = structs.AllocName(job.ID, job.TaskGroups[j].Name, uint(i))
  3289  			alloc.TaskGroup = job.TaskGroups[j].Name
  3290  			allocs = append(allocs, alloc)
  3291  		}
  3292  	}
  3293  
  3294  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3295  	r := reconciler.Compute()
  3296  
  3297  	newD := structs.NewDeployment(job)
  3298  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3299  	state := &structs.DeploymentState{
  3300  		DesiredCanaries: 2,
  3301  		DesiredTotal:    10,
  3302  	}
  3303  	newD.TaskGroups[job.TaskGroups[0].Name] = state
  3304  	newD.TaskGroups[job.TaskGroups[1].Name] = state.Copy()
  3305  
  3306  	// Assert the correct results
  3307  	assertResults(t, r, &resultExpectation{
  3308  		createDeployment:  newD,
  3309  		deploymentUpdates: nil,
  3310  		place:             4,
  3311  		inplace:           0,
  3312  		stop:              0,
  3313  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3314  			job.TaskGroups[0].Name: {
  3315  				Canary: 2,
  3316  				Ignore: 10,
  3317  			},
  3318  			job.TaskGroups[1].Name: {
  3319  				Canary: 2,
  3320  				Ignore: 10,
  3321  			},
  3322  		},
  3323  	})
  3324  
  3325  	assertNamesHaveIndexes(t, intRange(0, 1, 0, 1), placeResultsToNames(r.place))
  3326  }
  3327  
  3328  // Tests the reconciler creates new canaries when the job changes and scales up
  3329  func TestReconciler_NewCanaries_ScaleUp(t *testing.T) {
  3330  	// Scale the job up to 15
  3331  	job := mock.Job()
  3332  	job.TaskGroups[0].Update = canaryUpdate
  3333  	job.TaskGroups[0].Count = 15
  3334  
  3335  	// Create 10 allocations from the old job
  3336  	var allocs []*structs.Allocation
  3337  	for i := 0; i < 10; i++ {
  3338  		alloc := mock.Alloc()
  3339  		alloc.Job = job
  3340  		alloc.JobID = job.ID
  3341  		alloc.NodeID = uuid.Generate()
  3342  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3343  		alloc.TaskGroup = job.TaskGroups[0].Name
  3344  		allocs = append(allocs, alloc)
  3345  	}
  3346  
  3347  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3348  	r := reconciler.Compute()
  3349  
  3350  	newD := structs.NewDeployment(job)
  3351  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3352  	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3353  		DesiredCanaries: 2,
  3354  		DesiredTotal:    15,
  3355  	}
  3356  
  3357  	// Assert the correct results
  3358  	assertResults(t, r, &resultExpectation{
  3359  		createDeployment:  newD,
  3360  		deploymentUpdates: nil,
  3361  		place:             2,
  3362  		inplace:           0,
  3363  		stop:              0,
  3364  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3365  			job.TaskGroups[0].Name: {
  3366  				Canary: 2,
  3367  				Ignore: 10,
  3368  			},
  3369  		},
  3370  	})
  3371  
  3372  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  3373  }
  3374  
  3375  // Tests the reconciler creates new canaries when the job changes and scales
  3376  // down
  3377  func TestReconciler_NewCanaries_ScaleDown(t *testing.T) {
  3378  	// Scale the job down to 5
  3379  	job := mock.Job()
  3380  	job.TaskGroups[0].Update = canaryUpdate
  3381  	job.TaskGroups[0].Count = 5
  3382  
  3383  	// Create 10 allocations from the old job
  3384  	var allocs []*structs.Allocation
  3385  	for i := 0; i < 10; i++ {
  3386  		alloc := mock.Alloc()
  3387  		alloc.Job = job
  3388  		alloc.JobID = job.ID
  3389  		alloc.NodeID = uuid.Generate()
  3390  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3391  		alloc.TaskGroup = job.TaskGroups[0].Name
  3392  		allocs = append(allocs, alloc)
  3393  	}
  3394  
  3395  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  3396  	r := reconciler.Compute()
  3397  
  3398  	newD := structs.NewDeployment(job)
  3399  	newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  3400  	newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3401  		DesiredCanaries: 2,
  3402  		DesiredTotal:    5,
  3403  	}
  3404  
  3405  	// Assert the correct results
  3406  	assertResults(t, r, &resultExpectation{
  3407  		createDeployment:  newD,
  3408  		deploymentUpdates: nil,
  3409  		place:             2,
  3410  		inplace:           0,
  3411  		stop:              5,
  3412  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3413  			job.TaskGroups[0].Name: {
  3414  				Canary: 2,
  3415  				Stop:   5,
  3416  				Ignore: 5,
  3417  			},
  3418  		},
  3419  	})
  3420  
  3421  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  3422  	assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop))
  3423  }
  3424  
  3425  // Tests the reconciler handles filling the names of partially placed canaries
  3426  func TestReconciler_NewCanaries_FillNames(t *testing.T) {
  3427  	job := mock.Job()
  3428  	job.TaskGroups[0].Update = &structs.UpdateStrategy{
  3429  		Canary:          4,
  3430  		MaxParallel:     2,
  3431  		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
  3432  		MinHealthyTime:  10 * time.Second,
  3433  		HealthyDeadline: 10 * time.Minute,
  3434  	}
  3435  
  3436  	// Create an existing deployment that has placed some canaries
  3437  	d := structs.NewDeployment(job)
  3438  	s := &structs.DeploymentState{
  3439  		Promoted:        false,
  3440  		DesiredTotal:    10,
  3441  		DesiredCanaries: 4,
  3442  		PlacedAllocs:    2,
  3443  	}
  3444  	d.TaskGroups[job.TaskGroups[0].Name] = s
  3445  
  3446  	// Create 10 allocations from the old job
  3447  	var allocs []*structs.Allocation
  3448  	for i := 0; i < 10; i++ {
  3449  		alloc := mock.Alloc()
  3450  		alloc.Job = job
  3451  		alloc.JobID = job.ID
  3452  		alloc.NodeID = uuid.Generate()
  3453  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3454  		alloc.TaskGroup = job.TaskGroups[0].Name
  3455  		allocs = append(allocs, alloc)
  3456  	}
  3457  
  3458  	// Create canaries but pick names at the ends
  3459  	for i := 0; i < 4; i += 3 {
  3460  		// Create one canary
  3461  		canary := mock.Alloc()
  3462  		canary.Job = job
  3463  		canary.JobID = job.ID
  3464  		canary.NodeID = uuid.Generate()
  3465  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3466  		canary.TaskGroup = job.TaskGroups[0].Name
  3467  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3468  		canary.DeploymentID = d.ID
  3469  		allocs = append(allocs, canary)
  3470  	}
  3471  
  3472  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  3473  	r := reconciler.Compute()
  3474  
  3475  	// Assert the correct results
  3476  	assertResults(t, r, &resultExpectation{
  3477  		createDeployment:  nil,
  3478  		deploymentUpdates: nil,
  3479  		place:             2,
  3480  		inplace:           0,
  3481  		stop:              0,
  3482  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3483  			job.TaskGroups[0].Name: {
  3484  				Canary: 2,
  3485  				Ignore: 12,
  3486  			},
  3487  		},
  3488  	})
  3489  
  3490  	assertNamesHaveIndexes(t, intRange(1, 2), placeResultsToNames(r.place))
  3491  }
  3492  
  3493  // Tests the reconciler handles canary promotion by unblocking max_parallel
  3494  func TestReconciler_PromoteCanaries_Unblock(t *testing.T) {
  3495  	job := mock.Job()
  3496  	job.TaskGroups[0].Update = canaryUpdate
  3497  
  3498  	// Create an existing deployment that has placed some canaries and mark them
  3499  	// promoted
  3500  	d := structs.NewDeployment(job)
  3501  	s := &structs.DeploymentState{
  3502  		Promoted:        true,
  3503  		DesiredTotal:    10,
  3504  		DesiredCanaries: 2,
  3505  		PlacedAllocs:    2,
  3506  	}
  3507  	d.TaskGroups[job.TaskGroups[0].Name] = s
  3508  
  3509  	// Create 10 allocations from the old job
  3510  	var allocs []*structs.Allocation
  3511  	for i := 0; i < 10; i++ {
  3512  		alloc := mock.Alloc()
  3513  		alloc.Job = job
  3514  		alloc.JobID = job.ID
  3515  		alloc.NodeID = uuid.Generate()
  3516  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3517  		alloc.TaskGroup = job.TaskGroups[0].Name
  3518  		allocs = append(allocs, alloc)
  3519  	}
  3520  
  3521  	// Create the canaries
  3522  	handled := make(map[string]allocUpdateType)
  3523  	for i := 0; i < 2; i++ {
  3524  		// Create one canary
  3525  		canary := mock.Alloc()
  3526  		canary.Job = job
  3527  		canary.JobID = job.ID
  3528  		canary.NodeID = uuid.Generate()
  3529  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3530  		canary.TaskGroup = job.TaskGroups[0].Name
  3531  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3532  		canary.DeploymentID = d.ID
  3533  		canary.DeploymentStatus = &structs.AllocDeploymentStatus{
  3534  			Healthy: helper.BoolToPtr(true),
  3535  		}
  3536  		allocs = append(allocs, canary)
  3537  		handled[canary.ID] = allocUpdateFnIgnore
  3538  	}
  3539  
  3540  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3541  	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  3542  	r := reconciler.Compute()
  3543  
  3544  	// Assert the correct results
  3545  	assertResults(t, r, &resultExpectation{
  3546  		createDeployment:  nil,
  3547  		deploymentUpdates: nil,
  3548  		destructive:       2,
  3549  		stop:              2,
  3550  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3551  			job.TaskGroups[0].Name: {
  3552  				Stop:              2,
  3553  				DestructiveUpdate: 2,
  3554  				Ignore:            8,
  3555  			},
  3556  		},
  3557  	})
  3558  
  3559  	assertNoCanariesStopped(t, d, r.stop)
  3560  	assertNamesHaveIndexes(t, intRange(2, 3), destructiveResultsToNames(r.destructiveUpdate))
  3561  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  3562  }
  3563  
  3564  // Tests the reconciler handles canary promotion when the canary count equals
  3565  // the total correctly
  3566  func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) {
  3567  	job := mock.Job()
  3568  	job.TaskGroups[0].Update = canaryUpdate
  3569  	job.TaskGroups[0].Count = 2
  3570  
  3571  	// Create an existing deployment that has placed some canaries and mark them
  3572  	// promoted
  3573  	d := structs.NewDeployment(job)
  3574  	s := &structs.DeploymentState{
  3575  		Promoted:        true,
  3576  		DesiredTotal:    2,
  3577  		DesiredCanaries: 2,
  3578  		PlacedAllocs:    2,
  3579  		HealthyAllocs:   2,
  3580  	}
  3581  	d.TaskGroups[job.TaskGroups[0].Name] = s
  3582  
  3583  	// Create 2 allocations from the old job
  3584  	var allocs []*structs.Allocation
  3585  	for i := 0; i < 2; i++ {
  3586  		alloc := mock.Alloc()
  3587  		alloc.Job = job
  3588  		alloc.JobID = job.ID
  3589  		alloc.NodeID = uuid.Generate()
  3590  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3591  		alloc.TaskGroup = job.TaskGroups[0].Name
  3592  		allocs = append(allocs, alloc)
  3593  	}
  3594  
  3595  	// Create the canaries
  3596  	handled := make(map[string]allocUpdateType)
  3597  	for i := 0; i < 2; i++ {
  3598  		// Create one canary
  3599  		canary := mock.Alloc()
  3600  		canary.Job = job
  3601  		canary.JobID = job.ID
  3602  		canary.NodeID = uuid.Generate()
  3603  		canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3604  		canary.TaskGroup = job.TaskGroups[0].Name
  3605  		s.PlacedCanaries = append(s.PlacedCanaries, canary.ID)
  3606  		canary.DeploymentID = d.ID
  3607  		canary.DeploymentStatus = &structs.AllocDeploymentStatus{
  3608  			Healthy: helper.BoolToPtr(true),
  3609  		}
  3610  		allocs = append(allocs, canary)
  3611  		handled[canary.ID] = allocUpdateFnIgnore
  3612  	}
  3613  
  3614  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3615  	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  3616  	r := reconciler.Compute()
  3617  
  3618  	updates := []*structs.DeploymentStatusUpdate{
  3619  		{
  3620  			DeploymentID:      d.ID,
  3621  			Status:            structs.DeploymentStatusSuccessful,
  3622  			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
  3623  		},
  3624  	}
  3625  
  3626  	// Assert the correct results
  3627  	assertResults(t, r, &resultExpectation{
  3628  		createDeployment:  nil,
  3629  		deploymentUpdates: updates,
  3630  		place:             0,
  3631  		inplace:           0,
  3632  		stop:              2,
  3633  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3634  			job.TaskGroups[0].Name: {
  3635  				Stop:   2,
  3636  				Ignore: 2,
  3637  			},
  3638  		},
  3639  	})
  3640  
  3641  	assertNoCanariesStopped(t, d, r.stop)
  3642  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  3643  }
  3644  
  3645  // Tests the reconciler checks the health of placed allocs to determine the
  3646  // limit
  3647  func TestReconciler_DeploymentLimit_HealthAccounting(t *testing.T) {
  3648  	job := mock.Job()
  3649  	job.TaskGroups[0].Update = noCanaryUpdate
  3650  
  3651  	cases := []struct {
  3652  		healthy int
  3653  	}{
  3654  		{
  3655  			healthy: 0,
  3656  		},
  3657  		{
  3658  			healthy: 1,
  3659  		},
  3660  		{
  3661  			healthy: 2,
  3662  		},
  3663  		{
  3664  			healthy: 3,
  3665  		},
  3666  		{
  3667  			healthy: 4,
  3668  		},
  3669  	}
  3670  
  3671  	for _, c := range cases {
  3672  		t.Run(fmt.Sprintf("%d healthy", c.healthy), func(t *testing.T) {
  3673  			// Create an existing deployment that has placed some canaries and mark them
  3674  			// promoted
  3675  			d := structs.NewDeployment(job)
  3676  			d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3677  				Promoted:     true,
  3678  				DesiredTotal: 10,
  3679  				PlacedAllocs: 4,
  3680  			}
  3681  
  3682  			// Create 6 allocations from the old job
  3683  			var allocs []*structs.Allocation
  3684  			for i := 4; i < 10; i++ {
  3685  				alloc := mock.Alloc()
  3686  				alloc.Job = job
  3687  				alloc.JobID = job.ID
  3688  				alloc.NodeID = uuid.Generate()
  3689  				alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3690  				alloc.TaskGroup = job.TaskGroups[0].Name
  3691  				allocs = append(allocs, alloc)
  3692  			}
  3693  
  3694  			// Create the new allocs
  3695  			handled := make(map[string]allocUpdateType)
  3696  			for i := 0; i < 4; i++ {
  3697  				new := mock.Alloc()
  3698  				new.Job = job
  3699  				new.JobID = job.ID
  3700  				new.NodeID = uuid.Generate()
  3701  				new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3702  				new.TaskGroup = job.TaskGroups[0].Name
  3703  				new.DeploymentID = d.ID
  3704  				if i < c.healthy {
  3705  					new.DeploymentStatus = &structs.AllocDeploymentStatus{
  3706  						Healthy: helper.BoolToPtr(true),
  3707  					}
  3708  				}
  3709  				allocs = append(allocs, new)
  3710  				handled[new.ID] = allocUpdateFnIgnore
  3711  			}
  3712  
  3713  			mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3714  			reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  3715  			r := reconciler.Compute()
  3716  
  3717  			// Assert the correct results
  3718  			assertResults(t, r, &resultExpectation{
  3719  				createDeployment:  nil,
  3720  				deploymentUpdates: nil,
  3721  				destructive:       c.healthy,
  3722  				desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3723  					job.TaskGroups[0].Name: {
  3724  						DestructiveUpdate: uint64(c.healthy),
  3725  						Ignore:            uint64(10 - c.healthy),
  3726  					},
  3727  				},
  3728  			})
  3729  
  3730  			if c.healthy != 0 {
  3731  				assertNamesHaveIndexes(t, intRange(4, 3+c.healthy), destructiveResultsToNames(r.destructiveUpdate))
  3732  			}
  3733  		})
  3734  	}
  3735  }
  3736  
  3737  // Tests the reconciler handles an alloc on a tainted node during a rolling
  3738  // update
  3739  func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) {
  3740  	job := mock.Job()
  3741  	job.TaskGroups[0].Update = noCanaryUpdate
  3742  
  3743  	// Create an existing deployment that has some placed allocs
  3744  	d := structs.NewDeployment(job)
  3745  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3746  		Promoted:     true,
  3747  		DesiredTotal: 10,
  3748  		PlacedAllocs: 7,
  3749  	}
  3750  
  3751  	// Create 2 allocations from the old job
  3752  	var allocs []*structs.Allocation
  3753  	for i := 8; i < 10; i++ {
  3754  		alloc := mock.Alloc()
  3755  		alloc.Job = job
  3756  		alloc.JobID = job.ID
  3757  		alloc.NodeID = uuid.Generate()
  3758  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3759  		alloc.TaskGroup = job.TaskGroups[0].Name
  3760  		allocs = append(allocs, alloc)
  3761  	}
  3762  
  3763  	// Create the healthy replacements
  3764  	handled := make(map[string]allocUpdateType)
  3765  	for i := 0; i < 8; i++ {
  3766  		new := mock.Alloc()
  3767  		new.Job = job
  3768  		new.JobID = job.ID
  3769  		new.NodeID = uuid.Generate()
  3770  		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3771  		new.TaskGroup = job.TaskGroups[0].Name
  3772  		new.DeploymentID = d.ID
  3773  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  3774  			Healthy: helper.BoolToPtr(true),
  3775  		}
  3776  		allocs = append(allocs, new)
  3777  		handled[new.ID] = allocUpdateFnIgnore
  3778  	}
  3779  
  3780  	// Build a map of tainted nodes
  3781  	tainted := make(map[string]*structs.Node, 3)
  3782  	for i := 0; i < 3; i++ {
  3783  		n := mock.Node()
  3784  		n.ID = allocs[2+i].NodeID
  3785  		if i == 0 {
  3786  			n.Status = structs.NodeStatusDown
  3787  		} else {
  3788  			n.Drain = true
  3789  			allocs[2+i].DesiredTransition.Migrate = helper.BoolToPtr(true)
  3790  		}
  3791  		tainted[n.ID] = n
  3792  	}
  3793  
  3794  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3795  	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "")
  3796  	r := reconciler.Compute()
  3797  
  3798  	// Assert the correct results
  3799  	assertResults(t, r, &resultExpectation{
  3800  		createDeployment:  nil,
  3801  		deploymentUpdates: nil,
  3802  		place:             3,
  3803  		destructive:       2,
  3804  		stop:              3,
  3805  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3806  			job.TaskGroups[0].Name: {
  3807  				Place:             1, // Place the lost
  3808  				Stop:              1, // Stop the lost
  3809  				Migrate:           2, // Migrate the tainted
  3810  				DestructiveUpdate: 2,
  3811  				Ignore:            5,
  3812  			},
  3813  		},
  3814  	})
  3815  
  3816  	assertNamesHaveIndexes(t, intRange(8, 9), destructiveResultsToNames(r.destructiveUpdate))
  3817  	assertNamesHaveIndexes(t, intRange(0, 2), placeResultsToNames(r.place))
  3818  	assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop))
  3819  }
  3820  
  3821  // Tests the reconciler handles a failed deployment with allocs on tainted
  3822  // nodes
  3823  func TestReconciler_FailedDeployment_TaintedNodes(t *testing.T) {
  3824  	job := mock.Job()
  3825  	job.TaskGroups[0].Update = noCanaryUpdate
  3826  
  3827  	// Create an existing failed deployment that has some placed allocs
  3828  	d := structs.NewDeployment(job)
  3829  	d.Status = structs.DeploymentStatusFailed
  3830  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3831  		Promoted:     true,
  3832  		DesiredTotal: 10,
  3833  		PlacedAllocs: 4,
  3834  	}
  3835  
  3836  	// Create 6 allocations from the old job
  3837  	var allocs []*structs.Allocation
  3838  	for i := 4; i < 10; i++ {
  3839  		alloc := mock.Alloc()
  3840  		alloc.Job = job
  3841  		alloc.JobID = job.ID
  3842  		alloc.NodeID = uuid.Generate()
  3843  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3844  		alloc.TaskGroup = job.TaskGroups[0].Name
  3845  		allocs = append(allocs, alloc)
  3846  	}
  3847  
  3848  	// Create the healthy replacements
  3849  	handled := make(map[string]allocUpdateType)
  3850  	for i := 0; i < 4; i++ {
  3851  		new := mock.Alloc()
  3852  		new.Job = job
  3853  		new.JobID = job.ID
  3854  		new.NodeID = uuid.Generate()
  3855  		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3856  		new.TaskGroup = job.TaskGroups[0].Name
  3857  		new.DeploymentID = d.ID
  3858  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  3859  			Healthy: helper.BoolToPtr(true),
  3860  		}
  3861  		allocs = append(allocs, new)
  3862  		handled[new.ID] = allocUpdateFnIgnore
  3863  	}
  3864  
  3865  	// Build a map of tainted nodes
  3866  	tainted := make(map[string]*structs.Node, 2)
  3867  	for i := 0; i < 2; i++ {
  3868  		n := mock.Node()
  3869  		n.ID = allocs[6+i].NodeID
  3870  		if i == 0 {
  3871  			n.Status = structs.NodeStatusDown
  3872  		} else {
  3873  			n.Drain = true
  3874  			allocs[6+i].DesiredTransition.Migrate = helper.BoolToPtr(true)
  3875  		}
  3876  		tainted[n.ID] = n
  3877  	}
  3878  
  3879  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  3880  	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "")
  3881  	r := reconciler.Compute()
  3882  
  3883  	// Assert the correct results
  3884  	assertResults(t, r, &resultExpectation{
  3885  		createDeployment:  nil,
  3886  		deploymentUpdates: nil,
  3887  		place:             2,
  3888  		inplace:           0,
  3889  		stop:              2,
  3890  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3891  			job.TaskGroups[0].Name: {
  3892  				Place:   1,
  3893  				Migrate: 1,
  3894  				Stop:    1,
  3895  				Ignore:  8,
  3896  			},
  3897  		},
  3898  	})
  3899  
  3900  	assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place))
  3901  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  3902  }
  3903  
  3904  // Tests the reconciler handles a run after a deployment is complete
  3905  // successfully.
  3906  func TestReconciler_CompleteDeployment(t *testing.T) {
  3907  	job := mock.Job()
  3908  	job.TaskGroups[0].Update = canaryUpdate
  3909  
  3910  	d := structs.NewDeployment(job)
  3911  	d.Status = structs.DeploymentStatusSuccessful
  3912  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3913  		Promoted:        true,
  3914  		DesiredTotal:    10,
  3915  		DesiredCanaries: 2,
  3916  		PlacedAllocs:    10,
  3917  		HealthyAllocs:   10,
  3918  	}
  3919  
  3920  	// Create allocations from the old job
  3921  	var allocs []*structs.Allocation
  3922  	for i := 0; i < 10; i++ {
  3923  		alloc := mock.Alloc()
  3924  		alloc.Job = job
  3925  		alloc.JobID = job.ID
  3926  		alloc.NodeID = uuid.Generate()
  3927  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  3928  		alloc.TaskGroup = job.TaskGroups[0].Name
  3929  		alloc.DeploymentID = d.ID
  3930  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  3931  			Healthy: helper.BoolToPtr(true),
  3932  		}
  3933  		allocs = append(allocs, alloc)
  3934  	}
  3935  
  3936  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  3937  	r := reconciler.Compute()
  3938  
  3939  	// Assert the correct results
  3940  	assertResults(t, r, &resultExpectation{
  3941  		createDeployment:  nil,
  3942  		deploymentUpdates: nil,
  3943  		place:             0,
  3944  		inplace:           0,
  3945  		stop:              0,
  3946  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  3947  			job.TaskGroups[0].Name: {
  3948  				Ignore: 10,
  3949  			},
  3950  		},
  3951  	})
  3952  }
  3953  
  3954  // Tests that the reconciler marks a deployment as complete once there is
  3955  // nothing left to place even if there are failed allocations that are part of
  3956  // the deployment.
  3957  func TestReconciler_MarkDeploymentComplete_FailedAllocations(t *testing.T) {
  3958  	job := mock.Job()
  3959  	job.TaskGroups[0].Update = noCanaryUpdate
  3960  
  3961  	d := structs.NewDeployment(job)
  3962  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  3963  		DesiredTotal:  10,
  3964  		PlacedAllocs:  20,
  3965  		HealthyAllocs: 10,
  3966  	}
  3967  
  3968  	// Create 10 healthy allocs and 10 allocs that are failed
  3969  	var allocs []*structs.Allocation
  3970  	for i := 0; i < 20; i++ {
  3971  		alloc := mock.Alloc()
  3972  		alloc.Job = job
  3973  		alloc.JobID = job.ID
  3974  		alloc.NodeID = uuid.Generate()
  3975  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%10))
  3976  		alloc.TaskGroup = job.TaskGroups[0].Name
  3977  		alloc.DeploymentID = d.ID
  3978  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{}
  3979  		if i < 10 {
  3980  			alloc.ClientStatus = structs.AllocClientStatusRunning
  3981  			alloc.DeploymentStatus.Healthy = helper.BoolToPtr(true)
  3982  		} else {
  3983  			alloc.DesiredStatus = structs.AllocDesiredStatusStop
  3984  			alloc.ClientStatus = structs.AllocClientStatusFailed
  3985  			alloc.DeploymentStatus.Healthy = helper.BoolToPtr(false)
  3986  		}
  3987  
  3988  		allocs = append(allocs, alloc)
  3989  	}
  3990  
  3991  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  3992  	r := reconciler.Compute()
  3993  
  3994  	updates := []*structs.DeploymentStatusUpdate{
  3995  		{
  3996  			DeploymentID:      d.ID,
  3997  			Status:            structs.DeploymentStatusSuccessful,
  3998  			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
  3999  		},
  4000  	}
  4001  
  4002  	// Assert the correct results
  4003  	assertResults(t, r, &resultExpectation{
  4004  		createDeployment:  nil,
  4005  		deploymentUpdates: updates,
  4006  		place:             0,
  4007  		inplace:           0,
  4008  		stop:              0,
  4009  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4010  			job.TaskGroups[0].Name: {
  4011  				Ignore: 10,
  4012  			},
  4013  		},
  4014  	})
  4015  }
  4016  
  4017  // Test that a failed deployment cancels non-promoted canaries
  4018  func TestReconciler_FailedDeployment_CancelCanaries(t *testing.T) {
  4019  	// Create a job with two task groups
  4020  	job := mock.Job()
  4021  	job.TaskGroups[0].Update = canaryUpdate
  4022  	job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy())
  4023  	job.TaskGroups[1].Name = "two"
  4024  
  4025  	// Create an existing failed deployment that has promoted one task group
  4026  	d := structs.NewDeployment(job)
  4027  	d.Status = structs.DeploymentStatusFailed
  4028  	s0 := &structs.DeploymentState{
  4029  		Promoted:        true,
  4030  		DesiredTotal:    10,
  4031  		DesiredCanaries: 2,
  4032  		PlacedAllocs:    4,
  4033  	}
  4034  	s1 := &structs.DeploymentState{
  4035  		Promoted:        false,
  4036  		DesiredTotal:    10,
  4037  		DesiredCanaries: 2,
  4038  		PlacedAllocs:    2,
  4039  	}
  4040  	d.TaskGroups[job.TaskGroups[0].Name] = s0
  4041  	d.TaskGroups[job.TaskGroups[1].Name] = s1
  4042  
  4043  	// Create 6 allocations from the old job
  4044  	var allocs []*structs.Allocation
  4045  	handled := make(map[string]allocUpdateType)
  4046  	for _, group := range []int{0, 1} {
  4047  		replacements := 4
  4048  		state := s0
  4049  		if group == 1 {
  4050  			replacements = 2
  4051  			state = s1
  4052  		}
  4053  
  4054  		// Create the healthy replacements
  4055  		for i := 0; i < replacements; i++ {
  4056  			new := mock.Alloc()
  4057  			new.Job = job
  4058  			new.JobID = job.ID
  4059  			new.NodeID = uuid.Generate()
  4060  			new.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i))
  4061  			new.TaskGroup = job.TaskGroups[group].Name
  4062  			new.DeploymentID = d.ID
  4063  			new.DeploymentStatus = &structs.AllocDeploymentStatus{
  4064  				Healthy: helper.BoolToPtr(true),
  4065  			}
  4066  			allocs = append(allocs, new)
  4067  			handled[new.ID] = allocUpdateFnIgnore
  4068  
  4069  			// Add the alloc to the canary list
  4070  			if i < 2 {
  4071  				state.PlacedCanaries = append(state.PlacedCanaries, new.ID)
  4072  			}
  4073  		}
  4074  		for i := replacements; i < 10; i++ {
  4075  			alloc := mock.Alloc()
  4076  			alloc.Job = job
  4077  			alloc.JobID = job.ID
  4078  			alloc.NodeID = uuid.Generate()
  4079  			alloc.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i))
  4080  			alloc.TaskGroup = job.TaskGroups[group].Name
  4081  			allocs = append(allocs, alloc)
  4082  		}
  4083  	}
  4084  
  4085  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  4086  	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  4087  	r := reconciler.Compute()
  4088  
  4089  	// Assert the correct results
  4090  	assertResults(t, r, &resultExpectation{
  4091  		createDeployment:  nil,
  4092  		deploymentUpdates: nil,
  4093  		place:             0,
  4094  		inplace:           0,
  4095  		stop:              2,
  4096  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4097  			job.TaskGroups[0].Name: {
  4098  				Ignore: 10,
  4099  			},
  4100  			job.TaskGroups[1].Name: {
  4101  				Stop:   2,
  4102  				Ignore: 8,
  4103  			},
  4104  		},
  4105  	})
  4106  
  4107  	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
  4108  }
  4109  
  4110  // Test that a failed deployment and updated job works
  4111  func TestReconciler_FailedDeployment_NewJob(t *testing.T) {
  4112  	job := mock.Job()
  4113  	job.TaskGroups[0].Update = noCanaryUpdate
  4114  
  4115  	// Create an existing failed deployment that has some placed allocs
  4116  	d := structs.NewDeployment(job)
  4117  	d.Status = structs.DeploymentStatusFailed
  4118  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4119  		Promoted:     true,
  4120  		DesiredTotal: 10,
  4121  		PlacedAllocs: 4,
  4122  	}
  4123  
  4124  	// Create 6 allocations from the old job
  4125  	var allocs []*structs.Allocation
  4126  	for i := 4; i < 10; i++ {
  4127  		alloc := mock.Alloc()
  4128  		alloc.Job = job
  4129  		alloc.JobID = job.ID
  4130  		alloc.NodeID = uuid.Generate()
  4131  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4132  		alloc.TaskGroup = job.TaskGroups[0].Name
  4133  		allocs = append(allocs, alloc)
  4134  	}
  4135  
  4136  	// Create the healthy replacements
  4137  	for i := 0; i < 4; i++ {
  4138  		new := mock.Alloc()
  4139  		new.Job = job
  4140  		new.JobID = job.ID
  4141  		new.NodeID = uuid.Generate()
  4142  		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4143  		new.TaskGroup = job.TaskGroups[0].Name
  4144  		new.DeploymentID = d.ID
  4145  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  4146  			Healthy: helper.BoolToPtr(true),
  4147  		}
  4148  		allocs = append(allocs, new)
  4149  	}
  4150  
  4151  	// Up the job version
  4152  	jobNew := job.Copy()
  4153  	jobNew.Version += 100
  4154  
  4155  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, jobNew, d, allocs, nil, "")
  4156  	r := reconciler.Compute()
  4157  
  4158  	dnew := structs.NewDeployment(jobNew)
  4159  	dnew.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4160  		DesiredTotal: 10,
  4161  	}
  4162  
  4163  	// Assert the correct results
  4164  	assertResults(t, r, &resultExpectation{
  4165  		createDeployment:  dnew,
  4166  		deploymentUpdates: nil,
  4167  		destructive:       4,
  4168  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4169  			job.TaskGroups[0].Name: {
  4170  				DestructiveUpdate: 4,
  4171  				Ignore:            6,
  4172  			},
  4173  		},
  4174  	})
  4175  
  4176  	assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate))
  4177  }
  4178  
  4179  // Tests the reconciler marks a deployment as complete
  4180  func TestReconciler_MarkDeploymentComplete(t *testing.T) {
  4181  	job := mock.Job()
  4182  	job.TaskGroups[0].Update = noCanaryUpdate
  4183  
  4184  	d := structs.NewDeployment(job)
  4185  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4186  		Promoted:      true,
  4187  		DesiredTotal:  10,
  4188  		PlacedAllocs:  10,
  4189  		HealthyAllocs: 10,
  4190  	}
  4191  
  4192  	// Create allocations from the old job
  4193  	var allocs []*structs.Allocation
  4194  	for i := 0; i < 10; i++ {
  4195  		alloc := mock.Alloc()
  4196  		alloc.Job = job
  4197  		alloc.JobID = job.ID
  4198  		alloc.NodeID = uuid.Generate()
  4199  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4200  		alloc.TaskGroup = job.TaskGroups[0].Name
  4201  		alloc.DeploymentID = d.ID
  4202  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
  4203  			Healthy: helper.BoolToPtr(true),
  4204  		}
  4205  		allocs = append(allocs, alloc)
  4206  	}
  4207  
  4208  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "")
  4209  	r := reconciler.Compute()
  4210  
  4211  	updates := []*structs.DeploymentStatusUpdate{
  4212  		{
  4213  			DeploymentID:      d.ID,
  4214  			Status:            structs.DeploymentStatusSuccessful,
  4215  			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
  4216  		},
  4217  	}
  4218  
  4219  	// Assert the correct results
  4220  	assertResults(t, r, &resultExpectation{
  4221  		createDeployment:  nil,
  4222  		deploymentUpdates: updates,
  4223  		place:             0,
  4224  		inplace:           0,
  4225  		stop:              0,
  4226  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4227  			job.TaskGroups[0].Name: {
  4228  				Ignore: 10,
  4229  			},
  4230  		},
  4231  	})
  4232  }
  4233  
  4234  // Tests the reconciler handles changing a job such that a deployment is created
  4235  // while doing a scale up but as the second eval.
  4236  func TestReconciler_JobChange_ScaleUp_SecondEval(t *testing.T) {
  4237  	// Scale the job up to 15
  4238  	job := mock.Job()
  4239  	job.TaskGroups[0].Update = noCanaryUpdate
  4240  	job.TaskGroups[0].Count = 30
  4241  
  4242  	// Create a deployment that is paused and has placed some canaries
  4243  	d := structs.NewDeployment(job)
  4244  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4245  		Promoted:     false,
  4246  		DesiredTotal: 30,
  4247  		PlacedAllocs: 20,
  4248  	}
  4249  
  4250  	// Create 10 allocations from the old job
  4251  	var allocs []*structs.Allocation
  4252  	for i := 0; i < 10; i++ {
  4253  		alloc := mock.Alloc()
  4254  		alloc.Job = job
  4255  		alloc.JobID = job.ID
  4256  		alloc.NodeID = uuid.Generate()
  4257  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4258  		alloc.TaskGroup = job.TaskGroups[0].Name
  4259  		allocs = append(allocs, alloc)
  4260  	}
  4261  
  4262  	// Create 20 from new job
  4263  	handled := make(map[string]allocUpdateType)
  4264  	for i := 10; i < 30; i++ {
  4265  		alloc := mock.Alloc()
  4266  		alloc.Job = job
  4267  		alloc.JobID = job.ID
  4268  		alloc.DeploymentID = d.ID
  4269  		alloc.NodeID = uuid.Generate()
  4270  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4271  		alloc.TaskGroup = job.TaskGroups[0].Name
  4272  		allocs = append(allocs, alloc)
  4273  		handled[alloc.ID] = allocUpdateFnIgnore
  4274  	}
  4275  
  4276  	mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive)
  4277  	reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "")
  4278  	r := reconciler.Compute()
  4279  
  4280  	// Assert the correct results
  4281  	assertResults(t, r, &resultExpectation{
  4282  		createDeployment:  nil,
  4283  		deploymentUpdates: nil,
  4284  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4285  			job.TaskGroups[0].Name: {
  4286  				// All should be ignored because nothing has been marked as
  4287  				// healthy.
  4288  				Ignore: 30,
  4289  			},
  4290  		},
  4291  	})
  4292  }
  4293  
  4294  // Tests the reconciler doesn't stop allocations when doing a rolling upgrade
  4295  // where the count of the old job allocs is < desired count.
  4296  func TestReconciler_RollingUpgrade_MissingAllocs(t *testing.T) {
  4297  	job := mock.Job()
  4298  	job.TaskGroups[0].Update = noCanaryUpdate
  4299  
  4300  	// Create 7 allocations from the old job
  4301  	var allocs []*structs.Allocation
  4302  	for i := 0; i < 7; i++ {
  4303  		alloc := mock.Alloc()
  4304  		alloc.Job = job
  4305  		alloc.JobID = job.ID
  4306  		alloc.NodeID = uuid.Generate()
  4307  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4308  		alloc.TaskGroup = job.TaskGroups[0].Name
  4309  		allocs = append(allocs, alloc)
  4310  	}
  4311  
  4312  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "")
  4313  	r := reconciler.Compute()
  4314  
  4315  	d := structs.NewDeployment(job)
  4316  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4317  		DesiredTotal: 10,
  4318  	}
  4319  
  4320  	// Assert the correct results
  4321  	assertResults(t, r, &resultExpectation{
  4322  		createDeployment:  d,
  4323  		deploymentUpdates: nil,
  4324  		place:             3,
  4325  		destructive:       1,
  4326  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4327  			job.TaskGroups[0].Name: {
  4328  				Place:             3,
  4329  				DestructiveUpdate: 1,
  4330  				Ignore:            6,
  4331  			},
  4332  		},
  4333  	})
  4334  
  4335  	assertNamesHaveIndexes(t, intRange(7, 9), placeResultsToNames(r.place))
  4336  	assertNamesHaveIndexes(t, intRange(0, 0), destructiveResultsToNames(r.destructiveUpdate))
  4337  }
  4338  
  4339  // Tests that the reconciler handles rerunning a batch job in the case that the
  4340  // allocations are from an older instance of the job.
  4341  func TestReconciler_Batch_Rerun(t *testing.T) {
  4342  	job := mock.Job()
  4343  	job.Type = structs.JobTypeBatch
  4344  	job.TaskGroups[0].Update = nil
  4345  
  4346  	// Create 10 allocations from the old job and have them be complete
  4347  	var allocs []*structs.Allocation
  4348  	for i := 0; i < 10; i++ {
  4349  		alloc := mock.Alloc()
  4350  		alloc.Job = job
  4351  		alloc.JobID = job.ID
  4352  		alloc.NodeID = uuid.Generate()
  4353  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4354  		alloc.TaskGroup = job.TaskGroups[0].Name
  4355  		alloc.ClientStatus = structs.AllocClientStatusComplete
  4356  		alloc.DesiredStatus = structs.AllocDesiredStatusStop
  4357  		allocs = append(allocs, alloc)
  4358  	}
  4359  
  4360  	// Create a copy of the job that is "new"
  4361  	job2 := job.Copy()
  4362  	job2.CreateIndex++
  4363  
  4364  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job2.ID, job2, nil, allocs, nil, "")
  4365  	r := reconciler.Compute()
  4366  
  4367  	// Assert the correct results
  4368  	assertResults(t, r, &resultExpectation{
  4369  		createDeployment:  nil,
  4370  		deploymentUpdates: nil,
  4371  		place:             10,
  4372  		destructive:       0,
  4373  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4374  			job.TaskGroups[0].Name: {
  4375  				Place:             10,
  4376  				DestructiveUpdate: 0,
  4377  				Ignore:            10,
  4378  			},
  4379  		},
  4380  	})
  4381  
  4382  	assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place))
  4383  }
  4384  
  4385  // Test that a failed deployment will not result in rescheduling failed allocations
  4386  func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) {
  4387  	job := mock.Job()
  4388  	job.TaskGroups[0].Update = noCanaryUpdate
  4389  
  4390  	tgName := job.TaskGroups[0].Name
  4391  	now := time.Now()
  4392  	// Create an existing failed deployment that has some placed allocs
  4393  	d := structs.NewDeployment(job)
  4394  	d.Status = structs.DeploymentStatusFailed
  4395  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4396  		Promoted:     true,
  4397  		DesiredTotal: 5,
  4398  		PlacedAllocs: 4,
  4399  	}
  4400  
  4401  	// Create 4 allocations and mark two as failed
  4402  	var allocs []*structs.Allocation
  4403  	for i := 0; i < 4; i++ {
  4404  		alloc := mock.Alloc()
  4405  		alloc.Job = job
  4406  		alloc.JobID = job.ID
  4407  		alloc.NodeID = uuid.Generate()
  4408  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4409  		alloc.TaskGroup = job.TaskGroups[0].Name
  4410  		alloc.DeploymentID = d.ID
  4411  		allocs = append(allocs, alloc)
  4412  	}
  4413  
  4414  	//create some allocations that are reschedulable now
  4415  	allocs[2].ClientStatus = structs.AllocClientStatusFailed
  4416  	allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  4417  		StartedAt:  now.Add(-1 * time.Hour),
  4418  		FinishedAt: now.Add(-10 * time.Second)}}
  4419  
  4420  	allocs[3].ClientStatus = structs.AllocClientStatusFailed
  4421  	allocs[3].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  4422  		StartedAt:  now.Add(-1 * time.Hour),
  4423  		FinishedAt: now.Add(-10 * time.Second)}}
  4424  
  4425  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  4426  	r := reconciler.Compute()
  4427  
  4428  	// Assert that no rescheduled placements were created
  4429  	assertResults(t, r, &resultExpectation{
  4430  		place:             0,
  4431  		createDeployment:  nil,
  4432  		deploymentUpdates: nil,
  4433  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4434  			job.TaskGroups[0].Name: {
  4435  				Ignore: 2,
  4436  			},
  4437  		},
  4438  	})
  4439  }
  4440  
  4441  // Test that a running deployment with failed allocs will not result in
  4442  // rescheduling failed allocations unless they are marked as reschedulable.
  4443  func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) {
  4444  	job := mock.Job()
  4445  	job.TaskGroups[0].Update = noCanaryUpdate
  4446  	tgName := job.TaskGroups[0].Name
  4447  	now := time.Now()
  4448  
  4449  	// Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet
  4450  	d := structs.NewDeployment(job)
  4451  	d.Status = structs.DeploymentStatusRunning
  4452  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4453  		Promoted:     false,
  4454  		DesiredTotal: 10,
  4455  		PlacedAllocs: 10,
  4456  	}
  4457  
  4458  	// Create 10 allocations
  4459  	var allocs []*structs.Allocation
  4460  	for i := 0; i < 10; i++ {
  4461  		alloc := mock.Alloc()
  4462  		alloc.Job = job
  4463  		alloc.JobID = job.ID
  4464  		alloc.NodeID = uuid.Generate()
  4465  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4466  		alloc.TaskGroup = job.TaskGroups[0].Name
  4467  		alloc.DeploymentID = d.ID
  4468  		alloc.ClientStatus = structs.AllocClientStatusFailed
  4469  		alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  4470  			StartedAt:  now.Add(-1 * time.Hour),
  4471  			FinishedAt: now.Add(-10 * time.Second)}}
  4472  		allocs = append(allocs, alloc)
  4473  	}
  4474  
  4475  	// Mark half of them as reschedulable
  4476  	for i := 0; i < 5; i++ {
  4477  		allocs[i].DesiredTransition.Reschedule = helper.BoolToPtr(true)
  4478  	}
  4479  
  4480  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  4481  	r := reconciler.Compute()
  4482  
  4483  	// Assert that no rescheduled placements were created
  4484  	assertResults(t, r, &resultExpectation{
  4485  		place:             5,
  4486  		stop:              5,
  4487  		createDeployment:  nil,
  4488  		deploymentUpdates: nil,
  4489  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4490  			job.TaskGroups[0].Name: {
  4491  				Place:  5,
  4492  				Stop:   5,
  4493  				Ignore: 5,
  4494  			},
  4495  		},
  4496  	})
  4497  }
  4498  
  4499  // Test that a failed deployment cancels non-promoted canaries
  4500  func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) {
  4501  	// Create a job
  4502  	job := mock.Job()
  4503  	job.TaskGroups[0].Count = 3
  4504  	job.TaskGroups[0].Update = &structs.UpdateStrategy{
  4505  		Canary:          3,
  4506  		MaxParallel:     2,
  4507  		HealthCheck:     structs.UpdateStrategyHealthCheck_Checks,
  4508  		MinHealthyTime:  10 * time.Second,
  4509  		HealthyDeadline: 10 * time.Minute,
  4510  		Stagger:         31 * time.Second,
  4511  	}
  4512  
  4513  	// Create v1 of the job
  4514  	jobv1 := job.Copy()
  4515  	jobv1.Version = 1
  4516  	jobv1.TaskGroups[0].Meta = map[string]string{"version": "1"}
  4517  
  4518  	// Create v2 of the job
  4519  	jobv2 := job.Copy()
  4520  	jobv2.Version = 2
  4521  	jobv2.TaskGroups[0].Meta = map[string]string{"version": "2"}
  4522  
  4523  	d := structs.NewDeployment(jobv2)
  4524  	state := &structs.DeploymentState{
  4525  		Promoted:      true,
  4526  		DesiredTotal:  3,
  4527  		PlacedAllocs:  3,
  4528  		HealthyAllocs: 3,
  4529  	}
  4530  	d.TaskGroups[job.TaskGroups[0].Name] = state
  4531  
  4532  	// Create the original
  4533  	var allocs []*structs.Allocation
  4534  	for i := 0; i < 3; i++ {
  4535  		new := mock.Alloc()
  4536  		new.Job = jobv2
  4537  		new.JobID = job.ID
  4538  		new.NodeID = uuid.Generate()
  4539  		new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4540  		new.TaskGroup = job.TaskGroups[0].Name
  4541  		new.DeploymentID = d.ID
  4542  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  4543  			Healthy: helper.BoolToPtr(true),
  4544  		}
  4545  		new.ClientStatus = structs.AllocClientStatusRunning
  4546  		allocs = append(allocs, new)
  4547  
  4548  	}
  4549  	for i := 0; i < 3; i++ {
  4550  		new := mock.Alloc()
  4551  		new.Job = jobv1
  4552  		new.JobID = jobv1.ID
  4553  		new.NodeID = uuid.Generate()
  4554  		new.Name = structs.AllocName(jobv1.ID, jobv1.TaskGroups[0].Name, uint(i))
  4555  		new.TaskGroup = job.TaskGroups[0].Name
  4556  		new.DeploymentID = uuid.Generate()
  4557  		new.DeploymentStatus = &structs.AllocDeploymentStatus{
  4558  			Healthy: helper.BoolToPtr(false),
  4559  		}
  4560  		new.DesiredStatus = structs.AllocDesiredStatusStop
  4561  		new.ClientStatus = structs.AllocClientStatusFailed
  4562  		allocs = append(allocs, new)
  4563  	}
  4564  
  4565  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, jobv2, d, allocs, nil, "")
  4566  	r := reconciler.Compute()
  4567  
  4568  	updates := []*structs.DeploymentStatusUpdate{
  4569  		{
  4570  			DeploymentID:      d.ID,
  4571  			Status:            structs.DeploymentStatusSuccessful,
  4572  			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
  4573  		},
  4574  	}
  4575  
  4576  	// Assert the correct results
  4577  	assertResults(t, r, &resultExpectation{
  4578  		createDeployment:  nil,
  4579  		deploymentUpdates: updates,
  4580  		place:             0,
  4581  		inplace:           0,
  4582  		stop:              0,
  4583  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4584  			job.TaskGroups[0].Name: {
  4585  				Stop:          0,
  4586  				InPlaceUpdate: 0,
  4587  				Ignore:        3,
  4588  			},
  4589  		},
  4590  	})
  4591  }
  4592  
  4593  // Test that a successful deployment with failed allocs will result in
  4594  // rescheduling failed allocations
  4595  func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T) {
  4596  	job := mock.Job()
  4597  	job.TaskGroups[0].Update = noCanaryUpdate
  4598  	tgName := job.TaskGroups[0].Name
  4599  	now := time.Now()
  4600  
  4601  	// Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet
  4602  	d := structs.NewDeployment(job)
  4603  	d.Status = structs.DeploymentStatusSuccessful
  4604  	d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
  4605  		Promoted:     false,
  4606  		DesiredTotal: 10,
  4607  		PlacedAllocs: 10,
  4608  	}
  4609  
  4610  	// Create 10 allocations
  4611  	var allocs []*structs.Allocation
  4612  	for i := 0; i < 10; i++ {
  4613  		alloc := mock.Alloc()
  4614  		alloc.Job = job
  4615  		alloc.JobID = job.ID
  4616  		alloc.NodeID = uuid.Generate()
  4617  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4618  		alloc.TaskGroup = job.TaskGroups[0].Name
  4619  		alloc.DeploymentID = d.ID
  4620  		alloc.ClientStatus = structs.AllocClientStatusFailed
  4621  		alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  4622  			StartedAt:  now.Add(-1 * time.Hour),
  4623  			FinishedAt: now.Add(-10 * time.Second)}}
  4624  		allocs = append(allocs, alloc)
  4625  	}
  4626  
  4627  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
  4628  	r := reconciler.Compute()
  4629  
  4630  	// Assert that rescheduled placements were created
  4631  	assertResults(t, r, &resultExpectation{
  4632  		place:             10,
  4633  		stop:              10,
  4634  		createDeployment:  nil,
  4635  		deploymentUpdates: nil,
  4636  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4637  			job.TaskGroups[0].Name: {
  4638  				Place:  10,
  4639  				Stop:   10,
  4640  				Ignore: 0,
  4641  			},
  4642  		},
  4643  	})
  4644  	assertPlaceResultsHavePreviousAllocs(t, 10, r.place)
  4645  }
  4646  
  4647  // Tests force rescheduling a failed alloc that is past its reschedule limit
  4648  func TestReconciler_ForceReschedule_Service(t *testing.T) {
  4649  	require := require.New(t)
  4650  
  4651  	// Set desired 5
  4652  	job := mock.Job()
  4653  	job.TaskGroups[0].Count = 5
  4654  	tgName := job.TaskGroups[0].Name
  4655  
  4656  	// Set up reschedule policy and update stanza
  4657  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  4658  		Attempts:      1,
  4659  		Interval:      24 * time.Hour,
  4660  		Delay:         5 * time.Second,
  4661  		DelayFunction: "",
  4662  		MaxDelay:      1 * time.Hour,
  4663  		Unlimited:     false,
  4664  	}
  4665  	job.TaskGroups[0].Update = noCanaryUpdate
  4666  
  4667  	// Create 5 existing allocations
  4668  	var allocs []*structs.Allocation
  4669  	for i := 0; i < 5; i++ {
  4670  		alloc := mock.Alloc()
  4671  		alloc.Job = job
  4672  		alloc.JobID = job.ID
  4673  		alloc.NodeID = uuid.Generate()
  4674  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4675  		allocs = append(allocs, alloc)
  4676  		alloc.ClientStatus = structs.AllocClientStatusRunning
  4677  	}
  4678  
  4679  	// Mark one as failed and past its reschedule limit so not eligible to reschedule
  4680  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  4681  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  4682  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  4683  			PrevAllocID: uuid.Generate(),
  4684  			PrevNodeID:  uuid.Generate(),
  4685  		},
  4686  	}}
  4687  
  4688  	// Mark DesiredTransition ForceReschedule
  4689  	allocs[0].DesiredTransition = structs.DesiredTransition{ForceReschedule: helper.BoolToPtr(true)}
  4690  
  4691  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  4692  	r := reconciler.Compute()
  4693  
  4694  	// Verify that no follow up evals were created
  4695  	evals := r.desiredFollowupEvals[tgName]
  4696  	require.Nil(evals)
  4697  
  4698  	// Verify that one rescheduled alloc was created because of the forced reschedule
  4699  	assertResults(t, r, &resultExpectation{
  4700  		createDeployment:  nil,
  4701  		deploymentUpdates: nil,
  4702  		place:             1,
  4703  		stop:              1,
  4704  		inplace:           0,
  4705  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4706  			job.TaskGroups[0].Name: {
  4707  				Place:  1,
  4708  				Stop:   1,
  4709  				Ignore: 4,
  4710  			},
  4711  		},
  4712  	})
  4713  
  4714  	// Rescheduled allocs should have previous allocs
  4715  	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
  4716  	assertPlaceResultsHavePreviousAllocs(t, 1, r.place)
  4717  	assertPlacementsAreRescheduled(t, 1, r.place)
  4718  }
  4719  
  4720  // Tests behavior of service failure with rescheduling policy preventing rescheduling:
  4721  // new allocs should be placed to satisfy the job count, and current allocations are
  4722  // left unmodified
  4723  func TestReconciler_RescheduleNot_Service(t *testing.T) {
  4724  	require := require.New(t)
  4725  
  4726  	// Set desired 5
  4727  	job := mock.Job()
  4728  	job.TaskGroups[0].Count = 5
  4729  	tgName := job.TaskGroups[0].Name
  4730  	now := time.Now()
  4731  
  4732  	// Set up reschedule policy and update stanza
  4733  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  4734  		Attempts:      0,
  4735  		Interval:      24 * time.Hour,
  4736  		Delay:         5 * time.Second,
  4737  		DelayFunction: "",
  4738  		MaxDelay:      1 * time.Hour,
  4739  		Unlimited:     false,
  4740  	}
  4741  	job.TaskGroups[0].Update = noCanaryUpdate
  4742  
  4743  	// Create 5 existing allocations
  4744  	var allocs []*structs.Allocation
  4745  	for i := 0; i < 5; i++ {
  4746  		alloc := mock.Alloc()
  4747  		alloc.Job = job
  4748  		alloc.JobID = job.ID
  4749  		alloc.NodeID = uuid.Generate()
  4750  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4751  		allocs = append(allocs, alloc)
  4752  		alloc.ClientStatus = structs.AllocClientStatusRunning
  4753  	}
  4754  
  4755  	// Mark two as failed
  4756  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  4757  
  4758  	// Mark one of them as already rescheduled once
  4759  	allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  4760  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  4761  			PrevAllocID: uuid.Generate(),
  4762  			PrevNodeID:  uuid.Generate(),
  4763  		},
  4764  	}}
  4765  	allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  4766  		StartedAt:  now.Add(-1 * time.Hour),
  4767  		FinishedAt: now.Add(-10 * time.Second)}}
  4768  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  4769  
  4770  	// Mark one as desired state stop
  4771  	allocs[4].DesiredStatus = structs.AllocDesiredStatusStop
  4772  
  4773  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "")
  4774  	r := reconciler.Compute()
  4775  
  4776  	// Verify that no follow up evals were created
  4777  	evals := r.desiredFollowupEvals[tgName]
  4778  	require.Nil(evals)
  4779  
  4780  	// no rescheduling, ignore all 4 allocs
  4781  	// but place one to substitute allocs[4] that was stopped explicitly
  4782  	assertResults(t, r, &resultExpectation{
  4783  		createDeployment:  nil,
  4784  		deploymentUpdates: nil,
  4785  		place:             1,
  4786  		inplace:           0,
  4787  		stop:              0,
  4788  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4789  			job.TaskGroups[0].Name: {
  4790  				Place:  1,
  4791  				Ignore: 4,
  4792  				Stop:   0,
  4793  			},
  4794  		},
  4795  	})
  4796  
  4797  	// none of the placement should have preallocs or rescheduled
  4798  	assertPlaceResultsHavePreviousAllocs(t, 0, r.place)
  4799  	assertPlacementsAreRescheduled(t, 0, r.place)
  4800  }
  4801  
  4802  // Tests behavior of batch failure with rescheduling policy preventing rescheduling:
  4803  // current allocations are left unmodified and no follow up
  4804  func TestReconciler_RescheduleNot_Batch(t *testing.T) {
  4805  	require := require.New(t)
  4806  	// Set desired 4
  4807  	job := mock.Job()
  4808  	job.TaskGroups[0].Count = 4
  4809  	now := time.Now()
  4810  	// Set up reschedule policy
  4811  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  4812  		Attempts:      0,
  4813  		Interval:      24 * time.Hour,
  4814  		Delay:         5 * time.Second,
  4815  		DelayFunction: "constant",
  4816  	}
  4817  	tgName := job.TaskGroups[0].Name
  4818  	// Create 6 existing allocations - 2 running, 1 complete and 3 failed
  4819  	var allocs []*structs.Allocation
  4820  	for i := 0; i < 6; i++ {
  4821  		alloc := mock.Alloc()
  4822  		alloc.Job = job
  4823  		alloc.JobID = job.ID
  4824  		alloc.NodeID = uuid.Generate()
  4825  		alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
  4826  		allocs = append(allocs, alloc)
  4827  		alloc.ClientStatus = structs.AllocClientStatusRunning
  4828  	}
  4829  	// Mark 3 as failed with restart tracking info
  4830  	allocs[0].ClientStatus = structs.AllocClientStatusFailed
  4831  	allocs[0].NextAllocation = allocs[1].ID
  4832  	allocs[1].ClientStatus = structs.AllocClientStatusFailed
  4833  	allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  4834  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  4835  			PrevAllocID: allocs[0].ID,
  4836  			PrevNodeID:  uuid.Generate(),
  4837  		},
  4838  	}}
  4839  	allocs[1].NextAllocation = allocs[2].ID
  4840  	allocs[2].ClientStatus = structs.AllocClientStatusFailed
  4841  	allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
  4842  		StartedAt:  now.Add(-1 * time.Hour),
  4843  		FinishedAt: now.Add(-5 * time.Second)}}
  4844  	allocs[2].FollowupEvalID = uuid.Generate()
  4845  	allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{
  4846  		{RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(),
  4847  			PrevAllocID: allocs[0].ID,
  4848  			PrevNodeID:  uuid.Generate(),
  4849  		},
  4850  		{RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
  4851  			PrevAllocID: allocs[1].ID,
  4852  			PrevNodeID:  uuid.Generate(),
  4853  		},
  4854  	}}
  4855  	// Mark one as complete
  4856  	allocs[5].ClientStatus = structs.AllocClientStatusComplete
  4857  
  4858  	reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, "")
  4859  	reconciler.now = now
  4860  	r := reconciler.Compute()
  4861  
  4862  	// Verify that no follow up evals were created
  4863  	evals := r.desiredFollowupEvals[tgName]
  4864  	require.Nil(evals)
  4865  
  4866  	// No reschedule attempts were made and all allocs are untouched
  4867  	assertResults(t, r, &resultExpectation{
  4868  		createDeployment:  nil,
  4869  		deploymentUpdates: nil,
  4870  		place:             0,
  4871  		stop:              0,
  4872  		inplace:           0,
  4873  		desiredTGUpdates: map[string]*structs.DesiredUpdates{
  4874  			job.TaskGroups[0].Name: {
  4875  				Place:  0,
  4876  				Stop:   0,
  4877  				Ignore: 4,
  4878  			},
  4879  		},
  4880  	})
  4881  
  4882  }