github.com/smithx10/nomad@v0.9.1-rc1/nomad/deploymentwatcher/deployments_watcher_test.go (about)

     1  package deploymentwatcher
     2  
     3  import (
     4  	"fmt"
     5  	"testing"
     6  	"time"
     7  
     8  	memdb "github.com/hashicorp/go-memdb"
     9  	"github.com/hashicorp/nomad/helper"
    10  	"github.com/hashicorp/nomad/helper/testlog"
    11  	"github.com/hashicorp/nomad/helper/uuid"
    12  	"github.com/hashicorp/nomad/nomad/mock"
    13  	"github.com/hashicorp/nomad/nomad/structs"
    14  	"github.com/hashicorp/nomad/testutil"
    15  	"github.com/stretchr/testify/assert"
    16  	mocker "github.com/stretchr/testify/mock"
    17  	"github.com/stretchr/testify/require"
    18  )
    19  
    20  func testDeploymentWatcher(t *testing.T, qps float64, batchDur time.Duration) (*Watcher, *mockBackend) {
    21  	m := newMockBackend(t)
    22  	w := NewDeploymentsWatcher(testlog.HCLogger(t), m, qps, batchDur)
    23  	return w, m
    24  }
    25  
    26  func defaultTestDeploymentWatcher(t *testing.T) (*Watcher, *mockBackend) {
    27  	return testDeploymentWatcher(t, LimitStateQueriesPerSecond, CrossDeploymentUpdateBatchDuration)
    28  }
    29  
    30  // Tests that the watcher properly watches for deployments and reconciles them
    31  func TestWatcher_WatchDeployments(t *testing.T) {
    32  	t.Parallel()
    33  	require := require.New(t)
    34  	w, m := defaultTestDeploymentWatcher(t)
    35  
    36  	// Create three jobs
    37  	j1, j2, j3 := mock.Job(), mock.Job(), mock.Job()
    38  	require.Nil(m.state.UpsertJob(100, j1))
    39  	require.Nil(m.state.UpsertJob(101, j2))
    40  	require.Nil(m.state.UpsertJob(102, j3))
    41  
    42  	// Create three deployments all running
    43  	d1, d2, d3 := mock.Deployment(), mock.Deployment(), mock.Deployment()
    44  	d1.JobID = j1.ID
    45  	d2.JobID = j2.ID
    46  	d3.JobID = j3.ID
    47  
    48  	// Upsert the first deployment
    49  	require.Nil(m.state.UpsertDeployment(103, d1))
    50  
    51  	// Next list 3
    52  	block1 := make(chan time.Time)
    53  	go func() {
    54  		<-block1
    55  		require.Nil(m.state.UpsertDeployment(104, d2))
    56  		require.Nil(m.state.UpsertDeployment(105, d3))
    57  	}()
    58  
    59  	//// Next list 3 but have one be terminal
    60  	block2 := make(chan time.Time)
    61  	d3terminal := d3.Copy()
    62  	d3terminal.Status = structs.DeploymentStatusFailed
    63  	go func() {
    64  		<-block2
    65  		require.Nil(m.state.UpsertDeployment(106, d3terminal))
    66  	}()
    67  
    68  	w.SetEnabled(true, m.state)
    69  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
    70  		func(err error) { require.Equal(1, len(w.watchers), "1 deployment returned") })
    71  
    72  	close(block1)
    73  	testutil.WaitForResult(func() (bool, error) { return 3 == len(w.watchers), nil },
    74  		func(err error) { require.Equal(3, len(w.watchers), "3 deployment returned") })
    75  
    76  	close(block2)
    77  	testutil.WaitForResult(func() (bool, error) { return 2 == len(w.watchers), nil },
    78  		func(err error) { require.Equal(3, len(w.watchers), "3 deployment returned - 1 terminal") })
    79  }
    80  
    81  // Tests that calls against an unknown deployment fail
    82  func TestWatcher_UnknownDeployment(t *testing.T) {
    83  	t.Parallel()
    84  	assert := assert.New(t)
    85  	require := require.New(t)
    86  	w, m := defaultTestDeploymentWatcher(t)
    87  	w.SetEnabled(true, m.state)
    88  
    89  	// The expected error is that it should be an unknown deployment
    90  	dID := uuid.Generate()
    91  	expected := fmt.Sprintf("unknown deployment %q", dID)
    92  
    93  	// Request setting the health against an unknown deployment
    94  	req := &structs.DeploymentAllocHealthRequest{
    95  		DeploymentID:         dID,
    96  		HealthyAllocationIDs: []string{uuid.Generate()},
    97  	}
    98  	var resp structs.DeploymentUpdateResponse
    99  	err := w.SetAllocHealth(req, &resp)
   100  	if assert.NotNil(err, "should have error for unknown deployment") {
   101  		require.Contains(err.Error(), expected)
   102  	}
   103  
   104  	// Request promoting against an unknown deployment
   105  	req2 := &structs.DeploymentPromoteRequest{
   106  		DeploymentID: dID,
   107  		All:          true,
   108  	}
   109  	err = w.PromoteDeployment(req2, &resp)
   110  	if assert.NotNil(err, "should have error for unknown deployment") {
   111  		require.Contains(err.Error(), expected)
   112  	}
   113  
   114  	// Request pausing against an unknown deployment
   115  	req3 := &structs.DeploymentPauseRequest{
   116  		DeploymentID: dID,
   117  		Pause:        true,
   118  	}
   119  	err = w.PauseDeployment(req3, &resp)
   120  	if assert.NotNil(err, "should have error for unknown deployment") {
   121  		require.Contains(err.Error(), expected)
   122  	}
   123  
   124  	// Request failing against an unknown deployment
   125  	req4 := &structs.DeploymentFailRequest{
   126  		DeploymentID: dID,
   127  	}
   128  	err = w.FailDeployment(req4, &resp)
   129  	if assert.NotNil(err, "should have error for unknown deployment") {
   130  		require.Contains(err.Error(), expected)
   131  	}
   132  }
   133  
   134  // Test setting an unknown allocation's health
   135  func TestWatcher_SetAllocHealth_Unknown(t *testing.T) {
   136  	t.Parallel()
   137  	assert := assert.New(t)
   138  	require := require.New(t)
   139  	w, m := defaultTestDeploymentWatcher(t)
   140  
   141  	// Create a job, and a deployment
   142  	j := mock.Job()
   143  	d := mock.Deployment()
   144  	d.JobID = j.ID
   145  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   146  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   147  
   148  	// require that we get a call to UpsertDeploymentAllocHealth
   149  	a := mock.Alloc()
   150  	matchConfig := &matchDeploymentAllocHealthRequestConfig{
   151  		DeploymentID: d.ID,
   152  		Healthy:      []string{a.ID},
   153  		Eval:         true,
   154  	}
   155  	matcher := matchDeploymentAllocHealthRequest(matchConfig)
   156  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)
   157  
   158  	w.SetEnabled(true, m.state)
   159  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   160  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
   161  
   162  	// Call SetAllocHealth
   163  	req := &structs.DeploymentAllocHealthRequest{
   164  		DeploymentID:         d.ID,
   165  		HealthyAllocationIDs: []string{a.ID},
   166  	}
   167  	var resp structs.DeploymentUpdateResponse
   168  	err := w.SetAllocHealth(req, &resp)
   169  	if assert.NotNil(err, "Set health of unknown allocation") {
   170  		require.Contains(err.Error(), "unknown")
   171  	}
   172  	require.Equal(1, len(w.watchers), "Deployment should still be active")
   173  }
   174  
   175  // Test setting allocation health
   176  func TestWatcher_SetAllocHealth_Healthy(t *testing.T) {
   177  	t.Parallel()
   178  	require := require.New(t)
   179  	w, m := defaultTestDeploymentWatcher(t)
   180  
   181  	// Create a job, alloc, and a deployment
   182  	j := mock.Job()
   183  	d := mock.Deployment()
   184  	d.JobID = j.ID
   185  	a := mock.Alloc()
   186  	a.DeploymentID = d.ID
   187  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   188  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   189  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   190  
   191  	// require that we get a call to UpsertDeploymentAllocHealth
   192  	matchConfig := &matchDeploymentAllocHealthRequestConfig{
   193  		DeploymentID: d.ID,
   194  		Healthy:      []string{a.ID},
   195  		Eval:         true,
   196  	}
   197  	matcher := matchDeploymentAllocHealthRequest(matchConfig)
   198  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)
   199  
   200  	w.SetEnabled(true, m.state)
   201  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   202  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
   203  
   204  	// Call SetAllocHealth
   205  	req := &structs.DeploymentAllocHealthRequest{
   206  		DeploymentID:         d.ID,
   207  		HealthyAllocationIDs: []string{a.ID},
   208  	}
   209  	var resp structs.DeploymentUpdateResponse
   210  	err := w.SetAllocHealth(req, &resp)
   211  	require.Nil(err, "SetAllocHealth")
   212  	require.Equal(1, len(w.watchers), "Deployment should still be active")
   213  	m.AssertCalled(t, "UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher))
   214  }
   215  
   216  // Test setting allocation unhealthy
   217  func TestWatcher_SetAllocHealth_Unhealthy(t *testing.T) {
   218  	t.Parallel()
   219  	require := require.New(t)
   220  	w, m := defaultTestDeploymentWatcher(t)
   221  
   222  	// Create a job, alloc, and a deployment
   223  	j := mock.Job()
   224  	d := mock.Deployment()
   225  	d.JobID = j.ID
   226  	a := mock.Alloc()
   227  	a.DeploymentID = d.ID
   228  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   229  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   230  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   231  
   232  	// require that we get a call to UpsertDeploymentAllocHealth
   233  	matchConfig := &matchDeploymentAllocHealthRequestConfig{
   234  		DeploymentID: d.ID,
   235  		Unhealthy:    []string{a.ID},
   236  		Eval:         true,
   237  		DeploymentUpdate: &structs.DeploymentStatusUpdate{
   238  			DeploymentID:      d.ID,
   239  			Status:            structs.DeploymentStatusFailed,
   240  			StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations,
   241  		},
   242  	}
   243  	matcher := matchDeploymentAllocHealthRequest(matchConfig)
   244  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)
   245  
   246  	w.SetEnabled(true, m.state)
   247  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   248  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
   249  
   250  	// Call SetAllocHealth
   251  	req := &structs.DeploymentAllocHealthRequest{
   252  		DeploymentID:           d.ID,
   253  		UnhealthyAllocationIDs: []string{a.ID},
   254  	}
   255  	var resp structs.DeploymentUpdateResponse
   256  	err := w.SetAllocHealth(req, &resp)
   257  	require.Nil(err, "SetAllocHealth")
   258  
   259  	testutil.WaitForResult(func() (bool, error) { return 0 == len(w.watchers), nil },
   260  		func(err error) { require.Equal(0, len(w.watchers), "Should have no deployment") })
   261  	m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1)
   262  }
   263  
   264  // Test setting allocation unhealthy and that there should be a rollback
   265  func TestWatcher_SetAllocHealth_Unhealthy_Rollback(t *testing.T) {
   266  	t.Parallel()
   267  	require := require.New(t)
   268  	w, m := defaultTestDeploymentWatcher(t)
   269  
   270  	// Create a job, alloc, and a deployment
   271  	j := mock.Job()
   272  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   273  	j.TaskGroups[0].Update.MaxParallel = 2
   274  	j.TaskGroups[0].Update.AutoRevert = true
   275  	j.TaskGroups[0].Update.ProgressDeadline = 0
   276  	j.Stable = true
   277  	d := mock.Deployment()
   278  	d.JobID = j.ID
   279  	d.TaskGroups["web"].AutoRevert = true
   280  	a := mock.Alloc()
   281  	a.DeploymentID = d.ID
   282  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   283  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   284  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   285  
   286  	// Upsert the job again to get a new version
   287  	j2 := j.Copy()
   288  	j2.Stable = false
   289  	// Modify the job to make its specification different
   290  	j2.Meta["foo"] = "bar"
   291  
   292  	require.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2")
   293  
   294  	// require that we get a call to UpsertDeploymentAllocHealth
   295  	matchConfig := &matchDeploymentAllocHealthRequestConfig{
   296  		DeploymentID: d.ID,
   297  		Unhealthy:    []string{a.ID},
   298  		Eval:         true,
   299  		DeploymentUpdate: &structs.DeploymentStatusUpdate{
   300  			DeploymentID:      d.ID,
   301  			Status:            structs.DeploymentStatusFailed,
   302  			StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations,
   303  		},
   304  		JobVersion: helper.Uint64ToPtr(0),
   305  	}
   306  	matcher := matchDeploymentAllocHealthRequest(matchConfig)
   307  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)
   308  
   309  	w.SetEnabled(true, m.state)
   310  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   311  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
   312  
   313  	// Call SetAllocHealth
   314  	req := &structs.DeploymentAllocHealthRequest{
   315  		DeploymentID:           d.ID,
   316  		UnhealthyAllocationIDs: []string{a.ID},
   317  	}
   318  	var resp structs.DeploymentUpdateResponse
   319  	err := w.SetAllocHealth(req, &resp)
   320  	require.Nil(err, "SetAllocHealth")
   321  
   322  	testutil.WaitForResult(func() (bool, error) { return 0 == len(w.watchers), nil },
   323  		func(err error) { require.Equal(0, len(w.watchers), "Should have no deployment") })
   324  	m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1)
   325  }
   326  
   327  // Test setting allocation unhealthy on job with identical spec and there should be no rollback
   328  func TestWatcher_SetAllocHealth_Unhealthy_NoRollback(t *testing.T) {
   329  	t.Parallel()
   330  	require := require.New(t)
   331  	w, m := defaultTestDeploymentWatcher(t)
   332  
   333  	// Create a job, alloc, and a deployment
   334  	j := mock.Job()
   335  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   336  	j.TaskGroups[0].Update.MaxParallel = 2
   337  	j.TaskGroups[0].Update.AutoRevert = true
   338  	j.TaskGroups[0].Update.ProgressDeadline = 0
   339  	j.Stable = true
   340  	d := mock.Deployment()
   341  	d.JobID = j.ID
   342  	d.TaskGroups["web"].AutoRevert = true
   343  	a := mock.Alloc()
   344  	a.DeploymentID = d.ID
   345  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   346  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   347  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   348  
   349  	// Upsert the job again to get a new version
   350  	j2 := j.Copy()
   351  	j2.Stable = false
   352  
   353  	require.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2")
   354  
   355  	// require that we get a call to UpsertDeploymentAllocHealth
   356  	matchConfig := &matchDeploymentAllocHealthRequestConfig{
   357  		DeploymentID: d.ID,
   358  		Unhealthy:    []string{a.ID},
   359  		Eval:         true,
   360  		DeploymentUpdate: &structs.DeploymentStatusUpdate{
   361  			DeploymentID:      d.ID,
   362  			Status:            structs.DeploymentStatusFailed,
   363  			StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations,
   364  		},
   365  		JobVersion: nil,
   366  	}
   367  	matcher := matchDeploymentAllocHealthRequest(matchConfig)
   368  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)
   369  
   370  	w.SetEnabled(true, m.state)
   371  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   372  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
   373  
   374  	// Call SetAllocHealth
   375  	req := &structs.DeploymentAllocHealthRequest{
   376  		DeploymentID:           d.ID,
   377  		UnhealthyAllocationIDs: []string{a.ID},
   378  	}
   379  	var resp structs.DeploymentUpdateResponse
   380  	err := w.SetAllocHealth(req, &resp)
   381  	require.Nil(err, "SetAllocHealth")
   382  
   383  	testutil.WaitForResult(func() (bool, error) { return 0 == len(w.watchers), nil },
   384  		func(err error) { require.Equal(0, len(w.watchers), "Should have no deployment") })
   385  	m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1)
   386  }
   387  
   388  // Test promoting a deployment
   389  func TestWatcher_PromoteDeployment_HealthyCanaries(t *testing.T) {
   390  	t.Parallel()
   391  	require := require.New(t)
   392  	w, m := defaultTestDeploymentWatcher(t)
   393  
   394  	// Create a job, canary alloc, and a deployment
   395  	j := mock.Job()
   396  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   397  	j.TaskGroups[0].Update.MaxParallel = 2
   398  	j.TaskGroups[0].Update.Canary = 1
   399  	j.TaskGroups[0].Update.ProgressDeadline = 0
   400  	d := mock.Deployment()
   401  	d.JobID = j.ID
   402  	a := mock.Alloc()
   403  	d.TaskGroups[a.TaskGroup].DesiredCanaries = 1
   404  	d.TaskGroups[a.TaskGroup].PlacedCanaries = []string{a.ID}
   405  	a.DeploymentStatus = &structs.AllocDeploymentStatus{
   406  		Healthy: helper.BoolToPtr(true),
   407  	}
   408  	a.DeploymentID = d.ID
   409  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   410  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   411  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   412  
   413  	// require that we get a call to UpsertDeploymentPromotion
   414  	matchConfig := &matchDeploymentPromoteRequestConfig{
   415  		Promotion: &structs.DeploymentPromoteRequest{
   416  			DeploymentID: d.ID,
   417  			All:          true,
   418  		},
   419  		Eval: true,
   420  	}
   421  	matcher := matchDeploymentPromoteRequest(matchConfig)
   422  	m.On("UpdateDeploymentPromotion", mocker.MatchedBy(matcher)).Return(nil)
   423  
   424  	// We may get an update for the desired transition.
   425  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
   426  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
   427  
   428  	w.SetEnabled(true, m.state)
   429  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   430  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
   431  
   432  	// Call PromoteDeployment
   433  	req := &structs.DeploymentPromoteRequest{
   434  		DeploymentID: d.ID,
   435  		All:          true,
   436  	}
   437  	var resp structs.DeploymentUpdateResponse
   438  	err := w.PromoteDeployment(req, &resp)
   439  	require.Nil(err, "PromoteDeployment")
   440  	require.Equal(1, len(w.watchers), "Deployment should still be active")
   441  	m.AssertCalled(t, "UpdateDeploymentPromotion", mocker.MatchedBy(matcher))
   442  }
   443  
   444  // Test promoting a deployment with unhealthy canaries
   445  func TestWatcher_PromoteDeployment_UnhealthyCanaries(t *testing.T) {
   446  	t.Parallel()
   447  	require := require.New(t)
   448  	w, m := defaultTestDeploymentWatcher(t)
   449  
   450  	// Create a job, canary alloc, and a deployment
   451  	j := mock.Job()
   452  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   453  	j.TaskGroups[0].Update.MaxParallel = 2
   454  	j.TaskGroups[0].Update.Canary = 2
   455  	j.TaskGroups[0].Update.ProgressDeadline = 0
   456  	d := mock.Deployment()
   457  	d.JobID = j.ID
   458  	a := mock.Alloc()
   459  	d.TaskGroups[a.TaskGroup].PlacedCanaries = []string{a.ID}
   460  	d.TaskGroups[a.TaskGroup].DesiredCanaries = 2
   461  	a.DeploymentID = d.ID
   462  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   463  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   464  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   465  
   466  	// require that we get a call to UpsertDeploymentPromotion
   467  	matchConfig := &matchDeploymentPromoteRequestConfig{
   468  		Promotion: &structs.DeploymentPromoteRequest{
   469  			DeploymentID: d.ID,
   470  			All:          true,
   471  		},
   472  		Eval: true,
   473  	}
   474  	matcher := matchDeploymentPromoteRequest(matchConfig)
   475  	m.On("UpdateDeploymentPromotion", mocker.MatchedBy(matcher)).Return(nil)
   476  
   477  	w.SetEnabled(true, m.state)
   478  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   479  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
   480  
   481  	// Call SetAllocHealth
   482  	req := &structs.DeploymentPromoteRequest{
   483  		DeploymentID: d.ID,
   484  		All:          true,
   485  	}
   486  	var resp structs.DeploymentUpdateResponse
   487  	err := w.PromoteDeployment(req, &resp)
   488  	if assert.NotNil(t, err, "PromoteDeployment") {
   489  		require.Contains(err.Error(), `Task group "web" has 0/2 healthy allocations`, "Should error because canary isn't marked healthy")
   490  	}
   491  
   492  	require.Equal(1, len(w.watchers), "Deployment should still be active")
   493  	m.AssertCalled(t, "UpdateDeploymentPromotion", mocker.MatchedBy(matcher))
   494  }
   495  
   496  // Test pausing a deployment that is running
   497  func TestWatcher_PauseDeployment_Pause_Running(t *testing.T) {
   498  	t.Parallel()
   499  	require := require.New(t)
   500  	w, m := defaultTestDeploymentWatcher(t)
   501  
   502  	// Create a job and a deployment
   503  	j := mock.Job()
   504  	d := mock.Deployment()
   505  	d.JobID = j.ID
   506  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   507  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   508  
   509  	// require that we get a call to UpsertDeploymentStatusUpdate
   510  	matchConfig := &matchDeploymentStatusUpdateConfig{
   511  		DeploymentID:      d.ID,
   512  		Status:            structs.DeploymentStatusPaused,
   513  		StatusDescription: structs.DeploymentStatusDescriptionPaused,
   514  	}
   515  	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
   516  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)
   517  
   518  	w.SetEnabled(true, m.state)
   519  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   520  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
   521  
   522  	// Call PauseDeployment
   523  	req := &structs.DeploymentPauseRequest{
   524  		DeploymentID: d.ID,
   525  		Pause:        true,
   526  	}
   527  	var resp structs.DeploymentUpdateResponse
   528  	err := w.PauseDeployment(req, &resp)
   529  	require.Nil(err, "PauseDeployment")
   530  
   531  	require.Equal(1, len(w.watchers), "Deployment should still be active")
   532  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
   533  }
   534  
   535  // Test pausing a deployment that is paused
   536  func TestWatcher_PauseDeployment_Pause_Paused(t *testing.T) {
   537  	t.Parallel()
   538  	require := require.New(t)
   539  	w, m := defaultTestDeploymentWatcher(t)
   540  
   541  	// Create a job and a deployment
   542  	j := mock.Job()
   543  	d := mock.Deployment()
   544  	d.JobID = j.ID
   545  	d.Status = structs.DeploymentStatusPaused
   546  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   547  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   548  
   549  	// require that we get a call to UpsertDeploymentStatusUpdate
   550  	matchConfig := &matchDeploymentStatusUpdateConfig{
   551  		DeploymentID:      d.ID,
   552  		Status:            structs.DeploymentStatusPaused,
   553  		StatusDescription: structs.DeploymentStatusDescriptionPaused,
   554  	}
   555  	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
   556  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)
   557  
   558  	w.SetEnabled(true, m.state)
   559  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   560  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
   561  
   562  	// Call PauseDeployment
   563  	req := &structs.DeploymentPauseRequest{
   564  		DeploymentID: d.ID,
   565  		Pause:        true,
   566  	}
   567  	var resp structs.DeploymentUpdateResponse
   568  	err := w.PauseDeployment(req, &resp)
   569  	require.Nil(err, "PauseDeployment")
   570  
   571  	require.Equal(1, len(w.watchers), "Deployment should still be active")
   572  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
   573  }
   574  
   575  // Test unpausing a deployment that is paused
   576  func TestWatcher_PauseDeployment_Unpause_Paused(t *testing.T) {
   577  	t.Parallel()
   578  	require := require.New(t)
   579  	w, m := defaultTestDeploymentWatcher(t)
   580  
   581  	// Create a job and a deployment
   582  	j := mock.Job()
   583  	d := mock.Deployment()
   584  	d.JobID = j.ID
   585  	d.Status = structs.DeploymentStatusPaused
   586  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   587  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   588  
   589  	// require that we get a call to UpsertDeploymentStatusUpdate
   590  	matchConfig := &matchDeploymentStatusUpdateConfig{
   591  		DeploymentID:      d.ID,
   592  		Status:            structs.DeploymentStatusRunning,
   593  		StatusDescription: structs.DeploymentStatusDescriptionRunning,
   594  		Eval:              true,
   595  	}
   596  	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
   597  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)
   598  
   599  	w.SetEnabled(true, m.state)
   600  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   601  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
   602  
   603  	// Call PauseDeployment
   604  	req := &structs.DeploymentPauseRequest{
   605  		DeploymentID: d.ID,
   606  		Pause:        false,
   607  	}
   608  	var resp structs.DeploymentUpdateResponse
   609  	err := w.PauseDeployment(req, &resp)
   610  	require.Nil(err, "PauseDeployment")
   611  
   612  	require.Equal(1, len(w.watchers), "Deployment should still be active")
   613  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
   614  }
   615  
   616  // Test unpausing a deployment that is running
   617  func TestWatcher_PauseDeployment_Unpause_Running(t *testing.T) {
   618  	t.Parallel()
   619  	require := require.New(t)
   620  	w, m := defaultTestDeploymentWatcher(t)
   621  
   622  	// Create a job and a deployment
   623  	j := mock.Job()
   624  	d := mock.Deployment()
   625  	d.JobID = j.ID
   626  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   627  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   628  
   629  	// require that we get a call to UpsertDeploymentStatusUpdate
   630  	matchConfig := &matchDeploymentStatusUpdateConfig{
   631  		DeploymentID:      d.ID,
   632  		Status:            structs.DeploymentStatusRunning,
   633  		StatusDescription: structs.DeploymentStatusDescriptionRunning,
   634  		Eval:              true,
   635  	}
   636  	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
   637  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)
   638  
   639  	w.SetEnabled(true, m.state)
   640  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   641  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
   642  
   643  	// Call PauseDeployment
   644  	req := &structs.DeploymentPauseRequest{
   645  		DeploymentID: d.ID,
   646  		Pause:        false,
   647  	}
   648  	var resp structs.DeploymentUpdateResponse
   649  	err := w.PauseDeployment(req, &resp)
   650  	require.Nil(err, "PauseDeployment")
   651  
   652  	require.Equal(1, len(w.watchers), "Deployment should still be active")
   653  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
   654  }
   655  
   656  // Test failing a deployment that is running
   657  func TestWatcher_FailDeployment_Running(t *testing.T) {
   658  	t.Parallel()
   659  	require := require.New(t)
   660  	w, m := defaultTestDeploymentWatcher(t)
   661  
   662  	// Create a job and a deployment
   663  	j := mock.Job()
   664  	d := mock.Deployment()
   665  	d.JobID = j.ID
   666  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   667  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   668  
   669  	// require that we get a call to UpsertDeploymentStatusUpdate
   670  	matchConfig := &matchDeploymentStatusUpdateConfig{
   671  		DeploymentID:      d.ID,
   672  		Status:            structs.DeploymentStatusFailed,
   673  		StatusDescription: structs.DeploymentStatusDescriptionFailedByUser,
   674  		Eval:              true,
   675  	}
   676  	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
   677  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)
   678  
   679  	w.SetEnabled(true, m.state)
   680  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   681  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
   682  
   683  	// Call PauseDeployment
   684  	req := &structs.DeploymentFailRequest{
   685  		DeploymentID: d.ID,
   686  	}
   687  	var resp structs.DeploymentUpdateResponse
   688  	err := w.FailDeployment(req, &resp)
   689  	require.Nil(err, "FailDeployment")
   690  
   691  	require.Equal(1, len(w.watchers), "Deployment should still be active")
   692  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
   693  }
   694  
   695  // Tests that the watcher properly watches for allocation changes and takes the
   696  // proper actions
   697  func TestDeploymentWatcher_Watch_NoProgressDeadline(t *testing.T) {
   698  	t.Parallel()
   699  	require := require.New(t)
   700  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
   701  
   702  	// Create a job, alloc, and a deployment
   703  	j := mock.Job()
   704  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   705  	j.TaskGroups[0].Update.MaxParallel = 2
   706  	j.TaskGroups[0].Update.AutoRevert = true
   707  	j.TaskGroups[0].Update.ProgressDeadline = 0
   708  	j.Stable = true
   709  	d := mock.Deployment()
   710  	d.JobID = j.ID
   711  	d.TaskGroups["web"].AutoRevert = true
   712  	a := mock.Alloc()
   713  	a.DeploymentID = d.ID
   714  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   715  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   716  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   717  
   718  	// Upsert the job again to get a new version
   719  	j2 := j.Copy()
   720  	// Modify the job to make its specification different
   721  	j2.Meta["foo"] = "bar"
   722  	j2.Stable = false
   723  	require.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2")
   724  
   725  	// require that we will get a update allocation call only once. This will
   726  	// verify that the watcher is batching allocation changes
   727  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
   728  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
   729  
   730  	// require that we get a call to UpsertDeploymentStatusUpdate
   731  	c := &matchDeploymentStatusUpdateConfig{
   732  		DeploymentID:      d.ID,
   733  		Status:            structs.DeploymentStatusFailed,
   734  		StatusDescription: structs.DeploymentStatusDescriptionRollback(structs.DeploymentStatusDescriptionFailedAllocations, 0),
   735  		JobVersion:        helper.Uint64ToPtr(0),
   736  		Eval:              true,
   737  	}
   738  	m2 := matchDeploymentStatusUpdateRequest(c)
   739  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil)
   740  
   741  	w.SetEnabled(true, m.state)
   742  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   743  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
   744  
   745  	// Update the allocs health to healthy which should create an evaluation
   746  	for i := 0; i < 5; i++ {
   747  		req := &structs.ApplyDeploymentAllocHealthRequest{
   748  			DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
   749  				DeploymentID:         d.ID,
   750  				HealthyAllocationIDs: []string{a.ID},
   751  			},
   752  		}
   753  		require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req), "UpsertDeploymentAllocHealth")
   754  	}
   755  
   756  	// Wait for there to be one eval
   757  	testutil.WaitForResult(func() (bool, error) {
   758  		ws := memdb.NewWatchSet()
   759  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
   760  		if err != nil {
   761  			return false, err
   762  		}
   763  
   764  		if l := len(evals); l != 1 {
   765  			return false, fmt.Errorf("Got %d evals; want 1", l)
   766  		}
   767  
   768  		return true, nil
   769  	}, func(err error) {
   770  		t.Fatal(err)
   771  	})
   772  
   773  	// Update the allocs health to unhealthy which should create a job rollback,
   774  	// status update and eval
   775  	req2 := &structs.ApplyDeploymentAllocHealthRequest{
   776  		DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
   777  			DeploymentID:           d.ID,
   778  			UnhealthyAllocationIDs: []string{a.ID},
   779  		},
   780  	}
   781  	require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req2), "UpsertDeploymentAllocHealth")
   782  
   783  	// Wait for there to be one eval
   784  	testutil.WaitForResult(func() (bool, error) {
   785  		ws := memdb.NewWatchSet()
   786  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
   787  		if err != nil {
   788  			return false, err
   789  		}
   790  
   791  		if l := len(evals); l != 2 {
   792  			return false, fmt.Errorf("Got %d evals; want 1", l)
   793  		}
   794  
   795  		return true, nil
   796  	}, func(err error) {
   797  		t.Fatal(err)
   798  	})
   799  
   800  	m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1))
   801  
   802  	// After we upsert the job version will go to 2. So use this to require the
   803  	// original call happened.
   804  	c2 := &matchDeploymentStatusUpdateConfig{
   805  		DeploymentID:      d.ID,
   806  		Status:            structs.DeploymentStatusFailed,
   807  		StatusDescription: structs.DeploymentStatusDescriptionRollback(structs.DeploymentStatusDescriptionFailedAllocations, 0),
   808  		JobVersion:        helper.Uint64ToPtr(2),
   809  		Eval:              true,
   810  	}
   811  	m3 := matchDeploymentStatusUpdateRequest(c2)
   812  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(m3))
   813  	testutil.WaitForResult(func() (bool, error) { return 0 == len(w.watchers), nil },
   814  		func(err error) { require.Equal(0, len(w.watchers), "Should have no deployment") })
   815  }
   816  
   817  func TestDeploymentWatcher_Watch_ProgressDeadline(t *testing.T) {
   818  	t.Parallel()
   819  	require := require.New(t)
   820  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
   821  
   822  	// Create a job, alloc, and a deployment
   823  	j := mock.Job()
   824  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   825  	j.TaskGroups[0].Update.MaxParallel = 2
   826  	j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond
   827  	j.Stable = true
   828  	d := mock.Deployment()
   829  	d.JobID = j.ID
   830  	d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond
   831  	a := mock.Alloc()
   832  	now := time.Now()
   833  	a.CreateTime = now.UnixNano()
   834  	a.ModifyTime = now.UnixNano()
   835  	a.DeploymentID = d.ID
   836  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   837  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   838  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   839  
   840  	// require that we get a call to UpsertDeploymentStatusUpdate
   841  	c := &matchDeploymentStatusUpdateConfig{
   842  		DeploymentID:      d.ID,
   843  		Status:            structs.DeploymentStatusFailed,
   844  		StatusDescription: structs.DeploymentStatusDescriptionProgressDeadline,
   845  		Eval:              true,
   846  	}
   847  	m2 := matchDeploymentStatusUpdateRequest(c)
   848  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil)
   849  
   850  	w.SetEnabled(true, m.state)
   851  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   852  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
   853  
   854  	// Update the alloc to be unhealthy and require that nothing happens.
   855  	a2 := a.Copy()
   856  	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
   857  		Healthy:   helper.BoolToPtr(false),
   858  		Timestamp: now,
   859  	}
   860  	require.Nil(m.state.UpdateAllocsFromClient(100, []*structs.Allocation{a2}))
   861  
   862  	// Wait for the deployment to be failed
   863  	testutil.WaitForResult(func() (bool, error) {
   864  		d, err := m.state.DeploymentByID(nil, d.ID)
   865  		if err != nil {
   866  			return false, err
   867  		}
   868  
   869  		return d.Status == structs.DeploymentStatusFailed, fmt.Errorf("bad status %q", d.Status)
   870  	}, func(err error) {
   871  		t.Fatal(err)
   872  	})
   873  
   874  	// require there are is only one evaluation
   875  	testutil.WaitForResult(func() (bool, error) {
   876  		ws := memdb.NewWatchSet()
   877  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
   878  		if err != nil {
   879  			return false, err
   880  		}
   881  
   882  		if l := len(evals); l != 1 {
   883  			return false, fmt.Errorf("Got %d evals; want 1", l)
   884  		}
   885  
   886  		return true, nil
   887  	}, func(err error) {
   888  		t.Fatal(err)
   889  	})
   890  }
   891  
   892  // Test that progress deadline handling works when there are multiple groups
   893  func TestDeploymentWatcher_ProgressCutoff(t *testing.T) {
   894  	t.Parallel()
   895  	require := require.New(t)
   896  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
   897  
   898  	// Create a job, alloc, and a deployment
   899  	j := mock.Job()
   900  	j.TaskGroups[0].Count = 1
   901  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   902  	j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond
   903  	j.TaskGroups = append(j.TaskGroups, j.TaskGroups[0].Copy())
   904  	j.TaskGroups[1].Name = "foo"
   905  	j.TaskGroups[1].Update.ProgressDeadline = 1 * time.Second
   906  	j.Stable = true
   907  
   908  	d := mock.Deployment()
   909  	d.JobID = j.ID
   910  	d.TaskGroups["web"].DesiredTotal = 1
   911  	d.TaskGroups["foo"] = d.TaskGroups["web"].Copy()
   912  	d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond
   913  	d.TaskGroups["foo"].ProgressDeadline = 1 * time.Second
   914  
   915  	a := mock.Alloc()
   916  	now := time.Now()
   917  	a.CreateTime = now.UnixNano()
   918  	a.ModifyTime = now.UnixNano()
   919  	a.DeploymentID = d.ID
   920  
   921  	a2 := mock.Alloc()
   922  	a2.TaskGroup = "foo"
   923  	a2.CreateTime = now.UnixNano()
   924  	a2.ModifyTime = now.UnixNano()
   925  	a2.DeploymentID = d.ID
   926  
   927  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   928  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   929  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a, a2}), "UpsertAllocs")
   930  
   931  	// We may get an update for the desired transition.
   932  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
   933  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
   934  
   935  	w.SetEnabled(true, m.state)
   936  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   937  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
   938  
   939  	watcher, err := w.getOrCreateWatcher(d.ID)
   940  	require.NoError(err)
   941  	require.NotNil(watcher)
   942  
   943  	d1, err := m.state.DeploymentByID(nil, d.ID)
   944  	require.NoError(err)
   945  
   946  	done := watcher.doneGroups(d1)
   947  	require.Contains(done, "web")
   948  	require.False(done["web"])
   949  	require.Contains(done, "foo")
   950  	require.False(done["foo"])
   951  
   952  	cutoff1 := watcher.getDeploymentProgressCutoff(d1)
   953  	require.False(cutoff1.IsZero())
   954  
   955  	// Update the first allocation to be healthy
   956  	a3 := a.Copy()
   957  	a3.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: helper.BoolToPtr(true)}
   958  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a3}), "UpsertAllocs")
   959  
   960  	// Get the updated deployment
   961  	d2, err := m.state.DeploymentByID(nil, d.ID)
   962  	require.NoError(err)
   963  
   964  	done = watcher.doneGroups(d2)
   965  	require.Contains(done, "web")
   966  	require.True(done["web"])
   967  	require.Contains(done, "foo")
   968  	require.False(done["foo"])
   969  
   970  	cutoff2 := watcher.getDeploymentProgressCutoff(d2)
   971  	require.False(cutoff2.IsZero())
   972  	require.True(cutoff1.UnixNano() < cutoff2.UnixNano())
   973  
   974  	// Update the second allocation to be healthy
   975  	a4 := a2.Copy()
   976  	a4.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: helper.BoolToPtr(true)}
   977  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a4}), "UpsertAllocs")
   978  
   979  	// Get the updated deployment
   980  	d3, err := m.state.DeploymentByID(nil, d.ID)
   981  	require.NoError(err)
   982  
   983  	done = watcher.doneGroups(d3)
   984  	require.Contains(done, "web")
   985  	require.True(done["web"])
   986  	require.Contains(done, "foo")
   987  	require.True(done["foo"])
   988  
   989  	cutoff3 := watcher.getDeploymentProgressCutoff(d2)
   990  	require.True(cutoff3.IsZero())
   991  }
   992  
   993  // Test that we will allow the progress deadline to be reached when the canaries
   994  // are healthy but we haven't promoted
   995  func TestDeploymentWatcher_Watch_ProgressDeadline_Canaries(t *testing.T) {
   996  	t.Parallel()
   997  	require := require.New(t)
   998  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
   999  
  1000  	// Create a job, alloc, and a deployment
  1001  	j := mock.Job()
  1002  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1003  	j.TaskGroups[0].Update.Canary = 1
  1004  	j.TaskGroups[0].Update.MaxParallel = 1
  1005  	j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond
  1006  	j.Stable = true
  1007  	d := mock.Deployment()
  1008  	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  1009  	d.JobID = j.ID
  1010  	d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond
  1011  	d.TaskGroups["web"].DesiredCanaries = 1
  1012  	a := mock.Alloc()
  1013  	now := time.Now()
  1014  	a.CreateTime = now.UnixNano()
  1015  	a.ModifyTime = now.UnixNano()
  1016  	a.DeploymentID = d.ID
  1017  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
  1018  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1019  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
  1020  
  1021  	// require that we will get a createEvaluation call only once. This will
  1022  	// verify that the watcher is batching allocation changes
  1023  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
  1024  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
  1025  
  1026  	w.SetEnabled(true, m.state)
  1027  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
  1028  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
  1029  
  1030  	// Update the alloc to be unhealthy and require that nothing happens.
  1031  	a2 := a.Copy()
  1032  	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
  1033  		Healthy:   helper.BoolToPtr(true),
  1034  		Timestamp: now,
  1035  	}
  1036  	require.Nil(m.state.UpdateAllocsFromClient(m.nextIndex(), []*structs.Allocation{a2}))
  1037  
  1038  	// Wait for the deployment to cross the deadline
  1039  	dout, err := m.state.DeploymentByID(nil, d.ID)
  1040  	require.NoError(err)
  1041  	require.NotNil(dout)
  1042  	state := dout.TaskGroups["web"]
  1043  	require.NotNil(state)
  1044  	time.Sleep(state.RequireProgressBy.Add(time.Second).Sub(now))
  1045  
  1046  	// Require the deployment is still running
  1047  	dout, err = m.state.DeploymentByID(nil, d.ID)
  1048  	require.NoError(err)
  1049  	require.NotNil(dout)
  1050  	require.Equal(structs.DeploymentStatusRunning, dout.Status)
  1051  	require.Equal(structs.DeploymentStatusDescriptionRunningNeedsPromotion, dout.StatusDescription)
  1052  
  1053  	// require there are is only one evaluation
  1054  	testutil.WaitForResult(func() (bool, error) {
  1055  		ws := memdb.NewWatchSet()
  1056  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
  1057  		if err != nil {
  1058  			return false, err
  1059  		}
  1060  
  1061  		if l := len(evals); l != 1 {
  1062  			return false, fmt.Errorf("Got %d evals; want 1", l)
  1063  		}
  1064  
  1065  		return true, nil
  1066  	}, func(err error) {
  1067  		t.Fatal(err)
  1068  	})
  1069  }
  1070  
  1071  // Test that a promoted deployment with alloc healthy updates create
  1072  // evals to move the deployment forward
  1073  func TestDeploymentWatcher_PromotedCanary_UpdatedAllocs(t *testing.T) {
  1074  	t.Parallel()
  1075  	require := require.New(t)
  1076  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
  1077  
  1078  	// Create a job, alloc, and a deployment
  1079  	j := mock.Job()
  1080  	j.TaskGroups[0].Count = 2
  1081  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1082  	j.TaskGroups[0].Update.Canary = 1
  1083  	j.TaskGroups[0].Update.MaxParallel = 1
  1084  	j.TaskGroups[0].Update.ProgressDeadline = 50 * time.Millisecond
  1085  	j.Stable = true
  1086  
  1087  	d := mock.Deployment()
  1088  	d.TaskGroups["web"].DesiredTotal = 2
  1089  	d.TaskGroups["web"].DesiredCanaries = 1
  1090  	d.TaskGroups["web"].HealthyAllocs = 1
  1091  	d.StatusDescription = structs.DeploymentStatusDescriptionRunning
  1092  	d.JobID = j.ID
  1093  	d.TaskGroups["web"].ProgressDeadline = 50 * time.Millisecond
  1094  	d.TaskGroups["web"].RequireProgressBy = time.Now().Add(50 * time.Millisecond)
  1095  
  1096  	a := mock.Alloc()
  1097  	now := time.Now()
  1098  	a.CreateTime = now.UnixNano()
  1099  	a.ModifyTime = now.UnixNano()
  1100  	a.DeploymentID = d.ID
  1101  	a.DeploymentStatus = &structs.AllocDeploymentStatus{
  1102  		Healthy:   helper.BoolToPtr(true),
  1103  		Timestamp: now,
  1104  	}
  1105  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
  1106  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1107  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
  1108  
  1109  	w.SetEnabled(true, m.state)
  1110  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
  1111  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
  1112  
  1113  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
  1114  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Twice()
  1115  
  1116  	// Create another alloc
  1117  	a2 := a.Copy()
  1118  	a2.ID = uuid.Generate()
  1119  	now = time.Now()
  1120  	a2.CreateTime = now.UnixNano()
  1121  	a2.ModifyTime = now.UnixNano()
  1122  	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
  1123  		Healthy:   helper.BoolToPtr(true),
  1124  		Timestamp: now,
  1125  	}
  1126  	d.TaskGroups["web"].RequireProgressBy = time.Now().Add(2 * time.Second)
  1127  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1128  	// Wait until batch eval period passes before updating another alloc
  1129  	time.Sleep(1 * time.Second)
  1130  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a2}), "UpsertAllocs")
  1131  
  1132  	// Wait for the deployment to cross the deadline
  1133  	dout, err := m.state.DeploymentByID(nil, d.ID)
  1134  	require.NoError(err)
  1135  	require.NotNil(dout)
  1136  	state := dout.TaskGroups["web"]
  1137  	require.NotNil(state)
  1138  	time.Sleep(state.RequireProgressBy.Add(time.Second).Sub(now))
  1139  
  1140  	// There should be two evals
  1141  	testutil.WaitForResult(func() (bool, error) {
  1142  		ws := memdb.NewWatchSet()
  1143  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
  1144  		if err != nil {
  1145  			return false, err
  1146  		}
  1147  
  1148  		if l := len(evals); l != 2 {
  1149  			return false, fmt.Errorf("Got %d evals; want 2", l)
  1150  		}
  1151  
  1152  		return true, nil
  1153  	}, func(err error) {
  1154  		t.Fatal(err)
  1155  	})
  1156  }
  1157  
  1158  // Test scenario where deployment initially has no progress deadline
  1159  // After the deployment is updated, a failed alloc's DesiredTransition should be set
  1160  func TestDeploymentWatcher_Watch_StartWithoutProgressDeadline(t *testing.T) {
  1161  	t.Parallel()
  1162  	require := require.New(t)
  1163  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
  1164  
  1165  	// Create a job, and a deployment
  1166  	j := mock.Job()
  1167  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1168  	j.TaskGroups[0].Update.MaxParallel = 2
  1169  	j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond
  1170  	j.Stable = true
  1171  	d := mock.Deployment()
  1172  	d.JobID = j.ID
  1173  
  1174  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
  1175  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1176  
  1177  	a := mock.Alloc()
  1178  	a.CreateTime = time.Now().UnixNano()
  1179  	a.DeploymentID = d.ID
  1180  
  1181  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
  1182  
  1183  	d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond
  1184  	// Update the deployment with a progress deadline
  1185  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1186  
  1187  	// Match on DesiredTransition set to Reschedule for the failed alloc
  1188  	m1 := matchUpdateAllocDesiredTransitionReschedule([]string{a.ID})
  1189  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
  1190  
  1191  	w.SetEnabled(true, m.state)
  1192  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
  1193  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
  1194  
  1195  	// Update the alloc to be unhealthy
  1196  	a2 := a.Copy()
  1197  	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
  1198  		Healthy:   helper.BoolToPtr(false),
  1199  		Timestamp: time.Now(),
  1200  	}
  1201  	require.Nil(m.state.UpdateAllocsFromClient(m.nextIndex(), []*structs.Allocation{a2}))
  1202  
  1203  	// Wait for the alloc's DesiredState to set reschedule
  1204  	testutil.WaitForResult(func() (bool, error) {
  1205  		a, err := m.state.AllocByID(nil, a.ID)
  1206  		if err != nil {
  1207  			return false, err
  1208  		}
  1209  		dt := a.DesiredTransition
  1210  		shouldReschedule := dt.Reschedule != nil && *dt.Reschedule
  1211  		return shouldReschedule, fmt.Errorf("Desired Transition Reschedule should be set but got %v", shouldReschedule)
  1212  	}, func(err error) {
  1213  		t.Fatal(err)
  1214  	})
  1215  }
  1216  
  1217  // Tests that the watcher fails rollback when the spec hasn't changed
  1218  func TestDeploymentWatcher_RollbackFailed(t *testing.T) {
  1219  	t.Parallel()
  1220  	require := require.New(t)
  1221  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
  1222  
  1223  	// Create a job, alloc, and a deployment
  1224  	j := mock.Job()
  1225  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1226  	j.TaskGroups[0].Update.MaxParallel = 2
  1227  	j.TaskGroups[0].Update.AutoRevert = true
  1228  	j.TaskGroups[0].Update.ProgressDeadline = 0
  1229  	j.Stable = true
  1230  	d := mock.Deployment()
  1231  	d.JobID = j.ID
  1232  	d.TaskGroups["web"].AutoRevert = true
  1233  	a := mock.Alloc()
  1234  	a.DeploymentID = d.ID
  1235  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
  1236  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1237  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
  1238  
  1239  	// Upsert the job again to get a new version
  1240  	j2 := j.Copy()
  1241  	// Modify the job to make its specification different
  1242  	j2.Stable = false
  1243  	require.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2")
  1244  
  1245  	// require that we will get a createEvaluation call only once. This will
  1246  	// verify that the watcher is batching allocation changes
  1247  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
  1248  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
  1249  
  1250  	// require that we get a call to UpsertDeploymentStatusUpdate with roll back failed as the status
  1251  	c := &matchDeploymentStatusUpdateConfig{
  1252  		DeploymentID:      d.ID,
  1253  		Status:            structs.DeploymentStatusFailed,
  1254  		StatusDescription: structs.DeploymentStatusDescriptionRollbackNoop(structs.DeploymentStatusDescriptionFailedAllocations, 0),
  1255  		JobVersion:        nil,
  1256  		Eval:              true,
  1257  	}
  1258  	m2 := matchDeploymentStatusUpdateRequest(c)
  1259  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil)
  1260  
  1261  	w.SetEnabled(true, m.state)
  1262  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
  1263  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
  1264  
  1265  	// Update the allocs health to healthy which should create an evaluation
  1266  	for i := 0; i < 5; i++ {
  1267  		req := &structs.ApplyDeploymentAllocHealthRequest{
  1268  			DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
  1269  				DeploymentID:         d.ID,
  1270  				HealthyAllocationIDs: []string{a.ID},
  1271  			},
  1272  		}
  1273  		require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req), "UpsertDeploymentAllocHealth")
  1274  	}
  1275  
  1276  	// Wait for there to be one eval
  1277  	testutil.WaitForResult(func() (bool, error) {
  1278  		ws := memdb.NewWatchSet()
  1279  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
  1280  		if err != nil {
  1281  			return false, err
  1282  		}
  1283  
  1284  		if l := len(evals); l != 1 {
  1285  			return false, fmt.Errorf("Got %d evals; want 1", l)
  1286  		}
  1287  
  1288  		return true, nil
  1289  	}, func(err error) {
  1290  		t.Fatal(err)
  1291  	})
  1292  
  1293  	// Update the allocs health to unhealthy which will cause attempting a rollback,
  1294  	// fail in that step, do status update and eval
  1295  	req2 := &structs.ApplyDeploymentAllocHealthRequest{
  1296  		DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
  1297  			DeploymentID:           d.ID,
  1298  			UnhealthyAllocationIDs: []string{a.ID},
  1299  		},
  1300  	}
  1301  	require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req2), "UpsertDeploymentAllocHealth")
  1302  
  1303  	// Wait for there to be one eval
  1304  	testutil.WaitForResult(func() (bool, error) {
  1305  		ws := memdb.NewWatchSet()
  1306  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
  1307  		if err != nil {
  1308  			return false, err
  1309  		}
  1310  
  1311  		if l := len(evals); l != 2 {
  1312  			return false, fmt.Errorf("Got %d evals; want 1", l)
  1313  		}
  1314  
  1315  		return true, nil
  1316  	}, func(err error) {
  1317  		t.Fatal(err)
  1318  	})
  1319  
  1320  	m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1))
  1321  
  1322  	// verify that the job version hasn't changed after upsert
  1323  	m.state.JobByID(nil, structs.DefaultNamespace, j.ID)
  1324  	require.Equal(uint64(0), j.Version, "Expected job version 0 but got ", j.Version)
  1325  }
  1326  
  1327  // Test allocation updates and evaluation creation is batched between watchers
  1328  func TestWatcher_BatchAllocUpdates(t *testing.T) {
  1329  	t.Parallel()
  1330  	require := require.New(t)
  1331  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Second)
  1332  
  1333  	// Create a job, alloc, for two deployments
  1334  	j1 := mock.Job()
  1335  	j1.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1336  	j1.TaskGroups[0].Update.ProgressDeadline = 0
  1337  	d1 := mock.Deployment()
  1338  	d1.JobID = j1.ID
  1339  	a1 := mock.Alloc()
  1340  	a1.Job = j1
  1341  	a1.JobID = j1.ID
  1342  	a1.DeploymentID = d1.ID
  1343  
  1344  	j2 := mock.Job()
  1345  	j2.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1346  	j2.TaskGroups[0].Update.ProgressDeadline = 0
  1347  	d2 := mock.Deployment()
  1348  	d2.JobID = j2.ID
  1349  	a2 := mock.Alloc()
  1350  	a2.Job = j2
  1351  	a2.JobID = j2.ID
  1352  	a2.DeploymentID = d2.ID
  1353  
  1354  	require.Nil(m.state.UpsertJob(m.nextIndex(), j1), "UpsertJob")
  1355  	require.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob")
  1356  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d1), "UpsertDeployment")
  1357  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d2), "UpsertDeployment")
  1358  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a1}), "UpsertAllocs")
  1359  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a2}), "UpsertAllocs")
  1360  
  1361  	// require that we will get a createEvaluation call only once and it contains
  1362  	// both deployments. This will verify that the watcher is batching
  1363  	// allocation changes
  1364  	m1 := matchUpdateAllocDesiredTransitions([]string{d1.ID, d2.ID})
  1365  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
  1366  
  1367  	w.SetEnabled(true, m.state)
  1368  	testutil.WaitForResult(func() (bool, error) { return 2 == len(w.watchers), nil },
  1369  		func(err error) { require.Equal(2, len(w.watchers), "Should have 2 deployment") })
  1370  
  1371  	// Update the allocs health to healthy which should create an evaluation
  1372  	req := &structs.ApplyDeploymentAllocHealthRequest{
  1373  		DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
  1374  			DeploymentID:         d1.ID,
  1375  			HealthyAllocationIDs: []string{a1.ID},
  1376  		},
  1377  	}
  1378  	require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req), "UpsertDeploymentAllocHealth")
  1379  
  1380  	req2 := &structs.ApplyDeploymentAllocHealthRequest{
  1381  		DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
  1382  			DeploymentID:         d2.ID,
  1383  			HealthyAllocationIDs: []string{a2.ID},
  1384  		},
  1385  	}
  1386  	require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req2), "UpsertDeploymentAllocHealth")
  1387  
  1388  	// Wait for there to be one eval for each job
  1389  	testutil.WaitForResult(func() (bool, error) {
  1390  		ws := memdb.NewWatchSet()
  1391  		evals1, err := m.state.EvalsByJob(ws, j1.Namespace, j1.ID)
  1392  		if err != nil {
  1393  			return false, err
  1394  		}
  1395  
  1396  		evals2, err := m.state.EvalsByJob(ws, j2.Namespace, j2.ID)
  1397  		if err != nil {
  1398  			return false, err
  1399  		}
  1400  
  1401  		if l := len(evals1); l != 1 {
  1402  			return false, fmt.Errorf("Got %d evals for job %v; want 1", l, j1.ID)
  1403  		}
  1404  
  1405  		if l := len(evals2); l != 1 {
  1406  			return false, fmt.Errorf("Got %d evals for job 2; want 1", l)
  1407  		}
  1408  
  1409  		return true, nil
  1410  	}, func(err error) {
  1411  		t.Fatal(err)
  1412  	})
  1413  
  1414  	m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1))
  1415  	testutil.WaitForResult(func() (bool, error) { return 2 == len(w.watchers), nil },
  1416  		func(err error) { require.Equal(2, len(w.watchers), "Should have 2 deployment") })
  1417  }