github.com/hernad/nomad@v1.6.112/nomad/deploymentwatcher/deployments_watcher_test.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package deploymentwatcher
     5  
     6  import (
     7  	"fmt"
     8  	"testing"
     9  	"time"
    10  
    11  	memdb "github.com/hashicorp/go-memdb"
    12  	"github.com/hernad/nomad/ci"
    13  	"github.com/hernad/nomad/helper/pointer"
    14  	"github.com/hernad/nomad/helper/testlog"
    15  	"github.com/hernad/nomad/helper/uuid"
    16  	"github.com/hernad/nomad/nomad/mock"
    17  	"github.com/hernad/nomad/nomad/structs"
    18  	"github.com/hernad/nomad/testutil"
    19  	"github.com/shoenig/test/must"
    20  	"github.com/stretchr/testify/assert"
    21  	mocker "github.com/stretchr/testify/mock"
    22  	"github.com/stretchr/testify/require"
    23  )
    24  
    25  func testDeploymentWatcher(t *testing.T, qps float64, batchDur time.Duration) (*Watcher, *mockBackend) {
    26  	m := newMockBackend(t)
    27  	w := NewDeploymentsWatcher(testlog.HCLogger(t), m, nil, nil, qps, batchDur)
    28  	return w, m
    29  }
    30  
    31  func defaultTestDeploymentWatcher(t *testing.T) (*Watcher, *mockBackend) {
    32  	return testDeploymentWatcher(t, LimitStateQueriesPerSecond, CrossDeploymentUpdateBatchDuration)
    33  }
    34  
    35  // Tests that the watcher properly watches for deployments and reconciles them
    36  func TestWatcher_WatchDeployments(t *testing.T) {
    37  	ci.Parallel(t)
    38  	require := require.New(t)
    39  	w, m := defaultTestDeploymentWatcher(t)
    40  
    41  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(func(args *structs.DeploymentStatusUpdateRequest) bool {
    42  		return true
    43  	})).Return(nil).Maybe()
    44  
    45  	// Create three jobs
    46  	j1, j2, j3 := mock.Job(), mock.Job(), mock.Job()
    47  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, 100, nil, j1))
    48  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, 101, nil, j2))
    49  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, 102, nil, j3))
    50  
    51  	// Create three deployments all running
    52  	d1, d2, d3 := mock.Deployment(), mock.Deployment(), mock.Deployment()
    53  	d1.JobID = j1.ID
    54  	d2.JobID = j2.ID
    55  	d3.JobID = j3.ID
    56  
    57  	// Upsert the first deployment
    58  	require.Nil(m.state.UpsertDeployment(103, d1))
    59  
    60  	// Next list 3
    61  	block1 := make(chan time.Time)
    62  	go func() {
    63  		<-block1
    64  		require.Nil(m.state.UpsertDeployment(104, d2))
    65  		require.Nil(m.state.UpsertDeployment(105, d3))
    66  	}()
    67  
    68  	//// Next list 3 but have one be terminal
    69  	block2 := make(chan time.Time)
    70  	d3terminal := d3.Copy()
    71  	d3terminal.Status = structs.DeploymentStatusFailed
    72  	go func() {
    73  		<-block2
    74  		require.Nil(m.state.UpsertDeployment(106, d3terminal))
    75  	}()
    76  
    77  	w.SetEnabled(true, m.state)
    78  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
    79  		func(err error) { require.Equal(1, watchersCount(w), "1 deployment returned") })
    80  
    81  	close(block1)
    82  	testutil.WaitForResult(func() (bool, error) { return 3 == watchersCount(w), nil },
    83  		func(err error) { require.Equal(3, watchersCount(w), "3 deployment returned") })
    84  
    85  	close(block2)
    86  	testutil.WaitForResult(func() (bool, error) { return 2 == watchersCount(w), nil },
    87  		func(err error) { require.Equal(3, watchersCount(w), "3 deployment returned - 1 terminal") })
    88  }
    89  
    90  // Tests that calls against an unknown deployment fail
    91  func TestWatcher_UnknownDeployment(t *testing.T) {
    92  	ci.Parallel(t)
    93  	assert := assert.New(t)
    94  	require := require.New(t)
    95  	w, m := defaultTestDeploymentWatcher(t)
    96  	w.SetEnabled(true, m.state)
    97  
    98  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(func(args *structs.DeploymentStatusUpdateRequest) bool {
    99  		return true
   100  	})).Return(nil).Maybe()
   101  
   102  	// The expected error is that it should be an unknown deployment
   103  	dID := uuid.Generate()
   104  	expected := fmt.Sprintf("unknown deployment %q", dID)
   105  
   106  	// Request setting the health against an unknown deployment
   107  	req := &structs.DeploymentAllocHealthRequest{
   108  		DeploymentID:         dID,
   109  		HealthyAllocationIDs: []string{uuid.Generate()},
   110  	}
   111  	var resp structs.DeploymentUpdateResponse
   112  	err := w.SetAllocHealth(req, &resp)
   113  	if assert.NotNil(err, "should have error for unknown deployment") {
   114  		require.Contains(err.Error(), expected)
   115  	}
   116  
   117  	// Request promoting against an unknown deployment
   118  	req2 := &structs.DeploymentPromoteRequest{
   119  		DeploymentID: dID,
   120  		All:          true,
   121  	}
   122  	err = w.PromoteDeployment(req2, &resp)
   123  	if assert.NotNil(err, "should have error for unknown deployment") {
   124  		require.Contains(err.Error(), expected)
   125  	}
   126  
   127  	// Request pausing against an unknown deployment
   128  	req3 := &structs.DeploymentPauseRequest{
   129  		DeploymentID: dID,
   130  		Pause:        true,
   131  	}
   132  	err = w.PauseDeployment(req3, &resp)
   133  	if assert.NotNil(err, "should have error for unknown deployment") {
   134  		require.Contains(err.Error(), expected)
   135  	}
   136  
   137  	// Request failing against an unknown deployment
   138  	req4 := &structs.DeploymentFailRequest{
   139  		DeploymentID: dID,
   140  	}
   141  	err = w.FailDeployment(req4, &resp)
   142  	if assert.NotNil(err, "should have error for unknown deployment") {
   143  		require.Contains(err.Error(), expected)
   144  	}
   145  }
   146  
   147  // Test setting an unknown allocation's health
   148  func TestWatcher_SetAllocHealth_Unknown(t *testing.T) {
   149  	ci.Parallel(t)
   150  	assert := assert.New(t)
   151  	require := require.New(t)
   152  	w, m := defaultTestDeploymentWatcher(t)
   153  
   154  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(func(args *structs.DeploymentStatusUpdateRequest) bool {
   155  		return true
   156  	})).Return(nil).Maybe()
   157  
   158  	// Create a job, and a deployment
   159  	j := mock.Job()
   160  	d := mock.Deployment()
   161  	d.JobID = j.ID
   162  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
   163  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   164  
   165  	// require that we get a call to UpsertDeploymentAllocHealth
   166  	a := mock.Alloc()
   167  	matchConfig := &matchDeploymentAllocHealthRequestConfig{
   168  		DeploymentID: d.ID,
   169  		Healthy:      []string{a.ID},
   170  		Eval:         true,
   171  	}
   172  	matcher := matchDeploymentAllocHealthRequest(matchConfig)
   173  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)
   174  
   175  	w.SetEnabled(true, m.state)
   176  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
   177  		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })
   178  
   179  	// Call SetAllocHealth
   180  	req := &structs.DeploymentAllocHealthRequest{
   181  		DeploymentID:         d.ID,
   182  		HealthyAllocationIDs: []string{a.ID},
   183  	}
   184  	var resp structs.DeploymentUpdateResponse
   185  	err := w.SetAllocHealth(req, &resp)
   186  	if assert.NotNil(err, "Set health of unknown allocation") {
   187  		require.Contains(err.Error(), "unknown")
   188  	}
   189  	require.Equal(1, watchersCount(w), "Deployment should still be active")
   190  }
   191  
   192  // Test setting allocation health
   193  func TestWatcher_SetAllocHealth_Healthy(t *testing.T) {
   194  	ci.Parallel(t)
   195  	require := require.New(t)
   196  	w, m := defaultTestDeploymentWatcher(t)
   197  
   198  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(func(args *structs.DeploymentStatusUpdateRequest) bool {
   199  		return true
   200  	})).Return(nil).Maybe()
   201  
   202  	// Create a job, alloc, and a deployment
   203  	j := mock.Job()
   204  	d := mock.Deployment()
   205  	d.JobID = j.ID
   206  	a := mock.Alloc()
   207  	a.DeploymentID = d.ID
   208  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
   209  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   210  	require.Nil(m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   211  
   212  	// require that we get a call to UpsertDeploymentAllocHealth
   213  	matchConfig := &matchDeploymentAllocHealthRequestConfig{
   214  		DeploymentID: d.ID,
   215  		Healthy:      []string{a.ID},
   216  		Eval:         true,
   217  	}
   218  	matcher := matchDeploymentAllocHealthRequest(matchConfig)
   219  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)
   220  
   221  	w.SetEnabled(true, m.state)
   222  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
   223  		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })
   224  
   225  	// Call SetAllocHealth
   226  	req := &structs.DeploymentAllocHealthRequest{
   227  		DeploymentID:         d.ID,
   228  		HealthyAllocationIDs: []string{a.ID},
   229  	}
   230  	var resp structs.DeploymentUpdateResponse
   231  	err := w.SetAllocHealth(req, &resp)
   232  	require.Nil(err, "SetAllocHealth")
   233  	require.Equal(1, watchersCount(w), "Deployment should still be active")
   234  	m.AssertCalled(t, "UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher))
   235  }
   236  
   237  // Test setting allocation unhealthy
   238  func TestWatcher_SetAllocHealth_Unhealthy(t *testing.T) {
   239  	ci.Parallel(t)
   240  	require := require.New(t)
   241  	w, m := defaultTestDeploymentWatcher(t)
   242  
   243  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(func(args *structs.DeploymentStatusUpdateRequest) bool {
   244  		return true
   245  	})).Return(nil).Maybe()
   246  
   247  	// Create a job, alloc, and a deployment
   248  	j := mock.Job()
   249  	d := mock.Deployment()
   250  	d.JobID = j.ID
   251  	a := mock.Alloc()
   252  	a.DeploymentID = d.ID
   253  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
   254  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   255  	require.Nil(m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   256  
   257  	// require that we get a call to UpsertDeploymentAllocHealth
   258  	matchConfig := &matchDeploymentAllocHealthRequestConfig{
   259  		DeploymentID: d.ID,
   260  		Unhealthy:    []string{a.ID},
   261  		Eval:         true,
   262  		DeploymentUpdate: &structs.DeploymentStatusUpdate{
   263  			DeploymentID:      d.ID,
   264  			Status:            structs.DeploymentStatusFailed,
   265  			StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations,
   266  		},
   267  	}
   268  	matcher := matchDeploymentAllocHealthRequest(matchConfig)
   269  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)
   270  
   271  	w.SetEnabled(true, m.state)
   272  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
   273  		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })
   274  
   275  	// Call SetAllocHealth
   276  	req := &structs.DeploymentAllocHealthRequest{
   277  		DeploymentID:           d.ID,
   278  		UnhealthyAllocationIDs: []string{a.ID},
   279  	}
   280  	var resp structs.DeploymentUpdateResponse
   281  	err := w.SetAllocHealth(req, &resp)
   282  	require.Nil(err, "SetAllocHealth")
   283  
   284  	testutil.WaitForResult(func() (bool, error) { return 0 == watchersCount(w), nil },
   285  		func(err error) { require.Equal(0, watchersCount(w), "Should have no deployment") })
   286  	m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1)
   287  }
   288  
   289  // Test setting allocation unhealthy and that there should be a rollback
   290  func TestWatcher_SetAllocHealth_Unhealthy_Rollback(t *testing.T) {
   291  	ci.Parallel(t)
   292  	require := require.New(t)
   293  	w, m := defaultTestDeploymentWatcher(t)
   294  
   295  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(func(args *structs.DeploymentStatusUpdateRequest) bool {
   296  		return true
   297  	})).Return(nil).Maybe()
   298  
   299  	// Create a job, alloc, and a deployment
   300  	j := mock.Job()
   301  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   302  	j.TaskGroups[0].Update.MaxParallel = 2
   303  	j.TaskGroups[0].Update.AutoRevert = true
   304  	j.TaskGroups[0].Update.ProgressDeadline = 0
   305  	j.Stable = true
   306  	d := mock.Deployment()
   307  	d.JobID = j.ID
   308  	d.TaskGroups["web"].AutoRevert = true
   309  	a := mock.Alloc()
   310  	a.DeploymentID = d.ID
   311  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
   312  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   313  	require.Nil(m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   314  
   315  	// Upsert the job again to get a new version
   316  	j2 := j.Copy()
   317  	j2.Stable = false
   318  	// Modify the job to make its specification different
   319  	j2.Meta["foo"] = "bar"
   320  
   321  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j2), "UpsertJob2")
   322  
   323  	// require that we get a call to UpsertDeploymentAllocHealth
   324  	matchConfig := &matchDeploymentAllocHealthRequestConfig{
   325  		DeploymentID: d.ID,
   326  		Unhealthy:    []string{a.ID},
   327  		Eval:         true,
   328  		DeploymentUpdate: &structs.DeploymentStatusUpdate{
   329  			DeploymentID:      d.ID,
   330  			Status:            structs.DeploymentStatusFailed,
   331  			StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations,
   332  		},
   333  		JobVersion: pointer.Of(uint64(0)),
   334  	}
   335  	matcher := matchDeploymentAllocHealthRequest(matchConfig)
   336  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)
   337  
   338  	w.SetEnabled(true, m.state)
   339  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
   340  		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })
   341  
   342  	// Call SetAllocHealth
   343  	req := &structs.DeploymentAllocHealthRequest{
   344  		DeploymentID:           d.ID,
   345  		UnhealthyAllocationIDs: []string{a.ID},
   346  	}
   347  	var resp structs.DeploymentUpdateResponse
   348  	err := w.SetAllocHealth(req, &resp)
   349  	require.Nil(err, "SetAllocHealth")
   350  
   351  	testutil.WaitForResult(func() (bool, error) { return 0 == watchersCount(w), nil },
   352  		func(err error) { require.Equal(0, watchersCount(w), "Should have no deployment") })
   353  	m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1)
   354  }
   355  
   356  // Test setting allocation unhealthy on job with identical spec and there should be no rollback
   357  func TestWatcher_SetAllocHealth_Unhealthy_NoRollback(t *testing.T) {
   358  	ci.Parallel(t)
   359  	require := require.New(t)
   360  	w, m := defaultTestDeploymentWatcher(t)
   361  
   362  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(func(args *structs.DeploymentStatusUpdateRequest) bool {
   363  		return true
   364  	})).Return(nil).Maybe()
   365  
   366  	// Create a job, alloc, and a deployment
   367  	j := mock.Job()
   368  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   369  	j.TaskGroups[0].Update.MaxParallel = 2
   370  	j.TaskGroups[0].Update.AutoRevert = true
   371  	j.TaskGroups[0].Update.ProgressDeadline = 0
   372  	j.Stable = true
   373  	d := mock.Deployment()
   374  	d.JobID = j.ID
   375  	d.TaskGroups["web"].AutoRevert = true
   376  	a := mock.Alloc()
   377  	a.DeploymentID = d.ID
   378  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
   379  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   380  	require.Nil(m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   381  
   382  	// Upsert the job again to get a new version
   383  	j2 := j.Copy()
   384  	j2.Stable = false
   385  
   386  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j2), "UpsertJob2")
   387  
   388  	// require that we get a call to UpsertDeploymentAllocHealth
   389  	matchConfig := &matchDeploymentAllocHealthRequestConfig{
   390  		DeploymentID: d.ID,
   391  		Unhealthy:    []string{a.ID},
   392  		Eval:         true,
   393  		DeploymentUpdate: &structs.DeploymentStatusUpdate{
   394  			DeploymentID:      d.ID,
   395  			Status:            structs.DeploymentStatusFailed,
   396  			StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations,
   397  		},
   398  		JobVersion: nil,
   399  	}
   400  	matcher := matchDeploymentAllocHealthRequest(matchConfig)
   401  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)
   402  
   403  	w.SetEnabled(true, m.state)
   404  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
   405  		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })
   406  
   407  	// Call SetAllocHealth
   408  	req := &structs.DeploymentAllocHealthRequest{
   409  		DeploymentID:           d.ID,
   410  		UnhealthyAllocationIDs: []string{a.ID},
   411  	}
   412  	var resp structs.DeploymentUpdateResponse
   413  	err := w.SetAllocHealth(req, &resp)
   414  	require.Nil(err, "SetAllocHealth")
   415  
   416  	testutil.WaitForResult(func() (bool, error) { return 0 == watchersCount(w), nil },
   417  		func(err error) { require.Equal(0, watchersCount(w), "Should have no deployment") })
   418  	m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1)
   419  }
   420  
   421  // Test promoting a deployment
   422  func TestWatcher_PromoteDeployment_HealthyCanaries(t *testing.T) {
   423  	ci.Parallel(t)
   424  	require := require.New(t)
   425  	w, m := defaultTestDeploymentWatcher(t)
   426  
   427  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(func(args *structs.DeploymentStatusUpdateRequest) bool {
   428  		return true
   429  	})).Return(nil).Maybe()
   430  
   431  	// Create a job, canary alloc, and a deployment
   432  	j := mock.Job()
   433  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   434  	j.TaskGroups[0].Update.MaxParallel = 2
   435  	j.TaskGroups[0].Update.Canary = 1
   436  	j.TaskGroups[0].Update.ProgressDeadline = 0
   437  	d := mock.Deployment()
   438  	d.JobID = j.ID
   439  	a := mock.Alloc()
   440  	d.TaskGroups[a.TaskGroup].DesiredCanaries = 1
   441  	d.TaskGroups[a.TaskGroup].PlacedCanaries = []string{a.ID}
   442  	a.DeploymentStatus = &structs.AllocDeploymentStatus{
   443  		Healthy: pointer.Of(true),
   444  	}
   445  	a.DeploymentID = d.ID
   446  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
   447  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   448  	require.Nil(m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   449  
   450  	// require that we get a call to UpsertDeploymentPromotion
   451  	matchConfig := &matchDeploymentPromoteRequestConfig{
   452  		Promotion: &structs.DeploymentPromoteRequest{
   453  			DeploymentID: d.ID,
   454  			All:          true,
   455  		},
   456  		Eval: true,
   457  	}
   458  	matcher := matchDeploymentPromoteRequest(matchConfig)
   459  	m.On("UpdateDeploymentPromotion", mocker.MatchedBy(matcher)).Return(nil)
   460  
   461  	// We may get an update for the desired transition.
   462  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
   463  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
   464  
   465  	w.SetEnabled(true, m.state)
   466  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
   467  		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })
   468  
   469  	// Call PromoteDeployment
   470  	req := &structs.DeploymentPromoteRequest{
   471  		DeploymentID: d.ID,
   472  		All:          true,
   473  	}
   474  	var resp structs.DeploymentUpdateResponse
   475  	err := w.PromoteDeployment(req, &resp)
   476  	require.Nil(err, "PromoteDeployment")
   477  	require.Equal(1, watchersCount(w), "Deployment should still be active")
   478  	m.AssertCalled(t, "UpdateDeploymentPromotion", mocker.MatchedBy(matcher))
   479  }
   480  
   481  // Test promoting a deployment with unhealthy canaries
   482  func TestWatcher_PromoteDeployment_UnhealthyCanaries(t *testing.T) {
   483  	ci.Parallel(t)
   484  	require := require.New(t)
   485  	w, m := defaultTestDeploymentWatcher(t)
   486  
   487  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(func(args *structs.DeploymentStatusUpdateRequest) bool {
   488  		return true
   489  	})).Return(nil).Maybe()
   490  
   491  	// Create a job, canary alloc, and a deployment
   492  	j := mock.Job()
   493  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   494  	j.TaskGroups[0].Update.MaxParallel = 2
   495  	j.TaskGroups[0].Update.Canary = 2
   496  	j.TaskGroups[0].Update.ProgressDeadline = 0
   497  	d := mock.Deployment()
   498  	d.JobID = j.ID
   499  	a := mock.Alloc()
   500  	d.TaskGroups[a.TaskGroup].PlacedCanaries = []string{a.ID}
   501  	d.TaskGroups[a.TaskGroup].DesiredCanaries = 2
   502  	a.DeploymentID = d.ID
   503  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
   504  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   505  	require.Nil(m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   506  
   507  	// require that we get a call to UpsertDeploymentPromotion
   508  	matchConfig := &matchDeploymentPromoteRequestConfig{
   509  		Promotion: &structs.DeploymentPromoteRequest{
   510  			DeploymentID: d.ID,
   511  			All:          true,
   512  		},
   513  		Eval: true,
   514  	}
   515  	matcher := matchDeploymentPromoteRequest(matchConfig)
   516  	m.On("UpdateDeploymentPromotion", mocker.MatchedBy(matcher)).Return(nil)
   517  
   518  	w.SetEnabled(true, m.state)
   519  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
   520  		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })
   521  
   522  	// Call SetAllocHealth
   523  	req := &structs.DeploymentPromoteRequest{
   524  		DeploymentID: d.ID,
   525  		All:          true,
   526  	}
   527  	var resp structs.DeploymentUpdateResponse
   528  	err := w.PromoteDeployment(req, &resp)
   529  	if assert.NotNil(t, err, "PromoteDeployment") {
   530  		// 0/2 because the old version has been stopped but the canary isn't marked healthy yet
   531  		require.Contains(err.Error(), `Task group "web" has 0/2 healthy allocations`, "Should error because canary isn't marked healthy")
   532  	}
   533  
   534  	require.Equal(1, watchersCount(w), "Deployment should still be active")
   535  	m.AssertCalled(t, "UpdateDeploymentPromotion", mocker.MatchedBy(matcher))
   536  }
   537  
   538  func TestWatcher_AutoPromoteDeployment(t *testing.T) {
   539  	ci.Parallel(t)
   540  	w, m := defaultTestDeploymentWatcher(t)
   541  	now := time.Now()
   542  
   543  	// Create 1 UpdateStrategy, 1 job (2 TaskGroups), 2 canaries, and 1 deployment
   544  	canaryUpd := structs.DefaultUpdateStrategy.Copy()
   545  	canaryUpd.AutoPromote = true
   546  	canaryUpd.MaxParallel = 2
   547  	canaryUpd.Canary = 2
   548  	canaryUpd.ProgressDeadline = 5 * time.Second
   549  
   550  	rollingUpd := structs.DefaultUpdateStrategy.Copy()
   551  	rollingUpd.ProgressDeadline = 5 * time.Second
   552  
   553  	j := mock.MultiTaskGroupJob()
   554  	j.TaskGroups[0].Update = canaryUpd
   555  	j.TaskGroups[1].Update = rollingUpd
   556  
   557  	d := mock.Deployment()
   558  	d.JobID = j.ID
   559  	// This is created in scheduler.computeGroup at runtime, where properties from the
   560  	// UpdateStrategy are copied in
   561  	d.TaskGroups = map[string]*structs.DeploymentState{
   562  		"web": {
   563  			AutoPromote:      canaryUpd.AutoPromote,
   564  			AutoRevert:       canaryUpd.AutoRevert,
   565  			ProgressDeadline: canaryUpd.ProgressDeadline,
   566  			DesiredTotal:     2,
   567  		},
   568  		"api": {
   569  			AutoPromote:      rollingUpd.AutoPromote,
   570  			AutoRevert:       rollingUpd.AutoRevert,
   571  			ProgressDeadline: rollingUpd.ProgressDeadline,
   572  			DesiredTotal:     2,
   573  		},
   574  	}
   575  
   576  	canaryAlloc := func() *structs.Allocation {
   577  		a := mock.Alloc()
   578  		a.DeploymentID = d.ID
   579  		a.CreateTime = now.UnixNano()
   580  		a.ModifyTime = now.UnixNano()
   581  		a.DeploymentStatus = &structs.AllocDeploymentStatus{
   582  			Canary: true,
   583  		}
   584  		return a
   585  	}
   586  
   587  	rollingAlloc := func() *structs.Allocation {
   588  		a := mock.Alloc()
   589  		a.DeploymentID = d.ID
   590  		a.CreateTime = now.UnixNano()
   591  		a.ModifyTime = now.UnixNano()
   592  		a.TaskGroup = "api"
   593  		a.AllocatedResources.Tasks["api"] = a.AllocatedResources.Tasks["web"].Copy()
   594  		delete(a.AllocatedResources.Tasks, "web")
   595  		a.TaskResources["api"] = a.TaskResources["web"].Copy()
   596  		delete(a.TaskResources, "web")
   597  		a.DeploymentStatus = &structs.AllocDeploymentStatus{
   598  			Canary: false,
   599  		}
   600  		return a
   601  	}
   602  
   603  	// Web taskgroup (0)
   604  	ca1 := canaryAlloc()
   605  	ca2 := canaryAlloc()
   606  
   607  	// Api taskgroup (1)
   608  	ra1 := rollingAlloc()
   609  	ra2 := rollingAlloc()
   610  
   611  	d.TaskGroups[ca1.TaskGroup].PlacedCanaries = []string{ca1.ID, ca2.ID}
   612  	d.TaskGroups[ca1.TaskGroup].DesiredCanaries = 2
   613  	d.TaskGroups[ra1.TaskGroup].PlacedAllocs = 2
   614  	require.NoError(t, m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
   615  	require.NoError(t, m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   616  	require.NoError(t, m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{ca1, ca2, ra1, ra2}), "UpsertAllocs")
   617  
   618  	// =============================================================
   619  	// Support method calls
   620  
   621  	// clear UpdateDeploymentStatus default expectation
   622  	m.Mock.ExpectedCalls = nil
   623  
   624  	matchConfig0 := &matchDeploymentStatusUpdateConfig{
   625  		DeploymentID:      d.ID,
   626  		Status:            structs.DeploymentStatusFailed,
   627  		StatusDescription: structs.DeploymentStatusDescriptionProgressDeadline,
   628  		Eval:              true,
   629  	}
   630  	matcher0 := matchDeploymentStatusUpdateRequest(matchConfig0)
   631  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher0)).Return(nil)
   632  
   633  	matchConfig1 := &matchDeploymentAllocHealthRequestConfig{
   634  		DeploymentID: d.ID,
   635  		Healthy:      []string{ca1.ID, ca2.ID, ra1.ID, ra2.ID},
   636  		Eval:         true,
   637  	}
   638  	matcher1 := matchDeploymentAllocHealthRequest(matchConfig1)
   639  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher1)).Return(nil)
   640  
   641  	matchConfig2 := &matchDeploymentPromoteRequestConfig{
   642  		Promotion: &structs.DeploymentPromoteRequest{
   643  			DeploymentID: d.ID,
   644  			All:          true,
   645  		},
   646  		Eval: true,
   647  	}
   648  	matcher2 := matchDeploymentPromoteRequest(matchConfig2)
   649  	m.On("UpdateDeploymentPromotion", mocker.MatchedBy(matcher2)).Return(nil)
   650  	// =============================================================
   651  
   652  	// Start the deployment
   653  	w.SetEnabled(true, m.state)
   654  	testutil.WaitForResult(func() (bool, error) {
   655  		w.l.RLock()
   656  		defer w.l.RUnlock()
   657  		return 1 == len(w.watchers), nil
   658  	},
   659  		func(err error) {
   660  			w.l.RLock()
   661  			defer w.l.RUnlock()
   662  			require.Equal(t, 1, len(w.watchers), "Should have 1 deployment")
   663  		},
   664  	)
   665  
   666  	// Mark the canaries healthy
   667  	req := &structs.DeploymentAllocHealthRequest{
   668  		DeploymentID:         d.ID,
   669  		HealthyAllocationIDs: []string{ca1.ID, ca2.ID, ra1.ID, ra2.ID},
   670  	}
   671  	var resp structs.DeploymentUpdateResponse
   672  	// Calls w.raft.UpdateDeploymentAllocHealth, which is implemented by StateStore in
   673  	// state.UpdateDeploymentAllocHealth via a raft shim?
   674  	err := w.SetAllocHealth(req, &resp)
   675  	require.NoError(t, err)
   676  
   677  	ws := memdb.NewWatchSet()
   678  
   679  	testutil.WaitForResult(
   680  		func() (bool, error) {
   681  			ds, _ := m.state.DeploymentsByJobID(ws, j.Namespace, j.ID, true)
   682  			d = ds[0]
   683  			return 2 == d.TaskGroups["web"].HealthyAllocs, nil
   684  		},
   685  		func(err error) { require.NoError(t, err) },
   686  	)
   687  
   688  	require.Equal(t, 1, len(w.watchers), "Deployment should still be active")
   689  	m.AssertCalled(t, "UpdateDeploymentPromotion", mocker.MatchedBy(matcher2))
   690  
   691  	require.Equal(t, "running", d.Status)
   692  	require.True(t, d.TaskGroups["web"].Promoted)
   693  
   694  	a1, _ := m.state.AllocByID(ws, ca1.ID)
   695  	require.False(t, a1.DeploymentStatus.Canary)
   696  	require.Equal(t, "pending", a1.ClientStatus)
   697  	require.Equal(t, "run", a1.DesiredStatus)
   698  
   699  	b1, _ := m.state.AllocByID(ws, ca2.ID)
   700  	require.False(t, b1.DeploymentStatus.Canary)
   701  }
   702  
   703  func TestWatcher_AutoPromoteDeployment_UnhealthyCanaries(t *testing.T) {
   704  	ci.Parallel(t)
   705  	w, m := defaultTestDeploymentWatcher(t)
   706  	now := time.Now()
   707  
   708  	// Create 1 UpdateStrategy, 1 job (2 TaskGroups), 2 canaries, and 1 deployment
   709  	canaryUpd := structs.DefaultUpdateStrategy.Copy()
   710  	canaryUpd.AutoPromote = true
   711  	canaryUpd.MaxParallel = 2
   712  	canaryUpd.Canary = 2
   713  	canaryUpd.ProgressDeadline = 5 * time.Second
   714  
   715  	j := mock.MultiTaskGroupJob()
   716  	j.TaskGroups[0].Update = canaryUpd
   717  
   718  	d := mock.Deployment()
   719  	d.JobID = j.ID
   720  	// This is created in scheduler.computeGroup at runtime, where properties from the
   721  	// UpdateStrategy are copied in
   722  	d.TaskGroups = map[string]*structs.DeploymentState{
   723  		"web": {
   724  			AutoPromote:      canaryUpd.AutoPromote,
   725  			AutoRevert:       canaryUpd.AutoRevert,
   726  			ProgressDeadline: canaryUpd.ProgressDeadline,
   727  			DesiredTotal:     2,
   728  		},
   729  	}
   730  
   731  	canaryAlloc := func() *structs.Allocation {
   732  		a := mock.Alloc()
   733  		a.DeploymentID = d.ID
   734  		a.CreateTime = now.UnixNano()
   735  		a.ModifyTime = now.UnixNano()
   736  		a.DeploymentStatus = &structs.AllocDeploymentStatus{
   737  			Canary: true,
   738  		}
   739  		return a
   740  	}
   741  
   742  	// Web taskgroup
   743  	ca1 := canaryAlloc()
   744  	ca2 := canaryAlloc()
   745  	ca3 := canaryAlloc()
   746  
   747  	d.TaskGroups[ca1.TaskGroup].PlacedCanaries = []string{ca1.ID, ca2.ID, ca3.ID}
   748  	d.TaskGroups[ca1.TaskGroup].DesiredCanaries = 2
   749  	require.NoError(t, m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
   750  	require.NoError(t, m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   751  	require.NoError(t, m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{ca1, ca2, ca3}), "UpsertAllocs")
   752  
   753  	// =============================================================
   754  	// Support method calls
   755  
   756  	// clear UpdateDeploymentStatus default expectation
   757  	m.Mock.ExpectedCalls = nil
   758  
   759  	matchConfig0 := &matchDeploymentStatusUpdateConfig{
   760  		DeploymentID:      d.ID,
   761  		Status:            structs.DeploymentStatusFailed,
   762  		StatusDescription: structs.DeploymentStatusDescriptionProgressDeadline,
   763  		Eval:              true,
   764  	}
   765  	matcher0 := matchDeploymentStatusUpdateRequest(matchConfig0)
   766  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher0)).Return(nil)
   767  
   768  	matchConfig1 := &matchDeploymentAllocHealthRequestConfig{
   769  		DeploymentID: d.ID,
   770  		Healthy:      []string{ca1.ID, ca2.ID},
   771  		Eval:         true,
   772  	}
   773  	matcher1 := matchDeploymentAllocHealthRequest(matchConfig1)
   774  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher1)).Return(nil)
   775  
   776  	matchConfig2 := &matchDeploymentPromoteRequestConfig{
   777  		Promotion: &structs.DeploymentPromoteRequest{
   778  			DeploymentID: d.ID,
   779  			All:          true,
   780  		},
   781  		Eval: true,
   782  	}
   783  	matcher2 := matchDeploymentPromoteRequest(matchConfig2)
   784  	m.On("UpdateDeploymentPromotion", mocker.MatchedBy(matcher2)).Return(nil)
   785  	// =============================================================
   786  
   787  	// Start the deployment
   788  	w.SetEnabled(true, m.state)
   789  	testutil.WaitForResult(func() (bool, error) {
   790  		w.l.RLock()
   791  		defer w.l.RUnlock()
   792  		return 1 == len(w.watchers), nil
   793  	},
   794  		func(err error) {
   795  			w.l.RLock()
   796  			defer w.l.RUnlock()
   797  			require.Equal(t, 1, len(w.watchers), "Should have 1 deployment")
   798  		},
   799  	)
   800  
   801  	// Mark only 2 canaries as healthy
   802  	req := &structs.DeploymentAllocHealthRequest{
   803  		DeploymentID:         d.ID,
   804  		HealthyAllocationIDs: []string{ca1.ID, ca2.ID},
   805  	}
   806  	var resp structs.DeploymentUpdateResponse
   807  	// Calls w.raft.UpdateDeploymentAllocHealth, which is implemented by StateStore in
   808  	// state.UpdateDeploymentAllocHealth via a raft shim?
   809  	err := w.SetAllocHealth(req, &resp)
   810  	require.NoError(t, err)
   811  
   812  	ws := memdb.NewWatchSet()
   813  
   814  	testutil.WaitForResult(
   815  		func() (bool, error) {
   816  			ds, _ := m.state.DeploymentsByJobID(ws, j.Namespace, j.ID, true)
   817  			d = ds[0]
   818  			return 2 == d.TaskGroups["web"].HealthyAllocs, nil
   819  		},
   820  		func(err error) { require.NoError(t, err) },
   821  	)
   822  
   823  	// Verify that a promotion request was submitted.
   824  	require.Equal(t, 1, len(w.watchers), "Deployment should still be active")
   825  	m.AssertCalled(t, "UpdateDeploymentPromotion", mocker.MatchedBy(matcher2))
   826  
   827  	require.Equal(t, "running", d.Status)
   828  	require.True(t, d.TaskGroups["web"].Promoted)
   829  
   830  	a1, _ := m.state.AllocByID(ws, ca1.ID)
   831  	require.False(t, a1.DeploymentStatus.Canary)
   832  	require.Equal(t, "pending", a1.ClientStatus)
   833  	require.Equal(t, "run", a1.DesiredStatus)
   834  
   835  	b1, _ := m.state.AllocByID(ws, ca2.ID)
   836  	require.False(t, b1.DeploymentStatus.Canary)
   837  }
   838  
   839  // Test pausing a deployment that is running
   840  func TestWatcher_PauseDeployment_Pause_Running(t *testing.T) {
   841  	ci.Parallel(t)
   842  	require := require.New(t)
   843  	w, m := defaultTestDeploymentWatcher(t)
   844  
   845  	// clear UpdateDeploymentStatus default expectation
   846  	m.Mock.ExpectedCalls = nil
   847  
   848  	// Create a job and a deployment
   849  	j := mock.Job()
   850  	d := mock.Deployment()
   851  	d.JobID = j.ID
   852  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
   853  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   854  
   855  	// require that we get a call to UpsertDeploymentStatusUpdate
   856  	matchConfig := &matchDeploymentStatusUpdateConfig{
   857  		DeploymentID:      d.ID,
   858  		Status:            structs.DeploymentStatusPaused,
   859  		StatusDescription: structs.DeploymentStatusDescriptionPaused,
   860  	}
   861  	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
   862  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)
   863  
   864  	w.SetEnabled(true, m.state)
   865  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
   866  		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })
   867  
   868  	// Call PauseDeployment
   869  	req := &structs.DeploymentPauseRequest{
   870  		DeploymentID: d.ID,
   871  		Pause:        true,
   872  	}
   873  	var resp structs.DeploymentUpdateResponse
   874  	err := w.PauseDeployment(req, &resp)
   875  	require.Nil(err, "PauseDeployment")
   876  
   877  	require.Equal(1, watchersCount(w), "Deployment should still be active")
   878  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
   879  }
   880  
   881  // Test pausing a deployment that is paused
   882  func TestWatcher_PauseDeployment_Pause_Paused(t *testing.T) {
   883  	ci.Parallel(t)
   884  	require := require.New(t)
   885  	w, m := defaultTestDeploymentWatcher(t)
   886  
   887  	// clear UpdateDeploymentStatus default expectation
   888  	m.Mock.ExpectedCalls = nil
   889  
   890  	// Create a job and a deployment
   891  	j := mock.Job()
   892  	d := mock.Deployment()
   893  	d.JobID = j.ID
   894  	d.Status = structs.DeploymentStatusPaused
   895  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
   896  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   897  
   898  	// require that we get a call to UpsertDeploymentStatusUpdate
   899  	matchConfig := &matchDeploymentStatusUpdateConfig{
   900  		DeploymentID:      d.ID,
   901  		Status:            structs.DeploymentStatusPaused,
   902  		StatusDescription: structs.DeploymentStatusDescriptionPaused,
   903  	}
   904  	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
   905  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)
   906  
   907  	w.SetEnabled(true, m.state)
   908  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
   909  		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })
   910  
   911  	// Call PauseDeployment
   912  	req := &structs.DeploymentPauseRequest{
   913  		DeploymentID: d.ID,
   914  		Pause:        true,
   915  	}
   916  	var resp structs.DeploymentUpdateResponse
   917  	err := w.PauseDeployment(req, &resp)
   918  	require.Nil(err, "PauseDeployment")
   919  
   920  	require.Equal(1, watchersCount(w), "Deployment should still be active")
   921  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
   922  }
   923  
   924  // Test unpausing a deployment that is paused
   925  func TestWatcher_PauseDeployment_Unpause_Paused(t *testing.T) {
   926  	ci.Parallel(t)
   927  	require := require.New(t)
   928  	w, m := defaultTestDeploymentWatcher(t)
   929  
   930  	// Create a job and a deployment
   931  	j := mock.Job()
   932  	d := mock.Deployment()
   933  	d.JobID = j.ID
   934  	d.Status = structs.DeploymentStatusPaused
   935  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
   936  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   937  
   938  	// require that we get a call to UpsertDeploymentStatusUpdate
   939  	matchConfig := &matchDeploymentStatusUpdateConfig{
   940  		DeploymentID:      d.ID,
   941  		Status:            structs.DeploymentStatusRunning,
   942  		StatusDescription: structs.DeploymentStatusDescriptionRunning,
   943  		Eval:              true,
   944  	}
   945  	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
   946  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)
   947  
   948  	w.SetEnabled(true, m.state)
   949  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
   950  		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })
   951  
   952  	// Call PauseDeployment
   953  	req := &structs.DeploymentPauseRequest{
   954  		DeploymentID: d.ID,
   955  		Pause:        false,
   956  	}
   957  	var resp structs.DeploymentUpdateResponse
   958  	err := w.PauseDeployment(req, &resp)
   959  	require.Nil(err, "PauseDeployment")
   960  
   961  	require.Equal(1, watchersCount(w), "Deployment should still be active")
   962  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
   963  }
   964  
   965  // Test unpausing a deployment that is running
   966  func TestWatcher_PauseDeployment_Unpause_Running(t *testing.T) {
   967  	ci.Parallel(t)
   968  	require := require.New(t)
   969  	w, m := defaultTestDeploymentWatcher(t)
   970  
   971  	// Create a job and a deployment
   972  	j := mock.Job()
   973  	d := mock.Deployment()
   974  	d.JobID = j.ID
   975  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
   976  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   977  
   978  	// require that we get a call to UpsertDeploymentStatusUpdate
   979  	matchConfig := &matchDeploymentStatusUpdateConfig{
   980  		DeploymentID:      d.ID,
   981  		Status:            structs.DeploymentStatusRunning,
   982  		StatusDescription: structs.DeploymentStatusDescriptionRunning,
   983  		Eval:              true,
   984  	}
   985  	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
   986  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)
   987  
   988  	w.SetEnabled(true, m.state)
   989  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
   990  		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })
   991  
   992  	// Call PauseDeployment
   993  	req := &structs.DeploymentPauseRequest{
   994  		DeploymentID: d.ID,
   995  		Pause:        false,
   996  	}
   997  	var resp structs.DeploymentUpdateResponse
   998  	err := w.PauseDeployment(req, &resp)
   999  	require.Nil(err, "PauseDeployment")
  1000  
  1001  	require.Equal(1, watchersCount(w), "Deployment should still be active")
  1002  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
  1003  }
  1004  
  1005  // Test failing a deployment that is running
  1006  func TestWatcher_FailDeployment_Running(t *testing.T) {
  1007  	ci.Parallel(t)
  1008  	require := require.New(t)
  1009  	w, m := defaultTestDeploymentWatcher(t)
  1010  
  1011  	// Create a job and a deployment
  1012  	j := mock.Job()
  1013  	d := mock.Deployment()
  1014  	d.JobID = j.ID
  1015  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
  1016  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1017  
  1018  	// require that we get a call to UpsertDeploymentStatusUpdate
  1019  	matchConfig := &matchDeploymentStatusUpdateConfig{
  1020  		DeploymentID:      d.ID,
  1021  		Status:            structs.DeploymentStatusFailed,
  1022  		StatusDescription: structs.DeploymentStatusDescriptionFailedByUser,
  1023  		Eval:              true,
  1024  	}
  1025  	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
  1026  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)
  1027  
  1028  	w.SetEnabled(true, m.state)
  1029  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
  1030  		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })
  1031  
  1032  	// Call PauseDeployment
  1033  	req := &structs.DeploymentFailRequest{
  1034  		DeploymentID: d.ID,
  1035  	}
  1036  	var resp structs.DeploymentUpdateResponse
  1037  	err := w.FailDeployment(req, &resp)
  1038  	require.Nil(err, "FailDeployment")
  1039  
  1040  	require.Equal(1, watchersCount(w), "Deployment should still be active")
  1041  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
  1042  }
  1043  
  1044  // Tests that the watcher properly watches for allocation changes and takes the
  1045  // proper actions
  1046  func TestDeploymentWatcher_Watch_NoProgressDeadline(t *testing.T) {
  1047  	ci.Parallel(t)
  1048  	require := require.New(t)
  1049  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
  1050  
  1051  	// Create a job, alloc, and a deployment
  1052  	j := mock.Job()
  1053  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1054  	j.TaskGroups[0].Update.MaxParallel = 2
  1055  	j.TaskGroups[0].Update.AutoRevert = true
  1056  	j.TaskGroups[0].Update.ProgressDeadline = 0
  1057  	j.Stable = true
  1058  	d := mock.Deployment()
  1059  	d.JobID = j.ID
  1060  	d.TaskGroups["web"].AutoRevert = true
  1061  	a := mock.Alloc()
  1062  	a.DeploymentID = d.ID
  1063  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
  1064  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1065  	require.Nil(m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
  1066  
  1067  	// Upsert the job again to get a new version
  1068  	j2 := j.Copy()
  1069  	// Modify the job to make its specification different
  1070  	j2.Meta["foo"] = "bar"
  1071  	j2.Stable = false
  1072  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j2), "UpsertJob2")
  1073  
  1074  	// require that we will get a update allocation call only once. This will
  1075  	// verify that the watcher is batching allocation changes
  1076  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
  1077  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
  1078  
  1079  	// require that we get a call to UpsertDeploymentStatusUpdate
  1080  	c := &matchDeploymentStatusUpdateConfig{
  1081  		DeploymentID:      d.ID,
  1082  		Status:            structs.DeploymentStatusFailed,
  1083  		StatusDescription: structs.DeploymentStatusDescriptionRollback(structs.DeploymentStatusDescriptionFailedAllocations, 0),
  1084  		JobVersion:        pointer.Of(uint64(0)),
  1085  		Eval:              true,
  1086  	}
  1087  	m2 := matchDeploymentStatusUpdateRequest(c)
  1088  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil)
  1089  
  1090  	w.SetEnabled(true, m.state)
  1091  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
  1092  		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })
  1093  
  1094  	// Update the allocs health to healthy which should create an evaluation
  1095  	for i := 0; i < 5; i++ {
  1096  		req := &structs.ApplyDeploymentAllocHealthRequest{
  1097  			DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
  1098  				DeploymentID:         d.ID,
  1099  				HealthyAllocationIDs: []string{a.ID},
  1100  			},
  1101  		}
  1102  		require.Nil(m.state.UpdateDeploymentAllocHealth(structs.MsgTypeTestSetup, m.nextIndex(), req), "UpsertDeploymentAllocHealth")
  1103  	}
  1104  
  1105  	// Wait for there to be one eval
  1106  	testutil.WaitForResult(func() (bool, error) {
  1107  		ws := memdb.NewWatchSet()
  1108  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
  1109  		if err != nil {
  1110  			return false, err
  1111  		}
  1112  
  1113  		if l := len(evals); l != 1 {
  1114  			return false, fmt.Errorf("Got %d evals; want 1", l)
  1115  		}
  1116  
  1117  		return true, nil
  1118  	}, func(err error) {
  1119  		t.Fatal(err)
  1120  	})
  1121  
  1122  	// Update the allocs health to unhealthy which should create a job rollback,
  1123  	// status update and eval
  1124  	req2 := &structs.ApplyDeploymentAllocHealthRequest{
  1125  		DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
  1126  			DeploymentID:           d.ID,
  1127  			UnhealthyAllocationIDs: []string{a.ID},
  1128  		},
  1129  	}
  1130  	require.Nil(m.state.UpdateDeploymentAllocHealth(structs.MsgTypeTestSetup, m.nextIndex(), req2), "UpsertDeploymentAllocHealth")
  1131  
  1132  	// Wait for there to be one eval
  1133  	testutil.WaitForResult(func() (bool, error) {
  1134  		ws := memdb.NewWatchSet()
  1135  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
  1136  		if err != nil {
  1137  			return false, err
  1138  		}
  1139  
  1140  		if l := len(evals); l != 2 {
  1141  			return false, fmt.Errorf("Got %d evals; want 1", l)
  1142  		}
  1143  
  1144  		return true, nil
  1145  	}, func(err error) {
  1146  		t.Fatal(err)
  1147  	})
  1148  
  1149  	m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1))
  1150  
  1151  	// After we upsert the job version will go to 2. So use this to require the
  1152  	// original call happened.
  1153  	c2 := &matchDeploymentStatusUpdateConfig{
  1154  		DeploymentID:      d.ID,
  1155  		Status:            structs.DeploymentStatusFailed,
  1156  		StatusDescription: structs.DeploymentStatusDescriptionRollback(structs.DeploymentStatusDescriptionFailedAllocations, 0),
  1157  		JobVersion:        pointer.Of(uint64(2)),
  1158  		Eval:              true,
  1159  	}
  1160  	m3 := matchDeploymentStatusUpdateRequest(c2)
  1161  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(m3))
  1162  	testutil.WaitForResult(func() (bool, error) { return 0 == watchersCount(w), nil },
  1163  		func(err error) { require.Equal(0, watchersCount(w), "Should have no deployment") })
  1164  }
  1165  
  1166  func TestDeploymentWatcher_Watch_ProgressDeadline(t *testing.T) {
  1167  	ci.Parallel(t)
  1168  	require := require.New(t)
  1169  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
  1170  
  1171  	// Create a job, alloc, and a deployment
  1172  	j := mock.Job()
  1173  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1174  	j.TaskGroups[0].Update.MaxParallel = 2
  1175  	j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond
  1176  	j.Stable = true
  1177  	d := mock.Deployment()
  1178  	d.JobID = j.ID
  1179  	d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond
  1180  	a := mock.Alloc()
  1181  	now := time.Now()
  1182  	a.CreateTime = now.UnixNano()
  1183  	a.ModifyTime = now.UnixNano()
  1184  	a.DeploymentID = d.ID
  1185  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
  1186  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1187  	require.Nil(m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
  1188  
  1189  	// require that we get a call to UpsertDeploymentStatusUpdate
  1190  	c := &matchDeploymentStatusUpdateConfig{
  1191  		DeploymentID:      d.ID,
  1192  		Status:            structs.DeploymentStatusFailed,
  1193  		StatusDescription: structs.DeploymentStatusDescriptionProgressDeadline,
  1194  		Eval:              true,
  1195  	}
  1196  	m2 := matchDeploymentStatusUpdateRequest(c)
  1197  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil)
  1198  
  1199  	w.SetEnabled(true, m.state)
  1200  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
  1201  		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })
  1202  
  1203  	// Update the alloc to be unhealthy and require that nothing happens.
  1204  	a2 := a.Copy()
  1205  	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
  1206  		Healthy:   pointer.Of(false),
  1207  		Timestamp: now,
  1208  	}
  1209  	require.Nil(m.state.UpdateAllocsFromClient(structs.MsgTypeTestSetup, 100, []*structs.Allocation{a2}))
  1210  
  1211  	// Wait for the deployment to be failed
  1212  	testutil.WaitForResult(func() (bool, error) {
  1213  		d, err := m.state.DeploymentByID(nil, d.ID)
  1214  		if err != nil {
  1215  			return false, err
  1216  		}
  1217  
  1218  		return d.Status == structs.DeploymentStatusFailed, fmt.Errorf("bad status %q", d.Status)
  1219  	}, func(err error) {
  1220  		t.Fatal(err)
  1221  	})
  1222  
  1223  	// require there are is only one evaluation
  1224  	testutil.WaitForResult(func() (bool, error) {
  1225  		ws := memdb.NewWatchSet()
  1226  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
  1227  		if err != nil {
  1228  			return false, err
  1229  		}
  1230  
  1231  		if l := len(evals); l != 1 {
  1232  			return false, fmt.Errorf("Got %d evals; want 1", l)
  1233  		}
  1234  
  1235  		return true, nil
  1236  	}, func(err error) {
  1237  		t.Fatal(err)
  1238  	})
  1239  }
  1240  
  1241  // Test that progress deadline handling works when there are multiple groups
  1242  func TestDeploymentWatcher_ProgressCutoff(t *testing.T) {
  1243  	ci.Parallel(t)
  1244  	require := require.New(t)
  1245  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
  1246  
  1247  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(func(args *structs.DeploymentStatusUpdateRequest) bool {
  1248  		return true
  1249  	})).Return(nil).Maybe()
  1250  
  1251  	// Create a job, alloc, and a deployment
  1252  	j := mock.Job()
  1253  	j.TaskGroups[0].Count = 1
  1254  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1255  	j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond
  1256  	j.TaskGroups = append(j.TaskGroups, j.TaskGroups[0].Copy())
  1257  	j.TaskGroups[1].Name = "foo"
  1258  	j.TaskGroups[1].Update.ProgressDeadline = 1 * time.Second
  1259  	j.Stable = true
  1260  
  1261  	d := mock.Deployment()
  1262  	d.JobID = j.ID
  1263  	d.TaskGroups["web"].DesiredTotal = 1
  1264  	d.TaskGroups["foo"] = d.TaskGroups["web"].Copy()
  1265  	d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond
  1266  	d.TaskGroups["foo"].ProgressDeadline = 1 * time.Second
  1267  
  1268  	a := mock.Alloc()
  1269  	now := time.Now()
  1270  	a.CreateTime = now.UnixNano()
  1271  	a.ModifyTime = now.UnixNano()
  1272  	a.DeploymentID = d.ID
  1273  
  1274  	a2 := mock.Alloc()
  1275  	a2.TaskGroup = "foo"
  1276  	a2.CreateTime = now.UnixNano()
  1277  	a2.ModifyTime = now.UnixNano()
  1278  	a2.DeploymentID = d.ID
  1279  
  1280  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
  1281  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1282  	require.Nil(m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a, a2}), "UpsertAllocs")
  1283  
  1284  	// We may get an update for the desired transition.
  1285  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
  1286  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
  1287  
  1288  	w.SetEnabled(true, m.state)
  1289  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
  1290  		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })
  1291  
  1292  	watcher, err := w.getOrCreateWatcher(d.ID)
  1293  	require.NoError(err)
  1294  	require.NotNil(watcher)
  1295  
  1296  	d1, err := m.state.DeploymentByID(nil, d.ID)
  1297  	require.NoError(err)
  1298  
  1299  	done := watcher.doneGroups(d1)
  1300  	require.Contains(done, "web")
  1301  	require.False(done["web"])
  1302  	require.Contains(done, "foo")
  1303  	require.False(done["foo"])
  1304  
  1305  	cutoff1 := watcher.getDeploymentProgressCutoff(d1)
  1306  	require.False(cutoff1.IsZero())
  1307  
  1308  	// Update the first allocation to be healthy
  1309  	a3 := a.Copy()
  1310  	a3.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: pointer.Of(true)}
  1311  	require.Nil(m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a3}), "UpsertAllocs")
  1312  
  1313  	// Get the updated deployment
  1314  	d2, err := m.state.DeploymentByID(nil, d.ID)
  1315  	require.NoError(err)
  1316  
  1317  	done = watcher.doneGroups(d2)
  1318  	require.Contains(done, "web")
  1319  	require.True(done["web"])
  1320  	require.Contains(done, "foo")
  1321  	require.False(done["foo"])
  1322  
  1323  	cutoff2 := watcher.getDeploymentProgressCutoff(d2)
  1324  	require.False(cutoff2.IsZero())
  1325  	require.True(cutoff1.UnixNano() < cutoff2.UnixNano())
  1326  
  1327  	// Update the second allocation to be healthy
  1328  	a4 := a2.Copy()
  1329  	a4.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: pointer.Of(true)}
  1330  	require.Nil(m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a4}), "UpsertAllocs")
  1331  
  1332  	// Get the updated deployment
  1333  	d3, err := m.state.DeploymentByID(nil, d.ID)
  1334  	require.NoError(err)
  1335  
  1336  	done = watcher.doneGroups(d3)
  1337  	require.Contains(done, "web")
  1338  	require.True(done["web"])
  1339  	require.Contains(done, "foo")
  1340  	require.True(done["foo"])
  1341  
  1342  	cutoff3 := watcher.getDeploymentProgressCutoff(d2)
  1343  	require.True(cutoff3.IsZero())
  1344  }
  1345  
  1346  // Test that we will allow the progress deadline to be reached when the canaries
  1347  // are healthy but we haven't promoted
  1348  func TestDeploymentWatcher_Watch_ProgressDeadline_Canaries(t *testing.T) {
  1349  	ci.Parallel(t)
  1350  	require := require.New(t)
  1351  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
  1352  
  1353  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(func(args *structs.DeploymentStatusUpdateRequest) bool {
  1354  		return true
  1355  	})).Return(nil).Maybe()
  1356  
  1357  	// Create a job, alloc, and a deployment
  1358  	j := mock.Job()
  1359  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1360  	j.TaskGroups[0].Update.Canary = 1
  1361  	j.TaskGroups[0].Update.MaxParallel = 1
  1362  	j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond
  1363  	j.Stable = true
  1364  	d := mock.Deployment()
  1365  	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
  1366  	d.JobID = j.ID
  1367  	d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond
  1368  	d.TaskGroups["web"].DesiredCanaries = 1
  1369  	a := mock.Alloc()
  1370  	now := time.Now()
  1371  	a.CreateTime = now.UnixNano()
  1372  	a.ModifyTime = now.UnixNano()
  1373  	a.DeploymentID = d.ID
  1374  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
  1375  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1376  	require.Nil(m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
  1377  
  1378  	// require that we will get a createEvaluation call only once. This will
  1379  	// verify that the watcher is batching allocation changes
  1380  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
  1381  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
  1382  
  1383  	w.SetEnabled(true, m.state)
  1384  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
  1385  		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })
  1386  
  1387  	// Update the alloc to be unhealthy and require that nothing happens.
  1388  	a2 := a.Copy()
  1389  	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
  1390  		Healthy:   pointer.Of(true),
  1391  		Timestamp: now,
  1392  	}
  1393  	require.Nil(m.state.UpdateAllocsFromClient(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a2}))
  1394  
  1395  	// Wait for the deployment to cross the deadline
  1396  	dout, err := m.state.DeploymentByID(nil, d.ID)
  1397  	require.NoError(err)
  1398  	require.NotNil(dout)
  1399  	state := dout.TaskGroups["web"]
  1400  	require.NotNil(state)
  1401  	time.Sleep(state.RequireProgressBy.Add(time.Second).Sub(now))
  1402  
  1403  	// Require the deployment is still running
  1404  	dout, err = m.state.DeploymentByID(nil, d.ID)
  1405  	require.NoError(err)
  1406  	require.NotNil(dout)
  1407  	require.Equal(structs.DeploymentStatusRunning, dout.Status)
  1408  	require.Equal(structs.DeploymentStatusDescriptionRunningNeedsPromotion, dout.StatusDescription)
  1409  
  1410  	// require there are is only one evaluation
  1411  	testutil.WaitForResult(func() (bool, error) {
  1412  		ws := memdb.NewWatchSet()
  1413  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
  1414  		if err != nil {
  1415  			return false, err
  1416  		}
  1417  
  1418  		if l := len(evals); l != 1 {
  1419  			return false, fmt.Errorf("Got %d evals; want 1", l)
  1420  		}
  1421  
  1422  		return true, nil
  1423  	}, func(err error) {
  1424  		t.Fatal(err)
  1425  	})
  1426  }
  1427  
  1428  // Test that a promoted deployment with alloc healthy updates create
  1429  // evals to move the deployment forward
  1430  func TestDeploymentWatcher_PromotedCanary_UpdatedAllocs(t *testing.T) {
  1431  	ci.Parallel(t)
  1432  	require := require.New(t)
  1433  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
  1434  
  1435  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(func(args *structs.DeploymentStatusUpdateRequest) bool {
  1436  		return true
  1437  	})).Return(nil).Maybe()
  1438  
  1439  	// Create a job, alloc, and a deployment
  1440  	j := mock.Job()
  1441  	j.TaskGroups[0].Count = 2
  1442  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1443  	j.TaskGroups[0].Update.Canary = 1
  1444  	j.TaskGroups[0].Update.MaxParallel = 1
  1445  	j.TaskGroups[0].Update.ProgressDeadline = 50 * time.Millisecond
  1446  	j.Stable = true
  1447  
  1448  	d := mock.Deployment()
  1449  	d.TaskGroups["web"].DesiredTotal = 2
  1450  	d.TaskGroups["web"].DesiredCanaries = 1
  1451  	d.TaskGroups["web"].HealthyAllocs = 1
  1452  	d.StatusDescription = structs.DeploymentStatusDescriptionRunning
  1453  	d.JobID = j.ID
  1454  	d.TaskGroups["web"].ProgressDeadline = 50 * time.Millisecond
  1455  	d.TaskGroups["web"].RequireProgressBy = time.Now().Add(50 * time.Millisecond)
  1456  
  1457  	a := mock.Alloc()
  1458  	now := time.Now()
  1459  	a.CreateTime = now.UnixNano()
  1460  	a.ModifyTime = now.UnixNano()
  1461  	a.DeploymentID = d.ID
  1462  	a.DeploymentStatus = &structs.AllocDeploymentStatus{
  1463  		Healthy:   pointer.Of(true),
  1464  		Timestamp: now,
  1465  	}
  1466  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
  1467  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1468  	require.Nil(m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
  1469  
  1470  	w.SetEnabled(true, m.state)
  1471  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
  1472  		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })
  1473  
  1474  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
  1475  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Twice()
  1476  
  1477  	// Create another alloc
  1478  	a2 := a.Copy()
  1479  	a2.ID = uuid.Generate()
  1480  	now = time.Now()
  1481  	a2.CreateTime = now.UnixNano()
  1482  	a2.ModifyTime = now.UnixNano()
  1483  	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
  1484  		Healthy:   pointer.Of(true),
  1485  		Timestamp: now,
  1486  	}
  1487  	d.TaskGroups["web"].RequireProgressBy = time.Now().Add(2 * time.Second)
  1488  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1489  	// Wait until batch eval period passes before updating another alloc
  1490  	time.Sleep(1 * time.Second)
  1491  	require.Nil(m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a2}), "UpsertAllocs")
  1492  
  1493  	// Wait for the deployment to cross the deadline
  1494  	dout, err := m.state.DeploymentByID(nil, d.ID)
  1495  	require.NoError(err)
  1496  	require.NotNil(dout)
  1497  	state := dout.TaskGroups["web"]
  1498  	require.NotNil(state)
  1499  	time.Sleep(state.RequireProgressBy.Add(time.Second).Sub(now))
  1500  
  1501  	// There should be two evals
  1502  	testutil.WaitForResult(func() (bool, error) {
  1503  		ws := memdb.NewWatchSet()
  1504  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
  1505  		if err != nil {
  1506  			return false, err
  1507  		}
  1508  
  1509  		if l := len(evals); l != 2 {
  1510  			return false, fmt.Errorf("Got %d evals; want 2", l)
  1511  		}
  1512  
  1513  		return true, nil
  1514  	}, func(err error) {
  1515  		t.Fatal(err)
  1516  	})
  1517  }
  1518  
  1519  func TestDeploymentWatcher_ProgressDeadline_LatePromote(t *testing.T) {
  1520  	ci.Parallel(t)
  1521  	require := require.New(t)
  1522  	mtype := structs.MsgTypeTestSetup
  1523  
  1524  	w, m := defaultTestDeploymentWatcher(t)
  1525  	w.SetEnabled(true, m.state)
  1526  
  1527  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(func(args *structs.DeploymentStatusUpdateRequest) bool {
  1528  		return true
  1529  	})).Return(nil).Maybe()
  1530  
  1531  	progressTimeout := time.Millisecond * 1000
  1532  	j := mock.Job()
  1533  	j.TaskGroups[0].Name = "group1"
  1534  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1535  	j.TaskGroups[0].Update.MaxParallel = 2
  1536  	j.TaskGroups[0].Update.AutoRevert = false
  1537  	j.TaskGroups[0].Update.ProgressDeadline = progressTimeout
  1538  	j.TaskGroups = append(j.TaskGroups, j.TaskGroups[0].Copy())
  1539  	j.TaskGroups[0].Name = "group2"
  1540  
  1541  	d := mock.Deployment()
  1542  	d.JobID = j.ID
  1543  	d.TaskGroups = map[string]*structs.DeploymentState{
  1544  		"group1": {
  1545  			ProgressDeadline: progressTimeout,
  1546  			Promoted:         false,
  1547  			PlacedCanaries:   []string{},
  1548  			DesiredCanaries:  1,
  1549  			DesiredTotal:     3,
  1550  			PlacedAllocs:     0,
  1551  			HealthyAllocs:    0,
  1552  			UnhealthyAllocs:  0,
  1553  		},
  1554  		"group2": {
  1555  			ProgressDeadline: progressTimeout,
  1556  			Promoted:         false,
  1557  			PlacedCanaries:   []string{},
  1558  			DesiredCanaries:  1,
  1559  			DesiredTotal:     1,
  1560  			PlacedAllocs:     0,
  1561  			HealthyAllocs:    0,
  1562  			UnhealthyAllocs:  0,
  1563  		},
  1564  	}
  1565  
  1566  	require.NoError(m.state.UpsertJob(mtype, m.nextIndex(), nil, j))
  1567  	require.NoError(m.state.UpsertDeployment(m.nextIndex(), d))
  1568  
  1569  	// require that we get a call to UpsertDeploymentPromotion
  1570  	matchConfig := &matchDeploymentPromoteRequestConfig{
  1571  		Promotion: &structs.DeploymentPromoteRequest{
  1572  			DeploymentID: d.ID,
  1573  			All:          true,
  1574  		},
  1575  		Eval: true,
  1576  	}
  1577  	matcher := matchDeploymentPromoteRequest(matchConfig)
  1578  	m.On("UpdateDeploymentPromotion", mocker.MatchedBy(matcher)).Return(nil)
  1579  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
  1580  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil)
  1581  
  1582  	// create canaries
  1583  
  1584  	now := time.Now()
  1585  
  1586  	canary1 := mock.Alloc()
  1587  	canary1.Job = j
  1588  	canary1.DeploymentID = d.ID
  1589  	canary1.TaskGroup = "group1"
  1590  	canary1.DesiredStatus = structs.AllocDesiredStatusRun
  1591  	canary1.ModifyTime = now.UnixNano()
  1592  
  1593  	canary2 := mock.Alloc()
  1594  	canary2.Job = j
  1595  	canary2.DeploymentID = d.ID
  1596  	canary2.TaskGroup = "group2"
  1597  	canary2.DesiredStatus = structs.AllocDesiredStatusRun
  1598  	canary2.ModifyTime = now.UnixNano()
  1599  
  1600  	allocs := []*structs.Allocation{canary1, canary2}
  1601  	err := m.state.UpsertAllocs(mtype, m.nextIndex(), allocs)
  1602  	require.NoError(err)
  1603  
  1604  	// 2nd group's canary becomes healthy
  1605  
  1606  	now = time.Now()
  1607  
  1608  	canary2 = canary2.Copy()
  1609  	canary2.ModifyTime = now.UnixNano()
  1610  	canary2.DeploymentStatus = &structs.AllocDeploymentStatus{
  1611  		Canary:    true,
  1612  		Healthy:   pointer.Of(true),
  1613  		Timestamp: now,
  1614  	}
  1615  
  1616  	allocs = []*structs.Allocation{canary2}
  1617  	err = m.state.UpdateAllocsFromClient(mtype, m.nextIndex(), allocs)
  1618  	require.NoError(err)
  1619  
  1620  	// wait for long enough to ensure we read deployment update channel
  1621  	// this sleep creates the race condition associated with #7058
  1622  	time.Sleep(50 * time.Millisecond)
  1623  
  1624  	// 1st group's canary becomes healthy
  1625  	now = time.Now()
  1626  
  1627  	canary1 = canary1.Copy()
  1628  	canary1.ModifyTime = now.UnixNano()
  1629  	canary1.DeploymentStatus = &structs.AllocDeploymentStatus{
  1630  		Canary:    true,
  1631  		Healthy:   pointer.Of(true),
  1632  		Timestamp: now,
  1633  	}
  1634  
  1635  	allocs = []*structs.Allocation{canary1}
  1636  	err = m.state.UpdateAllocsFromClient(mtype, m.nextIndex(), allocs)
  1637  	require.NoError(err)
  1638  
  1639  	// ensure progress_deadline has definitely expired
  1640  	time.Sleep(progressTimeout)
  1641  
  1642  	// promote the deployment
  1643  
  1644  	req := &structs.DeploymentPromoteRequest{
  1645  		DeploymentID: d.ID,
  1646  		All:          true,
  1647  	}
  1648  	err = w.PromoteDeployment(req, &structs.DeploymentUpdateResponse{})
  1649  	require.NoError(err)
  1650  
  1651  	// wait for long enough to ensure we read deployment update channel
  1652  	time.Sleep(50 * time.Millisecond)
  1653  
  1654  	// create new allocs for promoted deployment
  1655  	// (these come from plan_apply, not a client update)
  1656  	now = time.Now()
  1657  
  1658  	alloc1a := mock.Alloc()
  1659  	alloc1a.Job = j
  1660  	alloc1a.DeploymentID = d.ID
  1661  	alloc1a.TaskGroup = "group1"
  1662  	alloc1a.ClientStatus = structs.AllocClientStatusPending
  1663  	alloc1a.DesiredStatus = structs.AllocDesiredStatusRun
  1664  	alloc1a.ModifyTime = now.UnixNano()
  1665  
  1666  	alloc1b := mock.Alloc()
  1667  	alloc1b.Job = j
  1668  	alloc1b.DeploymentID = d.ID
  1669  	alloc1b.TaskGroup = "group1"
  1670  	alloc1b.ClientStatus = structs.AllocClientStatusPending
  1671  	alloc1b.DesiredStatus = structs.AllocDesiredStatusRun
  1672  	alloc1b.ModifyTime = now.UnixNano()
  1673  
  1674  	allocs = []*structs.Allocation{alloc1a, alloc1b}
  1675  	err = m.state.UpsertAllocs(mtype, m.nextIndex(), allocs)
  1676  	require.NoError(err)
  1677  
  1678  	// allocs become healthy
  1679  
  1680  	now = time.Now()
  1681  
  1682  	alloc1a = alloc1a.Copy()
  1683  	alloc1a.ClientStatus = structs.AllocClientStatusRunning
  1684  	alloc1a.ModifyTime = now.UnixNano()
  1685  	alloc1a.DeploymentStatus = &structs.AllocDeploymentStatus{
  1686  		Canary:    false,
  1687  		Healthy:   pointer.Of(true),
  1688  		Timestamp: now,
  1689  	}
  1690  
  1691  	alloc1b = alloc1b.Copy()
  1692  	alloc1b.ClientStatus = structs.AllocClientStatusRunning
  1693  	alloc1b.ModifyTime = now.UnixNano()
  1694  	alloc1b.DeploymentStatus = &structs.AllocDeploymentStatus{
  1695  		Canary:    false,
  1696  		Healthy:   pointer.Of(true),
  1697  		Timestamp: now,
  1698  	}
  1699  
  1700  	allocs = []*structs.Allocation{alloc1a, alloc1b}
  1701  	err = m.state.UpdateAllocsFromClient(mtype, m.nextIndex(), allocs)
  1702  	require.NoError(err)
  1703  
  1704  	// ensure any progress deadline has expired
  1705  	time.Sleep(progressTimeout)
  1706  
  1707  	// without a scheduler running we'll never mark the deployment as
  1708  	// successful, so test that healthy == desired and that we haven't failed
  1709  	deployment, err := m.state.DeploymentByID(nil, d.ID)
  1710  	require.NoError(err)
  1711  	require.Equal(structs.DeploymentStatusRunning, deployment.Status)
  1712  
  1713  	group1 := deployment.TaskGroups["group1"]
  1714  
  1715  	require.Equal(group1.DesiredTotal, group1.HealthyAllocs, "not enough healthy")
  1716  	require.Equal(group1.DesiredTotal, group1.PlacedAllocs, "not enough placed")
  1717  	require.Equal(0, group1.UnhealthyAllocs)
  1718  
  1719  	group2 := deployment.TaskGroups["group2"]
  1720  	require.Equal(group2.DesiredTotal, group2.HealthyAllocs, "not enough healthy")
  1721  	require.Equal(group2.DesiredTotal, group2.PlacedAllocs, "not enough placed")
  1722  	require.Equal(0, group2.UnhealthyAllocs)
  1723  }
  1724  
  1725  // Test scenario where deployment initially has no progress deadline
  1726  // After the deployment is updated, a failed alloc's DesiredTransition should be set
  1727  func TestDeploymentWatcher_Watch_StartWithoutProgressDeadline(t *testing.T) {
  1728  	ci.Parallel(t)
  1729  	require := require.New(t)
  1730  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
  1731  
  1732  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(func(args *structs.DeploymentStatusUpdateRequest) bool {
  1733  		return true
  1734  	})).Return(nil).Maybe()
  1735  
  1736  	// Create a job, and a deployment
  1737  	j := mock.Job()
  1738  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1739  	j.TaskGroups[0].Update.MaxParallel = 2
  1740  	j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond
  1741  	j.Stable = true
  1742  	d := mock.Deployment()
  1743  	d.JobID = j.ID
  1744  
  1745  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
  1746  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1747  
  1748  	a := mock.Alloc()
  1749  	a.CreateTime = time.Now().UnixNano()
  1750  	a.DeploymentID = d.ID
  1751  
  1752  	require.Nil(m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
  1753  
  1754  	d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond
  1755  	// Update the deployment with a progress deadline
  1756  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1757  
  1758  	// Match on DesiredTransition set to Reschedule for the failed alloc
  1759  	m1 := matchUpdateAllocDesiredTransitionReschedule([]string{a.ID})
  1760  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
  1761  
  1762  	w.SetEnabled(true, m.state)
  1763  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
  1764  		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })
  1765  
  1766  	// Update the alloc to be unhealthy
  1767  	a2 := a.Copy()
  1768  	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
  1769  		Healthy:   pointer.Of(false),
  1770  		Timestamp: time.Now(),
  1771  	}
  1772  	require.Nil(m.state.UpdateAllocsFromClient(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a2}))
  1773  
  1774  	// Wait for the alloc's DesiredState to set reschedule
  1775  	testutil.WaitForResult(func() (bool, error) {
  1776  		a, err := m.state.AllocByID(nil, a.ID)
  1777  		if err != nil {
  1778  			return false, err
  1779  		}
  1780  		dt := a.DesiredTransition
  1781  		shouldReschedule := dt.Reschedule != nil && *dt.Reschedule
  1782  		return shouldReschedule, fmt.Errorf("Desired Transition Reschedule should be set but got %v", shouldReschedule)
  1783  	}, func(err error) {
  1784  		t.Fatal(err)
  1785  	})
  1786  }
  1787  
  1788  // Test that we exit before hitting the Progress Deadline when we run out of reschedule attempts
  1789  // for a failing deployment
  1790  func TestDeploymentWatcher_Watch_FailEarly(t *testing.T) {
  1791  	ci.Parallel(t)
  1792  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
  1793  
  1794  	// Create a job, alloc, and a deployment
  1795  	j := mock.Job()
  1796  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1797  	j.TaskGroups[0].Update.MaxParallel = 2
  1798  	j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond
  1799  	// Allow only 1 allocation for that deployment
  1800  	j.TaskGroups[0].ReschedulePolicy.Attempts = 0
  1801  	j.TaskGroups[0].ReschedulePolicy.Unlimited = false
  1802  	j.Stable = true
  1803  	d := mock.Deployment()
  1804  	d.JobID = j.ID
  1805  	d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond
  1806  	d.TaskGroups["web"].RequireProgressBy = time.Now().Add(d.TaskGroups["web"].ProgressDeadline)
  1807  	a := mock.Alloc()
  1808  	now := time.Now()
  1809  	a.CreateTime = now.UnixNano()
  1810  	a.ModifyTime = now.UnixNano()
  1811  	a.DeploymentID = d.ID
  1812  	must.Nil(t, m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), must.Sprint("UpsertJob"))
  1813  	must.Nil(t, m.state.UpsertDeployment(m.nextIndex(), d), must.Sprint("UpsertDeployment"))
  1814  	must.Nil(t, m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a}), must.Sprint("UpsertAllocs"))
  1815  
  1816  	// require that we get a call to UpsertDeploymentStatusUpdate
  1817  	c := &matchDeploymentStatusUpdateConfig{
  1818  		DeploymentID:      d.ID,
  1819  		Status:            structs.DeploymentStatusFailed,
  1820  		StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations,
  1821  		Eval:              true,
  1822  	}
  1823  	m2 := matchDeploymentStatusUpdateRequest(c)
  1824  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil)
  1825  
  1826  	w.SetEnabled(true, m.state)
  1827  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
  1828  		func(err error) { must.Eq(t, 1, watchersCount(w), must.Sprint("Should have 1 deployment")) })
  1829  
  1830  	// Update the alloc to be unhealthy
  1831  	a2 := a.Copy()
  1832  	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
  1833  		Healthy:   pointer.Of(false),
  1834  		Timestamp: now,
  1835  	}
  1836  	must.Nil(t, m.state.UpdateAllocsFromClient(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a2}))
  1837  
  1838  	// Wait for the deployment to be failed
  1839  	testutil.WaitForResult(func() (bool, error) {
  1840  		d, err := m.state.DeploymentByID(nil, d.ID)
  1841  		if err != nil {
  1842  			return false, err
  1843  		}
  1844  
  1845  		if d.Status != structs.DeploymentStatusFailed {
  1846  			return false, fmt.Errorf("bad status %q", d.Status)
  1847  		}
  1848  
  1849  		return d.StatusDescription == structs.DeploymentStatusDescriptionFailedAllocations, fmt.Errorf("bad status description %q", d.StatusDescription)
  1850  	}, func(err error) {
  1851  		t.Fatal(err)
  1852  	})
  1853  
  1854  	// require there are is only one evaluation
  1855  	testutil.WaitForResult(func() (bool, error) {
  1856  		ws := memdb.NewWatchSet()
  1857  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
  1858  		if err != nil {
  1859  			return false, err
  1860  		}
  1861  
  1862  		if l := len(evals); l != 1 {
  1863  			return false, fmt.Errorf("Got %d evals; want 1", l)
  1864  		}
  1865  
  1866  		return true, nil
  1867  	}, func(err error) {
  1868  		t.Fatal(err)
  1869  	})
  1870  }
  1871  
  1872  // Tests that the watcher fails rollback when the spec hasn't changed
  1873  func TestDeploymentWatcher_RollbackFailed(t *testing.T) {
  1874  	ci.Parallel(t)
  1875  	require := require.New(t)
  1876  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
  1877  
  1878  	// Create a job, alloc, and a deployment
  1879  	j := mock.Job()
  1880  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1881  	j.TaskGroups[0].Update.MaxParallel = 2
  1882  	j.TaskGroups[0].Update.AutoRevert = true
  1883  	j.TaskGroups[0].Update.ProgressDeadline = 0
  1884  	j.Stable = true
  1885  	d := mock.Deployment()
  1886  	d.JobID = j.ID
  1887  	d.TaskGroups["web"].AutoRevert = true
  1888  	a := mock.Alloc()
  1889  	a.DeploymentID = d.ID
  1890  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j), "UpsertJob")
  1891  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1892  	require.Nil(m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
  1893  
  1894  	// Upsert the job again to get a new version
  1895  	j2 := j.Copy()
  1896  	// Modify the job to make its specification different
  1897  	j2.Stable = false
  1898  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j2), "UpsertJob2")
  1899  
  1900  	// require that we will get a createEvaluation call only once. This will
  1901  	// verify that the watcher is batching allocation changes
  1902  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
  1903  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
  1904  
  1905  	// require that we get a call to UpsertDeploymentStatusUpdate with roll back failed as the status
  1906  	c := &matchDeploymentStatusUpdateConfig{
  1907  		DeploymentID:      d.ID,
  1908  		Status:            structs.DeploymentStatusFailed,
  1909  		StatusDescription: structs.DeploymentStatusDescriptionRollbackNoop(structs.DeploymentStatusDescriptionFailedAllocations, 0),
  1910  		JobVersion:        nil,
  1911  		Eval:              true,
  1912  	}
  1913  	m2 := matchDeploymentStatusUpdateRequest(c)
  1914  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil)
  1915  
  1916  	w.SetEnabled(true, m.state)
  1917  	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
  1918  		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })
  1919  
  1920  	// Update the allocs health to healthy which should create an evaluation
  1921  	for i := 0; i < 5; i++ {
  1922  		req := &structs.ApplyDeploymentAllocHealthRequest{
  1923  			DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
  1924  				DeploymentID:         d.ID,
  1925  				HealthyAllocationIDs: []string{a.ID},
  1926  			},
  1927  		}
  1928  		require.Nil(m.state.UpdateDeploymentAllocHealth(structs.MsgTypeTestSetup, m.nextIndex(), req), "UpsertDeploymentAllocHealth")
  1929  	}
  1930  
  1931  	// Wait for there to be one eval
  1932  	testutil.WaitForResult(func() (bool, error) {
  1933  		ws := memdb.NewWatchSet()
  1934  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
  1935  		if err != nil {
  1936  			return false, err
  1937  		}
  1938  
  1939  		if l := len(evals); l != 1 {
  1940  			return false, fmt.Errorf("Got %d evals; want 1", l)
  1941  		}
  1942  
  1943  		return true, nil
  1944  	}, func(err error) {
  1945  		t.Fatal(err)
  1946  	})
  1947  
  1948  	// Update the allocs health to unhealthy which will cause attempting a rollback,
  1949  	// fail in that step, do status update and eval
  1950  	req2 := &structs.ApplyDeploymentAllocHealthRequest{
  1951  		DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
  1952  			DeploymentID:           d.ID,
  1953  			UnhealthyAllocationIDs: []string{a.ID},
  1954  		},
  1955  	}
  1956  	require.Nil(m.state.UpdateDeploymentAllocHealth(structs.MsgTypeTestSetup, m.nextIndex(), req2), "UpsertDeploymentAllocHealth")
  1957  
  1958  	// Wait for there to be one eval
  1959  	testutil.WaitForResult(func() (bool, error) {
  1960  		ws := memdb.NewWatchSet()
  1961  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
  1962  		if err != nil {
  1963  			return false, err
  1964  		}
  1965  
  1966  		if l := len(evals); l != 2 {
  1967  			return false, fmt.Errorf("Got %d evals; want 1", l)
  1968  		}
  1969  
  1970  		return true, nil
  1971  	}, func(err error) {
  1972  		t.Fatal(err)
  1973  	})
  1974  
  1975  	m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1))
  1976  
  1977  	// verify that the job version hasn't changed after upsert
  1978  	m.state.JobByID(nil, structs.DefaultNamespace, j.ID)
  1979  	require.Equal(uint64(0), j.Version, "Expected job version 0 but got ", j.Version)
  1980  }
  1981  
  1982  // Test allocation updates and evaluation creation is batched between watchers
  1983  func TestWatcher_BatchAllocUpdates(t *testing.T) {
  1984  	ci.Parallel(t)
  1985  	require := require.New(t)
  1986  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Second)
  1987  
  1988  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(func(args *structs.DeploymentStatusUpdateRequest) bool {
  1989  		return true
  1990  	})).Return(nil).Maybe()
  1991  
  1992  	// Create a job, alloc, for two deployments
  1993  	j1 := mock.Job()
  1994  	j1.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1995  	j1.TaskGroups[0].Update.ProgressDeadline = 0
  1996  	d1 := mock.Deployment()
  1997  	d1.JobID = j1.ID
  1998  	a1 := mock.Alloc()
  1999  	a1.Job = j1
  2000  	a1.JobID = j1.ID
  2001  	a1.DeploymentID = d1.ID
  2002  
  2003  	j2 := mock.Job()
  2004  	j2.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  2005  	j2.TaskGroups[0].Update.ProgressDeadline = 0
  2006  	d2 := mock.Deployment()
  2007  	d2.JobID = j2.ID
  2008  	a2 := mock.Alloc()
  2009  	a2.Job = j2
  2010  	a2.JobID = j2.ID
  2011  	a2.DeploymentID = d2.ID
  2012  
  2013  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j1), "UpsertJob")
  2014  	require.Nil(m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j2), "UpsertJob")
  2015  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d1), "UpsertDeployment")
  2016  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d2), "UpsertDeployment")
  2017  	require.Nil(m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a1}), "UpsertAllocs")
  2018  	require.Nil(m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a2}), "UpsertAllocs")
  2019  
  2020  	// require that we will get a createEvaluation call only once and it contains
  2021  	// both deployments. This will verify that the watcher is batching
  2022  	// allocation changes
  2023  	m1 := matchUpdateAllocDesiredTransitions([]string{d1.ID, d2.ID})
  2024  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
  2025  
  2026  	w.SetEnabled(true, m.state)
  2027  	testutil.WaitForResult(func() (bool, error) { return 2 == watchersCount(w), nil },
  2028  		func(err error) { require.Equal(2, watchersCount(w), "Should have 2 deployment") })
  2029  
  2030  	// Update the allocs health to healthy which should create an evaluation
  2031  	req := &structs.ApplyDeploymentAllocHealthRequest{
  2032  		DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
  2033  			DeploymentID:         d1.ID,
  2034  			HealthyAllocationIDs: []string{a1.ID},
  2035  		},
  2036  	}
  2037  	require.Nil(m.state.UpdateDeploymentAllocHealth(structs.MsgTypeTestSetup, m.nextIndex(), req), "UpsertDeploymentAllocHealth")
  2038  
  2039  	req2 := &structs.ApplyDeploymentAllocHealthRequest{
  2040  		DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
  2041  			DeploymentID:         d2.ID,
  2042  			HealthyAllocationIDs: []string{a2.ID},
  2043  		},
  2044  	}
  2045  	require.Nil(m.state.UpdateDeploymentAllocHealth(structs.MsgTypeTestSetup, m.nextIndex(), req2), "UpsertDeploymentAllocHealth")
  2046  
  2047  	// Wait for there to be one eval for each job
  2048  	testutil.WaitForResult(func() (bool, error) {
  2049  		ws := memdb.NewWatchSet()
  2050  		evals1, err := m.state.EvalsByJob(ws, j1.Namespace, j1.ID)
  2051  		if err != nil {
  2052  			return false, err
  2053  		}
  2054  
  2055  		evals2, err := m.state.EvalsByJob(ws, j2.Namespace, j2.ID)
  2056  		if err != nil {
  2057  			return false, err
  2058  		}
  2059  
  2060  		if l := len(evals1); l != 1 {
  2061  			return false, fmt.Errorf("Got %d evals for job %v; want 1", l, j1.ID)
  2062  		}
  2063  
  2064  		if l := len(evals2); l != 1 {
  2065  			return false, fmt.Errorf("Got %d evals for job 2; want 1", l)
  2066  		}
  2067  
  2068  		return true, nil
  2069  	}, func(err error) {
  2070  		t.Fatal(err)
  2071  	})
  2072  
  2073  	m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1))
  2074  	testutil.WaitForResult(func() (bool, error) { return 2 == watchersCount(w), nil },
  2075  		func(err error) { require.Equal(2, watchersCount(w), "Should have 2 deployment") })
  2076  }
  2077  
  2078  func watchersCount(w *Watcher) int {
  2079  	w.l.Lock()
  2080  	defer w.l.Unlock()
  2081  
  2082  	return len(w.watchers)
  2083  }