github.com/quite/nomad@v0.8.6/nomad/deploymentwatcher/deployments_watcher_test.go

github.com/quite/nomad@v0.8.6/nomad/deploymentwatcher/deployments_watcher_test.go (about)

     1  package deploymentwatcher
     2  
     3  import (
     4  	"fmt"
     5  	"testing"
     6  	"time"
     7  
     8  	memdb "github.com/hashicorp/go-memdb"
     9  	"github.com/hashicorp/nomad/helper"
    10  	"github.com/hashicorp/nomad/helper/uuid"
    11  	"github.com/hashicorp/nomad/nomad/mock"
    12  	"github.com/hashicorp/nomad/nomad/structs"
    13  	"github.com/hashicorp/nomad/testutil"
    14  	"github.com/stretchr/testify/assert"
    15  	mocker "github.com/stretchr/testify/mock"
    16  	"github.com/stretchr/testify/require"
    17  )
    18  
    19  func testDeploymentWatcher(t *testing.T, qps float64, batchDur time.Duration) (*Watcher, *mockBackend) {
    20  	m := newMockBackend(t)
    21  	w := NewDeploymentsWatcher(testLogger(), m, qps, batchDur)
    22  	return w, m
    23  }
    24  
    25  func defaultTestDeploymentWatcher(t *testing.T) (*Watcher, *mockBackend) {
    26  	return testDeploymentWatcher(t, LimitStateQueriesPerSecond, CrossDeploymentUpdateBatchDuration)
    27  }
    28  
    29  // Tests that the watcher properly watches for deployments and reconciles them
    30  func TestWatcher_WatchDeployments(t *testing.T) {
    31  	t.Parallel()
    32  	assert := assert.New(t)
    33  	w, m := defaultTestDeploymentWatcher(t)
    34  
    35  	// Create three jobs
    36  	j1, j2, j3 := mock.Job(), mock.Job(), mock.Job()
    37  	assert.Nil(m.state.UpsertJob(100, j1))
    38  	assert.Nil(m.state.UpsertJob(101, j2))
    39  	assert.Nil(m.state.UpsertJob(102, j3))
    40  
    41  	// Create three deployments all running
    42  	d1, d2, d3 := mock.Deployment(), mock.Deployment(), mock.Deployment()
    43  	d1.JobID = j1.ID
    44  	d2.JobID = j2.ID
    45  	d3.JobID = j3.ID
    46  
    47  	// Upsert the first deployment
    48  	assert.Nil(m.state.UpsertDeployment(103, d1))
    49  
    50  	// Next list 3
    51  	block1 := make(chan time.Time)
    52  	go func() {
    53  		<-block1
    54  		assert.Nil(m.state.UpsertDeployment(104, d2))
    55  		assert.Nil(m.state.UpsertDeployment(105, d3))
    56  	}()
    57  
    58  	//// Next list 3 but have one be terminal
    59  	block2 := make(chan time.Time)
    60  	d3terminal := d3.Copy()
    61  	d3terminal.Status = structs.DeploymentStatusFailed
    62  	go func() {
    63  		<-block2
    64  		assert.Nil(m.state.UpsertDeployment(106, d3terminal))
    65  	}()
    66  
    67  	w.SetEnabled(true, m.state)
    68  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
    69  		func(err error) { assert.Equal(1, len(w.watchers), "1 deployment returned") })
    70  
    71  	close(block1)
    72  	testutil.WaitForResult(func() (bool, error) { return 3 == len(w.watchers), nil },
    73  		func(err error) { assert.Equal(3, len(w.watchers), "3 deployment returned") })
    74  
    75  	close(block2)
    76  	testutil.WaitForResult(func() (bool, error) { return 2 == len(w.watchers), nil },
    77  		func(err error) { assert.Equal(3, len(w.watchers), "3 deployment returned - 1 terminal") })
    78  }
    79  
    80  // Tests that calls against an unknown deployment fail
    81  func TestWatcher_UnknownDeployment(t *testing.T) {
    82  	t.Parallel()
    83  	assert := assert.New(t)
    84  	w, m := defaultTestDeploymentWatcher(t)
    85  	w.SetEnabled(true, m.state)
    86  
    87  	// The expected error is that it should be an unknown deployment
    88  	dID := uuid.Generate()
    89  	expected := fmt.Sprintf("unknown deployment %q", dID)
    90  
    91  	// Request setting the health against an unknown deployment
    92  	req := &structs.DeploymentAllocHealthRequest{
    93  		DeploymentID:         dID,
    94  		HealthyAllocationIDs: []string{uuid.Generate()},
    95  	}
    96  	var resp structs.DeploymentUpdateResponse
    97  	err := w.SetAllocHealth(req, &resp)
    98  	if assert.NotNil(err, "should have error for unknown deployment") {
    99  		assert.Contains(err.Error(), expected)
   100  	}
   101  
   102  	// Request promoting against an unknown deployment
   103  	req2 := &structs.DeploymentPromoteRequest{
   104  		DeploymentID: dID,
   105  		All:          true,
   106  	}
   107  	err = w.PromoteDeployment(req2, &resp)
   108  	if assert.NotNil(err, "should have error for unknown deployment") {
   109  		assert.Contains(err.Error(), expected)
   110  	}
   111  
   112  	// Request pausing against an unknown deployment
   113  	req3 := &structs.DeploymentPauseRequest{
   114  		DeploymentID: dID,
   115  		Pause:        true,
   116  	}
   117  	err = w.PauseDeployment(req3, &resp)
   118  	if assert.NotNil(err, "should have error for unknown deployment") {
   119  		assert.Contains(err.Error(), expected)
   120  	}
   121  
   122  	// Request failing against an unknown deployment
   123  	req4 := &structs.DeploymentFailRequest{
   124  		DeploymentID: dID,
   125  	}
   126  	err = w.FailDeployment(req4, &resp)
   127  	if assert.NotNil(err, "should have error for unknown deployment") {
   128  		assert.Contains(err.Error(), expected)
   129  	}
   130  }
   131  
   132  // Test setting an unknown allocation's health
   133  func TestWatcher_SetAllocHealth_Unknown(t *testing.T) {
   134  	t.Parallel()
   135  	assert := assert.New(t)
   136  	w, m := defaultTestDeploymentWatcher(t)
   137  
   138  	// Create a job, and a deployment
   139  	j := mock.Job()
   140  	d := mock.Deployment()
   141  	d.JobID = j.ID
   142  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   143  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   144  
   145  	// Assert that we get a call to UpsertDeploymentAllocHealth
   146  	a := mock.Alloc()
   147  	matchConfig := &matchDeploymentAllocHealthRequestConfig{
   148  		DeploymentID: d.ID,
   149  		Healthy:      []string{a.ID},
   150  		Eval:         true,
   151  	}
   152  	matcher := matchDeploymentAllocHealthRequest(matchConfig)
   153  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)
   154  
   155  	w.SetEnabled(true, m.state)
   156  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   157  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   158  
   159  	// Call SetAllocHealth
   160  	req := &structs.DeploymentAllocHealthRequest{
   161  		DeploymentID:         d.ID,
   162  		HealthyAllocationIDs: []string{a.ID},
   163  	}
   164  	var resp structs.DeploymentUpdateResponse
   165  	err := w.SetAllocHealth(req, &resp)
   166  	if assert.NotNil(err, "Set health of unknown allocation") {
   167  		assert.Contains(err.Error(), "unknown")
   168  	}
   169  	assert.Equal(1, len(w.watchers), "Deployment should still be active")
   170  }
   171  
   172  // Test setting allocation health
   173  func TestWatcher_SetAllocHealth_Healthy(t *testing.T) {
   174  	t.Parallel()
   175  	assert := assert.New(t)
   176  	w, m := defaultTestDeploymentWatcher(t)
   177  
   178  	// Create a job, alloc, and a deployment
   179  	j := mock.Job()
   180  	d := mock.Deployment()
   181  	d.JobID = j.ID
   182  	a := mock.Alloc()
   183  	a.DeploymentID = d.ID
   184  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   185  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   186  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   187  
   188  	// Assert that we get a call to UpsertDeploymentAllocHealth
   189  	matchConfig := &matchDeploymentAllocHealthRequestConfig{
   190  		DeploymentID: d.ID,
   191  		Healthy:      []string{a.ID},
   192  		Eval:         true,
   193  	}
   194  	matcher := matchDeploymentAllocHealthRequest(matchConfig)
   195  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)
   196  
   197  	w.SetEnabled(true, m.state)
   198  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   199  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   200  
   201  	// Call SetAllocHealth
   202  	req := &structs.DeploymentAllocHealthRequest{
   203  		DeploymentID:         d.ID,
   204  		HealthyAllocationIDs: []string{a.ID},
   205  	}
   206  	var resp structs.DeploymentUpdateResponse
   207  	err := w.SetAllocHealth(req, &resp)
   208  	assert.Nil(err, "SetAllocHealth")
   209  	assert.Equal(1, len(w.watchers), "Deployment should still be active")
   210  	m.AssertCalled(t, "UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher))
   211  }
   212  
   213  // Test setting allocation unhealthy
   214  func TestWatcher_SetAllocHealth_Unhealthy(t *testing.T) {
   215  	t.Parallel()
   216  	assert := assert.New(t)
   217  	w, m := defaultTestDeploymentWatcher(t)
   218  
   219  	// Create a job, alloc, and a deployment
   220  	j := mock.Job()
   221  	d := mock.Deployment()
   222  	d.JobID = j.ID
   223  	a := mock.Alloc()
   224  	a.DeploymentID = d.ID
   225  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   226  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   227  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   228  
   229  	// Assert that we get a call to UpsertDeploymentAllocHealth
   230  	matchConfig := &matchDeploymentAllocHealthRequestConfig{
   231  		DeploymentID: d.ID,
   232  		Unhealthy:    []string{a.ID},
   233  		Eval:         true,
   234  		DeploymentUpdate: &structs.DeploymentStatusUpdate{
   235  			DeploymentID:      d.ID,
   236  			Status:            structs.DeploymentStatusFailed,
   237  			StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations,
   238  		},
   239  	}
   240  	matcher := matchDeploymentAllocHealthRequest(matchConfig)
   241  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)
   242  
   243  	w.SetEnabled(true, m.state)
   244  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   245  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   246  
   247  	// Call SetAllocHealth
   248  	req := &structs.DeploymentAllocHealthRequest{
   249  		DeploymentID:           d.ID,
   250  		UnhealthyAllocationIDs: []string{a.ID},
   251  	}
   252  	var resp structs.DeploymentUpdateResponse
   253  	err := w.SetAllocHealth(req, &resp)
   254  	assert.Nil(err, "SetAllocHealth")
   255  
   256  	testutil.WaitForResult(func() (bool, error) { return 0 == len(w.watchers), nil },
   257  		func(err error) { assert.Equal(0, len(w.watchers), "Should have no deployment") })
   258  	m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1)
   259  }
   260  
   261  // Test setting allocation unhealthy and that there should be a rollback
   262  func TestWatcher_SetAllocHealth_Unhealthy_Rollback(t *testing.T) {
   263  	t.Parallel()
   264  	assert := assert.New(t)
   265  	w, m := defaultTestDeploymentWatcher(t)
   266  
   267  	// Create a job, alloc, and a deployment
   268  	j := mock.Job()
   269  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   270  	j.TaskGroups[0].Update.MaxParallel = 2
   271  	j.TaskGroups[0].Update.AutoRevert = true
   272  	j.TaskGroups[0].Update.ProgressDeadline = 0
   273  	j.Stable = true
   274  	d := mock.Deployment()
   275  	d.JobID = j.ID
   276  	d.TaskGroups["web"].AutoRevert = true
   277  	a := mock.Alloc()
   278  	a.DeploymentID = d.ID
   279  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   280  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   281  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   282  
   283  	// Upsert the job again to get a new version
   284  	j2 := j.Copy()
   285  	j2.Stable = false
   286  	// Modify the job to make its specification different
   287  	j2.Meta["foo"] = "bar"
   288  
   289  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2")
   290  
   291  	// Assert that we get a call to UpsertDeploymentAllocHealth
   292  	matchConfig := &matchDeploymentAllocHealthRequestConfig{
   293  		DeploymentID: d.ID,
   294  		Unhealthy:    []string{a.ID},
   295  		Eval:         true,
   296  		DeploymentUpdate: &structs.DeploymentStatusUpdate{
   297  			DeploymentID:      d.ID,
   298  			Status:            structs.DeploymentStatusFailed,
   299  			StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations,
   300  		},
   301  		JobVersion: helper.Uint64ToPtr(0),
   302  	}
   303  	matcher := matchDeploymentAllocHealthRequest(matchConfig)
   304  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)
   305  
   306  	w.SetEnabled(true, m.state)
   307  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   308  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   309  
   310  	// Call SetAllocHealth
   311  	req := &structs.DeploymentAllocHealthRequest{
   312  		DeploymentID:           d.ID,
   313  		UnhealthyAllocationIDs: []string{a.ID},
   314  	}
   315  	var resp structs.DeploymentUpdateResponse
   316  	err := w.SetAllocHealth(req, &resp)
   317  	assert.Nil(err, "SetAllocHealth")
   318  
   319  	testutil.WaitForResult(func() (bool, error) { return 0 == len(w.watchers), nil },
   320  		func(err error) { assert.Equal(0, len(w.watchers), "Should have no deployment") })
   321  	m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1)
   322  }
   323  
   324  // Test setting allocation unhealthy on job with identical spec and there should be no rollback
   325  func TestWatcher_SetAllocHealth_Unhealthy_NoRollback(t *testing.T) {
   326  	t.Parallel()
   327  	assert := assert.New(t)
   328  	w, m := defaultTestDeploymentWatcher(t)
   329  
   330  	// Create a job, alloc, and a deployment
   331  	j := mock.Job()
   332  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   333  	j.TaskGroups[0].Update.MaxParallel = 2
   334  	j.TaskGroups[0].Update.AutoRevert = true
   335  	j.TaskGroups[0].Update.ProgressDeadline = 0
   336  	j.Stable = true
   337  	d := mock.Deployment()
   338  	d.JobID = j.ID
   339  	d.TaskGroups["web"].AutoRevert = true
   340  	a := mock.Alloc()
   341  	a.DeploymentID = d.ID
   342  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   343  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   344  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   345  
   346  	// Upsert the job again to get a new version
   347  	j2 := j.Copy()
   348  	j2.Stable = false
   349  
   350  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2")
   351  
   352  	// Assert that we get a call to UpsertDeploymentAllocHealth
   353  	matchConfig := &matchDeploymentAllocHealthRequestConfig{
   354  		DeploymentID: d.ID,
   355  		Unhealthy:    []string{a.ID},
   356  		Eval:         true,
   357  		DeploymentUpdate: &structs.DeploymentStatusUpdate{
   358  			DeploymentID:      d.ID,
   359  			Status:            structs.DeploymentStatusFailed,
   360  			StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations,
   361  		},
   362  		JobVersion: nil,
   363  	}
   364  	matcher := matchDeploymentAllocHealthRequest(matchConfig)
   365  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)
   366  
   367  	w.SetEnabled(true, m.state)
   368  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   369  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   370  
   371  	// Call SetAllocHealth
   372  	req := &structs.DeploymentAllocHealthRequest{
   373  		DeploymentID:           d.ID,
   374  		UnhealthyAllocationIDs: []string{a.ID},
   375  	}
   376  	var resp structs.DeploymentUpdateResponse
   377  	err := w.SetAllocHealth(req, &resp)
   378  	assert.Nil(err, "SetAllocHealth")
   379  
   380  	testutil.WaitForResult(func() (bool, error) { return 0 == len(w.watchers), nil },
   381  		func(err error) { assert.Equal(0, len(w.watchers), "Should have no deployment") })
   382  	m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1)
   383  }
   384  
   385  // Test promoting a deployment
   386  func TestWatcher_PromoteDeployment_HealthyCanaries(t *testing.T) {
   387  	t.Parallel()
   388  	assert := assert.New(t)
   389  	w, m := defaultTestDeploymentWatcher(t)
   390  
   391  	// Create a job, canary alloc, and a deployment
   392  	j := mock.Job()
   393  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   394  	j.TaskGroups[0].Update.MaxParallel = 2
   395  	j.TaskGroups[0].Update.Canary = 1
   396  	j.TaskGroups[0].Update.ProgressDeadline = 0
   397  	d := mock.Deployment()
   398  	d.JobID = j.ID
   399  	a := mock.Alloc()
   400  	d.TaskGroups[a.TaskGroup].DesiredCanaries = 1
   401  	d.TaskGroups[a.TaskGroup].PlacedCanaries = []string{a.ID}
   402  	a.DeploymentStatus = &structs.AllocDeploymentStatus{
   403  		Healthy: helper.BoolToPtr(true),
   404  	}
   405  	a.DeploymentID = d.ID
   406  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   407  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   408  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   409  
   410  	// Assert that we get a call to UpsertDeploymentPromotion
   411  	matchConfig := &matchDeploymentPromoteRequestConfig{
   412  		Promotion: &structs.DeploymentPromoteRequest{
   413  			DeploymentID: d.ID,
   414  			All:          true,
   415  		},
   416  		Eval: true,
   417  	}
   418  	matcher := matchDeploymentPromoteRequest(matchConfig)
   419  	m.On("UpdateDeploymentPromotion", mocker.MatchedBy(matcher)).Return(nil)
   420  
   421  	// We may get an update for the desired transition.
   422  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
   423  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
   424  
   425  	w.SetEnabled(true, m.state)
   426  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   427  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   428  
   429  	// Call PromoteDeployment
   430  	req := &structs.DeploymentPromoteRequest{
   431  		DeploymentID: d.ID,
   432  		All:          true,
   433  	}
   434  	var resp structs.DeploymentUpdateResponse
   435  	err := w.PromoteDeployment(req, &resp)
   436  	assert.Nil(err, "PromoteDeployment")
   437  	assert.Equal(1, len(w.watchers), "Deployment should still be active")
   438  	m.AssertCalled(t, "UpdateDeploymentPromotion", mocker.MatchedBy(matcher))
   439  }
   440  
   441  // Test promoting a deployment with unhealthy canaries
   442  func TestWatcher_PromoteDeployment_UnhealthyCanaries(t *testing.T) {
   443  	t.Parallel()
   444  	assert := assert.New(t)
   445  	w, m := defaultTestDeploymentWatcher(t)
   446  
   447  	// Create a job, canary alloc, and a deployment
   448  	j := mock.Job()
   449  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   450  	j.TaskGroups[0].Update.MaxParallel = 2
   451  	j.TaskGroups[0].Update.Canary = 2
   452  	j.TaskGroups[0].Update.ProgressDeadline = 0
   453  	d := mock.Deployment()
   454  	d.JobID = j.ID
   455  	a := mock.Alloc()
   456  	d.TaskGroups[a.TaskGroup].PlacedCanaries = []string{a.ID}
   457  	d.TaskGroups[a.TaskGroup].DesiredCanaries = 2
   458  	a.DeploymentID = d.ID
   459  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   460  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   461  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   462  
   463  	// Assert that we get a call to UpsertDeploymentPromotion
   464  	matchConfig := &matchDeploymentPromoteRequestConfig{
   465  		Promotion: &structs.DeploymentPromoteRequest{
   466  			DeploymentID: d.ID,
   467  			All:          true,
   468  		},
   469  		Eval: true,
   470  	}
   471  	matcher := matchDeploymentPromoteRequest(matchConfig)
   472  	m.On("UpdateDeploymentPromotion", mocker.MatchedBy(matcher)).Return(nil)
   473  
   474  	w.SetEnabled(true, m.state)
   475  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   476  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   477  
   478  	// Call SetAllocHealth
   479  	req := &structs.DeploymentPromoteRequest{
   480  		DeploymentID: d.ID,
   481  		All:          true,
   482  	}
   483  	var resp structs.DeploymentUpdateResponse
   484  	err := w.PromoteDeployment(req, &resp)
   485  	if assert.NotNil(err, "PromoteDeployment") {
   486  		assert.Contains(err.Error(), `Task group "web" has 0/2 healthy allocations`, "Should error because canary isn't marked healthy")
   487  	}
   488  
   489  	assert.Equal(1, len(w.watchers), "Deployment should still be active")
   490  	m.AssertCalled(t, "UpdateDeploymentPromotion", mocker.MatchedBy(matcher))
   491  }
   492  
   493  // Test pausing a deployment that is running
   494  func TestWatcher_PauseDeployment_Pause_Running(t *testing.T) {
   495  	t.Parallel()
   496  	assert := assert.New(t)
   497  	w, m := defaultTestDeploymentWatcher(t)
   498  
   499  	// Create a job and a deployment
   500  	j := mock.Job()
   501  	d := mock.Deployment()
   502  	d.JobID = j.ID
   503  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   504  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   505  
   506  	// Assert that we get a call to UpsertDeploymentStatusUpdate
   507  	matchConfig := &matchDeploymentStatusUpdateConfig{
   508  		DeploymentID:      d.ID,
   509  		Status:            structs.DeploymentStatusPaused,
   510  		StatusDescription: structs.DeploymentStatusDescriptionPaused,
   511  	}
   512  	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
   513  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)
   514  
   515  	w.SetEnabled(true, m.state)
   516  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   517  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   518  
   519  	// Call PauseDeployment
   520  	req := &structs.DeploymentPauseRequest{
   521  		DeploymentID: d.ID,
   522  		Pause:        true,
   523  	}
   524  	var resp structs.DeploymentUpdateResponse
   525  	err := w.PauseDeployment(req, &resp)
   526  	assert.Nil(err, "PauseDeployment")
   527  
   528  	assert.Equal(1, len(w.watchers), "Deployment should still be active")
   529  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
   530  }
   531  
   532  // Test pausing a deployment that is paused
   533  func TestWatcher_PauseDeployment_Pause_Paused(t *testing.T) {
   534  	t.Parallel()
   535  	assert := assert.New(t)
   536  	w, m := defaultTestDeploymentWatcher(t)
   537  
   538  	// Create a job and a deployment
   539  	j := mock.Job()
   540  	d := mock.Deployment()
   541  	d.JobID = j.ID
   542  	d.Status = structs.DeploymentStatusPaused
   543  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   544  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   545  
   546  	// Assert that we get a call to UpsertDeploymentStatusUpdate
   547  	matchConfig := &matchDeploymentStatusUpdateConfig{
   548  		DeploymentID:      d.ID,
   549  		Status:            structs.DeploymentStatusPaused,
   550  		StatusDescription: structs.DeploymentStatusDescriptionPaused,
   551  	}
   552  	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
   553  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)
   554  
   555  	w.SetEnabled(true, m.state)
   556  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   557  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   558  
   559  	// Call PauseDeployment
   560  	req := &structs.DeploymentPauseRequest{
   561  		DeploymentID: d.ID,
   562  		Pause:        true,
   563  	}
   564  	var resp structs.DeploymentUpdateResponse
   565  	err := w.PauseDeployment(req, &resp)
   566  	assert.Nil(err, "PauseDeployment")
   567  
   568  	assert.Equal(1, len(w.watchers), "Deployment should still be active")
   569  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
   570  }
   571  
   572  // Test unpausing a deployment that is paused
   573  func TestWatcher_PauseDeployment_Unpause_Paused(t *testing.T) {
   574  	t.Parallel()
   575  	assert := assert.New(t)
   576  	w, m := defaultTestDeploymentWatcher(t)
   577  
   578  	// Create a job and a deployment
   579  	j := mock.Job()
   580  	d := mock.Deployment()
   581  	d.JobID = j.ID
   582  	d.Status = structs.DeploymentStatusPaused
   583  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   584  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   585  
   586  	// Assert that we get a call to UpsertDeploymentStatusUpdate
   587  	matchConfig := &matchDeploymentStatusUpdateConfig{
   588  		DeploymentID:      d.ID,
   589  		Status:            structs.DeploymentStatusRunning,
   590  		StatusDescription: structs.DeploymentStatusDescriptionRunning,
   591  		Eval:              true,
   592  	}
   593  	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
   594  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)
   595  
   596  	w.SetEnabled(true, m.state)
   597  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   598  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   599  
   600  	// Call PauseDeployment
   601  	req := &structs.DeploymentPauseRequest{
   602  		DeploymentID: d.ID,
   603  		Pause:        false,
   604  	}
   605  	var resp structs.DeploymentUpdateResponse
   606  	err := w.PauseDeployment(req, &resp)
   607  	assert.Nil(err, "PauseDeployment")
   608  
   609  	assert.Equal(1, len(w.watchers), "Deployment should still be active")
   610  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
   611  }
   612  
   613  // Test unpausing a deployment that is running
   614  func TestWatcher_PauseDeployment_Unpause_Running(t *testing.T) {
   615  	t.Parallel()
   616  	assert := assert.New(t)
   617  	w, m := defaultTestDeploymentWatcher(t)
   618  
   619  	// Create a job and a deployment
   620  	j := mock.Job()
   621  	d := mock.Deployment()
   622  	d.JobID = j.ID
   623  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   624  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   625  
   626  	// Assert that we get a call to UpsertDeploymentStatusUpdate
   627  	matchConfig := &matchDeploymentStatusUpdateConfig{
   628  		DeploymentID:      d.ID,
   629  		Status:            structs.DeploymentStatusRunning,
   630  		StatusDescription: structs.DeploymentStatusDescriptionRunning,
   631  		Eval:              true,
   632  	}
   633  	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
   634  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)
   635  
   636  	w.SetEnabled(true, m.state)
   637  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   638  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   639  
   640  	// Call PauseDeployment
   641  	req := &structs.DeploymentPauseRequest{
   642  		DeploymentID: d.ID,
   643  		Pause:        false,
   644  	}
   645  	var resp structs.DeploymentUpdateResponse
   646  	err := w.PauseDeployment(req, &resp)
   647  	assert.Nil(err, "PauseDeployment")
   648  
   649  	assert.Equal(1, len(w.watchers), "Deployment should still be active")
   650  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
   651  }
   652  
   653  // Test failing a deployment that is running
   654  func TestWatcher_FailDeployment_Running(t *testing.T) {
   655  	t.Parallel()
   656  	assert := assert.New(t)
   657  	w, m := defaultTestDeploymentWatcher(t)
   658  
   659  	// Create a job and a deployment
   660  	j := mock.Job()
   661  	d := mock.Deployment()
   662  	d.JobID = j.ID
   663  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   664  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   665  
   666  	// Assert that we get a call to UpsertDeploymentStatusUpdate
   667  	matchConfig := &matchDeploymentStatusUpdateConfig{
   668  		DeploymentID:      d.ID,
   669  		Status:            structs.DeploymentStatusFailed,
   670  		StatusDescription: structs.DeploymentStatusDescriptionFailedByUser,
   671  		Eval:              true,
   672  	}
   673  	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
   674  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)
   675  
   676  	w.SetEnabled(true, m.state)
   677  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   678  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   679  
   680  	// Call PauseDeployment
   681  	req := &structs.DeploymentFailRequest{
   682  		DeploymentID: d.ID,
   683  	}
   684  	var resp structs.DeploymentUpdateResponse
   685  	err := w.FailDeployment(req, &resp)
   686  	assert.Nil(err, "FailDeployment")
   687  
   688  	assert.Equal(1, len(w.watchers), "Deployment should still be active")
   689  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
   690  }
   691  
   692  // Tests that the watcher properly watches for allocation changes and takes the
   693  // proper actions
   694  func TestDeploymentWatcher_Watch_NoProgressDeadline(t *testing.T) {
   695  	t.Parallel()
   696  	assert := assert.New(t)
   697  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
   698  
   699  	// Create a job, alloc, and a deployment
   700  	j := mock.Job()
   701  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   702  	j.TaskGroups[0].Update.MaxParallel = 2
   703  	j.TaskGroups[0].Update.AutoRevert = true
   704  	j.TaskGroups[0].Update.ProgressDeadline = 0
   705  	j.Stable = true
   706  	d := mock.Deployment()
   707  	d.JobID = j.ID
   708  	d.TaskGroups["web"].AutoRevert = true
   709  	a := mock.Alloc()
   710  	a.DeploymentID = d.ID
   711  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   712  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   713  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   714  
   715  	// Upsert the job again to get a new version
   716  	j2 := j.Copy()
   717  	// Modify the job to make its specification different
   718  	j2.Meta["foo"] = "bar"
   719  	j2.Stable = false
   720  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2")
   721  
   722  	// Assert that we will get a update allocation call only once. This will
   723  	// verify that the watcher is batching allocation changes
   724  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
   725  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
   726  
   727  	// Assert that we get a call to UpsertDeploymentStatusUpdate
   728  	c := &matchDeploymentStatusUpdateConfig{
   729  		DeploymentID:      d.ID,
   730  		Status:            structs.DeploymentStatusFailed,
   731  		StatusDescription: structs.DeploymentStatusDescriptionRollback(structs.DeploymentStatusDescriptionFailedAllocations, 0),
   732  		JobVersion:        helper.Uint64ToPtr(0),
   733  		Eval:              true,
   734  	}
   735  	m2 := matchDeploymentStatusUpdateRequest(c)
   736  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil)
   737  
   738  	w.SetEnabled(true, m.state)
   739  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   740  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   741  
   742  	// Update the allocs health to healthy which should create an evaluation
   743  	for i := 0; i < 5; i++ {
   744  		req := &structs.ApplyDeploymentAllocHealthRequest{
   745  			DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
   746  				DeploymentID:         d.ID,
   747  				HealthyAllocationIDs: []string{a.ID},
   748  			},
   749  		}
   750  		assert.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req), "UpsertDeploymentAllocHealth")
   751  	}
   752  
   753  	// Wait for there to be one eval
   754  	testutil.WaitForResult(func() (bool, error) {
   755  		ws := memdb.NewWatchSet()
   756  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
   757  		if err != nil {
   758  			return false, err
   759  		}
   760  
   761  		if l := len(evals); l != 1 {
   762  			return false, fmt.Errorf("Got %d evals; want 1", l)
   763  		}
   764  
   765  		return true, nil
   766  	}, func(err error) {
   767  		t.Fatal(err)
   768  	})
   769  
   770  	// Update the allocs health to unhealthy which should create a job rollback,
   771  	// status update and eval
   772  	req2 := &structs.ApplyDeploymentAllocHealthRequest{
   773  		DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
   774  			DeploymentID:           d.ID,
   775  			UnhealthyAllocationIDs: []string{a.ID},
   776  		},
   777  	}
   778  	assert.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req2), "UpsertDeploymentAllocHealth")
   779  
   780  	// Wait for there to be one eval
   781  	testutil.WaitForResult(func() (bool, error) {
   782  		ws := memdb.NewWatchSet()
   783  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
   784  		if err != nil {
   785  			return false, err
   786  		}
   787  
   788  		if l := len(evals); l != 2 {
   789  			return false, fmt.Errorf("Got %d evals; want 1", l)
   790  		}
   791  
   792  		return true, nil
   793  	}, func(err error) {
   794  		t.Fatal(err)
   795  	})
   796  
   797  	m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1))
   798  
   799  	// After we upsert the job version will go to 2. So use this to assert the
   800  	// original call happened.
   801  	c2 := &matchDeploymentStatusUpdateConfig{
   802  		DeploymentID:      d.ID,
   803  		Status:            structs.DeploymentStatusFailed,
   804  		StatusDescription: structs.DeploymentStatusDescriptionRollback(structs.DeploymentStatusDescriptionFailedAllocations, 0),
   805  		JobVersion:        helper.Uint64ToPtr(2),
   806  		Eval:              true,
   807  	}
   808  	m3 := matchDeploymentStatusUpdateRequest(c2)
   809  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(m3))
   810  	testutil.WaitForResult(func() (bool, error) { return 0 == len(w.watchers), nil },
   811  		func(err error) { assert.Equal(0, len(w.watchers), "Should have no deployment") })
   812  }
   813  
   814  func TestDeploymentWatcher_Watch_ProgressDeadline(t *testing.T) {
   815  	t.Parallel()
   816  	assert := assert.New(t)
   817  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
   818  
   819  	// Create a job, alloc, and a deployment
   820  	j := mock.Job()
   821  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   822  	j.TaskGroups[0].Update.MaxParallel = 2
   823  	j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond
   824  	j.Stable = true
   825  	d := mock.Deployment()
   826  	d.JobID = j.ID
   827  	d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond
   828  	a := mock.Alloc()
   829  	now := time.Now()
   830  	a.CreateTime = now.UnixNano()
   831  	a.ModifyTime = now.UnixNano()
   832  	a.DeploymentID = d.ID
   833  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   834  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   835  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   836  
   837  	// Assert that we get a call to UpsertDeploymentStatusUpdate
   838  	c := &matchDeploymentStatusUpdateConfig{
   839  		DeploymentID:      d.ID,
   840  		Status:            structs.DeploymentStatusFailed,
   841  		StatusDescription: structs.DeploymentStatusDescriptionProgressDeadline,
   842  		Eval:              true,
   843  	}
   844  	m2 := matchDeploymentStatusUpdateRequest(c)
   845  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil)
   846  
   847  	w.SetEnabled(true, m.state)
   848  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   849  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   850  
   851  	// Update the alloc to be unhealthy and assert that nothing happens.
   852  	a2 := a.Copy()
   853  	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
   854  		Healthy:   helper.BoolToPtr(false),
   855  		Timestamp: now,
   856  	}
   857  	assert.Nil(m.state.UpdateAllocsFromClient(100, []*structs.Allocation{a2}))
   858  
   859  	// Wait for the deployment to be failed
   860  	testutil.WaitForResult(func() (bool, error) {
   861  		d, err := m.state.DeploymentByID(nil, d.ID)
   862  		if err != nil {
   863  			return false, err
   864  		}
   865  
   866  		return d.Status == structs.DeploymentStatusFailed, fmt.Errorf("bad status %q", d.Status)
   867  	}, func(err error) {
   868  		t.Fatal(err)
   869  	})
   870  
   871  	// Assert there are is only one evaluation
   872  	testutil.WaitForResult(func() (bool, error) {
   873  		ws := memdb.NewWatchSet()
   874  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
   875  		if err != nil {
   876  			return false, err
   877  		}
   878  
   879  		if l := len(evals); l != 1 {
   880  			return false, fmt.Errorf("Got %d evals; want 1", l)
   881  		}
   882  
   883  		return true, nil
   884  	}, func(err error) {
   885  		t.Fatal(err)
   886  	})
   887  }
   888  
   889  // Test that we will allow the progress deadline to be reached when the canaries
   890  // are healthy but we haven't promoted
   891  func TestDeploymentWatcher_Watch_ProgressDeadline_Canaries(t *testing.T) {
   892  	t.Parallel()
   893  	require := require.New(t)
   894  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
   895  
   896  	// Create a job, alloc, and a deployment
   897  	j := mock.Job()
   898  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   899  	j.TaskGroups[0].Update.Canary = 1
   900  	j.TaskGroups[0].Update.MaxParallel = 1
   901  	j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond
   902  	j.Stable = true
   903  	d := mock.Deployment()
   904  	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
   905  	d.JobID = j.ID
   906  	d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond
   907  	d.TaskGroups["web"].DesiredCanaries = 1
   908  	a := mock.Alloc()
   909  	now := time.Now()
   910  	a.CreateTime = now.UnixNano()
   911  	a.ModifyTime = now.UnixNano()
   912  	a.DeploymentID = d.ID
   913  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   914  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   915  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   916  
   917  	// Assert that we will get a createEvaluation call only once. This will
   918  	// verify that the watcher is batching allocation changes
   919  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
   920  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
   921  
   922  	w.SetEnabled(true, m.state)
   923  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   924  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
   925  
   926  	// Update the alloc to be unhealthy and require that nothing happens.
   927  	a2 := a.Copy()
   928  	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
   929  		Healthy:   helper.BoolToPtr(true),
   930  		Timestamp: now,
   931  	}
   932  	require.Nil(m.state.UpdateAllocsFromClient(m.nextIndex(), []*structs.Allocation{a2}))
   933  
   934  	// Wait for the deployment to cross the deadline
   935  	dout, err := m.state.DeploymentByID(nil, d.ID)
   936  	require.NoError(err)
   937  	require.NotNil(dout)
   938  	state := dout.TaskGroups["web"]
   939  	require.NotNil(state)
   940  	time.Sleep(state.RequireProgressBy.Add(time.Second).Sub(now))
   941  
   942  	// Require the deployment is still running
   943  	dout, err = m.state.DeploymentByID(nil, d.ID)
   944  	require.NoError(err)
   945  	require.NotNil(dout)
   946  	require.Equal(structs.DeploymentStatusRunning, dout.Status)
   947  	require.Equal(structs.DeploymentStatusDescriptionRunningNeedsPromotion, dout.StatusDescription)
   948  
   949  	// require there are is only one evaluation
   950  	testutil.WaitForResult(func() (bool, error) {
   951  		ws := memdb.NewWatchSet()
   952  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
   953  		if err != nil {
   954  			return false, err
   955  		}
   956  
   957  		if l := len(evals); l != 1 {
   958  			return false, fmt.Errorf("Got %d evals; want 1", l)
   959  		}
   960  
   961  		return true, nil
   962  	}, func(err error) {
   963  		t.Fatal(err)
   964  	})
   965  }
   966  
   967  // Test that a promoted deployment with alloc healthy updates create
   968  // evals to move the deployment forward
   969  func TestDeploymentWatcher_PromotedCanary_UpdatedAllocs(t *testing.T) {
   970  	t.Parallel()
   971  	require := require.New(t)
   972  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
   973  
   974  	// Create a job, alloc, and a deployment
   975  	j := mock.Job()
   976  	j.TaskGroups[0].Count = 2
   977  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   978  	j.TaskGroups[0].Update.Canary = 1
   979  	j.TaskGroups[0].Update.MaxParallel = 1
   980  	j.TaskGroups[0].Update.ProgressDeadline = 50 * time.Millisecond
   981  	j.Stable = true
   982  
   983  	d := mock.Deployment()
   984  	d.TaskGroups["web"].DesiredTotal = 2
   985  	d.TaskGroups["web"].DesiredCanaries = 1
   986  	d.TaskGroups["web"].HealthyAllocs = 1
   987  	d.StatusDescription = structs.DeploymentStatusDescriptionRunning
   988  	d.JobID = j.ID
   989  	d.TaskGroups["web"].ProgressDeadline = 50 * time.Millisecond
   990  	d.TaskGroups["web"].RequireProgressBy = time.Now().Add(50 * time.Millisecond)
   991  
   992  	a := mock.Alloc()
   993  	now := time.Now()
   994  	a.CreateTime = now.UnixNano()
   995  	a.ModifyTime = now.UnixNano()
   996  	a.DeploymentID = d.ID
   997  	a.DeploymentStatus = &structs.AllocDeploymentStatus{
   998  		Healthy:   helper.BoolToPtr(true),
   999  		Timestamp: now,
  1000  	}
  1001  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
  1002  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1003  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
  1004  
  1005  	w.SetEnabled(true, m.state)
  1006  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
  1007  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
  1008  
  1009  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
  1010  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Twice()
  1011  
  1012  	// Create another alloc
  1013  	a2 := a.Copy()
  1014  	a2.ID = uuid.Generate()
  1015  	now = time.Now()
  1016  	a2.CreateTime = now.UnixNano()
  1017  	a2.ModifyTime = now.UnixNano()
  1018  	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
  1019  		Healthy:   helper.BoolToPtr(true),
  1020  		Timestamp: now,
  1021  	}
  1022  	d.TaskGroups["web"].RequireProgressBy = time.Now().Add(2 * time.Second)
  1023  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1024  	// Wait until batch eval period passes before updating another alloc
  1025  	time.Sleep(1 * time.Second)
  1026  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a2}), "UpsertAllocs")
  1027  
  1028  	// Wait for the deployment to cross the deadline
  1029  	dout, err := m.state.DeploymentByID(nil, d.ID)
  1030  	require.NoError(err)
  1031  	require.NotNil(dout)
  1032  	state := dout.TaskGroups["web"]
  1033  	require.NotNil(state)
  1034  	time.Sleep(state.RequireProgressBy.Add(time.Second).Sub(now))
  1035  
  1036  	// There should be two evals
  1037  	testutil.WaitForResult(func() (bool, error) {
  1038  		ws := memdb.NewWatchSet()
  1039  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
  1040  		if err != nil {
  1041  			return false, err
  1042  		}
  1043  
  1044  		if l := len(evals); l != 2 {
  1045  			return false, fmt.Errorf("Got %d evals; want 2", l)
  1046  		}
  1047  
  1048  		return true, nil
  1049  	}, func(err error) {
  1050  		t.Fatal(err)
  1051  	})
  1052  }
  1053  
  1054  // Test scenario where deployment initially has no progress deadline
  1055  // After the deployment is updated, a failed alloc's DesiredTransition should be set
  1056  func TestDeploymentWatcher_Watch_StartWithoutProgressDeadline(t *testing.T) {
  1057  	t.Parallel()
  1058  	assert := assert.New(t)
  1059  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
  1060  
  1061  	// Create a job, and a deployment
  1062  	j := mock.Job()
  1063  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1064  	j.TaskGroups[0].Update.MaxParallel = 2
  1065  	j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond
  1066  	j.Stable = true
  1067  	d := mock.Deployment()
  1068  	d.JobID = j.ID
  1069  
  1070  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
  1071  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1072  
  1073  	a := mock.Alloc()
  1074  	a.CreateTime = time.Now().UnixNano()
  1075  	a.DeploymentID = d.ID
  1076  
  1077  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
  1078  
  1079  	d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond
  1080  	// Update the deployment with a progress deadline
  1081  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1082  
  1083  	// Match on DesiredTransition set to Reschedule for the failed alloc
  1084  	m1 := matchUpdateAllocDesiredTransitionReschedule([]string{a.ID})
  1085  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
  1086  
  1087  	w.SetEnabled(true, m.state)
  1088  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
  1089  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
  1090  
  1091  	// Update the alloc to be unhealthy
  1092  	a2 := a.Copy()
  1093  	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
  1094  		Healthy:   helper.BoolToPtr(false),
  1095  		Timestamp: time.Now(),
  1096  	}
  1097  	assert.Nil(m.state.UpdateAllocsFromClient(m.nextIndex(), []*structs.Allocation{a2}))
  1098  
  1099  	// Wait for the alloc's DesiredState to set reschedule
  1100  	testutil.WaitForResult(func() (bool, error) {
  1101  		a, err := m.state.AllocByID(nil, a.ID)
  1102  		if err != nil {
  1103  			return false, err
  1104  		}
  1105  		dt := a.DesiredTransition
  1106  		shouldReschedule := dt.Reschedule != nil && *dt.Reschedule
  1107  		return shouldReschedule, fmt.Errorf("Desired Transition Reschedule should be set but got %v", shouldReschedule)
  1108  	}, func(err error) {
  1109  		t.Fatal(err)
  1110  	})
  1111  }
  1112  
  1113  // Tests that the watcher fails rollback when the spec hasn't changed
  1114  func TestDeploymentWatcher_RollbackFailed(t *testing.T) {
  1115  	t.Parallel()
  1116  	assert := assert.New(t)
  1117  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
  1118  
  1119  	// Create a job, alloc, and a deployment
  1120  	j := mock.Job()
  1121  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1122  	j.TaskGroups[0].Update.MaxParallel = 2
  1123  	j.TaskGroups[0].Update.AutoRevert = true
  1124  	j.TaskGroups[0].Update.ProgressDeadline = 0
  1125  	j.Stable = true
  1126  	d := mock.Deployment()
  1127  	d.JobID = j.ID
  1128  	d.TaskGroups["web"].AutoRevert = true
  1129  	a := mock.Alloc()
  1130  	a.DeploymentID = d.ID
  1131  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
  1132  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1133  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
  1134  
  1135  	// Upsert the job again to get a new version
  1136  	j2 := j.Copy()
  1137  	// Modify the job to make its specification different
  1138  	j2.Stable = false
  1139  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2")
  1140  
  1141  	// Assert that we will get a createEvaluation call only once. This will
  1142  	// verify that the watcher is batching allocation changes
  1143  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
  1144  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
  1145  
  1146  	// Assert that we get a call to UpsertDeploymentStatusUpdate with roll back failed as the status
  1147  	c := &matchDeploymentStatusUpdateConfig{
  1148  		DeploymentID:      d.ID,
  1149  		Status:            structs.DeploymentStatusFailed,
  1150  		StatusDescription: structs.DeploymentStatusDescriptionRollbackNoop(structs.DeploymentStatusDescriptionFailedAllocations, 0),
  1151  		JobVersion:        nil,
  1152  		Eval:              true,
  1153  	}
  1154  	m2 := matchDeploymentStatusUpdateRequest(c)
  1155  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil)
  1156  
  1157  	w.SetEnabled(true, m.state)
  1158  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
  1159  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
  1160  
  1161  	// Update the allocs health to healthy which should create an evaluation
  1162  	for i := 0; i < 5; i++ {
  1163  		req := &structs.ApplyDeploymentAllocHealthRequest{
  1164  			DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
  1165  				DeploymentID:         d.ID,
  1166  				HealthyAllocationIDs: []string{a.ID},
  1167  			},
  1168  		}
  1169  		assert.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req), "UpsertDeploymentAllocHealth")
  1170  	}
  1171  
  1172  	// Wait for there to be one eval
  1173  	testutil.WaitForResult(func() (bool, error) {
  1174  		ws := memdb.NewWatchSet()
  1175  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
  1176  		if err != nil {
  1177  			return false, err
  1178  		}
  1179  
  1180  		if l := len(evals); l != 1 {
  1181  			return false, fmt.Errorf("Got %d evals; want 1", l)
  1182  		}
  1183  
  1184  		return true, nil
  1185  	}, func(err error) {
  1186  		t.Fatal(err)
  1187  	})
  1188  
  1189  	// Update the allocs health to unhealthy which will cause attempting a rollback,
  1190  	// fail in that step, do status update and eval
  1191  	req2 := &structs.ApplyDeploymentAllocHealthRequest{
  1192  		DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
  1193  			DeploymentID:           d.ID,
  1194  			UnhealthyAllocationIDs: []string{a.ID},
  1195  		},
  1196  	}
  1197  	assert.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req2), "UpsertDeploymentAllocHealth")
  1198  
  1199  	// Wait for there to be one eval
  1200  	testutil.WaitForResult(func() (bool, error) {
  1201  		ws := memdb.NewWatchSet()
  1202  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
  1203  		if err != nil {
  1204  			return false, err
  1205  		}
  1206  
  1207  		if l := len(evals); l != 2 {
  1208  			return false, fmt.Errorf("Got %d evals; want 1", l)
  1209  		}
  1210  
  1211  		return true, nil
  1212  	}, func(err error) {
  1213  		t.Fatal(err)
  1214  	})
  1215  
  1216  	m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1))
  1217  
  1218  	// verify that the job version hasn't changed after upsert
  1219  	m.state.JobByID(nil, structs.DefaultNamespace, j.ID)
  1220  	assert.Equal(uint64(0), j.Version, "Expected job version 0 but got ", j.Version)
  1221  }
  1222  
  1223  // Test allocation updates and evaluation creation is batched between watchers
  1224  func TestWatcher_BatchAllocUpdates(t *testing.T) {
  1225  	t.Parallel()
  1226  	assert := assert.New(t)
  1227  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Second)
  1228  
  1229  	// Create a job, alloc, for two deployments
  1230  	j1 := mock.Job()
  1231  	j1.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1232  	j1.TaskGroups[0].Update.ProgressDeadline = 0
  1233  	d1 := mock.Deployment()
  1234  	d1.JobID = j1.ID
  1235  	a1 := mock.Alloc()
  1236  	a1.Job = j1
  1237  	a1.JobID = j1.ID
  1238  	a1.DeploymentID = d1.ID
  1239  
  1240  	j2 := mock.Job()
  1241  	j2.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1242  	j2.TaskGroups[0].Update.ProgressDeadline = 0
  1243  	d2 := mock.Deployment()
  1244  	d2.JobID = j2.ID
  1245  	a2 := mock.Alloc()
  1246  	a2.Job = j2
  1247  	a2.JobID = j2.ID
  1248  	a2.DeploymentID = d2.ID
  1249  
  1250  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j1), "UpsertJob")
  1251  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob")
  1252  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d1), "UpsertDeployment")
  1253  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d2), "UpsertDeployment")
  1254  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a1}), "UpsertAllocs")
  1255  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a2}), "UpsertAllocs")
  1256  
  1257  	// Assert that we will get a createEvaluation call only once and it contains
  1258  	// both deployments. This will verify that the watcher is batching
  1259  	// allocation changes
  1260  	m1 := matchUpdateAllocDesiredTransitions([]string{d1.ID, d2.ID})
  1261  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
  1262  
  1263  	w.SetEnabled(true, m.state)
  1264  	testutil.WaitForResult(func() (bool, error) { return 2 == len(w.watchers), nil },
  1265  		func(err error) { assert.Equal(2, len(w.watchers), "Should have 2 deployment") })
  1266  
  1267  	// Update the allocs health to healthy which should create an evaluation
  1268  	req := &structs.ApplyDeploymentAllocHealthRequest{
  1269  		DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
  1270  			DeploymentID:         d1.ID,
  1271  			HealthyAllocationIDs: []string{a1.ID},
  1272  		},
  1273  	}
  1274  	assert.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req), "UpsertDeploymentAllocHealth")
  1275  
  1276  	req2 := &structs.ApplyDeploymentAllocHealthRequest{
  1277  		DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
  1278  			DeploymentID:         d2.ID,
  1279  			HealthyAllocationIDs: []string{a2.ID},
  1280  		},
  1281  	}
  1282  	assert.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req2), "UpsertDeploymentAllocHealth")
  1283  
  1284  	// Wait for there to be one eval for each job
  1285  	testutil.WaitForResult(func() (bool, error) {
  1286  		ws := memdb.NewWatchSet()
  1287  		evals1, err := m.state.EvalsByJob(ws, j1.Namespace, j1.ID)
  1288  		if err != nil {
  1289  			return false, err
  1290  		}
  1291  
  1292  		evals2, err := m.state.EvalsByJob(ws, j2.Namespace, j2.ID)
  1293  		if err != nil {
  1294  			return false, err
  1295  		}
  1296  
  1297  		if l := len(evals1); l != 1 {
  1298  			return false, fmt.Errorf("Got %d evals for job %v; want 1", l, j1.ID)
  1299  		}
  1300  
  1301  		if l := len(evals2); l != 1 {
  1302  			return false, fmt.Errorf("Got %d evals for job 2; want 1", l)
  1303  		}
  1304  
  1305  		return true, nil
  1306  	}, func(err error) {
  1307  		t.Fatal(err)
  1308  	})
  1309  
  1310  	m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1))
  1311  	testutil.WaitForResult(func() (bool, error) { return 2 == len(w.watchers), nil },
  1312  		func(err error) { assert.Equal(2, len(w.watchers), "Should have 2 deployment") })
  1313  }