github.com/zhizhiboom/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/nomad/deploymentwatcher/deployments_watcher_test.go

github.com/zhizhiboom/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/nomad/deploymentwatcher/deployments_watcher_test.go (about)

     1  package deploymentwatcher
     2  
     3  import (
     4  	"fmt"
     5  	"testing"
     6  	"time"
     7  
     8  	memdb "github.com/hashicorp/go-memdb"
     9  	"github.com/hashicorp/nomad/helper"
    10  	"github.com/hashicorp/nomad/helper/testlog"
    11  	"github.com/hashicorp/nomad/helper/uuid"
    12  	"github.com/hashicorp/nomad/nomad/mock"
    13  	"github.com/hashicorp/nomad/nomad/structs"
    14  	"github.com/hashicorp/nomad/testutil"
    15  	"github.com/stretchr/testify/assert"
    16  	mocker "github.com/stretchr/testify/mock"
    17  	"github.com/stretchr/testify/require"
    18  )
    19  
    20  func testDeploymentWatcher(t *testing.T, qps float64, batchDur time.Duration) (*Watcher, *mockBackend) {
    21  	m := newMockBackend(t)
    22  	w := NewDeploymentsWatcher(testlog.Logger(t), m, qps, batchDur)
    23  	return w, m
    24  }
    25  
    26  func defaultTestDeploymentWatcher(t *testing.T) (*Watcher, *mockBackend) {
    27  	return testDeploymentWatcher(t, LimitStateQueriesPerSecond, CrossDeploymentUpdateBatchDuration)
    28  }
    29  
    30  // Tests that the watcher properly watches for deployments and reconciles them
    31  func TestWatcher_WatchDeployments(t *testing.T) {
    32  	t.Parallel()
    33  	assert := assert.New(t)
    34  	w, m := defaultTestDeploymentWatcher(t)
    35  
    36  	// Create three jobs
    37  	j1, j2, j3 := mock.Job(), mock.Job(), mock.Job()
    38  	assert.Nil(m.state.UpsertJob(100, j1))
    39  	assert.Nil(m.state.UpsertJob(101, j2))
    40  	assert.Nil(m.state.UpsertJob(102, j3))
    41  
    42  	// Create three deployments all running
    43  	d1, d2, d3 := mock.Deployment(), mock.Deployment(), mock.Deployment()
    44  	d1.JobID = j1.ID
    45  	d2.JobID = j2.ID
    46  	d3.JobID = j3.ID
    47  
    48  	// Upsert the first deployment
    49  	assert.Nil(m.state.UpsertDeployment(103, d1))
    50  
    51  	// Next list 3
    52  	block1 := make(chan time.Time)
    53  	go func() {
    54  		<-block1
    55  		assert.Nil(m.state.UpsertDeployment(104, d2))
    56  		assert.Nil(m.state.UpsertDeployment(105, d3))
    57  	}()
    58  
    59  	//// Next list 3 but have one be terminal
    60  	block2 := make(chan time.Time)
    61  	d3terminal := d3.Copy()
    62  	d3terminal.Status = structs.DeploymentStatusFailed
    63  	go func() {
    64  		<-block2
    65  		assert.Nil(m.state.UpsertDeployment(106, d3terminal))
    66  	}()
    67  
    68  	w.SetEnabled(true, m.state)
    69  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
    70  		func(err error) { assert.Equal(1, len(w.watchers), "1 deployment returned") })
    71  
    72  	close(block1)
    73  	testutil.WaitForResult(func() (bool, error) { return 3 == len(w.watchers), nil },
    74  		func(err error) { assert.Equal(3, len(w.watchers), "3 deployment returned") })
    75  
    76  	close(block2)
    77  	testutil.WaitForResult(func() (bool, error) { return 2 == len(w.watchers), nil },
    78  		func(err error) { assert.Equal(3, len(w.watchers), "3 deployment returned - 1 terminal") })
    79  }
    80  
    81  // Tests that calls against an unknown deployment fail
    82  func TestWatcher_UnknownDeployment(t *testing.T) {
    83  	t.Parallel()
    84  	assert := assert.New(t)
    85  	w, m := defaultTestDeploymentWatcher(t)
    86  	w.SetEnabled(true, m.state)
    87  
    88  	// The expected error is that it should be an unknown deployment
    89  	dID := uuid.Generate()
    90  	expected := fmt.Sprintf("unknown deployment %q", dID)
    91  
    92  	// Request setting the health against an unknown deployment
    93  	req := &structs.DeploymentAllocHealthRequest{
    94  		DeploymentID:         dID,
    95  		HealthyAllocationIDs: []string{uuid.Generate()},
    96  	}
    97  	var resp structs.DeploymentUpdateResponse
    98  	err := w.SetAllocHealth(req, &resp)
    99  	if assert.NotNil(err, "should have error for unknown deployment") {
   100  		assert.Contains(err.Error(), expected)
   101  	}
   102  
   103  	// Request promoting against an unknown deployment
   104  	req2 := &structs.DeploymentPromoteRequest{
   105  		DeploymentID: dID,
   106  		All:          true,
   107  	}
   108  	err = w.PromoteDeployment(req2, &resp)
   109  	if assert.NotNil(err, "should have error for unknown deployment") {
   110  		assert.Contains(err.Error(), expected)
   111  	}
   112  
   113  	// Request pausing against an unknown deployment
   114  	req3 := &structs.DeploymentPauseRequest{
   115  		DeploymentID: dID,
   116  		Pause:        true,
   117  	}
   118  	err = w.PauseDeployment(req3, &resp)
   119  	if assert.NotNil(err, "should have error for unknown deployment") {
   120  		assert.Contains(err.Error(), expected)
   121  	}
   122  
   123  	// Request failing against an unknown deployment
   124  	req4 := &structs.DeploymentFailRequest{
   125  		DeploymentID: dID,
   126  	}
   127  	err = w.FailDeployment(req4, &resp)
   128  	if assert.NotNil(err, "should have error for unknown deployment") {
   129  		assert.Contains(err.Error(), expected)
   130  	}
   131  }
   132  
   133  // Test setting an unknown allocation's health
   134  func TestWatcher_SetAllocHealth_Unknown(t *testing.T) {
   135  	t.Parallel()
   136  	assert := assert.New(t)
   137  	w, m := defaultTestDeploymentWatcher(t)
   138  
   139  	// Create a job, and a deployment
   140  	j := mock.Job()
   141  	d := mock.Deployment()
   142  	d.JobID = j.ID
   143  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   144  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   145  
   146  	// Assert that we get a call to UpsertDeploymentAllocHealth
   147  	a := mock.Alloc()
   148  	matchConfig := &matchDeploymentAllocHealthRequestConfig{
   149  		DeploymentID: d.ID,
   150  		Healthy:      []string{a.ID},
   151  		Eval:         true,
   152  	}
   153  	matcher := matchDeploymentAllocHealthRequest(matchConfig)
   154  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)
   155  
   156  	w.SetEnabled(true, m.state)
   157  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   158  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   159  
   160  	// Call SetAllocHealth
   161  	req := &structs.DeploymentAllocHealthRequest{
   162  		DeploymentID:         d.ID,
   163  		HealthyAllocationIDs: []string{a.ID},
   164  	}
   165  	var resp structs.DeploymentUpdateResponse
   166  	err := w.SetAllocHealth(req, &resp)
   167  	if assert.NotNil(err, "Set health of unknown allocation") {
   168  		assert.Contains(err.Error(), "unknown")
   169  	}
   170  	assert.Equal(1, len(w.watchers), "Deployment should still be active")
   171  }
   172  
   173  // Test setting allocation health
   174  func TestWatcher_SetAllocHealth_Healthy(t *testing.T) {
   175  	t.Parallel()
   176  	assert := assert.New(t)
   177  	w, m := defaultTestDeploymentWatcher(t)
   178  
   179  	// Create a job, alloc, and a deployment
   180  	j := mock.Job()
   181  	d := mock.Deployment()
   182  	d.JobID = j.ID
   183  	a := mock.Alloc()
   184  	a.DeploymentID = d.ID
   185  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   186  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   187  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   188  
   189  	// Assert that we get a call to UpsertDeploymentAllocHealth
   190  	matchConfig := &matchDeploymentAllocHealthRequestConfig{
   191  		DeploymentID: d.ID,
   192  		Healthy:      []string{a.ID},
   193  		Eval:         true,
   194  	}
   195  	matcher := matchDeploymentAllocHealthRequest(matchConfig)
   196  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)
   197  
   198  	w.SetEnabled(true, m.state)
   199  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   200  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   201  
   202  	// Call SetAllocHealth
   203  	req := &structs.DeploymentAllocHealthRequest{
   204  		DeploymentID:         d.ID,
   205  		HealthyAllocationIDs: []string{a.ID},
   206  	}
   207  	var resp structs.DeploymentUpdateResponse
   208  	err := w.SetAllocHealth(req, &resp)
   209  	assert.Nil(err, "SetAllocHealth")
   210  	assert.Equal(1, len(w.watchers), "Deployment should still be active")
   211  	m.AssertCalled(t, "UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher))
   212  }
   213  
   214  // Test setting allocation unhealthy
   215  func TestWatcher_SetAllocHealth_Unhealthy(t *testing.T) {
   216  	t.Parallel()
   217  	assert := assert.New(t)
   218  	w, m := defaultTestDeploymentWatcher(t)
   219  
   220  	// Create a job, alloc, and a deployment
   221  	j := mock.Job()
   222  	d := mock.Deployment()
   223  	d.JobID = j.ID
   224  	a := mock.Alloc()
   225  	a.DeploymentID = d.ID
   226  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   227  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   228  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   229  
   230  	// Assert that we get a call to UpsertDeploymentAllocHealth
   231  	matchConfig := &matchDeploymentAllocHealthRequestConfig{
   232  		DeploymentID: d.ID,
   233  		Unhealthy:    []string{a.ID},
   234  		Eval:         true,
   235  		DeploymentUpdate: &structs.DeploymentStatusUpdate{
   236  			DeploymentID:      d.ID,
   237  			Status:            structs.DeploymentStatusFailed,
   238  			StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations,
   239  		},
   240  	}
   241  	matcher := matchDeploymentAllocHealthRequest(matchConfig)
   242  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)
   243  
   244  	w.SetEnabled(true, m.state)
   245  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   246  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   247  
   248  	// Call SetAllocHealth
   249  	req := &structs.DeploymentAllocHealthRequest{
   250  		DeploymentID:           d.ID,
   251  		UnhealthyAllocationIDs: []string{a.ID},
   252  	}
   253  	var resp structs.DeploymentUpdateResponse
   254  	err := w.SetAllocHealth(req, &resp)
   255  	assert.Nil(err, "SetAllocHealth")
   256  
   257  	testutil.WaitForResult(func() (bool, error) { return 0 == len(w.watchers), nil },
   258  		func(err error) { assert.Equal(0, len(w.watchers), "Should have no deployment") })
   259  	m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1)
   260  }
   261  
   262  // Test setting allocation unhealthy and that there should be a rollback
   263  func TestWatcher_SetAllocHealth_Unhealthy_Rollback(t *testing.T) {
   264  	t.Parallel()
   265  	assert := assert.New(t)
   266  	w, m := defaultTestDeploymentWatcher(t)
   267  
   268  	// Create a job, alloc, and a deployment
   269  	j := mock.Job()
   270  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   271  	j.TaskGroups[0].Update.MaxParallel = 2
   272  	j.TaskGroups[0].Update.AutoRevert = true
   273  	j.TaskGroups[0].Update.ProgressDeadline = 0
   274  	j.Stable = true
   275  	d := mock.Deployment()
   276  	d.JobID = j.ID
   277  	d.TaskGroups["web"].AutoRevert = true
   278  	a := mock.Alloc()
   279  	a.DeploymentID = d.ID
   280  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   281  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   282  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   283  
   284  	// Upsert the job again to get a new version
   285  	j2 := j.Copy()
   286  	j2.Stable = false
   287  	// Modify the job to make its specification different
   288  	j2.Meta["foo"] = "bar"
   289  
   290  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2")
   291  
   292  	// Assert that we get a call to UpsertDeploymentAllocHealth
   293  	matchConfig := &matchDeploymentAllocHealthRequestConfig{
   294  		DeploymentID: d.ID,
   295  		Unhealthy:    []string{a.ID},
   296  		Eval:         true,
   297  		DeploymentUpdate: &structs.DeploymentStatusUpdate{
   298  			DeploymentID:      d.ID,
   299  			Status:            structs.DeploymentStatusFailed,
   300  			StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations,
   301  		},
   302  		JobVersion: helper.Uint64ToPtr(0),
   303  	}
   304  	matcher := matchDeploymentAllocHealthRequest(matchConfig)
   305  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)
   306  
   307  	w.SetEnabled(true, m.state)
   308  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   309  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   310  
   311  	// Call SetAllocHealth
   312  	req := &structs.DeploymentAllocHealthRequest{
   313  		DeploymentID:           d.ID,
   314  		UnhealthyAllocationIDs: []string{a.ID},
   315  	}
   316  	var resp structs.DeploymentUpdateResponse
   317  	err := w.SetAllocHealth(req, &resp)
   318  	assert.Nil(err, "SetAllocHealth")
   319  
   320  	testutil.WaitForResult(func() (bool, error) { return 0 == len(w.watchers), nil },
   321  		func(err error) { assert.Equal(0, len(w.watchers), "Should have no deployment") })
   322  	m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1)
   323  }
   324  
   325  // Test setting allocation unhealthy on job with identical spec and there should be no rollback
   326  func TestWatcher_SetAllocHealth_Unhealthy_NoRollback(t *testing.T) {
   327  	t.Parallel()
   328  	assert := assert.New(t)
   329  	w, m := defaultTestDeploymentWatcher(t)
   330  
   331  	// Create a job, alloc, and a deployment
   332  	j := mock.Job()
   333  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   334  	j.TaskGroups[0].Update.MaxParallel = 2
   335  	j.TaskGroups[0].Update.AutoRevert = true
   336  	j.TaskGroups[0].Update.ProgressDeadline = 0
   337  	j.Stable = true
   338  	d := mock.Deployment()
   339  	d.JobID = j.ID
   340  	d.TaskGroups["web"].AutoRevert = true
   341  	a := mock.Alloc()
   342  	a.DeploymentID = d.ID
   343  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   344  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   345  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   346  
   347  	// Upsert the job again to get a new version
   348  	j2 := j.Copy()
   349  	j2.Stable = false
   350  
   351  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2")
   352  
   353  	// Assert that we get a call to UpsertDeploymentAllocHealth
   354  	matchConfig := &matchDeploymentAllocHealthRequestConfig{
   355  		DeploymentID: d.ID,
   356  		Unhealthy:    []string{a.ID},
   357  		Eval:         true,
   358  		DeploymentUpdate: &structs.DeploymentStatusUpdate{
   359  			DeploymentID:      d.ID,
   360  			Status:            structs.DeploymentStatusFailed,
   361  			StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations,
   362  		},
   363  		JobVersion: nil,
   364  	}
   365  	matcher := matchDeploymentAllocHealthRequest(matchConfig)
   366  	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)
   367  
   368  	w.SetEnabled(true, m.state)
   369  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   370  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   371  
   372  	// Call SetAllocHealth
   373  	req := &structs.DeploymentAllocHealthRequest{
   374  		DeploymentID:           d.ID,
   375  		UnhealthyAllocationIDs: []string{a.ID},
   376  	}
   377  	var resp structs.DeploymentUpdateResponse
   378  	err := w.SetAllocHealth(req, &resp)
   379  	assert.Nil(err, "SetAllocHealth")
   380  
   381  	testutil.WaitForResult(func() (bool, error) { return 0 == len(w.watchers), nil },
   382  		func(err error) { assert.Equal(0, len(w.watchers), "Should have no deployment") })
   383  	m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1)
   384  }
   385  
   386  // Test promoting a deployment
   387  func TestWatcher_PromoteDeployment_HealthyCanaries(t *testing.T) {
   388  	t.Parallel()
   389  	assert := assert.New(t)
   390  	w, m := defaultTestDeploymentWatcher(t)
   391  
   392  	// Create a job, canary alloc, and a deployment
   393  	j := mock.Job()
   394  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   395  	j.TaskGroups[0].Update.MaxParallel = 2
   396  	j.TaskGroups[0].Update.Canary = 1
   397  	j.TaskGroups[0].Update.ProgressDeadline = 0
   398  	d := mock.Deployment()
   399  	d.JobID = j.ID
   400  	a := mock.Alloc()
   401  	d.TaskGroups[a.TaskGroup].DesiredCanaries = 1
   402  	d.TaskGroups[a.TaskGroup].PlacedCanaries = []string{a.ID}
   403  	a.DeploymentStatus = &structs.AllocDeploymentStatus{
   404  		Healthy: helper.BoolToPtr(true),
   405  	}
   406  	a.DeploymentID = d.ID
   407  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   408  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   409  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   410  
   411  	// Assert that we get a call to UpsertDeploymentPromotion
   412  	matchConfig := &matchDeploymentPromoteRequestConfig{
   413  		Promotion: &structs.DeploymentPromoteRequest{
   414  			DeploymentID: d.ID,
   415  			All:          true,
   416  		},
   417  		Eval: true,
   418  	}
   419  	matcher := matchDeploymentPromoteRequest(matchConfig)
   420  	m.On("UpdateDeploymentPromotion", mocker.MatchedBy(matcher)).Return(nil)
   421  
   422  	// We may get an update for the desired transition.
   423  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
   424  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
   425  
   426  	w.SetEnabled(true, m.state)
   427  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   428  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   429  
   430  	// Call PromoteDeployment
   431  	req := &structs.DeploymentPromoteRequest{
   432  		DeploymentID: d.ID,
   433  		All:          true,
   434  	}
   435  	var resp structs.DeploymentUpdateResponse
   436  	err := w.PromoteDeployment(req, &resp)
   437  	assert.Nil(err, "PromoteDeployment")
   438  	assert.Equal(1, len(w.watchers), "Deployment should still be active")
   439  	m.AssertCalled(t, "UpdateDeploymentPromotion", mocker.MatchedBy(matcher))
   440  }
   441  
   442  // Test promoting a deployment with unhealthy canaries
   443  func TestWatcher_PromoteDeployment_UnhealthyCanaries(t *testing.T) {
   444  	t.Parallel()
   445  	assert := assert.New(t)
   446  	w, m := defaultTestDeploymentWatcher(t)
   447  
   448  	// Create a job, canary alloc, and a deployment
   449  	j := mock.Job()
   450  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   451  	j.TaskGroups[0].Update.MaxParallel = 2
   452  	j.TaskGroups[0].Update.Canary = 2
   453  	j.TaskGroups[0].Update.ProgressDeadline = 0
   454  	d := mock.Deployment()
   455  	d.JobID = j.ID
   456  	a := mock.Alloc()
   457  	d.TaskGroups[a.TaskGroup].PlacedCanaries = []string{a.ID}
   458  	d.TaskGroups[a.TaskGroup].DesiredCanaries = 2
   459  	a.DeploymentID = d.ID
   460  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   461  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   462  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   463  
   464  	// Assert that we get a call to UpsertDeploymentPromotion
   465  	matchConfig := &matchDeploymentPromoteRequestConfig{
   466  		Promotion: &structs.DeploymentPromoteRequest{
   467  			DeploymentID: d.ID,
   468  			All:          true,
   469  		},
   470  		Eval: true,
   471  	}
   472  	matcher := matchDeploymentPromoteRequest(matchConfig)
   473  	m.On("UpdateDeploymentPromotion", mocker.MatchedBy(matcher)).Return(nil)
   474  
   475  	w.SetEnabled(true, m.state)
   476  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   477  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   478  
   479  	// Call SetAllocHealth
   480  	req := &structs.DeploymentPromoteRequest{
   481  		DeploymentID: d.ID,
   482  		All:          true,
   483  	}
   484  	var resp structs.DeploymentUpdateResponse
   485  	err := w.PromoteDeployment(req, &resp)
   486  	if assert.NotNil(err, "PromoteDeployment") {
   487  		assert.Contains(err.Error(), `Task group "web" has 0/2 healthy allocations`, "Should error because canary isn't marked healthy")
   488  	}
   489  
   490  	assert.Equal(1, len(w.watchers), "Deployment should still be active")
   491  	m.AssertCalled(t, "UpdateDeploymentPromotion", mocker.MatchedBy(matcher))
   492  }
   493  
   494  // Test pausing a deployment that is running
   495  func TestWatcher_PauseDeployment_Pause_Running(t *testing.T) {
   496  	t.Parallel()
   497  	assert := assert.New(t)
   498  	w, m := defaultTestDeploymentWatcher(t)
   499  
   500  	// Create a job and a deployment
   501  	j := mock.Job()
   502  	d := mock.Deployment()
   503  	d.JobID = j.ID
   504  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   505  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   506  
   507  	// Assert that we get a call to UpsertDeploymentStatusUpdate
   508  	matchConfig := &matchDeploymentStatusUpdateConfig{
   509  		DeploymentID:      d.ID,
   510  		Status:            structs.DeploymentStatusPaused,
   511  		StatusDescription: structs.DeploymentStatusDescriptionPaused,
   512  	}
   513  	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
   514  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)
   515  
   516  	w.SetEnabled(true, m.state)
   517  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   518  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   519  
   520  	// Call PauseDeployment
   521  	req := &structs.DeploymentPauseRequest{
   522  		DeploymentID: d.ID,
   523  		Pause:        true,
   524  	}
   525  	var resp structs.DeploymentUpdateResponse
   526  	err := w.PauseDeployment(req, &resp)
   527  	assert.Nil(err, "PauseDeployment")
   528  
   529  	assert.Equal(1, len(w.watchers), "Deployment should still be active")
   530  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
   531  }
   532  
   533  // Test pausing a deployment that is paused
   534  func TestWatcher_PauseDeployment_Pause_Paused(t *testing.T) {
   535  	t.Parallel()
   536  	assert := assert.New(t)
   537  	w, m := defaultTestDeploymentWatcher(t)
   538  
   539  	// Create a job and a deployment
   540  	j := mock.Job()
   541  	d := mock.Deployment()
   542  	d.JobID = j.ID
   543  	d.Status = structs.DeploymentStatusPaused
   544  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   545  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   546  
   547  	// Assert that we get a call to UpsertDeploymentStatusUpdate
   548  	matchConfig := &matchDeploymentStatusUpdateConfig{
   549  		DeploymentID:      d.ID,
   550  		Status:            structs.DeploymentStatusPaused,
   551  		StatusDescription: structs.DeploymentStatusDescriptionPaused,
   552  	}
   553  	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
   554  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)
   555  
   556  	w.SetEnabled(true, m.state)
   557  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   558  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   559  
   560  	// Call PauseDeployment
   561  	req := &structs.DeploymentPauseRequest{
   562  		DeploymentID: d.ID,
   563  		Pause:        true,
   564  	}
   565  	var resp structs.DeploymentUpdateResponse
   566  	err := w.PauseDeployment(req, &resp)
   567  	assert.Nil(err, "PauseDeployment")
   568  
   569  	assert.Equal(1, len(w.watchers), "Deployment should still be active")
   570  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
   571  }
   572  
   573  // Test unpausing a deployment that is paused
   574  func TestWatcher_PauseDeployment_Unpause_Paused(t *testing.T) {
   575  	t.Parallel()
   576  	assert := assert.New(t)
   577  	w, m := defaultTestDeploymentWatcher(t)
   578  
   579  	// Create a job and a deployment
   580  	j := mock.Job()
   581  	d := mock.Deployment()
   582  	d.JobID = j.ID
   583  	d.Status = structs.DeploymentStatusPaused
   584  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   585  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   586  
   587  	// Assert that we get a call to UpsertDeploymentStatusUpdate
   588  	matchConfig := &matchDeploymentStatusUpdateConfig{
   589  		DeploymentID:      d.ID,
   590  		Status:            structs.DeploymentStatusRunning,
   591  		StatusDescription: structs.DeploymentStatusDescriptionRunning,
   592  		Eval:              true,
   593  	}
   594  	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
   595  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)
   596  
   597  	w.SetEnabled(true, m.state)
   598  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   599  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   600  
   601  	// Call PauseDeployment
   602  	req := &structs.DeploymentPauseRequest{
   603  		DeploymentID: d.ID,
   604  		Pause:        false,
   605  	}
   606  	var resp structs.DeploymentUpdateResponse
   607  	err := w.PauseDeployment(req, &resp)
   608  	assert.Nil(err, "PauseDeployment")
   609  
   610  	assert.Equal(1, len(w.watchers), "Deployment should still be active")
   611  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
   612  }
   613  
   614  // Test unpausing a deployment that is running
   615  func TestWatcher_PauseDeployment_Unpause_Running(t *testing.T) {
   616  	t.Parallel()
   617  	assert := assert.New(t)
   618  	w, m := defaultTestDeploymentWatcher(t)
   619  
   620  	// Create a job and a deployment
   621  	j := mock.Job()
   622  	d := mock.Deployment()
   623  	d.JobID = j.ID
   624  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   625  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   626  
   627  	// Assert that we get a call to UpsertDeploymentStatusUpdate
   628  	matchConfig := &matchDeploymentStatusUpdateConfig{
   629  		DeploymentID:      d.ID,
   630  		Status:            structs.DeploymentStatusRunning,
   631  		StatusDescription: structs.DeploymentStatusDescriptionRunning,
   632  		Eval:              true,
   633  	}
   634  	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
   635  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)
   636  
   637  	w.SetEnabled(true, m.state)
   638  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   639  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   640  
   641  	// Call PauseDeployment
   642  	req := &structs.DeploymentPauseRequest{
   643  		DeploymentID: d.ID,
   644  		Pause:        false,
   645  	}
   646  	var resp structs.DeploymentUpdateResponse
   647  	err := w.PauseDeployment(req, &resp)
   648  	assert.Nil(err, "PauseDeployment")
   649  
   650  	assert.Equal(1, len(w.watchers), "Deployment should still be active")
   651  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
   652  }
   653  
   654  // Test failing a deployment that is running
   655  func TestWatcher_FailDeployment_Running(t *testing.T) {
   656  	t.Parallel()
   657  	assert := assert.New(t)
   658  	w, m := defaultTestDeploymentWatcher(t)
   659  
   660  	// Create a job and a deployment
   661  	j := mock.Job()
   662  	d := mock.Deployment()
   663  	d.JobID = j.ID
   664  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   665  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   666  
   667  	// Assert that we get a call to UpsertDeploymentStatusUpdate
   668  	matchConfig := &matchDeploymentStatusUpdateConfig{
   669  		DeploymentID:      d.ID,
   670  		Status:            structs.DeploymentStatusFailed,
   671  		StatusDescription: structs.DeploymentStatusDescriptionFailedByUser,
   672  		Eval:              true,
   673  	}
   674  	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
   675  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)
   676  
   677  	w.SetEnabled(true, m.state)
   678  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   679  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   680  
   681  	// Call PauseDeployment
   682  	req := &structs.DeploymentFailRequest{
   683  		DeploymentID: d.ID,
   684  	}
   685  	var resp structs.DeploymentUpdateResponse
   686  	err := w.FailDeployment(req, &resp)
   687  	assert.Nil(err, "FailDeployment")
   688  
   689  	assert.Equal(1, len(w.watchers), "Deployment should still be active")
   690  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
   691  }
   692  
   693  // Tests that the watcher properly watches for allocation changes and takes the
   694  // proper actions
   695  func TestDeploymentWatcher_Watch_NoProgressDeadline(t *testing.T) {
   696  	t.Parallel()
   697  	assert := assert.New(t)
   698  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
   699  
   700  	// Create a job, alloc, and a deployment
   701  	j := mock.Job()
   702  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   703  	j.TaskGroups[0].Update.MaxParallel = 2
   704  	j.TaskGroups[0].Update.AutoRevert = true
   705  	j.TaskGroups[0].Update.ProgressDeadline = 0
   706  	j.Stable = true
   707  	d := mock.Deployment()
   708  	d.JobID = j.ID
   709  	d.TaskGroups["web"].AutoRevert = true
   710  	a := mock.Alloc()
   711  	a.DeploymentID = d.ID
   712  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   713  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   714  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   715  
   716  	// Upsert the job again to get a new version
   717  	j2 := j.Copy()
   718  	// Modify the job to make its specification different
   719  	j2.Meta["foo"] = "bar"
   720  	j2.Stable = false
   721  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2")
   722  
   723  	// Assert that we will get a update allocation call only once. This will
   724  	// verify that the watcher is batching allocation changes
   725  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
   726  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
   727  
   728  	// Assert that we get a call to UpsertDeploymentStatusUpdate
   729  	c := &matchDeploymentStatusUpdateConfig{
   730  		DeploymentID:      d.ID,
   731  		Status:            structs.DeploymentStatusFailed,
   732  		StatusDescription: structs.DeploymentStatusDescriptionRollback(structs.DeploymentStatusDescriptionFailedAllocations, 0),
   733  		JobVersion:        helper.Uint64ToPtr(0),
   734  		Eval:              true,
   735  	}
   736  	m2 := matchDeploymentStatusUpdateRequest(c)
   737  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil)
   738  
   739  	w.SetEnabled(true, m.state)
   740  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   741  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   742  
   743  	// Update the allocs health to healthy which should create an evaluation
   744  	for i := 0; i < 5; i++ {
   745  		req := &structs.ApplyDeploymentAllocHealthRequest{
   746  			DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
   747  				DeploymentID:         d.ID,
   748  				HealthyAllocationIDs: []string{a.ID},
   749  			},
   750  		}
   751  		assert.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req), "UpsertDeploymentAllocHealth")
   752  	}
   753  
   754  	// Wait for there to be one eval
   755  	testutil.WaitForResult(func() (bool, error) {
   756  		ws := memdb.NewWatchSet()
   757  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
   758  		if err != nil {
   759  			return false, err
   760  		}
   761  
   762  		if l := len(evals); l != 1 {
   763  			return false, fmt.Errorf("Got %d evals; want 1", l)
   764  		}
   765  
   766  		return true, nil
   767  	}, func(err error) {
   768  		t.Fatal(err)
   769  	})
   770  
   771  	// Update the allocs health to unhealthy which should create a job rollback,
   772  	// status update and eval
   773  	req2 := &structs.ApplyDeploymentAllocHealthRequest{
   774  		DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
   775  			DeploymentID:           d.ID,
   776  			UnhealthyAllocationIDs: []string{a.ID},
   777  		},
   778  	}
   779  	assert.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req2), "UpsertDeploymentAllocHealth")
   780  
   781  	// Wait for there to be one eval
   782  	testutil.WaitForResult(func() (bool, error) {
   783  		ws := memdb.NewWatchSet()
   784  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
   785  		if err != nil {
   786  			return false, err
   787  		}
   788  
   789  		if l := len(evals); l != 2 {
   790  			return false, fmt.Errorf("Got %d evals; want 1", l)
   791  		}
   792  
   793  		return true, nil
   794  	}, func(err error) {
   795  		t.Fatal(err)
   796  	})
   797  
   798  	m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1))
   799  
   800  	// After we upsert the job version will go to 2. So use this to assert the
   801  	// original call happened.
   802  	c2 := &matchDeploymentStatusUpdateConfig{
   803  		DeploymentID:      d.ID,
   804  		Status:            structs.DeploymentStatusFailed,
   805  		StatusDescription: structs.DeploymentStatusDescriptionRollback(structs.DeploymentStatusDescriptionFailedAllocations, 0),
   806  		JobVersion:        helper.Uint64ToPtr(2),
   807  		Eval:              true,
   808  	}
   809  	m3 := matchDeploymentStatusUpdateRequest(c2)
   810  	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(m3))
   811  	testutil.WaitForResult(func() (bool, error) { return 0 == len(w.watchers), nil },
   812  		func(err error) { assert.Equal(0, len(w.watchers), "Should have no deployment") })
   813  }
   814  
   815  func TestDeploymentWatcher_Watch_ProgressDeadline(t *testing.T) {
   816  	t.Parallel()
   817  	assert := assert.New(t)
   818  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
   819  
   820  	// Create a job, alloc, and a deployment
   821  	j := mock.Job()
   822  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   823  	j.TaskGroups[0].Update.MaxParallel = 2
   824  	j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond
   825  	j.Stable = true
   826  	d := mock.Deployment()
   827  	d.JobID = j.ID
   828  	d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond
   829  	a := mock.Alloc()
   830  	now := time.Now()
   831  	a.CreateTime = now.UnixNano()
   832  	a.ModifyTime = now.UnixNano()
   833  	a.DeploymentID = d.ID
   834  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   835  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   836  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   837  
   838  	// Assert that we get a call to UpsertDeploymentStatusUpdate
   839  	c := &matchDeploymentStatusUpdateConfig{
   840  		DeploymentID:      d.ID,
   841  		Status:            structs.DeploymentStatusFailed,
   842  		StatusDescription: structs.DeploymentStatusDescriptionProgressDeadline,
   843  		Eval:              true,
   844  	}
   845  	m2 := matchDeploymentStatusUpdateRequest(c)
   846  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil)
   847  
   848  	w.SetEnabled(true, m.state)
   849  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   850  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
   851  
   852  	// Update the alloc to be unhealthy and assert that nothing happens.
   853  	a2 := a.Copy()
   854  	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
   855  		Healthy:   helper.BoolToPtr(false),
   856  		Timestamp: now,
   857  	}
   858  	assert.Nil(m.state.UpdateAllocsFromClient(100, []*structs.Allocation{a2}))
   859  
   860  	// Wait for the deployment to be failed
   861  	testutil.WaitForResult(func() (bool, error) {
   862  		d, err := m.state.DeploymentByID(nil, d.ID)
   863  		if err != nil {
   864  			return false, err
   865  		}
   866  
   867  		return d.Status == structs.DeploymentStatusFailed, fmt.Errorf("bad status %q", d.Status)
   868  	}, func(err error) {
   869  		t.Fatal(err)
   870  	})
   871  
   872  	// Assert there are is only one evaluation
   873  	testutil.WaitForResult(func() (bool, error) {
   874  		ws := memdb.NewWatchSet()
   875  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
   876  		if err != nil {
   877  			return false, err
   878  		}
   879  
   880  		if l := len(evals); l != 1 {
   881  			return false, fmt.Errorf("Got %d evals; want 1", l)
   882  		}
   883  
   884  		return true, nil
   885  	}, func(err error) {
   886  		t.Fatal(err)
   887  	})
   888  }
   889  
   890  // Test that we will allow the progress deadline to be reached when the canaries
   891  // are healthy but we haven't promoted
   892  func TestDeploymentWatcher_Watch_ProgressDeadline_Canaries(t *testing.T) {
   893  	t.Parallel()
   894  	require := require.New(t)
   895  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
   896  
   897  	// Create a job, alloc, and a deployment
   898  	j := mock.Job()
   899  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   900  	j.TaskGroups[0].Update.Canary = 1
   901  	j.TaskGroups[0].Update.MaxParallel = 1
   902  	j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond
   903  	j.Stable = true
   904  	d := mock.Deployment()
   905  	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
   906  	d.JobID = j.ID
   907  	d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond
   908  	d.TaskGroups["web"].DesiredCanaries = 1
   909  	a := mock.Alloc()
   910  	now := time.Now()
   911  	a.CreateTime = now.UnixNano()
   912  	a.ModifyTime = now.UnixNano()
   913  	a.DeploymentID = d.ID
   914  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
   915  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
   916  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
   917  
   918  	// Assert that we will get a createEvaluation call only once. This will
   919  	// verify that the watcher is batching allocation changes
   920  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
   921  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
   922  
   923  	w.SetEnabled(true, m.state)
   924  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
   925  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
   926  
   927  	// Update the alloc to be unhealthy and require that nothing happens.
   928  	a2 := a.Copy()
   929  	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
   930  		Healthy:   helper.BoolToPtr(true),
   931  		Timestamp: now,
   932  	}
   933  	require.Nil(m.state.UpdateAllocsFromClient(m.nextIndex(), []*structs.Allocation{a2}))
   934  
   935  	// Wait for the deployment to cross the deadline
   936  	dout, err := m.state.DeploymentByID(nil, d.ID)
   937  	require.NoError(err)
   938  	require.NotNil(dout)
   939  	state := dout.TaskGroups["web"]
   940  	require.NotNil(state)
   941  	time.Sleep(state.RequireProgressBy.Add(time.Second).Sub(now))
   942  
   943  	// Require the deployment is still running
   944  	dout, err = m.state.DeploymentByID(nil, d.ID)
   945  	require.NoError(err)
   946  	require.NotNil(dout)
   947  	require.Equal(structs.DeploymentStatusRunning, dout.Status)
   948  	require.Equal(structs.DeploymentStatusDescriptionRunningNeedsPromotion, dout.StatusDescription)
   949  
   950  	// require there are is only one evaluation
   951  	testutil.WaitForResult(func() (bool, error) {
   952  		ws := memdb.NewWatchSet()
   953  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
   954  		if err != nil {
   955  			return false, err
   956  		}
   957  
   958  		if l := len(evals); l != 1 {
   959  			return false, fmt.Errorf("Got %d evals; want 1", l)
   960  		}
   961  
   962  		return true, nil
   963  	}, func(err error) {
   964  		t.Fatal(err)
   965  	})
   966  }
   967  
   968  // Test that a promoted deployment with alloc healthy updates create
   969  // evals to move the deployment forward
   970  func TestDeploymentWatcher_PromotedCanary_UpdatedAllocs(t *testing.T) {
   971  	t.Parallel()
   972  	require := require.New(t)
   973  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
   974  
   975  	// Create a job, alloc, and a deployment
   976  	j := mock.Job()
   977  	j.TaskGroups[0].Count = 2
   978  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   979  	j.TaskGroups[0].Update.Canary = 1
   980  	j.TaskGroups[0].Update.MaxParallel = 1
   981  	j.TaskGroups[0].Update.ProgressDeadline = 50 * time.Millisecond
   982  	j.Stable = true
   983  
   984  	d := mock.Deployment()
   985  	d.TaskGroups["web"].DesiredTotal = 2
   986  	d.TaskGroups["web"].DesiredCanaries = 1
   987  	d.TaskGroups["web"].HealthyAllocs = 1
   988  	d.StatusDescription = structs.DeploymentStatusDescriptionRunning
   989  	d.JobID = j.ID
   990  	d.TaskGroups["web"].ProgressDeadline = 50 * time.Millisecond
   991  	d.TaskGroups["web"].RequireProgressBy = time.Now().Add(50 * time.Millisecond)
   992  
   993  	a := mock.Alloc()
   994  	now := time.Now()
   995  	a.CreateTime = now.UnixNano()
   996  	a.ModifyTime = now.UnixNano()
   997  	a.DeploymentID = d.ID
   998  	a.DeploymentStatus = &structs.AllocDeploymentStatus{
   999  		Healthy:   helper.BoolToPtr(true),
  1000  		Timestamp: now,
  1001  	}
  1002  	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
  1003  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1004  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
  1005  
  1006  	w.SetEnabled(true, m.state)
  1007  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
  1008  		func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") })
  1009  
  1010  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
  1011  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Twice()
  1012  
  1013  	// Create another alloc
  1014  	a2 := a.Copy()
  1015  	a2.ID = uuid.Generate()
  1016  	now = time.Now()
  1017  	a2.CreateTime = now.UnixNano()
  1018  	a2.ModifyTime = now.UnixNano()
  1019  	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
  1020  		Healthy:   helper.BoolToPtr(true),
  1021  		Timestamp: now,
  1022  	}
  1023  	d.TaskGroups["web"].RequireProgressBy = time.Now().Add(2 * time.Second)
  1024  	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1025  	// Wait until batch eval period passes before updating another alloc
  1026  	time.Sleep(1 * time.Second)
  1027  	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a2}), "UpsertAllocs")
  1028  
  1029  	// Wait for the deployment to cross the deadline
  1030  	dout, err := m.state.DeploymentByID(nil, d.ID)
  1031  	require.NoError(err)
  1032  	require.NotNil(dout)
  1033  	state := dout.TaskGroups["web"]
  1034  	require.NotNil(state)
  1035  	time.Sleep(state.RequireProgressBy.Add(time.Second).Sub(now))
  1036  
  1037  	// There should be two evals
  1038  	testutil.WaitForResult(func() (bool, error) {
  1039  		ws := memdb.NewWatchSet()
  1040  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
  1041  		if err != nil {
  1042  			return false, err
  1043  		}
  1044  
  1045  		if l := len(evals); l != 2 {
  1046  			return false, fmt.Errorf("Got %d evals; want 2", l)
  1047  		}
  1048  
  1049  		return true, nil
  1050  	}, func(err error) {
  1051  		t.Fatal(err)
  1052  	})
  1053  }
  1054  
  1055  // Test scenario where deployment initially has no progress deadline
  1056  // After the deployment is updated, a failed alloc's DesiredTransition should be set
  1057  func TestDeploymentWatcher_Watch_StartWithoutProgressDeadline(t *testing.T) {
  1058  	t.Parallel()
  1059  	assert := assert.New(t)
  1060  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
  1061  
  1062  	// Create a job, and a deployment
  1063  	j := mock.Job()
  1064  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1065  	j.TaskGroups[0].Update.MaxParallel = 2
  1066  	j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond
  1067  	j.Stable = true
  1068  	d := mock.Deployment()
  1069  	d.JobID = j.ID
  1070  
  1071  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
  1072  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1073  
  1074  	a := mock.Alloc()
  1075  	a.CreateTime = time.Now().UnixNano()
  1076  	a.DeploymentID = d.ID
  1077  
  1078  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
  1079  
  1080  	d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond
  1081  	// Update the deployment with a progress deadline
  1082  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1083  
  1084  	// Match on DesiredTransition set to Reschedule for the failed alloc
  1085  	m1 := matchUpdateAllocDesiredTransitionReschedule([]string{a.ID})
  1086  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
  1087  
  1088  	w.SetEnabled(true, m.state)
  1089  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
  1090  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
  1091  
  1092  	// Update the alloc to be unhealthy
  1093  	a2 := a.Copy()
  1094  	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
  1095  		Healthy:   helper.BoolToPtr(false),
  1096  		Timestamp: time.Now(),
  1097  	}
  1098  	assert.Nil(m.state.UpdateAllocsFromClient(m.nextIndex(), []*structs.Allocation{a2}))
  1099  
  1100  	// Wait for the alloc's DesiredState to set reschedule
  1101  	testutil.WaitForResult(func() (bool, error) {
  1102  		a, err := m.state.AllocByID(nil, a.ID)
  1103  		if err != nil {
  1104  			return false, err
  1105  		}
  1106  		dt := a.DesiredTransition
  1107  		shouldReschedule := dt.Reschedule != nil && *dt.Reschedule
  1108  		return shouldReschedule, fmt.Errorf("Desired Transition Reschedule should be set but got %v", shouldReschedule)
  1109  	}, func(err error) {
  1110  		t.Fatal(err)
  1111  	})
  1112  }
  1113  
  1114  // Tests that the watcher fails rollback when the spec hasn't changed
  1115  func TestDeploymentWatcher_RollbackFailed(t *testing.T) {
  1116  	t.Parallel()
  1117  	assert := assert.New(t)
  1118  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)
  1119  
  1120  	// Create a job, alloc, and a deployment
  1121  	j := mock.Job()
  1122  	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1123  	j.TaskGroups[0].Update.MaxParallel = 2
  1124  	j.TaskGroups[0].Update.AutoRevert = true
  1125  	j.TaskGroups[0].Update.ProgressDeadline = 0
  1126  	j.Stable = true
  1127  	d := mock.Deployment()
  1128  	d.JobID = j.ID
  1129  	d.TaskGroups["web"].AutoRevert = true
  1130  	a := mock.Alloc()
  1131  	a.DeploymentID = d.ID
  1132  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
  1133  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
  1134  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")
  1135  
  1136  	// Upsert the job again to get a new version
  1137  	j2 := j.Copy()
  1138  	// Modify the job to make its specification different
  1139  	j2.Stable = false
  1140  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2")
  1141  
  1142  	// Assert that we will get a createEvaluation call only once. This will
  1143  	// verify that the watcher is batching allocation changes
  1144  	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
  1145  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
  1146  
  1147  	// Assert that we get a call to UpsertDeploymentStatusUpdate with roll back failed as the status
  1148  	c := &matchDeploymentStatusUpdateConfig{
  1149  		DeploymentID:      d.ID,
  1150  		Status:            structs.DeploymentStatusFailed,
  1151  		StatusDescription: structs.DeploymentStatusDescriptionRollbackNoop(structs.DeploymentStatusDescriptionFailedAllocations, 0),
  1152  		JobVersion:        nil,
  1153  		Eval:              true,
  1154  	}
  1155  	m2 := matchDeploymentStatusUpdateRequest(c)
  1156  	m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil)
  1157  
  1158  	w.SetEnabled(true, m.state)
  1159  	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
  1160  		func(err error) { assert.Equal(1, len(w.watchers), "Should have 1 deployment") })
  1161  
  1162  	// Update the allocs health to healthy which should create an evaluation
  1163  	for i := 0; i < 5; i++ {
  1164  		req := &structs.ApplyDeploymentAllocHealthRequest{
  1165  			DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
  1166  				DeploymentID:         d.ID,
  1167  				HealthyAllocationIDs: []string{a.ID},
  1168  			},
  1169  		}
  1170  		assert.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req), "UpsertDeploymentAllocHealth")
  1171  	}
  1172  
  1173  	// Wait for there to be one eval
  1174  	testutil.WaitForResult(func() (bool, error) {
  1175  		ws := memdb.NewWatchSet()
  1176  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
  1177  		if err != nil {
  1178  			return false, err
  1179  		}
  1180  
  1181  		if l := len(evals); l != 1 {
  1182  			return false, fmt.Errorf("Got %d evals; want 1", l)
  1183  		}
  1184  
  1185  		return true, nil
  1186  	}, func(err error) {
  1187  		t.Fatal(err)
  1188  	})
  1189  
  1190  	// Update the allocs health to unhealthy which will cause attempting a rollback,
  1191  	// fail in that step, do status update and eval
  1192  	req2 := &structs.ApplyDeploymentAllocHealthRequest{
  1193  		DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
  1194  			DeploymentID:           d.ID,
  1195  			UnhealthyAllocationIDs: []string{a.ID},
  1196  		},
  1197  	}
  1198  	assert.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req2), "UpsertDeploymentAllocHealth")
  1199  
  1200  	// Wait for there to be one eval
  1201  	testutil.WaitForResult(func() (bool, error) {
  1202  		ws := memdb.NewWatchSet()
  1203  		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
  1204  		if err != nil {
  1205  			return false, err
  1206  		}
  1207  
  1208  		if l := len(evals); l != 2 {
  1209  			return false, fmt.Errorf("Got %d evals; want 1", l)
  1210  		}
  1211  
  1212  		return true, nil
  1213  	}, func(err error) {
  1214  		t.Fatal(err)
  1215  	})
  1216  
  1217  	m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1))
  1218  
  1219  	// verify that the job version hasn't changed after upsert
  1220  	m.state.JobByID(nil, structs.DefaultNamespace, j.ID)
  1221  	assert.Equal(uint64(0), j.Version, "Expected job version 0 but got ", j.Version)
  1222  }
  1223  
  1224  // Test allocation updates and evaluation creation is batched between watchers
  1225  func TestWatcher_BatchAllocUpdates(t *testing.T) {
  1226  	t.Parallel()
  1227  	assert := assert.New(t)
  1228  	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Second)
  1229  
  1230  	// Create a job, alloc, for two deployments
  1231  	j1 := mock.Job()
  1232  	j1.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1233  	j1.TaskGroups[0].Update.ProgressDeadline = 0
  1234  	d1 := mock.Deployment()
  1235  	d1.JobID = j1.ID
  1236  	a1 := mock.Alloc()
  1237  	a1.Job = j1
  1238  	a1.JobID = j1.ID
  1239  	a1.DeploymentID = d1.ID
  1240  
  1241  	j2 := mock.Job()
  1242  	j2.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
  1243  	j2.TaskGroups[0].Update.ProgressDeadline = 0
  1244  	d2 := mock.Deployment()
  1245  	d2.JobID = j2.ID
  1246  	a2 := mock.Alloc()
  1247  	a2.Job = j2
  1248  	a2.JobID = j2.ID
  1249  	a2.DeploymentID = d2.ID
  1250  
  1251  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j1), "UpsertJob")
  1252  	assert.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob")
  1253  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d1), "UpsertDeployment")
  1254  	assert.Nil(m.state.UpsertDeployment(m.nextIndex(), d2), "UpsertDeployment")
  1255  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a1}), "UpsertAllocs")
  1256  	assert.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a2}), "UpsertAllocs")
  1257  
  1258  	// Assert that we will get a createEvaluation call only once and it contains
  1259  	// both deployments. This will verify that the watcher is batching
  1260  	// allocation changes
  1261  	m1 := matchUpdateAllocDesiredTransitions([]string{d1.ID, d2.ID})
  1262  	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()
  1263  
  1264  	w.SetEnabled(true, m.state)
  1265  	testutil.WaitForResult(func() (bool, error) { return 2 == len(w.watchers), nil },
  1266  		func(err error) { assert.Equal(2, len(w.watchers), "Should have 2 deployment") })
  1267  
  1268  	// Update the allocs health to healthy which should create an evaluation
  1269  	req := &structs.ApplyDeploymentAllocHealthRequest{
  1270  		DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
  1271  			DeploymentID:         d1.ID,
  1272  			HealthyAllocationIDs: []string{a1.ID},
  1273  		},
  1274  	}
  1275  	assert.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req), "UpsertDeploymentAllocHealth")
  1276  
  1277  	req2 := &structs.ApplyDeploymentAllocHealthRequest{
  1278  		DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
  1279  			DeploymentID:         d2.ID,
  1280  			HealthyAllocationIDs: []string{a2.ID},
  1281  		},
  1282  	}
  1283  	assert.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req2), "UpsertDeploymentAllocHealth")
  1284  
  1285  	// Wait for there to be one eval for each job
  1286  	testutil.WaitForResult(func() (bool, error) {
  1287  		ws := memdb.NewWatchSet()
  1288  		evals1, err := m.state.EvalsByJob(ws, j1.Namespace, j1.ID)
  1289  		if err != nil {
  1290  			return false, err
  1291  		}
  1292  
  1293  		evals2, err := m.state.EvalsByJob(ws, j2.Namespace, j2.ID)
  1294  		if err != nil {
  1295  			return false, err
  1296  		}
  1297  
  1298  		if l := len(evals1); l != 1 {
  1299  			return false, fmt.Errorf("Got %d evals for job %v; want 1", l, j1.ID)
  1300  		}
  1301  
  1302  		if l := len(evals2); l != 1 {
  1303  			return false, fmt.Errorf("Got %d evals for job 2; want 1", l)
  1304  		}
  1305  
  1306  		return true, nil
  1307  	}, func(err error) {
  1308  		t.Fatal(err)
  1309  	})
  1310  
  1311  	m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1))
  1312  	testutil.WaitForResult(func() (bool, error) { return 2 == len(w.watchers), nil },
  1313  		func(err error) { assert.Equal(2, len(w.watchers), "Should have 2 deployment") })
  1314  }