github.com/smithx10/nomad@v0.9.1-rc1/nomad/deploymentwatcher/deployments_watcher_test.go (about) 1 package deploymentwatcher 2 3 import ( 4 "fmt" 5 "testing" 6 "time" 7 8 memdb "github.com/hashicorp/go-memdb" 9 "github.com/hashicorp/nomad/helper" 10 "github.com/hashicorp/nomad/helper/testlog" 11 "github.com/hashicorp/nomad/helper/uuid" 12 "github.com/hashicorp/nomad/nomad/mock" 13 "github.com/hashicorp/nomad/nomad/structs" 14 "github.com/hashicorp/nomad/testutil" 15 "github.com/stretchr/testify/assert" 16 mocker "github.com/stretchr/testify/mock" 17 "github.com/stretchr/testify/require" 18 ) 19 20 func testDeploymentWatcher(t *testing.T, qps float64, batchDur time.Duration) (*Watcher, *mockBackend) { 21 m := newMockBackend(t) 22 w := NewDeploymentsWatcher(testlog.HCLogger(t), m, qps, batchDur) 23 return w, m 24 } 25 26 func defaultTestDeploymentWatcher(t *testing.T) (*Watcher, *mockBackend) { 27 return testDeploymentWatcher(t, LimitStateQueriesPerSecond, CrossDeploymentUpdateBatchDuration) 28 } 29 30 // Tests that the watcher properly watches for deployments and reconciles them 31 func TestWatcher_WatchDeployments(t *testing.T) { 32 t.Parallel() 33 require := require.New(t) 34 w, m := defaultTestDeploymentWatcher(t) 35 36 // Create three jobs 37 j1, j2, j3 := mock.Job(), mock.Job(), mock.Job() 38 require.Nil(m.state.UpsertJob(100, j1)) 39 require.Nil(m.state.UpsertJob(101, j2)) 40 require.Nil(m.state.UpsertJob(102, j3)) 41 42 // Create three deployments all running 43 d1, d2, d3 := mock.Deployment(), mock.Deployment(), mock.Deployment() 44 d1.JobID = j1.ID 45 d2.JobID = j2.ID 46 d3.JobID = j3.ID 47 48 // Upsert the first deployment 49 require.Nil(m.state.UpsertDeployment(103, d1)) 50 51 // Next list 3 52 block1 := make(chan time.Time) 53 go func() { 54 <-block1 55 require.Nil(m.state.UpsertDeployment(104, d2)) 56 require.Nil(m.state.UpsertDeployment(105, d3)) 57 }() 58 59 //// Next list 3 but have one be terminal 60 block2 := make(chan time.Time) 61 d3terminal := d3.Copy() 62 d3terminal.Status = structs.DeploymentStatusFailed 63 go func() { 64 <-block2 65 require.Nil(m.state.UpsertDeployment(106, d3terminal)) 66 }() 67 68 w.SetEnabled(true, m.state) 69 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 70 func(err error) { require.Equal(1, len(w.watchers), "1 deployment returned") }) 71 72 close(block1) 73 testutil.WaitForResult(func() (bool, error) { return 3 == len(w.watchers), nil }, 74 func(err error) { require.Equal(3, len(w.watchers), "3 deployment returned") }) 75 76 close(block2) 77 testutil.WaitForResult(func() (bool, error) { return 2 == len(w.watchers), nil }, 78 func(err error) { require.Equal(3, len(w.watchers), "3 deployment returned - 1 terminal") }) 79 } 80 81 // Tests that calls against an unknown deployment fail 82 func TestWatcher_UnknownDeployment(t *testing.T) { 83 t.Parallel() 84 assert := assert.New(t) 85 require := require.New(t) 86 w, m := defaultTestDeploymentWatcher(t) 87 w.SetEnabled(true, m.state) 88 89 // The expected error is that it should be an unknown deployment 90 dID := uuid.Generate() 91 expected := fmt.Sprintf("unknown deployment %q", dID) 92 93 // Request setting the health against an unknown deployment 94 req := &structs.DeploymentAllocHealthRequest{ 95 DeploymentID: dID, 96 HealthyAllocationIDs: []string{uuid.Generate()}, 97 } 98 var resp structs.DeploymentUpdateResponse 99 err := w.SetAllocHealth(req, &resp) 100 if assert.NotNil(err, "should have error for unknown deployment") { 101 require.Contains(err.Error(), expected) 102 } 103 104 // Request promoting against an unknown deployment 105 req2 := &structs.DeploymentPromoteRequest{ 106 DeploymentID: dID, 107 All: true, 108 } 109 err = w.PromoteDeployment(req2, &resp) 110 if assert.NotNil(err, "should have error for unknown deployment") { 111 require.Contains(err.Error(), expected) 112 } 113 114 // Request pausing against an unknown deployment 115 req3 := &structs.DeploymentPauseRequest{ 116 DeploymentID: dID, 117 Pause: true, 118 } 119 err = w.PauseDeployment(req3, &resp) 120 if assert.NotNil(err, "should have error for unknown deployment") { 121 require.Contains(err.Error(), expected) 122 } 123 124 // Request failing against an unknown deployment 125 req4 := &structs.DeploymentFailRequest{ 126 DeploymentID: dID, 127 } 128 err = w.FailDeployment(req4, &resp) 129 if assert.NotNil(err, "should have error for unknown deployment") { 130 require.Contains(err.Error(), expected) 131 } 132 } 133 134 // Test setting an unknown allocation's health 135 func TestWatcher_SetAllocHealth_Unknown(t *testing.T) { 136 t.Parallel() 137 assert := assert.New(t) 138 require := require.New(t) 139 w, m := defaultTestDeploymentWatcher(t) 140 141 // Create a job, and a deployment 142 j := mock.Job() 143 d := mock.Deployment() 144 d.JobID = j.ID 145 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 146 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 147 148 // require that we get a call to UpsertDeploymentAllocHealth 149 a := mock.Alloc() 150 matchConfig := &matchDeploymentAllocHealthRequestConfig{ 151 DeploymentID: d.ID, 152 Healthy: []string{a.ID}, 153 Eval: true, 154 } 155 matcher := matchDeploymentAllocHealthRequest(matchConfig) 156 m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil) 157 158 w.SetEnabled(true, m.state) 159 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 160 func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") }) 161 162 // Call SetAllocHealth 163 req := &structs.DeploymentAllocHealthRequest{ 164 DeploymentID: d.ID, 165 HealthyAllocationIDs: []string{a.ID}, 166 } 167 var resp structs.DeploymentUpdateResponse 168 err := w.SetAllocHealth(req, &resp) 169 if assert.NotNil(err, "Set health of unknown allocation") { 170 require.Contains(err.Error(), "unknown") 171 } 172 require.Equal(1, len(w.watchers), "Deployment should still be active") 173 } 174 175 // Test setting allocation health 176 func TestWatcher_SetAllocHealth_Healthy(t *testing.T) { 177 t.Parallel() 178 require := require.New(t) 179 w, m := defaultTestDeploymentWatcher(t) 180 181 // Create a job, alloc, and a deployment 182 j := mock.Job() 183 d := mock.Deployment() 184 d.JobID = j.ID 185 a := mock.Alloc() 186 a.DeploymentID = d.ID 187 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 188 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 189 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 190 191 // require that we get a call to UpsertDeploymentAllocHealth 192 matchConfig := &matchDeploymentAllocHealthRequestConfig{ 193 DeploymentID: d.ID, 194 Healthy: []string{a.ID}, 195 Eval: true, 196 } 197 matcher := matchDeploymentAllocHealthRequest(matchConfig) 198 m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil) 199 200 w.SetEnabled(true, m.state) 201 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 202 func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") }) 203 204 // Call SetAllocHealth 205 req := &structs.DeploymentAllocHealthRequest{ 206 DeploymentID: d.ID, 207 HealthyAllocationIDs: []string{a.ID}, 208 } 209 var resp structs.DeploymentUpdateResponse 210 err := w.SetAllocHealth(req, &resp) 211 require.Nil(err, "SetAllocHealth") 212 require.Equal(1, len(w.watchers), "Deployment should still be active") 213 m.AssertCalled(t, "UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)) 214 } 215 216 // Test setting allocation unhealthy 217 func TestWatcher_SetAllocHealth_Unhealthy(t *testing.T) { 218 t.Parallel() 219 require := require.New(t) 220 w, m := defaultTestDeploymentWatcher(t) 221 222 // Create a job, alloc, and a deployment 223 j := mock.Job() 224 d := mock.Deployment() 225 d.JobID = j.ID 226 a := mock.Alloc() 227 a.DeploymentID = d.ID 228 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 229 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 230 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 231 232 // require that we get a call to UpsertDeploymentAllocHealth 233 matchConfig := &matchDeploymentAllocHealthRequestConfig{ 234 DeploymentID: d.ID, 235 Unhealthy: []string{a.ID}, 236 Eval: true, 237 DeploymentUpdate: &structs.DeploymentStatusUpdate{ 238 DeploymentID: d.ID, 239 Status: structs.DeploymentStatusFailed, 240 StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations, 241 }, 242 } 243 matcher := matchDeploymentAllocHealthRequest(matchConfig) 244 m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil) 245 246 w.SetEnabled(true, m.state) 247 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 248 func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") }) 249 250 // Call SetAllocHealth 251 req := &structs.DeploymentAllocHealthRequest{ 252 DeploymentID: d.ID, 253 UnhealthyAllocationIDs: []string{a.ID}, 254 } 255 var resp structs.DeploymentUpdateResponse 256 err := w.SetAllocHealth(req, &resp) 257 require.Nil(err, "SetAllocHealth") 258 259 testutil.WaitForResult(func() (bool, error) { return 0 == len(w.watchers), nil }, 260 func(err error) { require.Equal(0, len(w.watchers), "Should have no deployment") }) 261 m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1) 262 } 263 264 // Test setting allocation unhealthy and that there should be a rollback 265 func TestWatcher_SetAllocHealth_Unhealthy_Rollback(t *testing.T) { 266 t.Parallel() 267 require := require.New(t) 268 w, m := defaultTestDeploymentWatcher(t) 269 270 // Create a job, alloc, and a deployment 271 j := mock.Job() 272 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 273 j.TaskGroups[0].Update.MaxParallel = 2 274 j.TaskGroups[0].Update.AutoRevert = true 275 j.TaskGroups[0].Update.ProgressDeadline = 0 276 j.Stable = true 277 d := mock.Deployment() 278 d.JobID = j.ID 279 d.TaskGroups["web"].AutoRevert = true 280 a := mock.Alloc() 281 a.DeploymentID = d.ID 282 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 283 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 284 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 285 286 // Upsert the job again to get a new version 287 j2 := j.Copy() 288 j2.Stable = false 289 // Modify the job to make its specification different 290 j2.Meta["foo"] = "bar" 291 292 require.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2") 293 294 // require that we get a call to UpsertDeploymentAllocHealth 295 matchConfig := &matchDeploymentAllocHealthRequestConfig{ 296 DeploymentID: d.ID, 297 Unhealthy: []string{a.ID}, 298 Eval: true, 299 DeploymentUpdate: &structs.DeploymentStatusUpdate{ 300 DeploymentID: d.ID, 301 Status: structs.DeploymentStatusFailed, 302 StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations, 303 }, 304 JobVersion: helper.Uint64ToPtr(0), 305 } 306 matcher := matchDeploymentAllocHealthRequest(matchConfig) 307 m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil) 308 309 w.SetEnabled(true, m.state) 310 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 311 func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") }) 312 313 // Call SetAllocHealth 314 req := &structs.DeploymentAllocHealthRequest{ 315 DeploymentID: d.ID, 316 UnhealthyAllocationIDs: []string{a.ID}, 317 } 318 var resp structs.DeploymentUpdateResponse 319 err := w.SetAllocHealth(req, &resp) 320 require.Nil(err, "SetAllocHealth") 321 322 testutil.WaitForResult(func() (bool, error) { return 0 == len(w.watchers), nil }, 323 func(err error) { require.Equal(0, len(w.watchers), "Should have no deployment") }) 324 m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1) 325 } 326 327 // Test setting allocation unhealthy on job with identical spec and there should be no rollback 328 func TestWatcher_SetAllocHealth_Unhealthy_NoRollback(t *testing.T) { 329 t.Parallel() 330 require := require.New(t) 331 w, m := defaultTestDeploymentWatcher(t) 332 333 // Create a job, alloc, and a deployment 334 j := mock.Job() 335 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 336 j.TaskGroups[0].Update.MaxParallel = 2 337 j.TaskGroups[0].Update.AutoRevert = true 338 j.TaskGroups[0].Update.ProgressDeadline = 0 339 j.Stable = true 340 d := mock.Deployment() 341 d.JobID = j.ID 342 d.TaskGroups["web"].AutoRevert = true 343 a := mock.Alloc() 344 a.DeploymentID = d.ID 345 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 346 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 347 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 348 349 // Upsert the job again to get a new version 350 j2 := j.Copy() 351 j2.Stable = false 352 353 require.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2") 354 355 // require that we get a call to UpsertDeploymentAllocHealth 356 matchConfig := &matchDeploymentAllocHealthRequestConfig{ 357 DeploymentID: d.ID, 358 Unhealthy: []string{a.ID}, 359 Eval: true, 360 DeploymentUpdate: &structs.DeploymentStatusUpdate{ 361 DeploymentID: d.ID, 362 Status: structs.DeploymentStatusFailed, 363 StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations, 364 }, 365 JobVersion: nil, 366 } 367 matcher := matchDeploymentAllocHealthRequest(matchConfig) 368 m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil) 369 370 w.SetEnabled(true, m.state) 371 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 372 func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") }) 373 374 // Call SetAllocHealth 375 req := &structs.DeploymentAllocHealthRequest{ 376 DeploymentID: d.ID, 377 UnhealthyAllocationIDs: []string{a.ID}, 378 } 379 var resp structs.DeploymentUpdateResponse 380 err := w.SetAllocHealth(req, &resp) 381 require.Nil(err, "SetAllocHealth") 382 383 testutil.WaitForResult(func() (bool, error) { return 0 == len(w.watchers), nil }, 384 func(err error) { require.Equal(0, len(w.watchers), "Should have no deployment") }) 385 m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1) 386 } 387 388 // Test promoting a deployment 389 func TestWatcher_PromoteDeployment_HealthyCanaries(t *testing.T) { 390 t.Parallel() 391 require := require.New(t) 392 w, m := defaultTestDeploymentWatcher(t) 393 394 // Create a job, canary alloc, and a deployment 395 j := mock.Job() 396 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 397 j.TaskGroups[0].Update.MaxParallel = 2 398 j.TaskGroups[0].Update.Canary = 1 399 j.TaskGroups[0].Update.ProgressDeadline = 0 400 d := mock.Deployment() 401 d.JobID = j.ID 402 a := mock.Alloc() 403 d.TaskGroups[a.TaskGroup].DesiredCanaries = 1 404 d.TaskGroups[a.TaskGroup].PlacedCanaries = []string{a.ID} 405 a.DeploymentStatus = &structs.AllocDeploymentStatus{ 406 Healthy: helper.BoolToPtr(true), 407 } 408 a.DeploymentID = d.ID 409 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 410 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 411 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 412 413 // require that we get a call to UpsertDeploymentPromotion 414 matchConfig := &matchDeploymentPromoteRequestConfig{ 415 Promotion: &structs.DeploymentPromoteRequest{ 416 DeploymentID: d.ID, 417 All: true, 418 }, 419 Eval: true, 420 } 421 matcher := matchDeploymentPromoteRequest(matchConfig) 422 m.On("UpdateDeploymentPromotion", mocker.MatchedBy(matcher)).Return(nil) 423 424 // We may get an update for the desired transition. 425 m1 := matchUpdateAllocDesiredTransitions([]string{d.ID}) 426 m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once() 427 428 w.SetEnabled(true, m.state) 429 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 430 func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") }) 431 432 // Call PromoteDeployment 433 req := &structs.DeploymentPromoteRequest{ 434 DeploymentID: d.ID, 435 All: true, 436 } 437 var resp structs.DeploymentUpdateResponse 438 err := w.PromoteDeployment(req, &resp) 439 require.Nil(err, "PromoteDeployment") 440 require.Equal(1, len(w.watchers), "Deployment should still be active") 441 m.AssertCalled(t, "UpdateDeploymentPromotion", mocker.MatchedBy(matcher)) 442 } 443 444 // Test promoting a deployment with unhealthy canaries 445 func TestWatcher_PromoteDeployment_UnhealthyCanaries(t *testing.T) { 446 t.Parallel() 447 require := require.New(t) 448 w, m := defaultTestDeploymentWatcher(t) 449 450 // Create a job, canary alloc, and a deployment 451 j := mock.Job() 452 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 453 j.TaskGroups[0].Update.MaxParallel = 2 454 j.TaskGroups[0].Update.Canary = 2 455 j.TaskGroups[0].Update.ProgressDeadline = 0 456 d := mock.Deployment() 457 d.JobID = j.ID 458 a := mock.Alloc() 459 d.TaskGroups[a.TaskGroup].PlacedCanaries = []string{a.ID} 460 d.TaskGroups[a.TaskGroup].DesiredCanaries = 2 461 a.DeploymentID = d.ID 462 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 463 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 464 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 465 466 // require that we get a call to UpsertDeploymentPromotion 467 matchConfig := &matchDeploymentPromoteRequestConfig{ 468 Promotion: &structs.DeploymentPromoteRequest{ 469 DeploymentID: d.ID, 470 All: true, 471 }, 472 Eval: true, 473 } 474 matcher := matchDeploymentPromoteRequest(matchConfig) 475 m.On("UpdateDeploymentPromotion", mocker.MatchedBy(matcher)).Return(nil) 476 477 w.SetEnabled(true, m.state) 478 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 479 func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") }) 480 481 // Call SetAllocHealth 482 req := &structs.DeploymentPromoteRequest{ 483 DeploymentID: d.ID, 484 All: true, 485 } 486 var resp structs.DeploymentUpdateResponse 487 err := w.PromoteDeployment(req, &resp) 488 if assert.NotNil(t, err, "PromoteDeployment") { 489 require.Contains(err.Error(), `Task group "web" has 0/2 healthy allocations`, "Should error because canary isn't marked healthy") 490 } 491 492 require.Equal(1, len(w.watchers), "Deployment should still be active") 493 m.AssertCalled(t, "UpdateDeploymentPromotion", mocker.MatchedBy(matcher)) 494 } 495 496 // Test pausing a deployment that is running 497 func TestWatcher_PauseDeployment_Pause_Running(t *testing.T) { 498 t.Parallel() 499 require := require.New(t) 500 w, m := defaultTestDeploymentWatcher(t) 501 502 // Create a job and a deployment 503 j := mock.Job() 504 d := mock.Deployment() 505 d.JobID = j.ID 506 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 507 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 508 509 // require that we get a call to UpsertDeploymentStatusUpdate 510 matchConfig := &matchDeploymentStatusUpdateConfig{ 511 DeploymentID: d.ID, 512 Status: structs.DeploymentStatusPaused, 513 StatusDescription: structs.DeploymentStatusDescriptionPaused, 514 } 515 matcher := matchDeploymentStatusUpdateRequest(matchConfig) 516 m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil) 517 518 w.SetEnabled(true, m.state) 519 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 520 func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") }) 521 522 // Call PauseDeployment 523 req := &structs.DeploymentPauseRequest{ 524 DeploymentID: d.ID, 525 Pause: true, 526 } 527 var resp structs.DeploymentUpdateResponse 528 err := w.PauseDeployment(req, &resp) 529 require.Nil(err, "PauseDeployment") 530 531 require.Equal(1, len(w.watchers), "Deployment should still be active") 532 m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher)) 533 } 534 535 // Test pausing a deployment that is paused 536 func TestWatcher_PauseDeployment_Pause_Paused(t *testing.T) { 537 t.Parallel() 538 require := require.New(t) 539 w, m := defaultTestDeploymentWatcher(t) 540 541 // Create a job and a deployment 542 j := mock.Job() 543 d := mock.Deployment() 544 d.JobID = j.ID 545 d.Status = structs.DeploymentStatusPaused 546 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 547 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 548 549 // require that we get a call to UpsertDeploymentStatusUpdate 550 matchConfig := &matchDeploymentStatusUpdateConfig{ 551 DeploymentID: d.ID, 552 Status: structs.DeploymentStatusPaused, 553 StatusDescription: structs.DeploymentStatusDescriptionPaused, 554 } 555 matcher := matchDeploymentStatusUpdateRequest(matchConfig) 556 m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil) 557 558 w.SetEnabled(true, m.state) 559 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 560 func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") }) 561 562 // Call PauseDeployment 563 req := &structs.DeploymentPauseRequest{ 564 DeploymentID: d.ID, 565 Pause: true, 566 } 567 var resp structs.DeploymentUpdateResponse 568 err := w.PauseDeployment(req, &resp) 569 require.Nil(err, "PauseDeployment") 570 571 require.Equal(1, len(w.watchers), "Deployment should still be active") 572 m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher)) 573 } 574 575 // Test unpausing a deployment that is paused 576 func TestWatcher_PauseDeployment_Unpause_Paused(t *testing.T) { 577 t.Parallel() 578 require := require.New(t) 579 w, m := defaultTestDeploymentWatcher(t) 580 581 // Create a job and a deployment 582 j := mock.Job() 583 d := mock.Deployment() 584 d.JobID = j.ID 585 d.Status = structs.DeploymentStatusPaused 586 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 587 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 588 589 // require that we get a call to UpsertDeploymentStatusUpdate 590 matchConfig := &matchDeploymentStatusUpdateConfig{ 591 DeploymentID: d.ID, 592 Status: structs.DeploymentStatusRunning, 593 StatusDescription: structs.DeploymentStatusDescriptionRunning, 594 Eval: true, 595 } 596 matcher := matchDeploymentStatusUpdateRequest(matchConfig) 597 m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil) 598 599 w.SetEnabled(true, m.state) 600 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 601 func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") }) 602 603 // Call PauseDeployment 604 req := &structs.DeploymentPauseRequest{ 605 DeploymentID: d.ID, 606 Pause: false, 607 } 608 var resp structs.DeploymentUpdateResponse 609 err := w.PauseDeployment(req, &resp) 610 require.Nil(err, "PauseDeployment") 611 612 require.Equal(1, len(w.watchers), "Deployment should still be active") 613 m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher)) 614 } 615 616 // Test unpausing a deployment that is running 617 func TestWatcher_PauseDeployment_Unpause_Running(t *testing.T) { 618 t.Parallel() 619 require := require.New(t) 620 w, m := defaultTestDeploymentWatcher(t) 621 622 // Create a job and a deployment 623 j := mock.Job() 624 d := mock.Deployment() 625 d.JobID = j.ID 626 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 627 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 628 629 // require that we get a call to UpsertDeploymentStatusUpdate 630 matchConfig := &matchDeploymentStatusUpdateConfig{ 631 DeploymentID: d.ID, 632 Status: structs.DeploymentStatusRunning, 633 StatusDescription: structs.DeploymentStatusDescriptionRunning, 634 Eval: true, 635 } 636 matcher := matchDeploymentStatusUpdateRequest(matchConfig) 637 m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil) 638 639 w.SetEnabled(true, m.state) 640 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 641 func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") }) 642 643 // Call PauseDeployment 644 req := &structs.DeploymentPauseRequest{ 645 DeploymentID: d.ID, 646 Pause: false, 647 } 648 var resp structs.DeploymentUpdateResponse 649 err := w.PauseDeployment(req, &resp) 650 require.Nil(err, "PauseDeployment") 651 652 require.Equal(1, len(w.watchers), "Deployment should still be active") 653 m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher)) 654 } 655 656 // Test failing a deployment that is running 657 func TestWatcher_FailDeployment_Running(t *testing.T) { 658 t.Parallel() 659 require := require.New(t) 660 w, m := defaultTestDeploymentWatcher(t) 661 662 // Create a job and a deployment 663 j := mock.Job() 664 d := mock.Deployment() 665 d.JobID = j.ID 666 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 667 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 668 669 // require that we get a call to UpsertDeploymentStatusUpdate 670 matchConfig := &matchDeploymentStatusUpdateConfig{ 671 DeploymentID: d.ID, 672 Status: structs.DeploymentStatusFailed, 673 StatusDescription: structs.DeploymentStatusDescriptionFailedByUser, 674 Eval: true, 675 } 676 matcher := matchDeploymentStatusUpdateRequest(matchConfig) 677 m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil) 678 679 w.SetEnabled(true, m.state) 680 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 681 func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") }) 682 683 // Call PauseDeployment 684 req := &structs.DeploymentFailRequest{ 685 DeploymentID: d.ID, 686 } 687 var resp structs.DeploymentUpdateResponse 688 err := w.FailDeployment(req, &resp) 689 require.Nil(err, "FailDeployment") 690 691 require.Equal(1, len(w.watchers), "Deployment should still be active") 692 m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher)) 693 } 694 695 // Tests that the watcher properly watches for allocation changes and takes the 696 // proper actions 697 func TestDeploymentWatcher_Watch_NoProgressDeadline(t *testing.T) { 698 t.Parallel() 699 require := require.New(t) 700 w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond) 701 702 // Create a job, alloc, and a deployment 703 j := mock.Job() 704 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 705 j.TaskGroups[0].Update.MaxParallel = 2 706 j.TaskGroups[0].Update.AutoRevert = true 707 j.TaskGroups[0].Update.ProgressDeadline = 0 708 j.Stable = true 709 d := mock.Deployment() 710 d.JobID = j.ID 711 d.TaskGroups["web"].AutoRevert = true 712 a := mock.Alloc() 713 a.DeploymentID = d.ID 714 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 715 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 716 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 717 718 // Upsert the job again to get a new version 719 j2 := j.Copy() 720 // Modify the job to make its specification different 721 j2.Meta["foo"] = "bar" 722 j2.Stable = false 723 require.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2") 724 725 // require that we will get a update allocation call only once. This will 726 // verify that the watcher is batching allocation changes 727 m1 := matchUpdateAllocDesiredTransitions([]string{d.ID}) 728 m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once() 729 730 // require that we get a call to UpsertDeploymentStatusUpdate 731 c := &matchDeploymentStatusUpdateConfig{ 732 DeploymentID: d.ID, 733 Status: structs.DeploymentStatusFailed, 734 StatusDescription: structs.DeploymentStatusDescriptionRollback(structs.DeploymentStatusDescriptionFailedAllocations, 0), 735 JobVersion: helper.Uint64ToPtr(0), 736 Eval: true, 737 } 738 m2 := matchDeploymentStatusUpdateRequest(c) 739 m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil) 740 741 w.SetEnabled(true, m.state) 742 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 743 func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") }) 744 745 // Update the allocs health to healthy which should create an evaluation 746 for i := 0; i < 5; i++ { 747 req := &structs.ApplyDeploymentAllocHealthRequest{ 748 DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{ 749 DeploymentID: d.ID, 750 HealthyAllocationIDs: []string{a.ID}, 751 }, 752 } 753 require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req), "UpsertDeploymentAllocHealth") 754 } 755 756 // Wait for there to be one eval 757 testutil.WaitForResult(func() (bool, error) { 758 ws := memdb.NewWatchSet() 759 evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID) 760 if err != nil { 761 return false, err 762 } 763 764 if l := len(evals); l != 1 { 765 return false, fmt.Errorf("Got %d evals; want 1", l) 766 } 767 768 return true, nil 769 }, func(err error) { 770 t.Fatal(err) 771 }) 772 773 // Update the allocs health to unhealthy which should create a job rollback, 774 // status update and eval 775 req2 := &structs.ApplyDeploymentAllocHealthRequest{ 776 DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{ 777 DeploymentID: d.ID, 778 UnhealthyAllocationIDs: []string{a.ID}, 779 }, 780 } 781 require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req2), "UpsertDeploymentAllocHealth") 782 783 // Wait for there to be one eval 784 testutil.WaitForResult(func() (bool, error) { 785 ws := memdb.NewWatchSet() 786 evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID) 787 if err != nil { 788 return false, err 789 } 790 791 if l := len(evals); l != 2 { 792 return false, fmt.Errorf("Got %d evals; want 1", l) 793 } 794 795 return true, nil 796 }, func(err error) { 797 t.Fatal(err) 798 }) 799 800 m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1)) 801 802 // After we upsert the job version will go to 2. So use this to require the 803 // original call happened. 804 c2 := &matchDeploymentStatusUpdateConfig{ 805 DeploymentID: d.ID, 806 Status: structs.DeploymentStatusFailed, 807 StatusDescription: structs.DeploymentStatusDescriptionRollback(structs.DeploymentStatusDescriptionFailedAllocations, 0), 808 JobVersion: helper.Uint64ToPtr(2), 809 Eval: true, 810 } 811 m3 := matchDeploymentStatusUpdateRequest(c2) 812 m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(m3)) 813 testutil.WaitForResult(func() (bool, error) { return 0 == len(w.watchers), nil }, 814 func(err error) { require.Equal(0, len(w.watchers), "Should have no deployment") }) 815 } 816 817 func TestDeploymentWatcher_Watch_ProgressDeadline(t *testing.T) { 818 t.Parallel() 819 require := require.New(t) 820 w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond) 821 822 // Create a job, alloc, and a deployment 823 j := mock.Job() 824 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 825 j.TaskGroups[0].Update.MaxParallel = 2 826 j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond 827 j.Stable = true 828 d := mock.Deployment() 829 d.JobID = j.ID 830 d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond 831 a := mock.Alloc() 832 now := time.Now() 833 a.CreateTime = now.UnixNano() 834 a.ModifyTime = now.UnixNano() 835 a.DeploymentID = d.ID 836 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 837 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 838 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 839 840 // require that we get a call to UpsertDeploymentStatusUpdate 841 c := &matchDeploymentStatusUpdateConfig{ 842 DeploymentID: d.ID, 843 Status: structs.DeploymentStatusFailed, 844 StatusDescription: structs.DeploymentStatusDescriptionProgressDeadline, 845 Eval: true, 846 } 847 m2 := matchDeploymentStatusUpdateRequest(c) 848 m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil) 849 850 w.SetEnabled(true, m.state) 851 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 852 func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") }) 853 854 // Update the alloc to be unhealthy and require that nothing happens. 855 a2 := a.Copy() 856 a2.DeploymentStatus = &structs.AllocDeploymentStatus{ 857 Healthy: helper.BoolToPtr(false), 858 Timestamp: now, 859 } 860 require.Nil(m.state.UpdateAllocsFromClient(100, []*structs.Allocation{a2})) 861 862 // Wait for the deployment to be failed 863 testutil.WaitForResult(func() (bool, error) { 864 d, err := m.state.DeploymentByID(nil, d.ID) 865 if err != nil { 866 return false, err 867 } 868 869 return d.Status == structs.DeploymentStatusFailed, fmt.Errorf("bad status %q", d.Status) 870 }, func(err error) { 871 t.Fatal(err) 872 }) 873 874 // require there are is only one evaluation 875 testutil.WaitForResult(func() (bool, error) { 876 ws := memdb.NewWatchSet() 877 evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID) 878 if err != nil { 879 return false, err 880 } 881 882 if l := len(evals); l != 1 { 883 return false, fmt.Errorf("Got %d evals; want 1", l) 884 } 885 886 return true, nil 887 }, func(err error) { 888 t.Fatal(err) 889 }) 890 } 891 892 // Test that progress deadline handling works when there are multiple groups 893 func TestDeploymentWatcher_ProgressCutoff(t *testing.T) { 894 t.Parallel() 895 require := require.New(t) 896 w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond) 897 898 // Create a job, alloc, and a deployment 899 j := mock.Job() 900 j.TaskGroups[0].Count = 1 901 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 902 j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond 903 j.TaskGroups = append(j.TaskGroups, j.TaskGroups[0].Copy()) 904 j.TaskGroups[1].Name = "foo" 905 j.TaskGroups[1].Update.ProgressDeadline = 1 * time.Second 906 j.Stable = true 907 908 d := mock.Deployment() 909 d.JobID = j.ID 910 d.TaskGroups["web"].DesiredTotal = 1 911 d.TaskGroups["foo"] = d.TaskGroups["web"].Copy() 912 d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond 913 d.TaskGroups["foo"].ProgressDeadline = 1 * time.Second 914 915 a := mock.Alloc() 916 now := time.Now() 917 a.CreateTime = now.UnixNano() 918 a.ModifyTime = now.UnixNano() 919 a.DeploymentID = d.ID 920 921 a2 := mock.Alloc() 922 a2.TaskGroup = "foo" 923 a2.CreateTime = now.UnixNano() 924 a2.ModifyTime = now.UnixNano() 925 a2.DeploymentID = d.ID 926 927 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 928 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 929 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a, a2}), "UpsertAllocs") 930 931 // We may get an update for the desired transition. 932 m1 := matchUpdateAllocDesiredTransitions([]string{d.ID}) 933 m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once() 934 935 w.SetEnabled(true, m.state) 936 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 937 func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") }) 938 939 watcher, err := w.getOrCreateWatcher(d.ID) 940 require.NoError(err) 941 require.NotNil(watcher) 942 943 d1, err := m.state.DeploymentByID(nil, d.ID) 944 require.NoError(err) 945 946 done := watcher.doneGroups(d1) 947 require.Contains(done, "web") 948 require.False(done["web"]) 949 require.Contains(done, "foo") 950 require.False(done["foo"]) 951 952 cutoff1 := watcher.getDeploymentProgressCutoff(d1) 953 require.False(cutoff1.IsZero()) 954 955 // Update the first allocation to be healthy 956 a3 := a.Copy() 957 a3.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: helper.BoolToPtr(true)} 958 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a3}), "UpsertAllocs") 959 960 // Get the updated deployment 961 d2, err := m.state.DeploymentByID(nil, d.ID) 962 require.NoError(err) 963 964 done = watcher.doneGroups(d2) 965 require.Contains(done, "web") 966 require.True(done["web"]) 967 require.Contains(done, "foo") 968 require.False(done["foo"]) 969 970 cutoff2 := watcher.getDeploymentProgressCutoff(d2) 971 require.False(cutoff2.IsZero()) 972 require.True(cutoff1.UnixNano() < cutoff2.UnixNano()) 973 974 // Update the second allocation to be healthy 975 a4 := a2.Copy() 976 a4.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: helper.BoolToPtr(true)} 977 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a4}), "UpsertAllocs") 978 979 // Get the updated deployment 980 d3, err := m.state.DeploymentByID(nil, d.ID) 981 require.NoError(err) 982 983 done = watcher.doneGroups(d3) 984 require.Contains(done, "web") 985 require.True(done["web"]) 986 require.Contains(done, "foo") 987 require.True(done["foo"]) 988 989 cutoff3 := watcher.getDeploymentProgressCutoff(d2) 990 require.True(cutoff3.IsZero()) 991 } 992 993 // Test that we will allow the progress deadline to be reached when the canaries 994 // are healthy but we haven't promoted 995 func TestDeploymentWatcher_Watch_ProgressDeadline_Canaries(t *testing.T) { 996 t.Parallel() 997 require := require.New(t) 998 w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond) 999 1000 // Create a job, alloc, and a deployment 1001 j := mock.Job() 1002 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 1003 j.TaskGroups[0].Update.Canary = 1 1004 j.TaskGroups[0].Update.MaxParallel = 1 1005 j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond 1006 j.Stable = true 1007 d := mock.Deployment() 1008 d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 1009 d.JobID = j.ID 1010 d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond 1011 d.TaskGroups["web"].DesiredCanaries = 1 1012 a := mock.Alloc() 1013 now := time.Now() 1014 a.CreateTime = now.UnixNano() 1015 a.ModifyTime = now.UnixNano() 1016 a.DeploymentID = d.ID 1017 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 1018 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 1019 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 1020 1021 // require that we will get a createEvaluation call only once. This will 1022 // verify that the watcher is batching allocation changes 1023 m1 := matchUpdateAllocDesiredTransitions([]string{d.ID}) 1024 m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once() 1025 1026 w.SetEnabled(true, m.state) 1027 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 1028 func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") }) 1029 1030 // Update the alloc to be unhealthy and require that nothing happens. 1031 a2 := a.Copy() 1032 a2.DeploymentStatus = &structs.AllocDeploymentStatus{ 1033 Healthy: helper.BoolToPtr(true), 1034 Timestamp: now, 1035 } 1036 require.Nil(m.state.UpdateAllocsFromClient(m.nextIndex(), []*structs.Allocation{a2})) 1037 1038 // Wait for the deployment to cross the deadline 1039 dout, err := m.state.DeploymentByID(nil, d.ID) 1040 require.NoError(err) 1041 require.NotNil(dout) 1042 state := dout.TaskGroups["web"] 1043 require.NotNil(state) 1044 time.Sleep(state.RequireProgressBy.Add(time.Second).Sub(now)) 1045 1046 // Require the deployment is still running 1047 dout, err = m.state.DeploymentByID(nil, d.ID) 1048 require.NoError(err) 1049 require.NotNil(dout) 1050 require.Equal(structs.DeploymentStatusRunning, dout.Status) 1051 require.Equal(structs.DeploymentStatusDescriptionRunningNeedsPromotion, dout.StatusDescription) 1052 1053 // require there are is only one evaluation 1054 testutil.WaitForResult(func() (bool, error) { 1055 ws := memdb.NewWatchSet() 1056 evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID) 1057 if err != nil { 1058 return false, err 1059 } 1060 1061 if l := len(evals); l != 1 { 1062 return false, fmt.Errorf("Got %d evals; want 1", l) 1063 } 1064 1065 return true, nil 1066 }, func(err error) { 1067 t.Fatal(err) 1068 }) 1069 } 1070 1071 // Test that a promoted deployment with alloc healthy updates create 1072 // evals to move the deployment forward 1073 func TestDeploymentWatcher_PromotedCanary_UpdatedAllocs(t *testing.T) { 1074 t.Parallel() 1075 require := require.New(t) 1076 w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond) 1077 1078 // Create a job, alloc, and a deployment 1079 j := mock.Job() 1080 j.TaskGroups[0].Count = 2 1081 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 1082 j.TaskGroups[0].Update.Canary = 1 1083 j.TaskGroups[0].Update.MaxParallel = 1 1084 j.TaskGroups[0].Update.ProgressDeadline = 50 * time.Millisecond 1085 j.Stable = true 1086 1087 d := mock.Deployment() 1088 d.TaskGroups["web"].DesiredTotal = 2 1089 d.TaskGroups["web"].DesiredCanaries = 1 1090 d.TaskGroups["web"].HealthyAllocs = 1 1091 d.StatusDescription = structs.DeploymentStatusDescriptionRunning 1092 d.JobID = j.ID 1093 d.TaskGroups["web"].ProgressDeadline = 50 * time.Millisecond 1094 d.TaskGroups["web"].RequireProgressBy = time.Now().Add(50 * time.Millisecond) 1095 1096 a := mock.Alloc() 1097 now := time.Now() 1098 a.CreateTime = now.UnixNano() 1099 a.ModifyTime = now.UnixNano() 1100 a.DeploymentID = d.ID 1101 a.DeploymentStatus = &structs.AllocDeploymentStatus{ 1102 Healthy: helper.BoolToPtr(true), 1103 Timestamp: now, 1104 } 1105 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 1106 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 1107 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 1108 1109 w.SetEnabled(true, m.state) 1110 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 1111 func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") }) 1112 1113 m1 := matchUpdateAllocDesiredTransitions([]string{d.ID}) 1114 m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Twice() 1115 1116 // Create another alloc 1117 a2 := a.Copy() 1118 a2.ID = uuid.Generate() 1119 now = time.Now() 1120 a2.CreateTime = now.UnixNano() 1121 a2.ModifyTime = now.UnixNano() 1122 a2.DeploymentStatus = &structs.AllocDeploymentStatus{ 1123 Healthy: helper.BoolToPtr(true), 1124 Timestamp: now, 1125 } 1126 d.TaskGroups["web"].RequireProgressBy = time.Now().Add(2 * time.Second) 1127 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 1128 // Wait until batch eval period passes before updating another alloc 1129 time.Sleep(1 * time.Second) 1130 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a2}), "UpsertAllocs") 1131 1132 // Wait for the deployment to cross the deadline 1133 dout, err := m.state.DeploymentByID(nil, d.ID) 1134 require.NoError(err) 1135 require.NotNil(dout) 1136 state := dout.TaskGroups["web"] 1137 require.NotNil(state) 1138 time.Sleep(state.RequireProgressBy.Add(time.Second).Sub(now)) 1139 1140 // There should be two evals 1141 testutil.WaitForResult(func() (bool, error) { 1142 ws := memdb.NewWatchSet() 1143 evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID) 1144 if err != nil { 1145 return false, err 1146 } 1147 1148 if l := len(evals); l != 2 { 1149 return false, fmt.Errorf("Got %d evals; want 2", l) 1150 } 1151 1152 return true, nil 1153 }, func(err error) { 1154 t.Fatal(err) 1155 }) 1156 } 1157 1158 // Test scenario where deployment initially has no progress deadline 1159 // After the deployment is updated, a failed alloc's DesiredTransition should be set 1160 func TestDeploymentWatcher_Watch_StartWithoutProgressDeadline(t *testing.T) { 1161 t.Parallel() 1162 require := require.New(t) 1163 w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond) 1164 1165 // Create a job, and a deployment 1166 j := mock.Job() 1167 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 1168 j.TaskGroups[0].Update.MaxParallel = 2 1169 j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond 1170 j.Stable = true 1171 d := mock.Deployment() 1172 d.JobID = j.ID 1173 1174 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 1175 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 1176 1177 a := mock.Alloc() 1178 a.CreateTime = time.Now().UnixNano() 1179 a.DeploymentID = d.ID 1180 1181 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 1182 1183 d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond 1184 // Update the deployment with a progress deadline 1185 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 1186 1187 // Match on DesiredTransition set to Reschedule for the failed alloc 1188 m1 := matchUpdateAllocDesiredTransitionReschedule([]string{a.ID}) 1189 m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once() 1190 1191 w.SetEnabled(true, m.state) 1192 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 1193 func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") }) 1194 1195 // Update the alloc to be unhealthy 1196 a2 := a.Copy() 1197 a2.DeploymentStatus = &structs.AllocDeploymentStatus{ 1198 Healthy: helper.BoolToPtr(false), 1199 Timestamp: time.Now(), 1200 } 1201 require.Nil(m.state.UpdateAllocsFromClient(m.nextIndex(), []*structs.Allocation{a2})) 1202 1203 // Wait for the alloc's DesiredState to set reschedule 1204 testutil.WaitForResult(func() (bool, error) { 1205 a, err := m.state.AllocByID(nil, a.ID) 1206 if err != nil { 1207 return false, err 1208 } 1209 dt := a.DesiredTransition 1210 shouldReschedule := dt.Reschedule != nil && *dt.Reschedule 1211 return shouldReschedule, fmt.Errorf("Desired Transition Reschedule should be set but got %v", shouldReschedule) 1212 }, func(err error) { 1213 t.Fatal(err) 1214 }) 1215 } 1216 1217 // Tests that the watcher fails rollback when the spec hasn't changed 1218 func TestDeploymentWatcher_RollbackFailed(t *testing.T) { 1219 t.Parallel() 1220 require := require.New(t) 1221 w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond) 1222 1223 // Create a job, alloc, and a deployment 1224 j := mock.Job() 1225 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 1226 j.TaskGroups[0].Update.MaxParallel = 2 1227 j.TaskGroups[0].Update.AutoRevert = true 1228 j.TaskGroups[0].Update.ProgressDeadline = 0 1229 j.Stable = true 1230 d := mock.Deployment() 1231 d.JobID = j.ID 1232 d.TaskGroups["web"].AutoRevert = true 1233 a := mock.Alloc() 1234 a.DeploymentID = d.ID 1235 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 1236 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 1237 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 1238 1239 // Upsert the job again to get a new version 1240 j2 := j.Copy() 1241 // Modify the job to make its specification different 1242 j2.Stable = false 1243 require.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2") 1244 1245 // require that we will get a createEvaluation call only once. This will 1246 // verify that the watcher is batching allocation changes 1247 m1 := matchUpdateAllocDesiredTransitions([]string{d.ID}) 1248 m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once() 1249 1250 // require that we get a call to UpsertDeploymentStatusUpdate with roll back failed as the status 1251 c := &matchDeploymentStatusUpdateConfig{ 1252 DeploymentID: d.ID, 1253 Status: structs.DeploymentStatusFailed, 1254 StatusDescription: structs.DeploymentStatusDescriptionRollbackNoop(structs.DeploymentStatusDescriptionFailedAllocations, 0), 1255 JobVersion: nil, 1256 Eval: true, 1257 } 1258 m2 := matchDeploymentStatusUpdateRequest(c) 1259 m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil) 1260 1261 w.SetEnabled(true, m.state) 1262 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 1263 func(err error) { require.Equal(1, len(w.watchers), "Should have 1 deployment") }) 1264 1265 // Update the allocs health to healthy which should create an evaluation 1266 for i := 0; i < 5; i++ { 1267 req := &structs.ApplyDeploymentAllocHealthRequest{ 1268 DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{ 1269 DeploymentID: d.ID, 1270 HealthyAllocationIDs: []string{a.ID}, 1271 }, 1272 } 1273 require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req), "UpsertDeploymentAllocHealth") 1274 } 1275 1276 // Wait for there to be one eval 1277 testutil.WaitForResult(func() (bool, error) { 1278 ws := memdb.NewWatchSet() 1279 evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID) 1280 if err != nil { 1281 return false, err 1282 } 1283 1284 if l := len(evals); l != 1 { 1285 return false, fmt.Errorf("Got %d evals; want 1", l) 1286 } 1287 1288 return true, nil 1289 }, func(err error) { 1290 t.Fatal(err) 1291 }) 1292 1293 // Update the allocs health to unhealthy which will cause attempting a rollback, 1294 // fail in that step, do status update and eval 1295 req2 := &structs.ApplyDeploymentAllocHealthRequest{ 1296 DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{ 1297 DeploymentID: d.ID, 1298 UnhealthyAllocationIDs: []string{a.ID}, 1299 }, 1300 } 1301 require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req2), "UpsertDeploymentAllocHealth") 1302 1303 // Wait for there to be one eval 1304 testutil.WaitForResult(func() (bool, error) { 1305 ws := memdb.NewWatchSet() 1306 evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID) 1307 if err != nil { 1308 return false, err 1309 } 1310 1311 if l := len(evals); l != 2 { 1312 return false, fmt.Errorf("Got %d evals; want 1", l) 1313 } 1314 1315 return true, nil 1316 }, func(err error) { 1317 t.Fatal(err) 1318 }) 1319 1320 m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1)) 1321 1322 // verify that the job version hasn't changed after upsert 1323 m.state.JobByID(nil, structs.DefaultNamespace, j.ID) 1324 require.Equal(uint64(0), j.Version, "Expected job version 0 but got ", j.Version) 1325 } 1326 1327 // Test allocation updates and evaluation creation is batched between watchers 1328 func TestWatcher_BatchAllocUpdates(t *testing.T) { 1329 t.Parallel() 1330 require := require.New(t) 1331 w, m := testDeploymentWatcher(t, 1000.0, 1*time.Second) 1332 1333 // Create a job, alloc, for two deployments 1334 j1 := mock.Job() 1335 j1.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 1336 j1.TaskGroups[0].Update.ProgressDeadline = 0 1337 d1 := mock.Deployment() 1338 d1.JobID = j1.ID 1339 a1 := mock.Alloc() 1340 a1.Job = j1 1341 a1.JobID = j1.ID 1342 a1.DeploymentID = d1.ID 1343 1344 j2 := mock.Job() 1345 j2.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 1346 j2.TaskGroups[0].Update.ProgressDeadline = 0 1347 d2 := mock.Deployment() 1348 d2.JobID = j2.ID 1349 a2 := mock.Alloc() 1350 a2.Job = j2 1351 a2.JobID = j2.ID 1352 a2.DeploymentID = d2.ID 1353 1354 require.Nil(m.state.UpsertJob(m.nextIndex(), j1), "UpsertJob") 1355 require.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob") 1356 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d1), "UpsertDeployment") 1357 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d2), "UpsertDeployment") 1358 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a1}), "UpsertAllocs") 1359 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a2}), "UpsertAllocs") 1360 1361 // require that we will get a createEvaluation call only once and it contains 1362 // both deployments. This will verify that the watcher is batching 1363 // allocation changes 1364 m1 := matchUpdateAllocDesiredTransitions([]string{d1.ID, d2.ID}) 1365 m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once() 1366 1367 w.SetEnabled(true, m.state) 1368 testutil.WaitForResult(func() (bool, error) { return 2 == len(w.watchers), nil }, 1369 func(err error) { require.Equal(2, len(w.watchers), "Should have 2 deployment") }) 1370 1371 // Update the allocs health to healthy which should create an evaluation 1372 req := &structs.ApplyDeploymentAllocHealthRequest{ 1373 DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{ 1374 DeploymentID: d1.ID, 1375 HealthyAllocationIDs: []string{a1.ID}, 1376 }, 1377 } 1378 require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req), "UpsertDeploymentAllocHealth") 1379 1380 req2 := &structs.ApplyDeploymentAllocHealthRequest{ 1381 DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{ 1382 DeploymentID: d2.ID, 1383 HealthyAllocationIDs: []string{a2.ID}, 1384 }, 1385 } 1386 require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req2), "UpsertDeploymentAllocHealth") 1387 1388 // Wait for there to be one eval for each job 1389 testutil.WaitForResult(func() (bool, error) { 1390 ws := memdb.NewWatchSet() 1391 evals1, err := m.state.EvalsByJob(ws, j1.Namespace, j1.ID) 1392 if err != nil { 1393 return false, err 1394 } 1395 1396 evals2, err := m.state.EvalsByJob(ws, j2.Namespace, j2.ID) 1397 if err != nil { 1398 return false, err 1399 } 1400 1401 if l := len(evals1); l != 1 { 1402 return false, fmt.Errorf("Got %d evals for job %v; want 1", l, j1.ID) 1403 } 1404 1405 if l := len(evals2); l != 1 { 1406 return false, fmt.Errorf("Got %d evals for job 2; want 1", l) 1407 } 1408 1409 return true, nil 1410 }, func(err error) { 1411 t.Fatal(err) 1412 }) 1413 1414 m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1)) 1415 testutil.WaitForResult(func() (bool, error) { return 2 == len(w.watchers), nil }, 1416 func(err error) { require.Equal(2, len(w.watchers), "Should have 2 deployment") }) 1417 }