github.com/manicqin/nomad@v0.9.5/nomad/deploymentwatcher/deployments_watcher_test.go (about) 1 package deploymentwatcher 2 3 import ( 4 "fmt" 5 "testing" 6 "time" 7 8 memdb "github.com/hashicorp/go-memdb" 9 "github.com/hashicorp/nomad/helper" 10 "github.com/hashicorp/nomad/helper/testlog" 11 "github.com/hashicorp/nomad/helper/uuid" 12 "github.com/hashicorp/nomad/nomad/mock" 13 "github.com/hashicorp/nomad/nomad/structs" 14 "github.com/hashicorp/nomad/testutil" 15 "github.com/stretchr/testify/assert" 16 mocker "github.com/stretchr/testify/mock" 17 "github.com/stretchr/testify/require" 18 ) 19 20 func testDeploymentWatcher(t *testing.T, qps float64, batchDur time.Duration) (*Watcher, *mockBackend) { 21 m := newMockBackend(t) 22 w := NewDeploymentsWatcher(testlog.HCLogger(t), m, qps, batchDur) 23 return w, m 24 } 25 26 func defaultTestDeploymentWatcher(t *testing.T) (*Watcher, *mockBackend) { 27 return testDeploymentWatcher(t, LimitStateQueriesPerSecond, CrossDeploymentUpdateBatchDuration) 28 } 29 30 // Tests that the watcher properly watches for deployments and reconciles them 31 func TestWatcher_WatchDeployments(t *testing.T) { 32 t.Parallel() 33 require := require.New(t) 34 w, m := defaultTestDeploymentWatcher(t) 35 36 // Create three jobs 37 j1, j2, j3 := mock.Job(), mock.Job(), mock.Job() 38 require.Nil(m.state.UpsertJob(100, j1)) 39 require.Nil(m.state.UpsertJob(101, j2)) 40 require.Nil(m.state.UpsertJob(102, j3)) 41 42 // Create three deployments all running 43 d1, d2, d3 := mock.Deployment(), mock.Deployment(), mock.Deployment() 44 d1.JobID = j1.ID 45 d2.JobID = j2.ID 46 d3.JobID = j3.ID 47 48 // Upsert the first deployment 49 require.Nil(m.state.UpsertDeployment(103, d1)) 50 51 // Next list 3 52 block1 := make(chan time.Time) 53 go func() { 54 <-block1 55 require.Nil(m.state.UpsertDeployment(104, d2)) 56 require.Nil(m.state.UpsertDeployment(105, d3)) 57 }() 58 59 //// Next list 3 but have one be terminal 60 block2 := make(chan time.Time) 61 d3terminal := d3.Copy() 62 d3terminal.Status = structs.DeploymentStatusFailed 63 go func() { 64 <-block2 65 require.Nil(m.state.UpsertDeployment(106, d3terminal)) 66 }() 67 68 w.SetEnabled(true, m.state) 69 testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil }, 70 func(err error) { require.Equal(1, watchersCount(w), "1 deployment returned") }) 71 72 close(block1) 73 testutil.WaitForResult(func() (bool, error) { return 3 == watchersCount(w), nil }, 74 func(err error) { require.Equal(3, watchersCount(w), "3 deployment returned") }) 75 76 close(block2) 77 testutil.WaitForResult(func() (bool, error) { return 2 == watchersCount(w), nil }, 78 func(err error) { require.Equal(3, watchersCount(w), "3 deployment returned - 1 terminal") }) 79 } 80 81 // Tests that calls against an unknown deployment fail 82 func TestWatcher_UnknownDeployment(t *testing.T) { 83 t.Parallel() 84 assert := assert.New(t) 85 require := require.New(t) 86 w, m := defaultTestDeploymentWatcher(t) 87 w.SetEnabled(true, m.state) 88 89 // The expected error is that it should be an unknown deployment 90 dID := uuid.Generate() 91 expected := fmt.Sprintf("unknown deployment %q", dID) 92 93 // Request setting the health against an unknown deployment 94 req := &structs.DeploymentAllocHealthRequest{ 95 DeploymentID: dID, 96 HealthyAllocationIDs: []string{uuid.Generate()}, 97 } 98 var resp structs.DeploymentUpdateResponse 99 err := w.SetAllocHealth(req, &resp) 100 if assert.NotNil(err, "should have error for unknown deployment") { 101 require.Contains(err.Error(), expected) 102 } 103 104 // Request promoting against an unknown deployment 105 req2 := &structs.DeploymentPromoteRequest{ 106 DeploymentID: dID, 107 All: true, 108 } 109 err = w.PromoteDeployment(req2, &resp) 110 if assert.NotNil(err, "should have error for unknown deployment") { 111 require.Contains(err.Error(), expected) 112 } 113 114 // Request pausing against an unknown deployment 115 req3 := &structs.DeploymentPauseRequest{ 116 DeploymentID: dID, 117 Pause: true, 118 } 119 err = w.PauseDeployment(req3, &resp) 120 if assert.NotNil(err, "should have error for unknown deployment") { 121 require.Contains(err.Error(), expected) 122 } 123 124 // Request failing against an unknown deployment 125 req4 := &structs.DeploymentFailRequest{ 126 DeploymentID: dID, 127 } 128 err = w.FailDeployment(req4, &resp) 129 if assert.NotNil(err, "should have error for unknown deployment") { 130 require.Contains(err.Error(), expected) 131 } 132 } 133 134 // Test setting an unknown allocation's health 135 func TestWatcher_SetAllocHealth_Unknown(t *testing.T) { 136 t.Parallel() 137 assert := assert.New(t) 138 require := require.New(t) 139 w, m := defaultTestDeploymentWatcher(t) 140 141 // Create a job, and a deployment 142 j := mock.Job() 143 d := mock.Deployment() 144 d.JobID = j.ID 145 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 146 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 147 148 // require that we get a call to UpsertDeploymentAllocHealth 149 a := mock.Alloc() 150 matchConfig := &matchDeploymentAllocHealthRequestConfig{ 151 DeploymentID: d.ID, 152 Healthy: []string{a.ID}, 153 Eval: true, 154 } 155 matcher := matchDeploymentAllocHealthRequest(matchConfig) 156 m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil) 157 158 w.SetEnabled(true, m.state) 159 testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil }, 160 func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") }) 161 162 // Call SetAllocHealth 163 req := &structs.DeploymentAllocHealthRequest{ 164 DeploymentID: d.ID, 165 HealthyAllocationIDs: []string{a.ID}, 166 } 167 var resp structs.DeploymentUpdateResponse 168 err := w.SetAllocHealth(req, &resp) 169 if assert.NotNil(err, "Set health of unknown allocation") { 170 require.Contains(err.Error(), "unknown") 171 } 172 require.Equal(1, watchersCount(w), "Deployment should still be active") 173 } 174 175 // Test setting allocation health 176 func TestWatcher_SetAllocHealth_Healthy(t *testing.T) { 177 t.Parallel() 178 require := require.New(t) 179 w, m := defaultTestDeploymentWatcher(t) 180 181 // Create a job, alloc, and a deployment 182 j := mock.Job() 183 d := mock.Deployment() 184 d.JobID = j.ID 185 a := mock.Alloc() 186 a.DeploymentID = d.ID 187 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 188 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 189 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 190 191 // require that we get a call to UpsertDeploymentAllocHealth 192 matchConfig := &matchDeploymentAllocHealthRequestConfig{ 193 DeploymentID: d.ID, 194 Healthy: []string{a.ID}, 195 Eval: true, 196 } 197 matcher := matchDeploymentAllocHealthRequest(matchConfig) 198 m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil) 199 200 w.SetEnabled(true, m.state) 201 testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil }, 202 func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") }) 203 204 // Call SetAllocHealth 205 req := &structs.DeploymentAllocHealthRequest{ 206 DeploymentID: d.ID, 207 HealthyAllocationIDs: []string{a.ID}, 208 } 209 var resp structs.DeploymentUpdateResponse 210 err := w.SetAllocHealth(req, &resp) 211 require.Nil(err, "SetAllocHealth") 212 require.Equal(1, watchersCount(w), "Deployment should still be active") 213 m.AssertCalled(t, "UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)) 214 } 215 216 // Test setting allocation unhealthy 217 func TestWatcher_SetAllocHealth_Unhealthy(t *testing.T) { 218 t.Parallel() 219 require := require.New(t) 220 w, m := defaultTestDeploymentWatcher(t) 221 222 // Create a job, alloc, and a deployment 223 j := mock.Job() 224 d := mock.Deployment() 225 d.JobID = j.ID 226 a := mock.Alloc() 227 a.DeploymentID = d.ID 228 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 229 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 230 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 231 232 // require that we get a call to UpsertDeploymentAllocHealth 233 matchConfig := &matchDeploymentAllocHealthRequestConfig{ 234 DeploymentID: d.ID, 235 Unhealthy: []string{a.ID}, 236 Eval: true, 237 DeploymentUpdate: &structs.DeploymentStatusUpdate{ 238 DeploymentID: d.ID, 239 Status: structs.DeploymentStatusFailed, 240 StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations, 241 }, 242 } 243 matcher := matchDeploymentAllocHealthRequest(matchConfig) 244 m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil) 245 246 w.SetEnabled(true, m.state) 247 testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil }, 248 func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") }) 249 250 // Call SetAllocHealth 251 req := &structs.DeploymentAllocHealthRequest{ 252 DeploymentID: d.ID, 253 UnhealthyAllocationIDs: []string{a.ID}, 254 } 255 var resp structs.DeploymentUpdateResponse 256 err := w.SetAllocHealth(req, &resp) 257 require.Nil(err, "SetAllocHealth") 258 259 testutil.WaitForResult(func() (bool, error) { return 0 == watchersCount(w), nil }, 260 func(err error) { require.Equal(0, watchersCount(w), "Should have no deployment") }) 261 m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1) 262 } 263 264 // Test setting allocation unhealthy and that there should be a rollback 265 func TestWatcher_SetAllocHealth_Unhealthy_Rollback(t *testing.T) { 266 t.Parallel() 267 require := require.New(t) 268 w, m := defaultTestDeploymentWatcher(t) 269 270 // Create a job, alloc, and a deployment 271 j := mock.Job() 272 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 273 j.TaskGroups[0].Update.MaxParallel = 2 274 j.TaskGroups[0].Update.AutoRevert = true 275 j.TaskGroups[0].Update.ProgressDeadline = 0 276 j.Stable = true 277 d := mock.Deployment() 278 d.JobID = j.ID 279 d.TaskGroups["web"].AutoRevert = true 280 a := mock.Alloc() 281 a.DeploymentID = d.ID 282 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 283 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 284 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 285 286 // Upsert the job again to get a new version 287 j2 := j.Copy() 288 j2.Stable = false 289 // Modify the job to make its specification different 290 j2.Meta["foo"] = "bar" 291 292 require.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2") 293 294 // require that we get a call to UpsertDeploymentAllocHealth 295 matchConfig := &matchDeploymentAllocHealthRequestConfig{ 296 DeploymentID: d.ID, 297 Unhealthy: []string{a.ID}, 298 Eval: true, 299 DeploymentUpdate: &structs.DeploymentStatusUpdate{ 300 DeploymentID: d.ID, 301 Status: structs.DeploymentStatusFailed, 302 StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations, 303 }, 304 JobVersion: helper.Uint64ToPtr(0), 305 } 306 matcher := matchDeploymentAllocHealthRequest(matchConfig) 307 m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil) 308 309 w.SetEnabled(true, m.state) 310 testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil }, 311 func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") }) 312 313 // Call SetAllocHealth 314 req := &structs.DeploymentAllocHealthRequest{ 315 DeploymentID: d.ID, 316 UnhealthyAllocationIDs: []string{a.ID}, 317 } 318 var resp structs.DeploymentUpdateResponse 319 err := w.SetAllocHealth(req, &resp) 320 require.Nil(err, "SetAllocHealth") 321 322 testutil.WaitForResult(func() (bool, error) { return 0 == watchersCount(w), nil }, 323 func(err error) { require.Equal(0, watchersCount(w), "Should have no deployment") }) 324 m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1) 325 } 326 327 // Test setting allocation unhealthy on job with identical spec and there should be no rollback 328 func TestWatcher_SetAllocHealth_Unhealthy_NoRollback(t *testing.T) { 329 t.Parallel() 330 require := require.New(t) 331 w, m := defaultTestDeploymentWatcher(t) 332 333 // Create a job, alloc, and a deployment 334 j := mock.Job() 335 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 336 j.TaskGroups[0].Update.MaxParallel = 2 337 j.TaskGroups[0].Update.AutoRevert = true 338 j.TaskGroups[0].Update.ProgressDeadline = 0 339 j.Stable = true 340 d := mock.Deployment() 341 d.JobID = j.ID 342 d.TaskGroups["web"].AutoRevert = true 343 a := mock.Alloc() 344 a.DeploymentID = d.ID 345 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 346 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 347 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 348 349 // Upsert the job again to get a new version 350 j2 := j.Copy() 351 j2.Stable = false 352 353 require.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2") 354 355 // require that we get a call to UpsertDeploymentAllocHealth 356 matchConfig := &matchDeploymentAllocHealthRequestConfig{ 357 DeploymentID: d.ID, 358 Unhealthy: []string{a.ID}, 359 Eval: true, 360 DeploymentUpdate: &structs.DeploymentStatusUpdate{ 361 DeploymentID: d.ID, 362 Status: structs.DeploymentStatusFailed, 363 StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations, 364 }, 365 JobVersion: nil, 366 } 367 matcher := matchDeploymentAllocHealthRequest(matchConfig) 368 m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil) 369 370 w.SetEnabled(true, m.state) 371 testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil }, 372 func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") }) 373 374 // Call SetAllocHealth 375 req := &structs.DeploymentAllocHealthRequest{ 376 DeploymentID: d.ID, 377 UnhealthyAllocationIDs: []string{a.ID}, 378 } 379 var resp structs.DeploymentUpdateResponse 380 err := w.SetAllocHealth(req, &resp) 381 require.Nil(err, "SetAllocHealth") 382 383 testutil.WaitForResult(func() (bool, error) { return 0 == watchersCount(w), nil }, 384 func(err error) { require.Equal(0, watchersCount(w), "Should have no deployment") }) 385 m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1) 386 } 387 388 // Test promoting a deployment 389 func TestWatcher_PromoteDeployment_HealthyCanaries(t *testing.T) { 390 t.Parallel() 391 require := require.New(t) 392 w, m := defaultTestDeploymentWatcher(t) 393 394 // Create a job, canary alloc, and a deployment 395 j := mock.Job() 396 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 397 j.TaskGroups[0].Update.MaxParallel = 2 398 j.TaskGroups[0].Update.Canary = 1 399 j.TaskGroups[0].Update.ProgressDeadline = 0 400 d := mock.Deployment() 401 d.JobID = j.ID 402 a := mock.Alloc() 403 d.TaskGroups[a.TaskGroup].DesiredCanaries = 1 404 d.TaskGroups[a.TaskGroup].PlacedCanaries = []string{a.ID} 405 a.DeploymentStatus = &structs.AllocDeploymentStatus{ 406 Healthy: helper.BoolToPtr(true), 407 } 408 a.DeploymentID = d.ID 409 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 410 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 411 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 412 413 // require that we get a call to UpsertDeploymentPromotion 414 matchConfig := &matchDeploymentPromoteRequestConfig{ 415 Promotion: &structs.DeploymentPromoteRequest{ 416 DeploymentID: d.ID, 417 All: true, 418 }, 419 Eval: true, 420 } 421 matcher := matchDeploymentPromoteRequest(matchConfig) 422 m.On("UpdateDeploymentPromotion", mocker.MatchedBy(matcher)).Return(nil) 423 424 // We may get an update for the desired transition. 425 m1 := matchUpdateAllocDesiredTransitions([]string{d.ID}) 426 m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once() 427 428 w.SetEnabled(true, m.state) 429 testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil }, 430 func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") }) 431 432 // Call PromoteDeployment 433 req := &structs.DeploymentPromoteRequest{ 434 DeploymentID: d.ID, 435 All: true, 436 } 437 var resp structs.DeploymentUpdateResponse 438 err := w.PromoteDeployment(req, &resp) 439 require.Nil(err, "PromoteDeployment") 440 require.Equal(1, watchersCount(w), "Deployment should still be active") 441 m.AssertCalled(t, "UpdateDeploymentPromotion", mocker.MatchedBy(matcher)) 442 } 443 444 // Test promoting a deployment with unhealthy canaries 445 func TestWatcher_PromoteDeployment_UnhealthyCanaries(t *testing.T) { 446 t.Parallel() 447 require := require.New(t) 448 w, m := defaultTestDeploymentWatcher(t) 449 450 // Create a job, canary alloc, and a deployment 451 j := mock.Job() 452 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 453 j.TaskGroups[0].Update.MaxParallel = 2 454 j.TaskGroups[0].Update.Canary = 2 455 j.TaskGroups[0].Update.ProgressDeadline = 0 456 d := mock.Deployment() 457 d.JobID = j.ID 458 a := mock.Alloc() 459 d.TaskGroups[a.TaskGroup].PlacedCanaries = []string{a.ID} 460 d.TaskGroups[a.TaskGroup].DesiredCanaries = 2 461 a.DeploymentID = d.ID 462 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 463 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 464 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 465 466 // require that we get a call to UpsertDeploymentPromotion 467 matchConfig := &matchDeploymentPromoteRequestConfig{ 468 Promotion: &structs.DeploymentPromoteRequest{ 469 DeploymentID: d.ID, 470 All: true, 471 }, 472 Eval: true, 473 } 474 matcher := matchDeploymentPromoteRequest(matchConfig) 475 m.On("UpdateDeploymentPromotion", mocker.MatchedBy(matcher)).Return(nil) 476 477 w.SetEnabled(true, m.state) 478 testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil }, 479 func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") }) 480 481 // Call SetAllocHealth 482 req := &structs.DeploymentPromoteRequest{ 483 DeploymentID: d.ID, 484 All: true, 485 } 486 var resp structs.DeploymentUpdateResponse 487 err := w.PromoteDeployment(req, &resp) 488 if assert.NotNil(t, err, "PromoteDeployment") { 489 // 0/2 because the old version has been stopped but the canary isn't marked healthy yet 490 require.Contains(err.Error(), `Task group "web" has 0/2 healthy allocations`, "Should error because canary isn't marked healthy") 491 } 492 493 require.Equal(1, watchersCount(w), "Deployment should still be active") 494 m.AssertCalled(t, "UpdateDeploymentPromotion", mocker.MatchedBy(matcher)) 495 } 496 497 func TestWatcher_AutoPromoteDeployment(t *testing.T) { 498 t.Parallel() 499 w, m := defaultTestDeploymentWatcher(t) 500 now := time.Now() 501 502 // Create 1 UpdateStrategy, 1 job (1 TaskGroup), 2 canaries, and 1 deployment 503 upd := structs.DefaultUpdateStrategy.Copy() 504 upd.AutoPromote = true 505 upd.MaxParallel = 2 506 upd.Canary = 2 507 upd.ProgressDeadline = 5 * time.Second 508 509 j := mock.Job() 510 j.TaskGroups[0].Update = upd 511 512 d := mock.Deployment() 513 d.JobID = j.ID 514 // This is created in scheduler.computeGroup at runtime, where properties from the 515 // UpdateStrategy are copied in 516 d.TaskGroups = map[string]*structs.DeploymentState{ 517 "web": { 518 AutoPromote: upd.AutoPromote, 519 AutoRevert: upd.AutoRevert, 520 ProgressDeadline: upd.ProgressDeadline, 521 DesiredTotal: 2, 522 }, 523 } 524 525 alloc := func() *structs.Allocation { 526 a := mock.Alloc() 527 a.DeploymentID = d.ID 528 a.CreateTime = now.UnixNano() 529 a.ModifyTime = now.UnixNano() 530 a.DeploymentStatus = &structs.AllocDeploymentStatus{ 531 Canary: true, 532 } 533 return a 534 } 535 536 a := alloc() 537 b := alloc() 538 539 d.TaskGroups[a.TaskGroup].PlacedCanaries = []string{a.ID, b.ID} 540 d.TaskGroups[a.TaskGroup].DesiredCanaries = 2 541 require.NoError(t, m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 542 require.NoError(t, m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 543 require.NoError(t, m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a, b}), "UpsertAllocs") 544 545 // ============================================================= 546 // Support method calls 547 matchConfig0 := &matchDeploymentStatusUpdateConfig{ 548 DeploymentID: d.ID, 549 Status: structs.DeploymentStatusFailed, 550 StatusDescription: structs.DeploymentStatusDescriptionProgressDeadline, 551 Eval: true, 552 } 553 matcher0 := matchDeploymentStatusUpdateRequest(matchConfig0) 554 m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher0)).Return(nil) 555 556 matchConfig1 := &matchDeploymentAllocHealthRequestConfig{ 557 DeploymentID: d.ID, 558 Healthy: []string{a.ID, b.ID}, 559 Eval: true, 560 } 561 matcher1 := matchDeploymentAllocHealthRequest(matchConfig1) 562 m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher1)).Return(nil) 563 564 matchConfig2 := &matchDeploymentPromoteRequestConfig{ 565 Promotion: &structs.DeploymentPromoteRequest{ 566 DeploymentID: d.ID, 567 All: true, 568 }, 569 Eval: true, 570 } 571 matcher2 := matchDeploymentPromoteRequest(matchConfig2) 572 m.On("UpdateDeploymentPromotion", mocker.MatchedBy(matcher2)).Return(nil) 573 // ============================================================= 574 575 // Start the deployment 576 w.SetEnabled(true, m.state) 577 testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil }, 578 func(err error) { require.Equal(t, 1, len(w.watchers), "Should have 1 deployment") }) 579 580 // Mark the canaries healthy 581 req := &structs.DeploymentAllocHealthRequest{ 582 DeploymentID: d.ID, 583 HealthyAllocationIDs: []string{a.ID, b.ID}, 584 } 585 var resp structs.DeploymentUpdateResponse 586 // Calls w.raft.UpdateDeploymentAllocHealth, which is implemented by StateStore in 587 // state.UpdateDeploymentAllocHealth via a raft shim? 588 err := w.SetAllocHealth(req, &resp) 589 require.NoError(t, err) 590 591 ws := memdb.NewWatchSet() 592 593 testutil.WaitForResult( 594 func() (bool, error) { 595 ds, _ := m.state.DeploymentsByJobID(ws, j.Namespace, j.ID, true) 596 d = ds[0] 597 return 2 == d.TaskGroups["web"].HealthyAllocs, nil 598 }, 599 func(err error) { require.NoError(t, err) }, 600 ) 601 602 require.Equal(t, 1, len(w.watchers), "Deployment should still be active") 603 m.AssertCalled(t, "UpdateDeploymentPromotion", mocker.MatchedBy(matcher2)) 604 605 require.Equal(t, "running", d.Status) 606 require.True(t, d.TaskGroups["web"].Promoted) 607 608 a1, _ := m.state.AllocByID(ws, a.ID) 609 require.False(t, a1.DeploymentStatus.Canary) 610 require.Equal(t, "pending", a1.ClientStatus) 611 require.Equal(t, "run", a1.DesiredStatus) 612 613 b1, _ := m.state.AllocByID(ws, b.ID) 614 require.False(t, b1.DeploymentStatus.Canary) 615 } 616 617 // Test pausing a deployment that is running 618 func TestWatcher_PauseDeployment_Pause_Running(t *testing.T) { 619 t.Parallel() 620 require := require.New(t) 621 w, m := defaultTestDeploymentWatcher(t) 622 623 // Create a job and a deployment 624 j := mock.Job() 625 d := mock.Deployment() 626 d.JobID = j.ID 627 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 628 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 629 630 // require that we get a call to UpsertDeploymentStatusUpdate 631 matchConfig := &matchDeploymentStatusUpdateConfig{ 632 DeploymentID: d.ID, 633 Status: structs.DeploymentStatusPaused, 634 StatusDescription: structs.DeploymentStatusDescriptionPaused, 635 } 636 matcher := matchDeploymentStatusUpdateRequest(matchConfig) 637 m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil) 638 639 w.SetEnabled(true, m.state) 640 testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil }, 641 func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") }) 642 643 // Call PauseDeployment 644 req := &structs.DeploymentPauseRequest{ 645 DeploymentID: d.ID, 646 Pause: true, 647 } 648 var resp structs.DeploymentUpdateResponse 649 err := w.PauseDeployment(req, &resp) 650 require.Nil(err, "PauseDeployment") 651 652 require.Equal(1, watchersCount(w), "Deployment should still be active") 653 m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher)) 654 } 655 656 // Test pausing a deployment that is paused 657 func TestWatcher_PauseDeployment_Pause_Paused(t *testing.T) { 658 t.Parallel() 659 require := require.New(t) 660 w, m := defaultTestDeploymentWatcher(t) 661 662 // Create a job and a deployment 663 j := mock.Job() 664 d := mock.Deployment() 665 d.JobID = j.ID 666 d.Status = structs.DeploymentStatusPaused 667 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 668 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 669 670 // require that we get a call to UpsertDeploymentStatusUpdate 671 matchConfig := &matchDeploymentStatusUpdateConfig{ 672 DeploymentID: d.ID, 673 Status: structs.DeploymentStatusPaused, 674 StatusDescription: structs.DeploymentStatusDescriptionPaused, 675 } 676 matcher := matchDeploymentStatusUpdateRequest(matchConfig) 677 m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil) 678 679 w.SetEnabled(true, m.state) 680 testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil }, 681 func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") }) 682 683 // Call PauseDeployment 684 req := &structs.DeploymentPauseRequest{ 685 DeploymentID: d.ID, 686 Pause: true, 687 } 688 var resp structs.DeploymentUpdateResponse 689 err := w.PauseDeployment(req, &resp) 690 require.Nil(err, "PauseDeployment") 691 692 require.Equal(1, watchersCount(w), "Deployment should still be active") 693 m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher)) 694 } 695 696 // Test unpausing a deployment that is paused 697 func TestWatcher_PauseDeployment_Unpause_Paused(t *testing.T) { 698 t.Parallel() 699 require := require.New(t) 700 w, m := defaultTestDeploymentWatcher(t) 701 702 // Create a job and a deployment 703 j := mock.Job() 704 d := mock.Deployment() 705 d.JobID = j.ID 706 d.Status = structs.DeploymentStatusPaused 707 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 708 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 709 710 // require that we get a call to UpsertDeploymentStatusUpdate 711 matchConfig := &matchDeploymentStatusUpdateConfig{ 712 DeploymentID: d.ID, 713 Status: structs.DeploymentStatusRunning, 714 StatusDescription: structs.DeploymentStatusDescriptionRunning, 715 Eval: true, 716 } 717 matcher := matchDeploymentStatusUpdateRequest(matchConfig) 718 m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil) 719 720 w.SetEnabled(true, m.state) 721 testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil }, 722 func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") }) 723 724 // Call PauseDeployment 725 req := &structs.DeploymentPauseRequest{ 726 DeploymentID: d.ID, 727 Pause: false, 728 } 729 var resp structs.DeploymentUpdateResponse 730 err := w.PauseDeployment(req, &resp) 731 require.Nil(err, "PauseDeployment") 732 733 require.Equal(1, watchersCount(w), "Deployment should still be active") 734 m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher)) 735 } 736 737 // Test unpausing a deployment that is running 738 func TestWatcher_PauseDeployment_Unpause_Running(t *testing.T) { 739 t.Parallel() 740 require := require.New(t) 741 w, m := defaultTestDeploymentWatcher(t) 742 743 // Create a job and a deployment 744 j := mock.Job() 745 d := mock.Deployment() 746 d.JobID = j.ID 747 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 748 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 749 750 // require that we get a call to UpsertDeploymentStatusUpdate 751 matchConfig := &matchDeploymentStatusUpdateConfig{ 752 DeploymentID: d.ID, 753 Status: structs.DeploymentStatusRunning, 754 StatusDescription: structs.DeploymentStatusDescriptionRunning, 755 Eval: true, 756 } 757 matcher := matchDeploymentStatusUpdateRequest(matchConfig) 758 m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil) 759 760 w.SetEnabled(true, m.state) 761 testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil }, 762 func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") }) 763 764 // Call PauseDeployment 765 req := &structs.DeploymentPauseRequest{ 766 DeploymentID: d.ID, 767 Pause: false, 768 } 769 var resp structs.DeploymentUpdateResponse 770 err := w.PauseDeployment(req, &resp) 771 require.Nil(err, "PauseDeployment") 772 773 require.Equal(1, watchersCount(w), "Deployment should still be active") 774 m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher)) 775 } 776 777 // Test failing a deployment that is running 778 func TestWatcher_FailDeployment_Running(t *testing.T) { 779 t.Parallel() 780 require := require.New(t) 781 w, m := defaultTestDeploymentWatcher(t) 782 783 // Create a job and a deployment 784 j := mock.Job() 785 d := mock.Deployment() 786 d.JobID = j.ID 787 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 788 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 789 790 // require that we get a call to UpsertDeploymentStatusUpdate 791 matchConfig := &matchDeploymentStatusUpdateConfig{ 792 DeploymentID: d.ID, 793 Status: structs.DeploymentStatusFailed, 794 StatusDescription: structs.DeploymentStatusDescriptionFailedByUser, 795 Eval: true, 796 } 797 matcher := matchDeploymentStatusUpdateRequest(matchConfig) 798 m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil) 799 800 w.SetEnabled(true, m.state) 801 testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil }, 802 func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") }) 803 804 // Call PauseDeployment 805 req := &structs.DeploymentFailRequest{ 806 DeploymentID: d.ID, 807 } 808 var resp structs.DeploymentUpdateResponse 809 err := w.FailDeployment(req, &resp) 810 require.Nil(err, "FailDeployment") 811 812 require.Equal(1, watchersCount(w), "Deployment should still be active") 813 m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher)) 814 } 815 816 // Tests that the watcher properly watches for allocation changes and takes the 817 // proper actions 818 func TestDeploymentWatcher_Watch_NoProgressDeadline(t *testing.T) { 819 t.Parallel() 820 require := require.New(t) 821 w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond) 822 823 // Create a job, alloc, and a deployment 824 j := mock.Job() 825 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 826 j.TaskGroups[0].Update.MaxParallel = 2 827 j.TaskGroups[0].Update.AutoRevert = true 828 j.TaskGroups[0].Update.ProgressDeadline = 0 829 j.Stable = true 830 d := mock.Deployment() 831 d.JobID = j.ID 832 d.TaskGroups["web"].AutoRevert = true 833 a := mock.Alloc() 834 a.DeploymentID = d.ID 835 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 836 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 837 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 838 839 // Upsert the job again to get a new version 840 j2 := j.Copy() 841 // Modify the job to make its specification different 842 j2.Meta["foo"] = "bar" 843 j2.Stable = false 844 require.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2") 845 846 // require that we will get a update allocation call only once. This will 847 // verify that the watcher is batching allocation changes 848 m1 := matchUpdateAllocDesiredTransitions([]string{d.ID}) 849 m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once() 850 851 // require that we get a call to UpsertDeploymentStatusUpdate 852 c := &matchDeploymentStatusUpdateConfig{ 853 DeploymentID: d.ID, 854 Status: structs.DeploymentStatusFailed, 855 StatusDescription: structs.DeploymentStatusDescriptionRollback(structs.DeploymentStatusDescriptionFailedAllocations, 0), 856 JobVersion: helper.Uint64ToPtr(0), 857 Eval: true, 858 } 859 m2 := matchDeploymentStatusUpdateRequest(c) 860 m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil) 861 862 w.SetEnabled(true, m.state) 863 testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil }, 864 func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") }) 865 866 // Update the allocs health to healthy which should create an evaluation 867 for i := 0; i < 5; i++ { 868 req := &structs.ApplyDeploymentAllocHealthRequest{ 869 DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{ 870 DeploymentID: d.ID, 871 HealthyAllocationIDs: []string{a.ID}, 872 }, 873 } 874 require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req), "UpsertDeploymentAllocHealth") 875 } 876 877 // Wait for there to be one eval 878 testutil.WaitForResult(func() (bool, error) { 879 ws := memdb.NewWatchSet() 880 evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID) 881 if err != nil { 882 return false, err 883 } 884 885 if l := len(evals); l != 1 { 886 return false, fmt.Errorf("Got %d evals; want 1", l) 887 } 888 889 return true, nil 890 }, func(err error) { 891 t.Fatal(err) 892 }) 893 894 // Update the allocs health to unhealthy which should create a job rollback, 895 // status update and eval 896 req2 := &structs.ApplyDeploymentAllocHealthRequest{ 897 DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{ 898 DeploymentID: d.ID, 899 UnhealthyAllocationIDs: []string{a.ID}, 900 }, 901 } 902 require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req2), "UpsertDeploymentAllocHealth") 903 904 // Wait for there to be one eval 905 testutil.WaitForResult(func() (bool, error) { 906 ws := memdb.NewWatchSet() 907 evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID) 908 if err != nil { 909 return false, err 910 } 911 912 if l := len(evals); l != 2 { 913 return false, fmt.Errorf("Got %d evals; want 1", l) 914 } 915 916 return true, nil 917 }, func(err error) { 918 t.Fatal(err) 919 }) 920 921 m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1)) 922 923 // After we upsert the job version will go to 2. So use this to require the 924 // original call happened. 925 c2 := &matchDeploymentStatusUpdateConfig{ 926 DeploymentID: d.ID, 927 Status: structs.DeploymentStatusFailed, 928 StatusDescription: structs.DeploymentStatusDescriptionRollback(structs.DeploymentStatusDescriptionFailedAllocations, 0), 929 JobVersion: helper.Uint64ToPtr(2), 930 Eval: true, 931 } 932 m3 := matchDeploymentStatusUpdateRequest(c2) 933 m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(m3)) 934 testutil.WaitForResult(func() (bool, error) { return 0 == watchersCount(w), nil }, 935 func(err error) { require.Equal(0, watchersCount(w), "Should have no deployment") }) 936 } 937 938 func TestDeploymentWatcher_Watch_ProgressDeadline(t *testing.T) { 939 t.Parallel() 940 require := require.New(t) 941 w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond) 942 943 // Create a job, alloc, and a deployment 944 j := mock.Job() 945 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 946 j.TaskGroups[0].Update.MaxParallel = 2 947 j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond 948 j.Stable = true 949 d := mock.Deployment() 950 d.JobID = j.ID 951 d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond 952 a := mock.Alloc() 953 now := time.Now() 954 a.CreateTime = now.UnixNano() 955 a.ModifyTime = now.UnixNano() 956 a.DeploymentID = d.ID 957 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 958 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 959 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 960 961 // require that we get a call to UpsertDeploymentStatusUpdate 962 c := &matchDeploymentStatusUpdateConfig{ 963 DeploymentID: d.ID, 964 Status: structs.DeploymentStatusFailed, 965 StatusDescription: structs.DeploymentStatusDescriptionProgressDeadline, 966 Eval: true, 967 } 968 m2 := matchDeploymentStatusUpdateRequest(c) 969 m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil) 970 971 w.SetEnabled(true, m.state) 972 testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil }, 973 func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") }) 974 975 // Update the alloc to be unhealthy and require that nothing happens. 976 a2 := a.Copy() 977 a2.DeploymentStatus = &structs.AllocDeploymentStatus{ 978 Healthy: helper.BoolToPtr(false), 979 Timestamp: now, 980 } 981 require.Nil(m.state.UpdateAllocsFromClient(100, []*structs.Allocation{a2})) 982 983 // Wait for the deployment to be failed 984 testutil.WaitForResult(func() (bool, error) { 985 d, err := m.state.DeploymentByID(nil, d.ID) 986 if err != nil { 987 return false, err 988 } 989 990 return d.Status == structs.DeploymentStatusFailed, fmt.Errorf("bad status %q", d.Status) 991 }, func(err error) { 992 t.Fatal(err) 993 }) 994 995 // require there are is only one evaluation 996 testutil.WaitForResult(func() (bool, error) { 997 ws := memdb.NewWatchSet() 998 evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID) 999 if err != nil { 1000 return false, err 1001 } 1002 1003 if l := len(evals); l != 1 { 1004 return false, fmt.Errorf("Got %d evals; want 1", l) 1005 } 1006 1007 return true, nil 1008 }, func(err error) { 1009 t.Fatal(err) 1010 }) 1011 } 1012 1013 // Test that progress deadline handling works when there are multiple groups 1014 func TestDeploymentWatcher_ProgressCutoff(t *testing.T) { 1015 t.Parallel() 1016 require := require.New(t) 1017 w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond) 1018 1019 // Create a job, alloc, and a deployment 1020 j := mock.Job() 1021 j.TaskGroups[0].Count = 1 1022 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 1023 j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond 1024 j.TaskGroups = append(j.TaskGroups, j.TaskGroups[0].Copy()) 1025 j.TaskGroups[1].Name = "foo" 1026 j.TaskGroups[1].Update.ProgressDeadline = 1 * time.Second 1027 j.Stable = true 1028 1029 d := mock.Deployment() 1030 d.JobID = j.ID 1031 d.TaskGroups["web"].DesiredTotal = 1 1032 d.TaskGroups["foo"] = d.TaskGroups["web"].Copy() 1033 d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond 1034 d.TaskGroups["foo"].ProgressDeadline = 1 * time.Second 1035 1036 a := mock.Alloc() 1037 now := time.Now() 1038 a.CreateTime = now.UnixNano() 1039 a.ModifyTime = now.UnixNano() 1040 a.DeploymentID = d.ID 1041 1042 a2 := mock.Alloc() 1043 a2.TaskGroup = "foo" 1044 a2.CreateTime = now.UnixNano() 1045 a2.ModifyTime = now.UnixNano() 1046 a2.DeploymentID = d.ID 1047 1048 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 1049 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 1050 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a, a2}), "UpsertAllocs") 1051 1052 // We may get an update for the desired transition. 1053 m1 := matchUpdateAllocDesiredTransitions([]string{d.ID}) 1054 m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once() 1055 1056 w.SetEnabled(true, m.state) 1057 testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil }, 1058 func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") }) 1059 1060 watcher, err := w.getOrCreateWatcher(d.ID) 1061 require.NoError(err) 1062 require.NotNil(watcher) 1063 1064 d1, err := m.state.DeploymentByID(nil, d.ID) 1065 require.NoError(err) 1066 1067 done := watcher.doneGroups(d1) 1068 require.Contains(done, "web") 1069 require.False(done["web"]) 1070 require.Contains(done, "foo") 1071 require.False(done["foo"]) 1072 1073 cutoff1 := watcher.getDeploymentProgressCutoff(d1) 1074 require.False(cutoff1.IsZero()) 1075 1076 // Update the first allocation to be healthy 1077 a3 := a.Copy() 1078 a3.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: helper.BoolToPtr(true)} 1079 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a3}), "UpsertAllocs") 1080 1081 // Get the updated deployment 1082 d2, err := m.state.DeploymentByID(nil, d.ID) 1083 require.NoError(err) 1084 1085 done = watcher.doneGroups(d2) 1086 require.Contains(done, "web") 1087 require.True(done["web"]) 1088 require.Contains(done, "foo") 1089 require.False(done["foo"]) 1090 1091 cutoff2 := watcher.getDeploymentProgressCutoff(d2) 1092 require.False(cutoff2.IsZero()) 1093 require.True(cutoff1.UnixNano() < cutoff2.UnixNano()) 1094 1095 // Update the second allocation to be healthy 1096 a4 := a2.Copy() 1097 a4.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: helper.BoolToPtr(true)} 1098 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a4}), "UpsertAllocs") 1099 1100 // Get the updated deployment 1101 d3, err := m.state.DeploymentByID(nil, d.ID) 1102 require.NoError(err) 1103 1104 done = watcher.doneGroups(d3) 1105 require.Contains(done, "web") 1106 require.True(done["web"]) 1107 require.Contains(done, "foo") 1108 require.True(done["foo"]) 1109 1110 cutoff3 := watcher.getDeploymentProgressCutoff(d2) 1111 require.True(cutoff3.IsZero()) 1112 } 1113 1114 // Test that we will allow the progress deadline to be reached when the canaries 1115 // are healthy but we haven't promoted 1116 func TestDeploymentWatcher_Watch_ProgressDeadline_Canaries(t *testing.T) { 1117 t.Parallel() 1118 require := require.New(t) 1119 w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond) 1120 1121 // Create a job, alloc, and a deployment 1122 j := mock.Job() 1123 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 1124 j.TaskGroups[0].Update.Canary = 1 1125 j.TaskGroups[0].Update.MaxParallel = 1 1126 j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond 1127 j.Stable = true 1128 d := mock.Deployment() 1129 d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 1130 d.JobID = j.ID 1131 d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond 1132 d.TaskGroups["web"].DesiredCanaries = 1 1133 a := mock.Alloc() 1134 now := time.Now() 1135 a.CreateTime = now.UnixNano() 1136 a.ModifyTime = now.UnixNano() 1137 a.DeploymentID = d.ID 1138 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 1139 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 1140 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 1141 1142 // require that we will get a createEvaluation call only once. This will 1143 // verify that the watcher is batching allocation changes 1144 m1 := matchUpdateAllocDesiredTransitions([]string{d.ID}) 1145 m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once() 1146 1147 w.SetEnabled(true, m.state) 1148 testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil }, 1149 func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") }) 1150 1151 // Update the alloc to be unhealthy and require that nothing happens. 1152 a2 := a.Copy() 1153 a2.DeploymentStatus = &structs.AllocDeploymentStatus{ 1154 Healthy: helper.BoolToPtr(true), 1155 Timestamp: now, 1156 } 1157 require.Nil(m.state.UpdateAllocsFromClient(m.nextIndex(), []*structs.Allocation{a2})) 1158 1159 // Wait for the deployment to cross the deadline 1160 dout, err := m.state.DeploymentByID(nil, d.ID) 1161 require.NoError(err) 1162 require.NotNil(dout) 1163 state := dout.TaskGroups["web"] 1164 require.NotNil(state) 1165 time.Sleep(state.RequireProgressBy.Add(time.Second).Sub(now)) 1166 1167 // Require the deployment is still running 1168 dout, err = m.state.DeploymentByID(nil, d.ID) 1169 require.NoError(err) 1170 require.NotNil(dout) 1171 require.Equal(structs.DeploymentStatusRunning, dout.Status) 1172 require.Equal(structs.DeploymentStatusDescriptionRunningNeedsPromotion, dout.StatusDescription) 1173 1174 // require there are is only one evaluation 1175 testutil.WaitForResult(func() (bool, error) { 1176 ws := memdb.NewWatchSet() 1177 evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID) 1178 if err != nil { 1179 return false, err 1180 } 1181 1182 if l := len(evals); l != 1 { 1183 return false, fmt.Errorf("Got %d evals; want 1", l) 1184 } 1185 1186 return true, nil 1187 }, func(err error) { 1188 t.Fatal(err) 1189 }) 1190 } 1191 1192 // Test that a promoted deployment with alloc healthy updates create 1193 // evals to move the deployment forward 1194 func TestDeploymentWatcher_PromotedCanary_UpdatedAllocs(t *testing.T) { 1195 t.Parallel() 1196 require := require.New(t) 1197 w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond) 1198 1199 // Create a job, alloc, and a deployment 1200 j := mock.Job() 1201 j.TaskGroups[0].Count = 2 1202 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 1203 j.TaskGroups[0].Update.Canary = 1 1204 j.TaskGroups[0].Update.MaxParallel = 1 1205 j.TaskGroups[0].Update.ProgressDeadline = 50 * time.Millisecond 1206 j.Stable = true 1207 1208 d := mock.Deployment() 1209 d.TaskGroups["web"].DesiredTotal = 2 1210 d.TaskGroups["web"].DesiredCanaries = 1 1211 d.TaskGroups["web"].HealthyAllocs = 1 1212 d.StatusDescription = structs.DeploymentStatusDescriptionRunning 1213 d.JobID = j.ID 1214 d.TaskGroups["web"].ProgressDeadline = 50 * time.Millisecond 1215 d.TaskGroups["web"].RequireProgressBy = time.Now().Add(50 * time.Millisecond) 1216 1217 a := mock.Alloc() 1218 now := time.Now() 1219 a.CreateTime = now.UnixNano() 1220 a.ModifyTime = now.UnixNano() 1221 a.DeploymentID = d.ID 1222 a.DeploymentStatus = &structs.AllocDeploymentStatus{ 1223 Healthy: helper.BoolToPtr(true), 1224 Timestamp: now, 1225 } 1226 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 1227 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 1228 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 1229 1230 w.SetEnabled(true, m.state) 1231 testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil }, 1232 func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") }) 1233 1234 m1 := matchUpdateAllocDesiredTransitions([]string{d.ID}) 1235 m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Twice() 1236 1237 // Create another alloc 1238 a2 := a.Copy() 1239 a2.ID = uuid.Generate() 1240 now = time.Now() 1241 a2.CreateTime = now.UnixNano() 1242 a2.ModifyTime = now.UnixNano() 1243 a2.DeploymentStatus = &structs.AllocDeploymentStatus{ 1244 Healthy: helper.BoolToPtr(true), 1245 Timestamp: now, 1246 } 1247 d.TaskGroups["web"].RequireProgressBy = time.Now().Add(2 * time.Second) 1248 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 1249 // Wait until batch eval period passes before updating another alloc 1250 time.Sleep(1 * time.Second) 1251 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a2}), "UpsertAllocs") 1252 1253 // Wait for the deployment to cross the deadline 1254 dout, err := m.state.DeploymentByID(nil, d.ID) 1255 require.NoError(err) 1256 require.NotNil(dout) 1257 state := dout.TaskGroups["web"] 1258 require.NotNil(state) 1259 time.Sleep(state.RequireProgressBy.Add(time.Second).Sub(now)) 1260 1261 // There should be two evals 1262 testutil.WaitForResult(func() (bool, error) { 1263 ws := memdb.NewWatchSet() 1264 evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID) 1265 if err != nil { 1266 return false, err 1267 } 1268 1269 if l := len(evals); l != 2 { 1270 return false, fmt.Errorf("Got %d evals; want 2", l) 1271 } 1272 1273 return true, nil 1274 }, func(err error) { 1275 t.Fatal(err) 1276 }) 1277 } 1278 1279 // Test scenario where deployment initially has no progress deadline 1280 // After the deployment is updated, a failed alloc's DesiredTransition should be set 1281 func TestDeploymentWatcher_Watch_StartWithoutProgressDeadline(t *testing.T) { 1282 t.Parallel() 1283 require := require.New(t) 1284 w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond) 1285 1286 // Create a job, and a deployment 1287 j := mock.Job() 1288 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 1289 j.TaskGroups[0].Update.MaxParallel = 2 1290 j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond 1291 j.Stable = true 1292 d := mock.Deployment() 1293 d.JobID = j.ID 1294 1295 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 1296 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 1297 1298 a := mock.Alloc() 1299 a.CreateTime = time.Now().UnixNano() 1300 a.DeploymentID = d.ID 1301 1302 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 1303 1304 d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond 1305 // Update the deployment with a progress deadline 1306 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 1307 1308 // Match on DesiredTransition set to Reschedule for the failed alloc 1309 m1 := matchUpdateAllocDesiredTransitionReschedule([]string{a.ID}) 1310 m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once() 1311 1312 w.SetEnabled(true, m.state) 1313 testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil }, 1314 func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") }) 1315 1316 // Update the alloc to be unhealthy 1317 a2 := a.Copy() 1318 a2.DeploymentStatus = &structs.AllocDeploymentStatus{ 1319 Healthy: helper.BoolToPtr(false), 1320 Timestamp: time.Now(), 1321 } 1322 require.Nil(m.state.UpdateAllocsFromClient(m.nextIndex(), []*structs.Allocation{a2})) 1323 1324 // Wait for the alloc's DesiredState to set reschedule 1325 testutil.WaitForResult(func() (bool, error) { 1326 a, err := m.state.AllocByID(nil, a.ID) 1327 if err != nil { 1328 return false, err 1329 } 1330 dt := a.DesiredTransition 1331 shouldReschedule := dt.Reschedule != nil && *dt.Reschedule 1332 return shouldReschedule, fmt.Errorf("Desired Transition Reschedule should be set but got %v", shouldReschedule) 1333 }, func(err error) { 1334 t.Fatal(err) 1335 }) 1336 } 1337 1338 // Tests that the watcher fails rollback when the spec hasn't changed 1339 func TestDeploymentWatcher_RollbackFailed(t *testing.T) { 1340 t.Parallel() 1341 require := require.New(t) 1342 w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond) 1343 1344 // Create a job, alloc, and a deployment 1345 j := mock.Job() 1346 j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 1347 j.TaskGroups[0].Update.MaxParallel = 2 1348 j.TaskGroups[0].Update.AutoRevert = true 1349 j.TaskGroups[0].Update.ProgressDeadline = 0 1350 j.Stable = true 1351 d := mock.Deployment() 1352 d.JobID = j.ID 1353 d.TaskGroups["web"].AutoRevert = true 1354 a := mock.Alloc() 1355 a.DeploymentID = d.ID 1356 require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob") 1357 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") 1358 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs") 1359 1360 // Upsert the job again to get a new version 1361 j2 := j.Copy() 1362 // Modify the job to make its specification different 1363 j2.Stable = false 1364 require.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2") 1365 1366 // require that we will get a createEvaluation call only once. This will 1367 // verify that the watcher is batching allocation changes 1368 m1 := matchUpdateAllocDesiredTransitions([]string{d.ID}) 1369 m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once() 1370 1371 // require that we get a call to UpsertDeploymentStatusUpdate with roll back failed as the status 1372 c := &matchDeploymentStatusUpdateConfig{ 1373 DeploymentID: d.ID, 1374 Status: structs.DeploymentStatusFailed, 1375 StatusDescription: structs.DeploymentStatusDescriptionRollbackNoop(structs.DeploymentStatusDescriptionFailedAllocations, 0), 1376 JobVersion: nil, 1377 Eval: true, 1378 } 1379 m2 := matchDeploymentStatusUpdateRequest(c) 1380 m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil) 1381 1382 w.SetEnabled(true, m.state) 1383 testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil }, 1384 func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") }) 1385 1386 // Update the allocs health to healthy which should create an evaluation 1387 for i := 0; i < 5; i++ { 1388 req := &structs.ApplyDeploymentAllocHealthRequest{ 1389 DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{ 1390 DeploymentID: d.ID, 1391 HealthyAllocationIDs: []string{a.ID}, 1392 }, 1393 } 1394 require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req), "UpsertDeploymentAllocHealth") 1395 } 1396 1397 // Wait for there to be one eval 1398 testutil.WaitForResult(func() (bool, error) { 1399 ws := memdb.NewWatchSet() 1400 evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID) 1401 if err != nil { 1402 return false, err 1403 } 1404 1405 if l := len(evals); l != 1 { 1406 return false, fmt.Errorf("Got %d evals; want 1", l) 1407 } 1408 1409 return true, nil 1410 }, func(err error) { 1411 t.Fatal(err) 1412 }) 1413 1414 // Update the allocs health to unhealthy which will cause attempting a rollback, 1415 // fail in that step, do status update and eval 1416 req2 := &structs.ApplyDeploymentAllocHealthRequest{ 1417 DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{ 1418 DeploymentID: d.ID, 1419 UnhealthyAllocationIDs: []string{a.ID}, 1420 }, 1421 } 1422 require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req2), "UpsertDeploymentAllocHealth") 1423 1424 // Wait for there to be one eval 1425 testutil.WaitForResult(func() (bool, error) { 1426 ws := memdb.NewWatchSet() 1427 evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID) 1428 if err != nil { 1429 return false, err 1430 } 1431 1432 if l := len(evals); l != 2 { 1433 return false, fmt.Errorf("Got %d evals; want 1", l) 1434 } 1435 1436 return true, nil 1437 }, func(err error) { 1438 t.Fatal(err) 1439 }) 1440 1441 m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1)) 1442 1443 // verify that the job version hasn't changed after upsert 1444 m.state.JobByID(nil, structs.DefaultNamespace, j.ID) 1445 require.Equal(uint64(0), j.Version, "Expected job version 0 but got ", j.Version) 1446 } 1447 1448 // Test allocation updates and evaluation creation is batched between watchers 1449 func TestWatcher_BatchAllocUpdates(t *testing.T) { 1450 t.Parallel() 1451 require := require.New(t) 1452 w, m := testDeploymentWatcher(t, 1000.0, 1*time.Second) 1453 1454 // Create a job, alloc, for two deployments 1455 j1 := mock.Job() 1456 j1.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 1457 j1.TaskGroups[0].Update.ProgressDeadline = 0 1458 d1 := mock.Deployment() 1459 d1.JobID = j1.ID 1460 a1 := mock.Alloc() 1461 a1.Job = j1 1462 a1.JobID = j1.ID 1463 a1.DeploymentID = d1.ID 1464 1465 j2 := mock.Job() 1466 j2.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 1467 j2.TaskGroups[0].Update.ProgressDeadline = 0 1468 d2 := mock.Deployment() 1469 d2.JobID = j2.ID 1470 a2 := mock.Alloc() 1471 a2.Job = j2 1472 a2.JobID = j2.ID 1473 a2.DeploymentID = d2.ID 1474 1475 require.Nil(m.state.UpsertJob(m.nextIndex(), j1), "UpsertJob") 1476 require.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob") 1477 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d1), "UpsertDeployment") 1478 require.Nil(m.state.UpsertDeployment(m.nextIndex(), d2), "UpsertDeployment") 1479 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a1}), "UpsertAllocs") 1480 require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a2}), "UpsertAllocs") 1481 1482 // require that we will get a createEvaluation call only once and it contains 1483 // both deployments. This will verify that the watcher is batching 1484 // allocation changes 1485 m1 := matchUpdateAllocDesiredTransitions([]string{d1.ID, d2.ID}) 1486 m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once() 1487 1488 w.SetEnabled(true, m.state) 1489 testutil.WaitForResult(func() (bool, error) { return 2 == watchersCount(w), nil }, 1490 func(err error) { require.Equal(2, watchersCount(w), "Should have 2 deployment") }) 1491 1492 // Update the allocs health to healthy which should create an evaluation 1493 req := &structs.ApplyDeploymentAllocHealthRequest{ 1494 DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{ 1495 DeploymentID: d1.ID, 1496 HealthyAllocationIDs: []string{a1.ID}, 1497 }, 1498 } 1499 require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req), "UpsertDeploymentAllocHealth") 1500 1501 req2 := &structs.ApplyDeploymentAllocHealthRequest{ 1502 DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{ 1503 DeploymentID: d2.ID, 1504 HealthyAllocationIDs: []string{a2.ID}, 1505 }, 1506 } 1507 require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req2), "UpsertDeploymentAllocHealth") 1508 1509 // Wait for there to be one eval for each job 1510 testutil.WaitForResult(func() (bool, error) { 1511 ws := memdb.NewWatchSet() 1512 evals1, err := m.state.EvalsByJob(ws, j1.Namespace, j1.ID) 1513 if err != nil { 1514 return false, err 1515 } 1516 1517 evals2, err := m.state.EvalsByJob(ws, j2.Namespace, j2.ID) 1518 if err != nil { 1519 return false, err 1520 } 1521 1522 if l := len(evals1); l != 1 { 1523 return false, fmt.Errorf("Got %d evals for job %v; want 1", l, j1.ID) 1524 } 1525 1526 if l := len(evals2); l != 1 { 1527 return false, fmt.Errorf("Got %d evals for job 2; want 1", l) 1528 } 1529 1530 return true, nil 1531 }, func(err error) { 1532 t.Fatal(err) 1533 }) 1534 1535 m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1)) 1536 testutil.WaitForResult(func() (bool, error) { return 2 == watchersCount(w), nil }, 1537 func(err error) { require.Equal(2, watchersCount(w), "Should have 2 deployment") }) 1538 } 1539 1540 func watchersCount(w *Watcher) int { 1541 w.l.Lock() 1542 defer w.l.Unlock() 1543 1544 return len(w.watchers) 1545 }