github.com/zhizhiboom/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/client/allocrunner/alloc_runner_test.go (about) 1 package allocrunner 2 3 import ( 4 "fmt" 5 "io/ioutil" 6 "os" 7 "path/filepath" 8 "strings" 9 "testing" 10 "time" 11 12 "github.com/boltdb/bolt" 13 "github.com/hashicorp/consul/api" 14 "github.com/hashicorp/nomad/command/agent/consul" 15 "github.com/hashicorp/nomad/helper/testlog" 16 "github.com/hashicorp/nomad/helper/uuid" 17 "github.com/hashicorp/nomad/nomad/mock" 18 "github.com/hashicorp/nomad/nomad/structs" 19 "github.com/hashicorp/nomad/testutil" 20 "github.com/stretchr/testify/assert" 21 22 "github.com/hashicorp/nomad/client/allocrunner/taskrunner" 23 consulApi "github.com/hashicorp/nomad/client/consul" 24 "github.com/hashicorp/nomad/client/state" 25 "github.com/stretchr/testify/require" 26 ) 27 28 // allocationBucketExists checks if the allocation bucket was created. 29 func allocationBucketExists(tx *bolt.Tx, allocID string) bool { 30 bucket, err := state.GetAllocationBucket(tx, allocID) 31 return err == nil && bucket != nil 32 } 33 34 func TestAllocRunner_SimpleRun(t *testing.T) { 35 t.Parallel() 36 upd, ar := TestAllocRunner(t, false) 37 go ar.Run() 38 defer ar.Destroy() 39 40 testutil.WaitForResult(func() (bool, error) { 41 last := upd.Last() 42 if last == nil { 43 return false, fmt.Errorf("No updates") 44 } 45 if last.ClientStatus != structs.AllocClientStatusComplete { 46 return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) 47 } 48 return true, nil 49 }, func(err error) { 50 t.Fatalf("err: %v", err) 51 }) 52 } 53 54 // Test that FinisheAt is set when the alloc is in a terminal state 55 func TestAllocRunner_FinishedAtSet(t *testing.T) { 56 t.Parallel() 57 require := require.New(t) 58 _, ar := TestAllocRunner(t, false) 59 ar.allocClientStatus = structs.AllocClientStatusFailed 60 alloc := ar.Alloc() 61 taskFinishedAt := make(map[string]time.Time) 62 require.NotEmpty(alloc.TaskStates) 63 for name, s := range alloc.TaskStates { 64 require.False(s.FinishedAt.IsZero()) 65 taskFinishedAt[name] = s.FinishedAt 66 } 67 68 // Verify that calling again should not mutate finishedAt 69 alloc2 := ar.Alloc() 70 for name, s := range alloc2.TaskStates { 71 require.Equal(taskFinishedAt[name], s.FinishedAt) 72 } 73 74 } 75 76 // Test that FinisheAt is set when the alloc is in a terminal state 77 func TestAllocRunner_FinishedAtSet_TaskEvents(t *testing.T) { 78 t.Parallel() 79 require := require.New(t) 80 _, ar := TestAllocRunner(t, false) 81 ar.taskStates[ar.alloc.Job.TaskGroups[0].Tasks[0].Name] = &structs.TaskState{State: structs.TaskStateDead, Failed: true} 82 83 alloc := ar.Alloc() 84 taskFinishedAt := make(map[string]time.Time) 85 require.NotEmpty(alloc.TaskStates) 86 for name, s := range alloc.TaskStates { 87 require.False(s.FinishedAt.IsZero()) 88 taskFinishedAt[name] = s.FinishedAt 89 } 90 91 // Verify that calling again should not mutate finishedAt 92 alloc2 := ar.Alloc() 93 for name, s := range alloc2.TaskStates { 94 require.Equal(taskFinishedAt[name], s.FinishedAt) 95 } 96 97 } 98 99 // Test that the watcher will mark the allocation as unhealthy. 100 func TestAllocRunner_DeploymentHealth_Unhealthy_BadStart(t *testing.T) { 101 t.Parallel() 102 assert := assert.New(t) 103 104 // Ensure the task fails and restarts 105 upd, ar := TestAllocRunner(t, true) 106 107 // Make the task fail 108 task := ar.alloc.Job.TaskGroups[0].Tasks[0] 109 task.Driver = "mock_driver" 110 task.Config["start_error"] = "test error" 111 112 // Make the alloc be part of a deployment 113 ar.alloc.DeploymentID = uuid.Generate() 114 ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 115 ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_TaskStates 116 ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1 117 118 go ar.Run() 119 defer ar.Destroy() 120 121 testutil.WaitForResult(func() (bool, error) { 122 last := upd.Last() 123 if last == nil { 124 return false, fmt.Errorf("No updates") 125 } 126 if !last.DeploymentStatus.HasHealth() { 127 return false, fmt.Errorf("want deployment status unhealthy; got unset") 128 } else if *last.DeploymentStatus.Healthy { 129 return false, fmt.Errorf("want deployment status unhealthy; got healthy") 130 } 131 return true, nil 132 }, func(err error) { 133 t.Fatalf("err: %v", err) 134 }) 135 136 // Assert that we have an event explaining why we are unhealthy. 137 assert.Len(ar.taskStates, 1) 138 state := ar.taskStates[task.Name] 139 assert.NotNil(state) 140 assert.NotEmpty(state.Events) 141 last := state.Events[len(state.Events)-1] 142 assert.Equal(allocHealthEventSource, last.Type) 143 assert.Contains(last.Message, "failed task") 144 } 145 146 // Test that the watcher will mark the allocation as unhealthy if it hits its 147 // deadline. 148 func TestAllocRunner_DeploymentHealth_Unhealthy_Deadline(t *testing.T) { 149 t.Parallel() 150 151 // Don't restart but force service job type 152 upd, ar := TestAllocRunner(t, false) 153 ar.alloc.Job.Type = structs.JobTypeService 154 155 // Make the task block 156 task := ar.alloc.Job.TaskGroups[0].Tasks[0] 157 task.Driver = "mock_driver" 158 task.Config["start_block_for"] = "4s" 159 task.Config["run_for"] = "10s" 160 161 // Make the alloc be part of a deployment 162 ar.alloc.DeploymentID = uuid.Generate() 163 ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 164 ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_TaskStates 165 ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1 166 ar.alloc.Job.TaskGroups[0].Update.HealthyDeadline = 100 * time.Millisecond 167 168 go ar.Run() 169 defer ar.Destroy() 170 171 testutil.WaitForResult(func() (bool, error) { 172 last := upd.Last() 173 if last == nil { 174 return false, fmt.Errorf("No updates") 175 } 176 177 // Assert alloc is unhealthy 178 if !last.DeploymentStatus.HasHealth() { 179 return false, fmt.Errorf("want deployment status unhealthy; got unset") 180 } else if *last.DeploymentStatus.Healthy { 181 return false, fmt.Errorf("want deployment status unhealthy; got healthy") 182 } 183 184 // Assert there is a task event explaining why we are unhealthy. 185 state, ok := last.TaskStates[task.Name] 186 if !ok { 187 return false, fmt.Errorf("missing state for task %s", task.Name) 188 } 189 n := len(state.Events) 190 if n == 0 { 191 return false, fmt.Errorf("no task events") 192 } 193 lastEvent := state.Events[n-1] 194 if lastEvent.Type != allocHealthEventSource { 195 return false, fmt.Errorf("expected %q; found %q", allocHealthEventSource, lastEvent.Type) 196 } 197 if !strings.Contains(lastEvent.Message, "not running by deadline") { 198 return false, fmt.Errorf(`expected "not running by deadline" but found: %s`, lastEvent.Message) 199 } 200 201 return true, nil 202 }, func(err error) { 203 t.Fatalf("err: %v", err) 204 }) 205 } 206 207 // Test that the watcher will mark the allocation as healthy. 208 func TestAllocRunner_DeploymentHealth_Healthy_NoChecks(t *testing.T) { 209 t.Parallel() 210 211 // Ensure the task fails and restarts 212 upd, ar := TestAllocRunner(t, true) 213 214 // Make the task run healthy 215 task := ar.alloc.Job.TaskGroups[0].Tasks[0] 216 task.Driver = "mock_driver" 217 task.Config["run_for"] = "10s" 218 219 // Create a task that takes longer to become healthy 220 ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task.Copy()) 221 task2 := ar.alloc.Job.TaskGroups[0].Tasks[1] 222 task2.Name = "task 2" 223 task2.Config["start_block_for"] = "500ms" 224 225 // Make the alloc be part of a deployment 226 ar.alloc.DeploymentID = uuid.Generate() 227 ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 228 ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_TaskStates 229 ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1 230 ar.alloc.Job.TaskGroups[0].Update.MinHealthyTime = 100 * time.Millisecond 231 232 start := time.Now() 233 go ar.Run() 234 defer ar.Destroy() 235 236 testutil.WaitForResult(func() (bool, error) { 237 last := upd.Last() 238 if last == nil { 239 return false, fmt.Errorf("No updates") 240 } 241 if !last.DeploymentStatus.HasHealth() { 242 return false, fmt.Errorf("want deployment status unhealthy; got unset") 243 } else if !*last.DeploymentStatus.Healthy { 244 return false, fmt.Errorf("want deployment status healthy; got unhealthy") 245 } 246 return true, nil 247 }, func(err error) { 248 t.Fatalf("err: %v", err) 249 }) 250 if d := time.Now().Sub(start); d < 500*time.Millisecond { 251 t.Fatalf("didn't wait for second task group. Only took %v", d) 252 } 253 } 254 255 // Test that the watcher will mark the allocation as healthy with checks 256 func TestAllocRunner_DeploymentHealth_Healthy_Checks(t *testing.T) { 257 t.Parallel() 258 259 // Ensure the task fails and restarts 260 upd, ar := TestAllocRunner(t, true) 261 262 // Make the task fail 263 task := ar.alloc.Job.TaskGroups[0].Tasks[0] 264 task.Driver = "mock_driver" 265 task.Config["run_for"] = "10s" 266 267 // Create a task that has no checks 268 ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task.Copy()) 269 task2 := ar.alloc.Job.TaskGroups[0].Tasks[1] 270 task2.Name = "task 2" 271 task2.Services = nil 272 273 // Make the alloc be part of a deployment 274 ar.alloc.DeploymentID = uuid.Generate() 275 ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 276 ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_Checks 277 ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1 278 ar.alloc.Job.TaskGroups[0].Update.MinHealthyTime = 100 * time.Millisecond 279 280 checkHealthy := &api.AgentCheck{ 281 CheckID: uuid.Generate(), 282 Status: api.HealthPassing, 283 } 284 checkUnhealthy := &api.AgentCheck{ 285 CheckID: checkHealthy.CheckID, 286 Status: api.HealthWarning, 287 } 288 289 // Only return the check as healthy after a duration 290 trigger := time.After(500 * time.Millisecond) 291 ar.consulClient.(*consulApi.MockConsulServiceClient).AllocRegistrationsFn = func(allocID string) (*consul.AllocRegistration, error) { 292 select { 293 case <-trigger: 294 return &consul.AllocRegistration{ 295 Tasks: map[string]*consul.TaskRegistration{ 296 task.Name: { 297 Services: map[string]*consul.ServiceRegistration{ 298 "123": { 299 Service: &api.AgentService{Service: "foo"}, 300 Checks: []*api.AgentCheck{checkHealthy}, 301 }, 302 }, 303 }, 304 }, 305 }, nil 306 default: 307 return &consul.AllocRegistration{ 308 Tasks: map[string]*consul.TaskRegistration{ 309 task.Name: { 310 Services: map[string]*consul.ServiceRegistration{ 311 "123": { 312 Service: &api.AgentService{Service: "foo"}, 313 Checks: []*api.AgentCheck{checkUnhealthy}, 314 }, 315 }, 316 }, 317 }, 318 }, nil 319 } 320 } 321 322 start := time.Now() 323 go ar.Run() 324 defer ar.Destroy() 325 326 testutil.WaitForResult(func() (bool, error) { 327 last := upd.Last() 328 if last == nil { 329 return false, fmt.Errorf("No updates") 330 } 331 if !last.DeploymentStatus.HasHealth() { 332 return false, fmt.Errorf("want deployment status unhealthy; got unset") 333 } else if !*last.DeploymentStatus.Healthy { 334 return false, fmt.Errorf("want deployment status healthy; got unhealthy") 335 } 336 return true, nil 337 }, func(err error) { 338 t.Fatalf("err: %v", err) 339 }) 340 341 if d := time.Now().Sub(start); d < 500*time.Millisecond { 342 t.Fatalf("didn't wait for second task group. Only took %v", d) 343 } 344 } 345 346 // Test that the watcher will mark the allocation as unhealthy with failing 347 // checks 348 func TestAllocRunner_DeploymentHealth_Unhealthy_Checks(t *testing.T) { 349 t.Parallel() 350 assert := assert.New(t) 351 352 // Ensure the task fails and restarts 353 upd, ar := TestAllocRunner(t, true) 354 355 // Make the task fail 356 task := ar.alloc.Job.TaskGroups[0].Tasks[0] 357 task.Driver = "mock_driver" 358 task.Config["run_for"] = "10s" 359 360 // Make the alloc be part of a deployment 361 ar.alloc.DeploymentID = uuid.Generate() 362 ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 363 ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_Checks 364 ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1 365 ar.alloc.Job.TaskGroups[0].Update.MinHealthyTime = 100 * time.Millisecond 366 ar.alloc.Job.TaskGroups[0].Update.HealthyDeadline = 1 * time.Second 367 368 checkUnhealthy := &api.AgentCheck{ 369 CheckID: uuid.Generate(), 370 Status: api.HealthWarning, 371 } 372 373 // Only return the check as healthy after a duration 374 ar.consulClient.(*consulApi.MockConsulServiceClient).AllocRegistrationsFn = func(allocID string) (*consul.AllocRegistration, error) { 375 return &consul.AllocRegistration{ 376 Tasks: map[string]*consul.TaskRegistration{ 377 task.Name: { 378 Services: map[string]*consul.ServiceRegistration{ 379 "123": { 380 Service: &api.AgentService{Service: "foo"}, 381 Checks: []*api.AgentCheck{checkUnhealthy}, 382 }, 383 }, 384 }, 385 }, 386 }, nil 387 } 388 389 go ar.Run() 390 defer ar.Destroy() 391 392 testutil.WaitForResult(func() (bool, error) { 393 last := upd.Last() 394 if last == nil { 395 return false, fmt.Errorf("No updates") 396 } 397 if !last.DeploymentStatus.HasHealth() { 398 return false, fmt.Errorf("want deployment status unhealthy; got unset") 399 } else if *last.DeploymentStatus.Healthy { 400 return false, fmt.Errorf("want deployment status unhealthy; got healthy") 401 } 402 return true, nil 403 }, func(err error) { 404 t.Fatalf("err: %v", err) 405 }) 406 407 // Assert that we have an event explaining why we are unhealthy. 408 assert.Len(ar.taskStates, 1) 409 state := ar.taskStates[task.Name] 410 assert.NotNil(state) 411 assert.NotEmpty(state.Events) 412 last := state.Events[len(state.Events)-1] 413 assert.Equal(allocHealthEventSource, last.Type) 414 assert.Contains(last.Message, "Services not healthy by deadline") 415 } 416 417 // Test that the watcher will mark the allocation as healthy. 418 func TestAllocRunner_DeploymentHealth_Healthy_UpdatedDeployment(t *testing.T) { 419 t.Parallel() 420 421 // Ensure the task fails and restarts 422 upd, ar := TestAllocRunner(t, true) 423 424 // Make the task run healthy 425 task := ar.alloc.Job.TaskGroups[0].Tasks[0] 426 task.Driver = "mock_driver" 427 task.Config["run_for"] = "30s" 428 429 // Make the alloc be part of a deployment 430 ar.alloc.DeploymentID = uuid.Generate() 431 ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 432 ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_TaskStates 433 ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1 434 ar.alloc.Job.TaskGroups[0].Update.MinHealthyTime = 100 * time.Millisecond 435 436 go ar.Run() 437 defer ar.Destroy() 438 439 testutil.WaitForResult(func() (bool, error) { 440 last := upd.Last() 441 if last == nil { 442 return false, fmt.Errorf("No updates") 443 } 444 if !last.DeploymentStatus.HasHealth() { 445 return false, fmt.Errorf("want deployment status unhealthy; got unset") 446 } else if !*last.DeploymentStatus.Healthy { 447 return false, fmt.Errorf("want deployment status healthy; got unhealthy") 448 } 449 return true, nil 450 }, func(err error) { 451 t.Fatalf("err: %v", err) 452 }) 453 454 // Mimick an update to a new deployment id 455 last := upd.Last() 456 last.DeploymentStatus = nil 457 last.DeploymentID = uuid.Generate() 458 ar.Update(last) 459 460 testutil.WaitForResult(func() (bool, error) { 461 last := upd.Last() 462 if !last.DeploymentStatus.HasHealth() { 463 return false, fmt.Errorf("want deployment status unhealthy; got unset") 464 } else if !*last.DeploymentStatus.Healthy { 465 return false, fmt.Errorf("want deployment status healthy; got unhealthy") 466 } 467 return true, nil 468 }, func(err error) { 469 t.Fatalf("err: %v", err) 470 }) 471 } 472 473 // Test that health is reported for services that got migrated; not just part 474 // of deployments. 475 func TestAllocRunner_DeploymentHealth_Healthy_Migration(t *testing.T) { 476 t.Parallel() 477 478 // Ensure the task fails and restarts 479 upd, ar := TestAllocRunner(t, true) 480 481 // Make the task run healthy 482 tg := ar.alloc.Job.TaskGroups[0] 483 task := tg.Tasks[0] 484 task.Driver = "mock_driver" 485 task.Config["run_for"] = "30s" 486 487 // Shorten the default migration healthy time 488 tg.Migrate = structs.DefaultMigrateStrategy() 489 tg.Migrate.MinHealthyTime = 100 * time.Millisecond 490 tg.Migrate.HealthCheck = structs.MigrateStrategyHealthStates 491 492 // Ensure the alloc is *not* part of a deployment 493 ar.alloc.DeploymentID = "" 494 495 go ar.Run() 496 defer ar.Destroy() 497 498 testutil.WaitForResult(func() (bool, error) { 499 last := upd.Last() 500 if last == nil { 501 return false, fmt.Errorf("No updates") 502 } 503 if !last.DeploymentStatus.HasHealth() { 504 return false, fmt.Errorf("want deployment status unhealthy; got unset") 505 } else if !*last.DeploymentStatus.Healthy { 506 return false, fmt.Errorf("want deployment status healthy; got unhealthy") 507 } 508 return true, nil 509 }, func(err error) { 510 t.Fatalf("err: %v", err) 511 }) 512 } 513 514 // Test that health is *not* reported for batch jobs 515 func TestAllocRunner_DeploymentHealth_BatchDisabled(t *testing.T) { 516 t.Parallel() 517 518 // Ensure the task fails and restarts 519 alloc := mock.BatchAlloc() 520 tg := alloc.Job.TaskGroups[0] 521 522 // This should not be possile as validation should prevent batch jobs 523 // from having a migration stanza! 524 tg.Migrate = structs.DefaultMigrateStrategy() 525 tg.Migrate.MinHealthyTime = 1 * time.Millisecond 526 tg.Migrate.HealthyDeadline = 2 * time.Millisecond 527 tg.Migrate.HealthCheck = structs.MigrateStrategyHealthStates 528 529 task := tg.Tasks[0] 530 task.Driver = "mock_driver" 531 task.Config["run_for"] = "5s" 532 upd, ar := TestAllocRunnerFromAlloc(t, alloc, false) 533 534 go ar.Run() 535 defer ar.Destroy() 536 537 testutil.WaitForResult(func() (bool, error) { 538 last := upd.Last() 539 if last == nil { 540 return false, fmt.Errorf("No updates") 541 } 542 if last.DeploymentStatus != nil { 543 return false, fmt.Errorf("unexpected deployment health set: %v", last.DeploymentStatus.Healthy) 544 } 545 return true, nil 546 }, func(err error) { 547 t.Fatalf("err: %v", err) 548 }) 549 } 550 551 // TestAllocRuner_RetryArtifact ensures that if one task in a task group is 552 // retrying fetching an artifact, other tasks in the group should be able 553 // to proceed. 554 func TestAllocRunner_RetryArtifact(t *testing.T) { 555 t.Parallel() 556 557 alloc := mock.Alloc() 558 alloc.Job.Type = structs.JobTypeBatch 559 alloc.Job.TaskGroups[0].RestartPolicy.Mode = structs.RestartPolicyModeFail 560 alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 1 561 alloc.Job.TaskGroups[0].RestartPolicy.Delay = time.Duration(4*testutil.TestMultiplier()) * time.Second 562 563 task := alloc.Job.TaskGroups[0].Tasks[0] 564 task.Driver = "mock_driver" 565 task.Config = map[string]interface{}{ 566 "exit_code": "0", 567 "run_for": "1s", 568 } 569 570 // Create a new task with a bad artifact 571 badtask := alloc.Job.TaskGroups[0].Tasks[0].Copy() 572 badtask.Name = "bad" 573 badtask.Artifacts = []*structs.TaskArtifact{ 574 {GetterSource: "http://127.0.0.1:0/foo/bar/baz"}, 575 } 576 577 alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, badtask) 578 upd, ar := TestAllocRunnerFromAlloc(t, alloc, true) 579 go ar.Run() 580 defer ar.Destroy() 581 582 testutil.WaitForResult(func() (bool, error) { 583 last := upd.Last() 584 if last == nil { 585 return false, fmt.Errorf("No updates") 586 } 587 588 // web task should have completed successfully while bad task 589 // retries artifact fetching 590 webstate, ok := last.TaskStates["web"] 591 if !ok { 592 return false, fmt.Errorf("no task state for web") 593 } 594 if webstate.State != structs.TaskStateDead { 595 return false, fmt.Errorf("expected web to be dead but found %q", last.TaskStates["web"].State) 596 } 597 if !webstate.Successful() { 598 return false, fmt.Errorf("expected web to have exited successfully") 599 } 600 601 // bad task should have failed 602 badstate := last.TaskStates["bad"] 603 if badstate.State != structs.TaskStateDead { 604 return false, fmt.Errorf("expected bad to be dead but found %q", badstate.State) 605 } 606 if !badstate.Failed { 607 return false, fmt.Errorf("expected bad to have failed: %#v", badstate.Events) 608 } 609 return true, nil 610 }, func(err error) { 611 t.Fatalf("err: %v", err) 612 }) 613 } 614 615 func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) { 616 t.Parallel() 617 upd, ar := TestAllocRunner(t, false) 618 619 // Ensure task takes some time 620 task := ar.alloc.Job.TaskGroups[0].Tasks[0] 621 task.Driver = "mock_driver" 622 task.Config["run_for"] = "10s" 623 go ar.Run() 624 625 testutil.WaitForResult(func() (bool, error) { 626 last := upd.Last() 627 if last == nil { 628 return false, fmt.Errorf("No updates") 629 } 630 if last.ClientStatus != structs.AllocClientStatusRunning { 631 return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning) 632 } 633 return true, nil 634 }, func(err error) { 635 t.Fatalf("err: %v", err) 636 }) 637 638 // Update the alloc to be terminal which should cause the alloc runner to 639 // stop the tasks and wait for a destroy. 640 update := ar.alloc.Copy() 641 update.DesiredStatus = structs.AllocDesiredStatusStop 642 ar.Update(update) 643 644 testutil.WaitForResult(func() (bool, error) { 645 last := upd.Last() 646 if last == nil { 647 return false, fmt.Errorf("No updates") 648 } 649 650 // Check the status has changed. 651 if last.ClientStatus != structs.AllocClientStatusComplete { 652 return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) 653 } 654 655 // Check the allocation state still exists 656 if err := ar.stateDB.View(func(tx *bolt.Tx) error { 657 if !allocationBucketExists(tx, ar.Alloc().ID) { 658 return fmt.Errorf("no bucket for alloc") 659 } 660 661 return nil 662 }); err != nil { 663 return false, fmt.Errorf("state destroyed") 664 } 665 666 // Check the alloc directory still exists 667 if _, err := os.Stat(ar.allocDir.AllocDir); err != nil { 668 return false, fmt.Errorf("alloc dir destroyed: %v", ar.allocDir.AllocDir) 669 } 670 671 return true, nil 672 }, func(err error) { 673 t.Fatalf("err: %v", err) 674 }) 675 676 // Send the destroy signal and ensure the AllocRunner cleans up. 677 ar.Destroy() 678 679 testutil.WaitForResult(func() (bool, error) { 680 last := upd.Last() 681 if last == nil { 682 return false, fmt.Errorf("No updates") 683 } 684 685 // Check the status has changed. 686 if last.ClientStatus != structs.AllocClientStatusComplete { 687 return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) 688 } 689 690 // Check the state was cleaned 691 if err := ar.stateDB.View(func(tx *bolt.Tx) error { 692 if allocationBucketExists(tx, ar.Alloc().ID) { 693 return fmt.Errorf("bucket for alloc exists") 694 } 695 696 return nil 697 }); err != nil { 698 return false, fmt.Errorf("state not destroyed") 699 } 700 701 // Check the alloc directory was cleaned 702 if _, err := os.Stat(ar.allocDir.AllocDir); err == nil { 703 return false, fmt.Errorf("alloc dir still exists: %v", ar.allocDir.AllocDir) 704 } else if !os.IsNotExist(err) { 705 return false, fmt.Errorf("stat err: %v", err) 706 } 707 708 return true, nil 709 }, func(err error) { 710 t.Fatalf("err: %v", err) 711 }) 712 } 713 714 func TestAllocRunner_Destroy(t *testing.T) { 715 t.Parallel() 716 upd, ar := TestAllocRunner(t, false) 717 718 // Ensure task takes some time 719 task := ar.alloc.Job.TaskGroups[0].Tasks[0] 720 task.Driver = "mock_driver" 721 task.Config["run_for"] = "10s" 722 go ar.Run() 723 start := time.Now() 724 725 // Begin the tear down 726 go func() { 727 time.Sleep(1 * time.Second) 728 ar.Destroy() 729 }() 730 731 testutil.WaitForResult(func() (bool, error) { 732 last := upd.Last() 733 if last == nil { 734 return false, fmt.Errorf("No updates") 735 } 736 737 // Check the status has changed. 738 if last.ClientStatus != structs.AllocClientStatusComplete { 739 return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) 740 } 741 742 // Check the state was cleaned 743 if err := ar.stateDB.View(func(tx *bolt.Tx) error { 744 if allocationBucketExists(tx, ar.Alloc().ID) { 745 return fmt.Errorf("bucket for alloc exists") 746 } 747 748 return nil 749 }); err != nil { 750 return false, fmt.Errorf("state not destroyed: %v", err) 751 } 752 753 // Check the alloc directory was cleaned 754 if _, err := os.Stat(ar.allocDir.AllocDir); err == nil { 755 return false, fmt.Errorf("alloc dir still exists: %v", ar.allocDir.AllocDir) 756 } else if !os.IsNotExist(err) { 757 return false, fmt.Errorf("stat err: %v", err) 758 } 759 760 return true, nil 761 }, func(err error) { 762 t.Fatalf("err: %v", err) 763 }) 764 765 if elapsed := time.Since(start); elapsed > 20*time.Second { 766 t.Fatalf("took too long to terminate: %s", elapsed) 767 } 768 } 769 770 func TestAllocRunner_Update(t *testing.T) { 771 t.Parallel() 772 _, ar := TestAllocRunner(t, false) 773 774 // Deep copy the alloc to avoid races when updating 775 newAlloc := ar.Alloc().Copy() 776 777 // Ensure task takes some time 778 task := ar.alloc.Job.TaskGroups[0].Tasks[0] 779 task.Driver = "mock_driver" 780 task.Config["run_for"] = "10s" 781 go ar.Run() 782 defer ar.Destroy() 783 784 // Update the alloc definition 785 newAlloc.Name = "FOO" 786 newAlloc.AllocModifyIndex++ 787 ar.Update(newAlloc) 788 789 // Check the alloc runner stores the update allocation. 790 testutil.WaitForResult(func() (bool, error) { 791 return ar.Alloc().Name == "FOO", nil 792 }, func(err error) { 793 t.Fatalf("err: %v %#v", err, ar.Alloc()) 794 }) 795 } 796 797 func TestAllocRunner_SaveRestoreState(t *testing.T) { 798 t.Parallel() 799 alloc := mock.Alloc() 800 task := alloc.Job.TaskGroups[0].Tasks[0] 801 task.Driver = "mock_driver" 802 task.Config = map[string]interface{}{ 803 "exit_code": "0", 804 "run_for": "10s", 805 } 806 807 upd, ar := TestAllocRunnerFromAlloc(t, alloc, false) 808 go ar.Run() 809 defer ar.Destroy() 810 811 // Snapshot state 812 testutil.WaitForResult(func() (bool, error) { 813 ar.taskLock.RLock() 814 defer ar.taskLock.RUnlock() 815 return len(ar.tasks) == 1, nil 816 }, func(err error) { 817 t.Fatalf("task never started: %v", err) 818 }) 819 820 err := ar.SaveState() 821 if err != nil { 822 t.Fatalf("err: %v", err) 823 } 824 825 // Create a new alloc runner 826 l2 := testlog.WithPrefix(t, "----- ar2: ") 827 alloc2 := &structs.Allocation{ID: ar.alloc.ID} 828 prevAlloc := NewAllocWatcher(alloc2, ar, nil, ar.config, l2, "") 829 ar2 := NewAllocRunner(l2, ar.config, ar.stateDB, upd.Update, 830 alloc2, ar.vaultClient, ar.consulClient, prevAlloc) 831 err = ar2.RestoreState() 832 if err != nil { 833 t.Fatalf("err: %v", err) 834 } 835 go ar2.Run() 836 837 testutil.WaitForResult(func() (bool, error) { 838 if len(ar2.tasks) != 1 { 839 return false, fmt.Errorf("Incorrect number of tasks") 840 } 841 842 last := upd.Last() 843 if last == nil { 844 return false, nil 845 } 846 847 return last.ClientStatus == structs.AllocClientStatusRunning, nil 848 }, func(err error) { 849 last := upd.Last() 850 t.Fatalf("err: %v %#v %#v", err, last, last.TaskStates["web"]) 851 }) 852 853 // Destroy and wait 854 ar2.Destroy() 855 start := time.Now() 856 857 testutil.WaitForResult(func() (bool, error) { 858 alloc := ar2.Alloc() 859 if alloc.ClientStatus != structs.AllocClientStatusComplete { 860 return false, fmt.Errorf("Bad client status; got %v; want %v", alloc.ClientStatus, structs.AllocClientStatusComplete) 861 } 862 return true, nil 863 }, func(err error) { 864 last := upd.Last() 865 t.Fatalf("err: %v %#v %#v", err, last, last.TaskStates) 866 }) 867 868 if time.Since(start) > time.Duration(testutil.TestMultiplier()*5)*time.Second { 869 t.Fatalf("took too long to terminate") 870 } 871 } 872 873 func TestAllocRunner_SaveRestoreState_TerminalAlloc(t *testing.T) { 874 t.Parallel() 875 upd, ar := TestAllocRunner(t, false) 876 ar.logger = testlog.WithPrefix(t, "ar1: ") 877 878 // Ensure task takes some time 879 ar.alloc.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver" 880 task := ar.alloc.Job.TaskGroups[0].Tasks[0] 881 task.Config["run_for"] = "10s" 882 go ar.Run() 883 defer ar.Destroy() 884 885 testutil.WaitForResult(func() (bool, error) { 886 last := upd.Last() 887 if last == nil { 888 return false, fmt.Errorf("No updates") 889 } 890 891 if last.ClientStatus != structs.AllocClientStatusRunning { 892 return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning) 893 } 894 return true, nil 895 }, func(err error) { 896 t.Fatalf("err: %v", err) 897 }) 898 899 // Update the alloc to be terminal which should cause the alloc runner to 900 // stop the tasks and wait for a destroy. 901 update := ar.alloc.Copy() 902 update.DesiredStatus = structs.AllocDesiredStatusStop 903 ar.Update(update) 904 905 testutil.WaitForResult(func() (bool, error) { 906 return ar.Alloc().DesiredStatus == structs.AllocDesiredStatusStop, nil 907 }, func(err error) { 908 t.Fatalf("err: %v", err) 909 }) 910 911 err := ar.SaveState() 912 if err != nil { 913 t.Fatalf("err: %v", err) 914 } 915 916 // Ensure ar1 doesn't recreate the state file 917 ar.allocLock.Lock() 918 defer ar.allocLock.Unlock() 919 920 // Create a new alloc runner 921 l2 := testlog.WithPrefix(t, "ar2: ") 922 alloc2 := &structs.Allocation{ID: ar.alloc.ID} 923 prevAlloc := NewAllocWatcher(alloc2, ar, nil, ar.config, l2, "") 924 ar2 := NewAllocRunner(l2, ar.config, ar.stateDB, upd.Update, 925 alloc2, ar.vaultClient, ar.consulClient, prevAlloc) 926 err = ar2.RestoreState() 927 if err != nil { 928 t.Fatalf("err: %v", err) 929 } 930 ar2.logger.Println("[TESTING] running second alloc runner") 931 go ar2.Run() 932 defer ar2.Destroy() // Just-in-case of failure before Destroy below 933 934 testutil.WaitForResult(func() (bool, error) { 935 // Check the state still exists 936 if err := ar.stateDB.View(func(tx *bolt.Tx) error { 937 if !allocationBucketExists(tx, ar2.Alloc().ID) { 938 return fmt.Errorf("no bucket for alloc") 939 } 940 941 return nil 942 }); err != nil { 943 return false, fmt.Errorf("state destroyed") 944 } 945 946 // Check the alloc directory still exists 947 if _, err := os.Stat(ar.allocDir.AllocDir); err != nil { 948 return false, fmt.Errorf("alloc dir destroyed: %v", ar.allocDir.AllocDir) 949 } 950 951 return true, nil 952 }, func(err error) { 953 last := upd.Last() 954 t.Fatalf("err: %v %#v %#v", err, last, last.TaskStates) 955 }) 956 957 // Send the destroy signal and ensure the AllocRunner cleans up. 958 ar2.logger.Println("[TESTING] destroying second alloc runner") 959 ar2.Destroy() 960 961 testutil.WaitForResult(func() (bool, error) { 962 last := upd.Last() 963 if last == nil { 964 return false, fmt.Errorf("No updates") 965 } 966 967 // Check the status has changed. 968 if last.ClientStatus != structs.AllocClientStatusComplete { 969 return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) 970 } 971 972 // Check the state was cleaned 973 if err := ar.stateDB.View(func(tx *bolt.Tx) error { 974 if allocationBucketExists(tx, ar2.Alloc().ID) { 975 return fmt.Errorf("bucket for alloc exists") 976 } 977 978 return nil 979 }); err != nil { 980 return false, fmt.Errorf("state not destroyed") 981 } 982 983 // Check the alloc directory was cleaned 984 if _, err := os.Stat(ar.allocDir.AllocDir); err == nil { 985 return false, fmt.Errorf("alloc dir still exists: %v", ar.allocDir.AllocDir) 986 } else if !os.IsNotExist(err) { 987 return false, fmt.Errorf("stat err: %v", err) 988 } 989 990 return true, nil 991 }, func(err error) { 992 t.Fatalf("err: %v", err) 993 }) 994 } 995 996 func TestAllocRunner_TaskFailed_KillTG(t *testing.T) { 997 t.Parallel() 998 upd, ar := TestAllocRunner(t, false) 999 1000 // Create two tasks in the task group 1001 task := ar.alloc.Job.TaskGroups[0].Tasks[0] 1002 task.Driver = "mock_driver" 1003 task.KillTimeout = 10 * time.Millisecond 1004 task.Config = map[string]interface{}{ 1005 "run_for": "10s", 1006 } 1007 1008 task2 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy() 1009 task2.Name = "task 2" 1010 task2.Driver = "mock_driver" 1011 task2.Config = map[string]interface{}{ 1012 "start_error": "fail task please", 1013 } 1014 ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2) 1015 ar.alloc.TaskResources[task2.Name] = task2.Resources 1016 go ar.Run() 1017 defer ar.Destroy() 1018 1019 testutil.WaitForResult(func() (bool, error) { 1020 last := upd.Last() 1021 if last == nil { 1022 return false, fmt.Errorf("No updates") 1023 } 1024 if last.ClientStatus != structs.AllocClientStatusFailed { 1025 return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusFailed) 1026 } 1027 1028 // Task One should be killed 1029 state1 := last.TaskStates[task.Name] 1030 if state1.State != structs.TaskStateDead { 1031 return false, fmt.Errorf("got state %v; want %v", state1.State, structs.TaskStateDead) 1032 } 1033 if len(state1.Events) < 2 { 1034 // At least have a received and destroyed 1035 return false, fmt.Errorf("Unexpected number of events") 1036 } 1037 1038 found := false 1039 for _, e := range state1.Events { 1040 if e.Type != structs.TaskSiblingFailed { 1041 found = true 1042 } 1043 } 1044 1045 if !found { 1046 return false, fmt.Errorf("Did not find event %v", structs.TaskSiblingFailed) 1047 } 1048 1049 // Task Two should be failed 1050 state2 := last.TaskStates[task2.Name] 1051 if state2.State != structs.TaskStateDead { 1052 return false, fmt.Errorf("got state %v; want %v", state2.State, structs.TaskStateDead) 1053 } 1054 if !state2.Failed { 1055 return false, fmt.Errorf("task2 should have failed") 1056 } 1057 1058 return true, nil 1059 }, func(err error) { 1060 t.Fatalf("err: %v", err) 1061 }) 1062 } 1063 1064 func TestAllocRunner_TaskLeader_KillTG(t *testing.T) { 1065 t.Parallel() 1066 upd, ar := TestAllocRunner(t, false) 1067 1068 // Create two tasks in the task group 1069 task := ar.alloc.Job.TaskGroups[0].Tasks[0] 1070 task.Driver = "mock_driver" 1071 task.KillTimeout = 10 * time.Millisecond 1072 task.Config = map[string]interface{}{ 1073 "run_for": "10s", 1074 } 1075 1076 task2 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy() 1077 task2.Name = "task 2" 1078 task2.Driver = "mock_driver" 1079 task2.Leader = true 1080 task2.Config = map[string]interface{}{ 1081 "run_for": "1s", 1082 } 1083 ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2) 1084 ar.alloc.TaskResources[task2.Name] = task2.Resources 1085 go ar.Run() 1086 defer ar.Destroy() 1087 1088 testutil.WaitForResult(func() (bool, error) { 1089 last := upd.Last() 1090 if last == nil { 1091 return false, fmt.Errorf("No updates") 1092 } 1093 if last.ClientStatus != structs.AllocClientStatusComplete { 1094 return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) 1095 } 1096 1097 // Task One should be killed 1098 state1 := last.TaskStates[task.Name] 1099 if state1.State != structs.TaskStateDead { 1100 return false, fmt.Errorf("got state %v; want %v", state1.State, structs.TaskStateDead) 1101 } 1102 if state1.FinishedAt.IsZero() || state1.StartedAt.IsZero() { 1103 return false, fmt.Errorf("expected to have a start and finish time") 1104 } 1105 if len(state1.Events) < 2 { 1106 // At least have a received and destroyed 1107 return false, fmt.Errorf("Unexpected number of events") 1108 } 1109 1110 found := false 1111 for _, e := range state1.Events { 1112 if e.Type != structs.TaskLeaderDead { 1113 found = true 1114 } 1115 } 1116 1117 if !found { 1118 return false, fmt.Errorf("Did not find event %v", structs.TaskLeaderDead) 1119 } 1120 1121 // Task Two should be dead 1122 state2 := last.TaskStates[task2.Name] 1123 if state2.State != structs.TaskStateDead { 1124 return false, fmt.Errorf("got state %v; want %v", state2.State, structs.TaskStateDead) 1125 } 1126 if state2.FinishedAt.IsZero() || state2.StartedAt.IsZero() { 1127 return false, fmt.Errorf("expected to have a start and finish time") 1128 } 1129 1130 return true, nil 1131 }, func(err error) { 1132 t.Fatalf("err: %v", err) 1133 }) 1134 } 1135 1136 // TestAllocRunner_TaskLeader_StopTG asserts that when stopping a task group 1137 // with a leader the leader is stopped before other tasks. 1138 func TestAllocRunner_TaskLeader_StopTG(t *testing.T) { 1139 t.Parallel() 1140 upd, ar := TestAllocRunner(t, false) 1141 1142 // Create 3 tasks in the task group 1143 task := ar.alloc.Job.TaskGroups[0].Tasks[0] 1144 task.Name = "follower1" 1145 task.Driver = "mock_driver" 1146 task.KillTimeout = 10 * time.Millisecond 1147 task.Config = map[string]interface{}{ 1148 "run_for": "10s", 1149 } 1150 1151 task2 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy() 1152 task2.Name = "leader" 1153 task2.Driver = "mock_driver" 1154 task2.Leader = true 1155 task2.KillTimeout = 10 * time.Millisecond 1156 task2.Config = map[string]interface{}{ 1157 "run_for": "10s", 1158 } 1159 1160 task3 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy() 1161 task3.Name = "follower2" 1162 task3.Driver = "mock_driver" 1163 task3.KillTimeout = 10 * time.Millisecond 1164 task3.Config = map[string]interface{}{ 1165 "run_for": "10s", 1166 } 1167 ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2, task3) 1168 ar.alloc.TaskResources[task2.Name] = task2.Resources 1169 defer ar.Destroy() 1170 1171 go ar.Run() 1172 1173 // Wait for tasks to start 1174 last := upd.Last() 1175 testutil.WaitForResult(func() (bool, error) { 1176 last = upd.Last() 1177 if last == nil { 1178 return false, fmt.Errorf("No updates") 1179 } 1180 if n := len(last.TaskStates); n != 3 { 1181 return false, fmt.Errorf("Not enough task states (want: 3; found %d)", n) 1182 } 1183 for name, state := range last.TaskStates { 1184 if state.State != structs.TaskStateRunning { 1185 return false, fmt.Errorf("Task %q is not running yet (it's %q)", name, state.State) 1186 } 1187 } 1188 return true, nil 1189 }, func(err error) { 1190 t.Fatalf("err: %v", err) 1191 }) 1192 1193 // Reset updates 1194 upd.mu.Lock() 1195 upd.Allocs = upd.Allocs[:0] 1196 upd.mu.Unlock() 1197 1198 // Stop alloc 1199 update := ar.Alloc() 1200 update.DesiredStatus = structs.AllocDesiredStatusStop 1201 ar.Update(update) 1202 1203 // Wait for tasks to stop 1204 testutil.WaitForResult(func() (bool, error) { 1205 last := upd.Last() 1206 if last == nil { 1207 return false, fmt.Errorf("No updates") 1208 } 1209 if last.TaskStates["leader"].FinishedAt.UnixNano() >= last.TaskStates["follower1"].FinishedAt.UnixNano() { 1210 return false, fmt.Errorf("expected leader to finish before follower1: %s >= %s", 1211 last.TaskStates["leader"].FinishedAt, last.TaskStates["follower1"].FinishedAt) 1212 } 1213 if last.TaskStates["leader"].FinishedAt.UnixNano() >= last.TaskStates["follower2"].FinishedAt.UnixNano() { 1214 return false, fmt.Errorf("expected leader to finish before follower2: %s >= %s", 1215 last.TaskStates["leader"].FinishedAt, last.TaskStates["follower2"].FinishedAt) 1216 } 1217 return true, nil 1218 }, func(err error) { 1219 last := upd.Last() 1220 for name, state := range last.TaskStates { 1221 t.Logf("%s: %s", name, state.State) 1222 } 1223 t.Fatalf("err: %v", err) 1224 }) 1225 } 1226 1227 // TestAllocRunner_TaskLeader_StopRestoredTG asserts that when stopping a 1228 // restored task group with a leader that failed before restoring the leader is 1229 // not stopped as it does not exist. 1230 // See https://github.com/hashicorp/nomad/issues/3420#issuecomment-341666932 1231 func TestAllocRunner_TaskLeader_StopRestoredTG(t *testing.T) { 1232 t.Skip("Skipping because the functionality being tested doesn't exist") 1233 t.Parallel() 1234 _, ar := TestAllocRunner(t, false) 1235 defer ar.Destroy() 1236 1237 // Create a leader and follower task in the task group 1238 task := ar.alloc.Job.TaskGroups[0].Tasks[0] 1239 task.Name = "follower1" 1240 task.Driver = "mock_driver" 1241 task.KillTimeout = 10 * time.Second 1242 task.Config = map[string]interface{}{ 1243 "run_for": "10s", 1244 } 1245 1246 task2 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy() 1247 task2.Name = "leader" 1248 task2.Driver = "mock_driver" 1249 task2.Leader = true 1250 task2.KillTimeout = 10 * time.Millisecond 1251 task2.Config = map[string]interface{}{ 1252 "run_for": "0s", 1253 } 1254 1255 ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2) 1256 ar.alloc.TaskResources[task2.Name] = task2.Resources 1257 1258 // Mimic Nomad exiting before the leader stopping is able to stop other tasks. 1259 ar.tasks = map[string]*taskrunner.TaskRunner{ 1260 "leader": taskrunner.NewTaskRunner(ar.logger, ar.config, ar.stateDB, ar.setTaskState, 1261 ar.allocDir.NewTaskDir(task2.Name), ar.Alloc(), task2.Copy(), 1262 ar.vaultClient, ar.consulClient), 1263 "follower1": taskrunner.NewTaskRunner(ar.logger, ar.config, ar.stateDB, ar.setTaskState, 1264 ar.allocDir.NewTaskDir(task.Name), ar.Alloc(), task.Copy(), 1265 ar.vaultClient, ar.consulClient), 1266 } 1267 ar.taskStates = map[string]*structs.TaskState{ 1268 "leader": {State: structs.TaskStateDead}, 1269 "follower1": {State: structs.TaskStateRunning}, 1270 } 1271 if err := ar.SaveState(); err != nil { 1272 t.Fatalf("error saving state: %v", err) 1273 } 1274 1275 // Create a new AllocRunner to test RestoreState and Run 1276 upd2 := &MockAllocStateUpdater{} 1277 ar2 := NewAllocRunner(ar.logger, ar.config, ar.stateDB, upd2.Update, ar.alloc, 1278 ar.vaultClient, ar.consulClient, ar.prevAlloc) 1279 defer ar2.Destroy() 1280 1281 if err := ar2.RestoreState(); err != nil { 1282 t.Fatalf("error restoring state: %v", err) 1283 } 1284 go ar2.Run() 1285 1286 // Wait for tasks to be stopped because leader is dead 1287 testutil.WaitForResult(func() (bool, error) { 1288 alloc := ar2.Alloc() 1289 for task, state := range alloc.TaskStates { 1290 if state.State != structs.TaskStateDead { 1291 return false, fmt.Errorf("Task %q should be dead: %v", task, state.State) 1292 } 1293 } 1294 return true, nil 1295 }, func(err error) { 1296 t.Fatalf("err: %v", err) 1297 }) 1298 1299 // Make sure it GCs properly 1300 ar2.Destroy() 1301 1302 select { 1303 case <-ar2.WaitCh(): 1304 // exited as expected 1305 case <-time.After(10 * time.Second): 1306 t.Fatalf("timed out waiting for AR to GC") 1307 } 1308 } 1309 1310 // TestAllocRunner_MoveAllocDir asserts that a file written to an alloc's 1311 // local/ dir will be moved to a replacement alloc's local/ dir if sticky 1312 // volumes is on. 1313 func TestAllocRunner_MoveAllocDir(t *testing.T) { 1314 t.Parallel() 1315 // Create an alloc runner 1316 alloc := mock.Alloc() 1317 task := alloc.Job.TaskGroups[0].Tasks[0] 1318 task.Driver = "mock_driver" 1319 task.Config = map[string]interface{}{ 1320 "run_for": "1s", 1321 } 1322 upd, ar := TestAllocRunnerFromAlloc(t, alloc, false) 1323 go ar.Run() 1324 defer ar.Destroy() 1325 1326 testutil.WaitForResult(func() (bool, error) { 1327 last := upd.Last() 1328 if last == nil { 1329 return false, fmt.Errorf("No updates") 1330 } 1331 if last.ClientStatus != structs.AllocClientStatusComplete { 1332 return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) 1333 } 1334 return true, nil 1335 }, func(err error) { 1336 t.Fatalf("err: %v", err) 1337 }) 1338 1339 // Write some data in data dir and task dir of the alloc 1340 dataFile := filepath.Join(ar.allocDir.SharedDir, "data", "data_file") 1341 ioutil.WriteFile(dataFile, []byte("hello world"), os.ModePerm) 1342 taskDir := ar.allocDir.TaskDirs[task.Name] 1343 taskLocalFile := filepath.Join(taskDir.LocalDir, "local_file") 1344 ioutil.WriteFile(taskLocalFile, []byte("good bye world"), os.ModePerm) 1345 1346 // Create another alloc runner 1347 alloc2 := mock.Alloc() 1348 alloc2.PreviousAllocation = ar.allocID 1349 alloc2.Job.TaskGroups[0].EphemeralDisk.Sticky = true 1350 task = alloc2.Job.TaskGroups[0].Tasks[0] 1351 task.Driver = "mock_driver" 1352 task.Config = map[string]interface{}{ 1353 "run_for": "1s", 1354 } 1355 upd2, ar2 := TestAllocRunnerFromAlloc(t, alloc2, false) 1356 1357 // Set prevAlloc like Client does 1358 ar2.prevAlloc = NewAllocWatcher(alloc2, ar, nil, ar2.config, ar2.logger, "") 1359 1360 go ar2.Run() 1361 defer ar2.Destroy() 1362 1363 testutil.WaitForResult(func() (bool, error) { 1364 last := upd2.Last() 1365 if last == nil { 1366 return false, fmt.Errorf("No updates") 1367 } 1368 if last.ClientStatus != structs.AllocClientStatusComplete { 1369 return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) 1370 } 1371 return true, nil 1372 }, func(err error) { 1373 t.Fatalf("err: %v", err) 1374 }) 1375 1376 // Ensure that data from ar was moved to ar2 1377 taskDir = ar2.allocDir.TaskDirs[task.Name] 1378 taskLocalFile = filepath.Join(taskDir.LocalDir, "local_file") 1379 if fileInfo, _ := os.Stat(taskLocalFile); fileInfo == nil { 1380 t.Fatalf("file %v not found", taskLocalFile) 1381 } 1382 1383 dataFile = filepath.Join(ar2.allocDir.SharedDir, "data", "data_file") 1384 if fileInfo, _ := os.Stat(dataFile); fileInfo == nil { 1385 t.Fatalf("file %v not found", dataFile) 1386 } 1387 }