github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/alloc_runner_test.go (about) 1 package allocrunner 2 3 import ( 4 "errors" 5 "fmt" 6 "io/ioutil" 7 "os" 8 "path/filepath" 9 "testing" 10 "time" 11 12 "github.com/hashicorp/consul/api" 13 multierror "github.com/hashicorp/go-multierror" 14 "github.com/hashicorp/nomad/ci" 15 "github.com/hashicorp/nomad/client/allochealth" 16 "github.com/hashicorp/nomad/client/allocrunner/tasklifecycle" 17 "github.com/hashicorp/nomad/client/allocrunner/taskrunner" 18 "github.com/hashicorp/nomad/client/allocwatcher" 19 "github.com/hashicorp/nomad/client/serviceregistration" 20 regMock "github.com/hashicorp/nomad/client/serviceregistration/mock" 21 "github.com/hashicorp/nomad/client/state" 22 "github.com/hashicorp/nomad/helper/uuid" 23 "github.com/hashicorp/nomad/nomad/mock" 24 "github.com/hashicorp/nomad/nomad/structs" 25 "github.com/hashicorp/nomad/testutil" 26 "github.com/stretchr/testify/require" 27 ) 28 29 // destroy does a blocking destroy on an alloc runner 30 func destroy(ar *allocRunner) { 31 ar.Destroy() 32 <-ar.DestroyCh() 33 } 34 35 // TestAllocRunner_AllocState_Initialized asserts that getting TaskStates via 36 // AllocState() are initialized even before the AllocRunner has run. 37 func TestAllocRunner_AllocState_Initialized(t *testing.T) { 38 ci.Parallel(t) 39 40 alloc := mock.Alloc() 41 alloc.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver" 42 conf, cleanup := testAllocRunnerConfig(t, alloc) 43 defer cleanup() 44 45 ar, err := NewAllocRunner(conf) 46 require.NoError(t, err) 47 48 allocState := ar.AllocState() 49 50 require.NotNil(t, allocState) 51 require.NotNil(t, allocState.TaskStates[conf.Alloc.Job.TaskGroups[0].Tasks[0].Name]) 52 } 53 54 // TestAllocRunner_TaskLeader_KillTG asserts that when a leader task dies the 55 // entire task group is killed. 56 func TestAllocRunner_TaskLeader_KillTG(t *testing.T) { 57 ci.Parallel(t) 58 59 alloc := mock.BatchAlloc() 60 tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name] 61 alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0 62 alloc.Job.TaskGroups[0].Tasks[0].RestartPolicy.Attempts = 0 63 64 // Create two tasks in the task group 65 task := alloc.Job.TaskGroups[0].Tasks[0] 66 task.Name = "task1" 67 task.Driver = "mock_driver" 68 task.KillTimeout = 10 * time.Millisecond 69 task.Config = map[string]interface{}{ 70 "run_for": "10s", 71 } 72 73 task2 := alloc.Job.TaskGroups[0].Tasks[0].Copy() 74 task2.Name = "task2" 75 task2.Driver = "mock_driver" 76 task2.Leader = true 77 task2.Config = map[string]interface{}{ 78 "run_for": "1s", 79 } 80 alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2) 81 alloc.AllocatedResources.Tasks[task.Name] = tr 82 alloc.AllocatedResources.Tasks[task2.Name] = tr 83 84 conf, cleanup := testAllocRunnerConfig(t, alloc) 85 defer cleanup() 86 ar, err := NewAllocRunner(conf) 87 require.NoError(t, err) 88 defer destroy(ar) 89 go ar.Run() 90 91 // Wait for all tasks to be killed 92 upd := conf.StateUpdater.(*MockStateUpdater) 93 testutil.WaitForResult(func() (bool, error) { 94 last := upd.Last() 95 if last == nil { 96 return false, fmt.Errorf("No updates") 97 } 98 if last.ClientStatus != structs.AllocClientStatusComplete { 99 return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) 100 } 101 102 // Task1 should be killed because Task2 exited 103 state1 := last.TaskStates[task.Name] 104 if state1.State != structs.TaskStateDead { 105 return false, fmt.Errorf("got state %v; want %v", state1.State, structs.TaskStateDead) 106 } 107 if state1.FinishedAt.IsZero() || state1.StartedAt.IsZero() { 108 return false, fmt.Errorf("expected to have a start and finish time") 109 } 110 if len(state1.Events) < 2 { 111 // At least have a received and destroyed 112 return false, fmt.Errorf("Unexpected number of events") 113 } 114 115 found := false 116 killingMsg := "" 117 for _, e := range state1.Events { 118 if e.Type == structs.TaskLeaderDead { 119 found = true 120 } 121 if e.Type == structs.TaskKilling { 122 killingMsg = e.DisplayMessage 123 } 124 } 125 126 if !found { 127 return false, fmt.Errorf("Did not find event %v", structs.TaskLeaderDead) 128 } 129 130 expectedKillingMsg := "Sent interrupt. Waiting 10ms before force killing" 131 if killingMsg != expectedKillingMsg { 132 return false, fmt.Errorf("Unexpected task event message - wanted %q. got %q", expectedKillingMsg, killingMsg) 133 } 134 135 // Task Two should be dead 136 state2 := last.TaskStates[task2.Name] 137 if state2.State != structs.TaskStateDead { 138 return false, fmt.Errorf("got state %v; want %v", state2.State, structs.TaskStateDead) 139 } 140 if state2.FinishedAt.IsZero() || state2.StartedAt.IsZero() { 141 return false, fmt.Errorf("expected to have a start and finish time") 142 } 143 144 return true, nil 145 }, func(err error) { 146 t.Fatalf("err: %v", err) 147 }) 148 } 149 150 // TestAllocRunner_Lifecycle_Poststart asserts that a service job with 2 151 // poststart lifecycle hooks (1 sidecar, 1 ephemeral) starts all 3 tasks, only 152 // the ephemeral one finishes, and the other 2 exit when the alloc is stopped. 153 func TestAllocRunner_Lifecycle_Poststart(t *testing.T) { 154 alloc := mock.LifecycleAlloc() 155 156 alloc.Job.Type = structs.JobTypeService 157 mainTask := alloc.Job.TaskGroups[0].Tasks[0] 158 mainTask.Config["run_for"] = "100s" 159 160 sidecarTask := alloc.Job.TaskGroups[0].Tasks[1] 161 sidecarTask.Lifecycle.Hook = structs.TaskLifecycleHookPoststart 162 sidecarTask.Config["run_for"] = "100s" 163 164 ephemeralTask := alloc.Job.TaskGroups[0].Tasks[2] 165 ephemeralTask.Lifecycle.Hook = structs.TaskLifecycleHookPoststart 166 167 conf, cleanup := testAllocRunnerConfig(t, alloc) 168 defer cleanup() 169 ar, err := NewAllocRunner(conf) 170 require.NoError(t, err) 171 defer destroy(ar) 172 go ar.Run() 173 174 upd := conf.StateUpdater.(*MockStateUpdater) 175 176 // Wait for main and sidecar tasks to be running, and that the 177 // ephemeral task ran and exited. 178 testutil.WaitForResult(func() (bool, error) { 179 last := upd.Last() 180 if last == nil { 181 return false, fmt.Errorf("No updates") 182 } 183 184 if last.ClientStatus != structs.AllocClientStatusRunning { 185 return false, fmt.Errorf("expected alloc to be running not %s", last.ClientStatus) 186 } 187 188 if s := last.TaskStates[mainTask.Name].State; s != structs.TaskStateRunning { 189 return false, fmt.Errorf("expected main task to be running not %s", s) 190 } 191 192 if s := last.TaskStates[sidecarTask.Name].State; s != structs.TaskStateRunning { 193 return false, fmt.Errorf("expected sidecar task to be running not %s", s) 194 } 195 196 if s := last.TaskStates[ephemeralTask.Name].State; s != structs.TaskStateDead { 197 return false, fmt.Errorf("expected ephemeral task to be dead not %s", s) 198 } 199 200 if last.TaskStates[ephemeralTask.Name].Failed { 201 return false, fmt.Errorf("expected ephemeral task to be successful not failed") 202 } 203 204 return true, nil 205 }, func(err error) { 206 t.Fatalf("error waiting for initial state:\n%v", err) 207 }) 208 209 // Tell the alloc to stop 210 stopAlloc := alloc.Copy() 211 stopAlloc.DesiredStatus = structs.AllocDesiredStatusStop 212 ar.Update(stopAlloc) 213 214 // Wait for main and sidecar tasks to stop. 215 testutil.WaitForResult(func() (bool, error) { 216 last := upd.Last() 217 218 if last.ClientStatus != structs.AllocClientStatusComplete { 219 return false, fmt.Errorf("expected alloc to be running not %s", last.ClientStatus) 220 } 221 222 if s := last.TaskStates[mainTask.Name].State; s != structs.TaskStateDead { 223 return false, fmt.Errorf("expected main task to be dead not %s", s) 224 } 225 226 if last.TaskStates[mainTask.Name].Failed { 227 return false, fmt.Errorf("expected main task to be successful not failed") 228 } 229 230 if s := last.TaskStates[sidecarTask.Name].State; s != structs.TaskStateDead { 231 return false, fmt.Errorf("expected sidecar task to be dead not %s", s) 232 } 233 234 if last.TaskStates[sidecarTask.Name].Failed { 235 return false, fmt.Errorf("expected sidecar task to be successful not failed") 236 } 237 238 return true, nil 239 }, func(err error) { 240 t.Fatalf("error waiting for initial state:\n%v", err) 241 }) 242 } 243 244 // TestAllocRunner_TaskMain_KillTG asserts that when main tasks die the 245 // entire task group is killed. 246 func TestAllocRunner_TaskMain_KillTG(t *testing.T) { 247 ci.Parallel(t) 248 249 alloc := mock.BatchAlloc() 250 tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name] 251 alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0 252 alloc.Job.TaskGroups[0].Tasks[0].RestartPolicy.Attempts = 0 253 254 // Create four tasks in the task group 255 prestart := alloc.Job.TaskGroups[0].Tasks[0].Copy() 256 prestart.Name = "prestart-sidecar" 257 prestart.Driver = "mock_driver" 258 prestart.KillTimeout = 10 * time.Millisecond 259 prestart.Lifecycle = &structs.TaskLifecycleConfig{ 260 Hook: structs.TaskLifecycleHookPrestart, 261 Sidecar: true, 262 } 263 264 prestart.Config = map[string]interface{}{ 265 "run_for": "100s", 266 } 267 268 poststart := alloc.Job.TaskGroups[0].Tasks[0].Copy() 269 poststart.Name = "poststart-sidecar" 270 poststart.Driver = "mock_driver" 271 poststart.KillTimeout = 10 * time.Millisecond 272 poststart.Lifecycle = &structs.TaskLifecycleConfig{ 273 Hook: structs.TaskLifecycleHookPoststart, 274 Sidecar: true, 275 } 276 277 poststart.Config = map[string]interface{}{ 278 "run_for": "100s", 279 } 280 281 // these two main tasks have the same name, is that ok? 282 main1 := alloc.Job.TaskGroups[0].Tasks[0].Copy() 283 main1.Name = "task2" 284 main1.Driver = "mock_driver" 285 main1.Config = map[string]interface{}{ 286 "run_for": "1s", 287 } 288 289 main2 := alloc.Job.TaskGroups[0].Tasks[0].Copy() 290 main2.Name = "task2" 291 main2.Driver = "mock_driver" 292 main2.Config = map[string]interface{}{ 293 "run_for": "2s", 294 } 295 296 alloc.Job.TaskGroups[0].Tasks = []*structs.Task{prestart, poststart, main1, main2} 297 alloc.AllocatedResources.Tasks = map[string]*structs.AllocatedTaskResources{ 298 prestart.Name: tr, 299 poststart.Name: tr, 300 main1.Name: tr, 301 main2.Name: tr, 302 } 303 304 conf, cleanup := testAllocRunnerConfig(t, alloc) 305 defer cleanup() 306 ar, err := NewAllocRunner(conf) 307 require.NoError(t, err) 308 defer destroy(ar) 309 go ar.Run() 310 311 hasTaskMainEvent := func(state *structs.TaskState) bool { 312 for _, e := range state.Events { 313 if e.Type == structs.TaskMainDead { 314 return true 315 } 316 } 317 318 return false 319 } 320 321 // Wait for all tasks to be killed 322 upd := conf.StateUpdater.(*MockStateUpdater) 323 testutil.WaitForResult(func() (bool, error) { 324 last := upd.Last() 325 if last == nil { 326 return false, fmt.Errorf("No updates") 327 } 328 if last.ClientStatus != structs.AllocClientStatusComplete { 329 return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) 330 } 331 332 var state *structs.TaskState 333 334 // both sidecars should be killed because Task2 exited 335 state = last.TaskStates[prestart.Name] 336 if state == nil { 337 return false, fmt.Errorf("could not find state for task %s", prestart.Name) 338 } 339 if state.State != structs.TaskStateDead { 340 return false, fmt.Errorf("got state %v; want %v", state.State, structs.TaskStateDead) 341 } 342 if state.FinishedAt.IsZero() || state.StartedAt.IsZero() { 343 return false, fmt.Errorf("expected to have a start and finish time") 344 } 345 if len(state.Events) < 2 { 346 // At least have a received and destroyed 347 return false, fmt.Errorf("Unexpected number of events") 348 } 349 350 if !hasTaskMainEvent(state) { 351 return false, fmt.Errorf("Did not find event %v: %#+v", structs.TaskMainDead, state.Events) 352 } 353 354 state = last.TaskStates[poststart.Name] 355 if state == nil { 356 return false, fmt.Errorf("could not find state for task %s", poststart.Name) 357 } 358 if state.State != structs.TaskStateDead { 359 return false, fmt.Errorf("got state %v; want %v", state.State, structs.TaskStateDead) 360 } 361 if state.FinishedAt.IsZero() || state.StartedAt.IsZero() { 362 return false, fmt.Errorf("expected to have a start and finish time") 363 } 364 if len(state.Events) < 2 { 365 // At least have a received and destroyed 366 return false, fmt.Errorf("Unexpected number of events") 367 } 368 369 if !hasTaskMainEvent(state) { 370 return false, fmt.Errorf("Did not find event %v: %#+v", structs.TaskMainDead, state.Events) 371 } 372 373 // main tasks should die naturely 374 state = last.TaskStates[main1.Name] 375 if state.State != structs.TaskStateDead { 376 return false, fmt.Errorf("got state %v; want %v", state.State, structs.TaskStateDead) 377 } 378 if state.FinishedAt.IsZero() || state.StartedAt.IsZero() { 379 return false, fmt.Errorf("expected to have a start and finish time") 380 } 381 if hasTaskMainEvent(state) { 382 return false, fmt.Errorf("unexpected event %#+v in %v", structs.TaskMainDead, state.Events) 383 } 384 385 state = last.TaskStates[main2.Name] 386 if state.State != structs.TaskStateDead { 387 return false, fmt.Errorf("got state %v; want %v", state.State, structs.TaskStateDead) 388 } 389 if state.FinishedAt.IsZero() || state.StartedAt.IsZero() { 390 return false, fmt.Errorf("expected to have a start and finish time") 391 } 392 if hasTaskMainEvent(state) { 393 return false, fmt.Errorf("unexpected event %v in %#+v", structs.TaskMainDead, state.Events) 394 } 395 396 return true, nil 397 }, func(err error) { 398 t.Fatalf("err: %v", err) 399 }) 400 } 401 402 // TestAllocRunner_Lifecycle_Poststop asserts that a service job with 1 403 // postop lifecycle hook starts all 3 tasks, only 404 // the ephemeral one finishes, and the other 2 exit when the alloc is stopped. 405 func TestAllocRunner_Lifecycle_Poststop(t *testing.T) { 406 ci.Parallel(t) 407 408 alloc := mock.LifecycleAlloc() 409 tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name] 410 411 alloc.Job.Type = structs.JobTypeService 412 mainTask := alloc.Job.TaskGroups[0].Tasks[0] 413 mainTask.Config["run_for"] = "100s" 414 415 ephemeralTask := alloc.Job.TaskGroups[0].Tasks[1] 416 ephemeralTask.Name = "quit" 417 ephemeralTask.Lifecycle.Hook = structs.TaskLifecycleHookPoststop 418 ephemeralTask.Config["run_for"] = "10s" 419 420 alloc.Job.TaskGroups[0].Tasks = []*structs.Task{mainTask, ephemeralTask} 421 alloc.AllocatedResources.Tasks = map[string]*structs.AllocatedTaskResources{ 422 mainTask.Name: tr, 423 ephemeralTask.Name: tr, 424 } 425 426 conf, cleanup := testAllocRunnerConfig(t, alloc) 427 defer cleanup() 428 ar, err := NewAllocRunner(conf) 429 require.NoError(t, err) 430 defer destroy(ar) 431 go ar.Run() 432 433 upd := conf.StateUpdater.(*MockStateUpdater) 434 435 // Wait for main task to be running 436 testutil.WaitForResult(func() (bool, error) { 437 last := upd.Last() 438 if last == nil { 439 return false, fmt.Errorf("No updates") 440 } 441 442 if last.ClientStatus != structs.AllocClientStatusRunning { 443 return false, fmt.Errorf("expected alloc to be running not %s", last.ClientStatus) 444 } 445 446 if s := last.TaskStates[mainTask.Name].State; s != structs.TaskStateRunning { 447 return false, fmt.Errorf("expected main task to be running not %s", s) 448 } 449 450 if s := last.TaskStates[ephemeralTask.Name].State; s != structs.TaskStatePending { 451 return false, fmt.Errorf("expected ephemeral task to be pending not %s", s) 452 } 453 454 return true, nil 455 }, func(err error) { 456 t.Fatalf("error waiting for initial state:\n%v", err) 457 }) 458 459 // Tell the alloc to stop 460 stopAlloc := alloc.Copy() 461 stopAlloc.DesiredStatus = structs.AllocDesiredStatusStop 462 ar.Update(stopAlloc) 463 464 // Wait for main task to die & poststop task to run. 465 testutil.WaitForResult(func() (bool, error) { 466 last := upd.Last() 467 468 if last.ClientStatus != structs.AllocClientStatusRunning { 469 return false, fmt.Errorf("expected alloc to be running not %s", last.ClientStatus) 470 } 471 472 if s := last.TaskStates[mainTask.Name].State; s != structs.TaskStateDead { 473 return false, fmt.Errorf("expected main task to be dead not %s", s) 474 } 475 476 if s := last.TaskStates[ephemeralTask.Name].State; s != structs.TaskStateRunning { 477 return false, fmt.Errorf("expected poststop task to be running not %s", s) 478 } 479 480 return true, nil 481 }, func(err error) { 482 t.Fatalf("error waiting for initial state:\n%v", err) 483 }) 484 485 } 486 487 func TestAllocRunner_Lifecycle_Restart(t *testing.T) { 488 ci.Parallel(t) 489 490 // test cases can use this default or override w/ taskDefs param 491 alloc := mock.LifecycleAllocFromTasks([]mock.LifecycleTaskDef{ 492 {Name: "main", RunFor: "100s", ExitCode: 0, Hook: "", IsSidecar: false}, 493 {Name: "prestart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "prestart", IsSidecar: false}, 494 {Name: "prestart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "prestart", IsSidecar: true}, 495 {Name: "poststart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "poststart", IsSidecar: false}, 496 {Name: "poststart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "poststart", IsSidecar: true}, 497 {Name: "poststop", RunFor: "1s", ExitCode: 0, Hook: "poststop", IsSidecar: false}, 498 }) 499 alloc.Job.Type = structs.JobTypeService 500 rp := &structs.RestartPolicy{ 501 Attempts: 1, 502 Interval: 10 * time.Minute, 503 Delay: 1 * time.Nanosecond, 504 Mode: structs.RestartPolicyModeFail, 505 } 506 507 ev := &structs.TaskEvent{Type: structs.TaskRestartSignal} 508 509 testCases := []struct { 510 name string 511 taskDefs []mock.LifecycleTaskDef 512 isBatch bool 513 hasLeader bool 514 action func(*allocRunner, *structs.Allocation) error 515 expectedErr string 516 expectedAfter map[string]structs.TaskState 517 }{ 518 { 519 name: "restart entire allocation", 520 action: func(ar *allocRunner, alloc *structs.Allocation) error { 521 return ar.RestartAll(ev) 522 }, 523 expectedAfter: map[string]structs.TaskState{ 524 "main": structs.TaskState{State: "running", Restarts: 1}, 525 "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 1}, 526 "prestart-sidecar": structs.TaskState{State: "running", Restarts: 1}, 527 "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 1}, 528 "poststart-sidecar": structs.TaskState{State: "running", Restarts: 1}, 529 "poststop": structs.TaskState{State: "pending", Restarts: 0}, 530 }, 531 }, 532 { 533 name: "restart only running tasks", 534 action: func(ar *allocRunner, alloc *structs.Allocation) error { 535 return ar.RestartRunning(ev) 536 }, 537 expectedAfter: map[string]structs.TaskState{ 538 "main": structs.TaskState{State: "running", Restarts: 1}, 539 "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 540 "prestart-sidecar": structs.TaskState{State: "running", Restarts: 1}, 541 "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 542 "poststart-sidecar": structs.TaskState{State: "running", Restarts: 1}, 543 "poststop": structs.TaskState{State: "pending", Restarts: 0}, 544 }, 545 }, 546 { 547 name: "batch job restart entire allocation", 548 taskDefs: []mock.LifecycleTaskDef{ 549 {Name: "main", RunFor: "100s", ExitCode: 1, Hook: "", IsSidecar: false}, 550 {Name: "prestart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "prestart", IsSidecar: false}, 551 {Name: "prestart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "prestart", IsSidecar: true}, 552 {Name: "poststart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "poststart", IsSidecar: false}, 553 {Name: "poststart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "poststart", IsSidecar: true}, 554 {Name: "poststop", RunFor: "1s", ExitCode: 0, Hook: "poststop", IsSidecar: false}, 555 }, 556 isBatch: true, 557 action: func(ar *allocRunner, alloc *structs.Allocation) error { 558 return ar.RestartAll(ev) 559 }, 560 expectedAfter: map[string]structs.TaskState{ 561 "main": structs.TaskState{State: "running", Restarts: 1}, 562 "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 1}, 563 "prestart-sidecar": structs.TaskState{State: "running", Restarts: 1}, 564 "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 1}, 565 "poststart-sidecar": structs.TaskState{State: "running", Restarts: 1}, 566 "poststop": structs.TaskState{State: "pending", Restarts: 0}, 567 }, 568 }, 569 { 570 name: "batch job restart only running tasks ", 571 taskDefs: []mock.LifecycleTaskDef{ 572 {Name: "main", RunFor: "100s", ExitCode: 1, Hook: "", IsSidecar: false}, 573 {Name: "prestart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "prestart", IsSidecar: false}, 574 {Name: "prestart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "prestart", IsSidecar: true}, 575 {Name: "poststart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "poststart", IsSidecar: false}, 576 {Name: "poststart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "poststart", IsSidecar: true}, 577 {Name: "poststop", RunFor: "1s", ExitCode: 0, Hook: "poststop", IsSidecar: false}, 578 }, 579 isBatch: true, 580 action: func(ar *allocRunner, alloc *structs.Allocation) error { 581 return ar.RestartRunning(ev) 582 }, 583 expectedAfter: map[string]structs.TaskState{ 584 "main": structs.TaskState{State: "running", Restarts: 1}, 585 "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 586 "prestart-sidecar": structs.TaskState{State: "running", Restarts: 1}, 587 "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 588 "poststart-sidecar": structs.TaskState{State: "running", Restarts: 1}, 589 "poststop": structs.TaskState{State: "pending", Restarts: 0}, 590 }, 591 }, 592 { 593 name: "restart entire allocation with leader", 594 hasLeader: true, 595 action: func(ar *allocRunner, alloc *structs.Allocation) error { 596 return ar.RestartAll(ev) 597 }, 598 expectedAfter: map[string]structs.TaskState{ 599 "main": structs.TaskState{State: "running", Restarts: 1}, 600 "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 1}, 601 "prestart-sidecar": structs.TaskState{State: "running", Restarts: 1}, 602 "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 1}, 603 "poststart-sidecar": structs.TaskState{State: "running", Restarts: 1}, 604 "poststop": structs.TaskState{State: "pending", Restarts: 0}, 605 }, 606 }, 607 { 608 name: "stop from server", 609 action: func(ar *allocRunner, alloc *structs.Allocation) error { 610 stopAlloc := alloc.Copy() 611 stopAlloc.DesiredStatus = structs.AllocDesiredStatusStop 612 ar.Update(stopAlloc) 613 return nil 614 }, 615 expectedAfter: map[string]structs.TaskState{ 616 "main": structs.TaskState{State: "dead", Restarts: 0}, 617 "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 618 "prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, 619 "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 620 "poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, 621 "poststop": structs.TaskState{State: "dead", Restarts: 0}, 622 }, 623 }, 624 { 625 name: "restart main task", 626 action: func(ar *allocRunner, alloc *structs.Allocation) error { 627 return ar.RestartTask("main", ev) 628 }, 629 expectedAfter: map[string]structs.TaskState{ 630 "main": structs.TaskState{State: "running", Restarts: 1}, 631 "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 632 "prestart-sidecar": structs.TaskState{State: "running", Restarts: 0}, 633 "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 634 "poststart-sidecar": structs.TaskState{State: "running", Restarts: 0}, 635 "poststop": structs.TaskState{State: "pending", Restarts: 0}, 636 }, 637 }, 638 { 639 name: "restart leader main task", 640 hasLeader: true, 641 action: func(ar *allocRunner, alloc *structs.Allocation) error { 642 return ar.RestartTask("main", ev) 643 }, 644 expectedAfter: map[string]structs.TaskState{ 645 "main": structs.TaskState{State: "running", Restarts: 1}, 646 "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 647 "prestart-sidecar": structs.TaskState{State: "running", Restarts: 0}, 648 "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 649 "poststart-sidecar": structs.TaskState{State: "running", Restarts: 0}, 650 "poststop": structs.TaskState{State: "pending", Restarts: 0}, 651 }, 652 }, 653 { 654 name: "main task fails and restarts once", 655 taskDefs: []mock.LifecycleTaskDef{ 656 {Name: "main", RunFor: "2s", ExitCode: 1, Hook: "", IsSidecar: false}, 657 {Name: "prestart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "prestart", IsSidecar: false}, 658 {Name: "prestart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "prestart", IsSidecar: true}, 659 {Name: "poststart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "poststart", IsSidecar: false}, 660 {Name: "poststart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "poststart", IsSidecar: true}, 661 {Name: "poststop", RunFor: "1s", ExitCode: 0, Hook: "poststop", IsSidecar: false}, 662 }, 663 action: func(ar *allocRunner, alloc *structs.Allocation) error { 664 time.Sleep(3 * time.Second) // make sure main task has exited 665 return nil 666 }, 667 expectedAfter: map[string]structs.TaskState{ 668 "main": structs.TaskState{State: "dead", Restarts: 1}, 669 "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 670 "prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, 671 "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 672 "poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, 673 "poststop": structs.TaskState{State: "dead", Restarts: 0}, 674 }, 675 }, 676 { 677 name: "leader main task fails and restarts once", 678 taskDefs: []mock.LifecycleTaskDef{ 679 {Name: "main", RunFor: "2s", ExitCode: 1, Hook: "", IsSidecar: false}, 680 {Name: "prestart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "prestart", IsSidecar: false}, 681 {Name: "prestart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "prestart", IsSidecar: true}, 682 {Name: "poststart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "poststart", IsSidecar: false}, 683 {Name: "poststart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "poststart", IsSidecar: true}, 684 {Name: "poststop", RunFor: "1s", ExitCode: 0, Hook: "poststop", IsSidecar: false}, 685 }, 686 hasLeader: true, 687 action: func(ar *allocRunner, alloc *structs.Allocation) error { 688 time.Sleep(3 * time.Second) // make sure main task has exited 689 return nil 690 }, 691 expectedAfter: map[string]structs.TaskState{ 692 "main": structs.TaskState{State: "dead", Restarts: 1}, 693 "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 694 "prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, 695 "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 696 "poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, 697 "poststop": structs.TaskState{State: "dead", Restarts: 0}, 698 }, 699 }, 700 { 701 name: "main stopped unexpectedly and restarts once", 702 taskDefs: []mock.LifecycleTaskDef{ 703 {Name: "main", RunFor: "2s", ExitCode: 0, Hook: "", IsSidecar: false}, 704 {Name: "prestart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "prestart", IsSidecar: false}, 705 {Name: "prestart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "prestart", IsSidecar: true}, 706 {Name: "poststart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "poststart", IsSidecar: false}, 707 {Name: "poststart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "poststart", IsSidecar: true}, 708 {Name: "poststop", RunFor: "1s", ExitCode: 0, Hook: "poststop", IsSidecar: false}, 709 }, 710 action: func(ar *allocRunner, alloc *structs.Allocation) error { 711 time.Sleep(3 * time.Second) // make sure main task has exited 712 return nil 713 }, 714 expectedAfter: map[string]structs.TaskState{ 715 "main": structs.TaskState{State: "dead", Restarts: 1}, 716 "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 717 "prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, 718 "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 719 "poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, 720 "poststop": structs.TaskState{State: "dead", Restarts: 0}, 721 }, 722 }, 723 { 724 name: "leader main stopped unexpectedly and restarts once", 725 taskDefs: []mock.LifecycleTaskDef{ 726 {Name: "main", RunFor: "2s", ExitCode: 0, Hook: "", IsSidecar: false}, 727 {Name: "prestart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "prestart", IsSidecar: false}, 728 {Name: "prestart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "prestart", IsSidecar: true}, 729 {Name: "poststart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "poststart", IsSidecar: false}, 730 {Name: "poststart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "poststart", IsSidecar: true}, 731 {Name: "poststop", RunFor: "1s", ExitCode: 0, Hook: "poststop", IsSidecar: false}, 732 }, 733 action: func(ar *allocRunner, alloc *structs.Allocation) error { 734 time.Sleep(3 * time.Second) // make sure main task has exited 735 return nil 736 }, 737 expectedAfter: map[string]structs.TaskState{ 738 "main": structs.TaskState{State: "dead", Restarts: 1}, 739 "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 740 "prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, 741 "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 742 "poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, 743 "poststop": structs.TaskState{State: "dead", Restarts: 0}, 744 }, 745 }, 746 { 747 name: "failed main task cannot be restarted", 748 taskDefs: []mock.LifecycleTaskDef{ 749 {Name: "main", RunFor: "2s", ExitCode: 1, Hook: "", IsSidecar: false}, 750 {Name: "prestart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "prestart", IsSidecar: false}, 751 {Name: "prestart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "prestart", IsSidecar: true}, 752 {Name: "poststart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "poststart", IsSidecar: false}, 753 {Name: "poststart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "poststart", IsSidecar: true}, 754 {Name: "poststop", RunFor: "1s", ExitCode: 0, Hook: "poststop", IsSidecar: false}, 755 }, 756 action: func(ar *allocRunner, alloc *structs.Allocation) error { 757 // make sure main task has had a chance to restart once on its 758 // own and fail again before we try to manually restart it 759 time.Sleep(5 * time.Second) 760 return ar.RestartTask("main", ev) 761 }, 762 expectedErr: "Task not running", 763 expectedAfter: map[string]structs.TaskState{ 764 "main": structs.TaskState{State: "dead", Restarts: 1}, 765 "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 766 "prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, 767 "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 768 "poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, 769 "poststop": structs.TaskState{State: "dead", Restarts: 0}, 770 }, 771 }, 772 { 773 name: "restart prestart-sidecar task", 774 action: func(ar *allocRunner, alloc *structs.Allocation) error { 775 return ar.RestartTask("prestart-sidecar", ev) 776 }, 777 expectedAfter: map[string]structs.TaskState{ 778 "main": structs.TaskState{State: "running", Restarts: 0}, 779 "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 780 "prestart-sidecar": structs.TaskState{State: "running", Restarts: 1}, 781 "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 782 "poststart-sidecar": structs.TaskState{State: "running", Restarts: 0}, 783 "poststop": structs.TaskState{State: "pending", Restarts: 0}, 784 }, 785 }, 786 { 787 name: "restart poststart-sidecar task", 788 action: func(ar *allocRunner, alloc *structs.Allocation) error { 789 return ar.RestartTask("poststart-sidecar", ev) 790 }, 791 expectedAfter: map[string]structs.TaskState{ 792 "main": structs.TaskState{State: "running", Restarts: 0}, 793 "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 794 "prestart-sidecar": structs.TaskState{State: "running", Restarts: 0}, 795 "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, 796 "poststart-sidecar": structs.TaskState{State: "running", Restarts: 1}, 797 "poststop": structs.TaskState{State: "pending", Restarts: 0}, 798 }, 799 }, 800 } 801 802 for _, tc := range testCases { 803 tc := tc 804 t.Run(tc.name, func(t *testing.T) { 805 ci.Parallel(t) 806 807 alloc := alloc.Copy() 808 alloc.Job.TaskGroups[0].RestartPolicy = rp 809 if tc.taskDefs != nil { 810 alloc = mock.LifecycleAllocFromTasks(tc.taskDefs) 811 alloc.Job.Type = structs.JobTypeService 812 } 813 for _, task := range alloc.Job.TaskGroups[0].Tasks { 814 task.RestartPolicy = rp // tasks inherit the group policy 815 } 816 if tc.hasLeader { 817 for _, task := range alloc.Job.TaskGroups[0].Tasks { 818 if task.Name == "main" { 819 task.Leader = true 820 } 821 } 822 } 823 if tc.isBatch { 824 alloc.Job.Type = structs.JobTypeBatch 825 } 826 827 conf, cleanup := testAllocRunnerConfig(t, alloc) 828 defer cleanup() 829 ar, err := NewAllocRunner(conf) 830 require.NoError(t, err) 831 defer destroy(ar) 832 go ar.Run() 833 834 upd := conf.StateUpdater.(*MockStateUpdater) 835 836 // assert our "before" states: 837 // - all one-shot tasks should be dead but not failed 838 // - all main tasks and sidecars should be running 839 // - no tasks should have restarted 840 testutil.WaitForResult(func() (bool, error) { 841 last := upd.Last() 842 if last == nil { 843 return false, fmt.Errorf("no update") 844 } 845 if last.ClientStatus != structs.AllocClientStatusRunning { 846 return false, fmt.Errorf( 847 "expected alloc to be running not %s", last.ClientStatus) 848 } 849 var errs *multierror.Error 850 851 expectedBefore := map[string]string{ 852 "main": "running", 853 "prestart-oneshot": "dead", 854 "prestart-sidecar": "running", 855 "poststart-oneshot": "dead", 856 "poststart-sidecar": "running", 857 "poststop": "pending", 858 } 859 860 for task, expected := range expectedBefore { 861 got, ok := last.TaskStates[task] 862 if !ok { 863 continue 864 } 865 if got.State != expected { 866 errs = multierror.Append(errs, fmt.Errorf( 867 "expected initial state of task %q to be %q not %q", 868 task, expected, got.State)) 869 } 870 if got.Restarts != 0 { 871 errs = multierror.Append(errs, fmt.Errorf( 872 "expected no initial restarts of task %q, not %q", 873 task, got.Restarts)) 874 } 875 if expected == "dead" && got.Failed { 876 errs = multierror.Append(errs, fmt.Errorf( 877 "expected ephemeral task %q to be dead but not failed", 878 task)) 879 } 880 881 } 882 if errs.ErrorOrNil() != nil { 883 return false, errs.ErrorOrNil() 884 } 885 return true, nil 886 }, func(err error) { 887 require.NoError(t, err, "error waiting for initial state") 888 }) 889 890 // perform the action 891 err = tc.action(ar, alloc.Copy()) 892 if tc.expectedErr != "" { 893 require.EqualError(t, err, tc.expectedErr) 894 } else { 895 require.NoError(t, err) 896 } 897 898 // assert our "after" states 899 testutil.WaitForResult(func() (bool, error) { 900 last := upd.Last() 901 if last == nil { 902 return false, fmt.Errorf("no update") 903 } 904 var errs *multierror.Error 905 for task, expected := range tc.expectedAfter { 906 got, ok := last.TaskStates[task] 907 if !ok { 908 errs = multierror.Append(errs, fmt.Errorf( 909 "no final state found for task %q", task, 910 )) 911 } 912 if got.State != expected.State { 913 errs = multierror.Append(errs, fmt.Errorf( 914 "expected final state of task %q to be %q not %q", 915 task, expected.State, got.State)) 916 } 917 if expected.State == "dead" { 918 if got.FinishedAt.IsZero() || got.StartedAt.IsZero() { 919 errs = multierror.Append(errs, fmt.Errorf( 920 "expected final state of task %q to have start and finish time", task)) 921 } 922 if len(got.Events) < 2 { 923 errs = multierror.Append(errs, fmt.Errorf( 924 "expected final state of task %q to include at least 2 tasks", task)) 925 } 926 } 927 928 if got.Restarts != expected.Restarts { 929 errs = multierror.Append(errs, fmt.Errorf( 930 "expected final restarts of task %q to be %v not %v", 931 task, expected.Restarts, got.Restarts)) 932 } 933 } 934 if errs.ErrorOrNil() != nil { 935 return false, errs.ErrorOrNil() 936 } 937 return true, nil 938 }, func(err error) { 939 require.NoError(t, err, "error waiting for final state") 940 }) 941 }) 942 } 943 } 944 945 func TestAllocRunner_TaskGroup_ShutdownDelay(t *testing.T) { 946 ci.Parallel(t) 947 948 alloc := mock.Alloc() 949 tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name] 950 alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0 951 alloc.Job.TaskGroups[0].Tasks[0].RestartPolicy.Attempts = 0 952 953 // Create a group service 954 tg := alloc.Job.TaskGroups[0] 955 tg.Services = []*structs.Service{ 956 { 957 Name: "shutdown_service", 958 Provider: structs.ServiceProviderConsul, 959 }, 960 } 961 962 // Create two tasks in the group 963 task := alloc.Job.TaskGroups[0].Tasks[0] 964 task.Name = "follower1" 965 task.Driver = "mock_driver" 966 task.Config = map[string]interface{}{ 967 "run_for": "10s", 968 } 969 970 task2 := alloc.Job.TaskGroups[0].Tasks[0].Copy() 971 task2.Name = "leader" 972 task2.Driver = "mock_driver" 973 task2.Leader = true 974 task2.Config = map[string]interface{}{ 975 "run_for": "10s", 976 } 977 978 alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2) 979 alloc.AllocatedResources.Tasks[task.Name] = tr 980 alloc.AllocatedResources.Tasks[task2.Name] = tr 981 982 // Set a shutdown delay 983 shutdownDelay := 1 * time.Second 984 alloc.Job.TaskGroups[0].ShutdownDelay = &shutdownDelay 985 986 conf, cleanup := testAllocRunnerConfig(t, alloc) 987 defer cleanup() 988 ar, err := NewAllocRunner(conf) 989 require.NoError(t, err) 990 defer destroy(ar) 991 go ar.Run() 992 993 // Wait for tasks to start 994 upd := conf.StateUpdater.(*MockStateUpdater) 995 last := upd.Last() 996 testutil.WaitForResult(func() (bool, error) { 997 last = upd.Last() 998 if last == nil { 999 return false, fmt.Errorf("No updates") 1000 } 1001 if n := len(last.TaskStates); n != 2 { 1002 return false, fmt.Errorf("Not enough task states (want: 2; found %d)", n) 1003 } 1004 for name, state := range last.TaskStates { 1005 if state.State != structs.TaskStateRunning { 1006 return false, fmt.Errorf("Task %q is not running yet (it's %q)", name, state.State) 1007 } 1008 } 1009 return true, nil 1010 }, func(err error) { 1011 t.Fatalf("err: %v", err) 1012 }) 1013 1014 // Reset updates 1015 upd.Reset() 1016 1017 // Stop alloc 1018 shutdownInit := time.Now() 1019 update := alloc.Copy() 1020 update.DesiredStatus = structs.AllocDesiredStatusStop 1021 ar.Update(update) 1022 1023 // Wait for tasks to stop 1024 testutil.WaitForResult(func() (bool, error) { 1025 last := upd.Last() 1026 if last == nil { 1027 return false, fmt.Errorf("No updates") 1028 } 1029 1030 fin := last.TaskStates["leader"].FinishedAt 1031 1032 if fin.IsZero() { 1033 return false, nil 1034 } 1035 1036 return true, nil 1037 }, func(err error) { 1038 last := upd.Last() 1039 for name, state := range last.TaskStates { 1040 t.Logf("%s: %s", name, state.State) 1041 } 1042 t.Fatalf("err: %v", err) 1043 }) 1044 1045 // Get consul client operations 1046 consulClient := conf.Consul.(*regMock.ServiceRegistrationHandler) 1047 consulOpts := consulClient.GetOps() 1048 var groupRemoveOp regMock.Operation 1049 for _, op := range consulOpts { 1050 // Grab the first deregistration request 1051 if op.Op == "remove" && op.Name == "group-web" { 1052 groupRemoveOp = op 1053 break 1054 } 1055 } 1056 1057 // Ensure remove operation is close to shutdown initiation 1058 require.True(t, groupRemoveOp.OccurredAt.Sub(shutdownInit) < 100*time.Millisecond) 1059 1060 last = upd.Last() 1061 minShutdown := shutdownInit.Add(task.ShutdownDelay) 1062 leaderFinished := last.TaskStates["leader"].FinishedAt 1063 followerFinished := last.TaskStates["follower1"].FinishedAt 1064 1065 // Check that both tasks shut down after min possible shutdown time 1066 require.Greater(t, leaderFinished.UnixNano(), minShutdown.UnixNano()) 1067 require.Greater(t, followerFinished.UnixNano(), minShutdown.UnixNano()) 1068 1069 // Check that there is at least shutdown_delay between consul 1070 // remove operation and task finished at time 1071 require.True(t, leaderFinished.Sub(groupRemoveOp.OccurredAt) > shutdownDelay) 1072 } 1073 1074 // TestAllocRunner_TaskLeader_StopTG asserts that when stopping an alloc with a 1075 // leader the leader is stopped before other tasks. 1076 func TestAllocRunner_TaskLeader_StopTG(t *testing.T) { 1077 ci.Parallel(t) 1078 1079 alloc := mock.Alloc() 1080 tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name] 1081 alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0 1082 alloc.Job.TaskGroups[0].Tasks[0].RestartPolicy.Attempts = 0 1083 1084 // Create 3 tasks in the task group 1085 task := alloc.Job.TaskGroups[0].Tasks[0] 1086 task.Name = "follower1" 1087 task.Driver = "mock_driver" 1088 task.Config = map[string]interface{}{ 1089 "run_for": "10s", 1090 } 1091 1092 task2 := alloc.Job.TaskGroups[0].Tasks[0].Copy() 1093 task2.Name = "leader" 1094 task2.Driver = "mock_driver" 1095 task2.Leader = true 1096 task2.Config = map[string]interface{}{ 1097 "run_for": "10s", 1098 } 1099 1100 task3 := alloc.Job.TaskGroups[0].Tasks[0].Copy() 1101 task3.Name = "follower2" 1102 task3.Driver = "mock_driver" 1103 task3.Config = map[string]interface{}{ 1104 "run_for": "10s", 1105 } 1106 alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2, task3) 1107 alloc.AllocatedResources.Tasks[task.Name] = tr 1108 alloc.AllocatedResources.Tasks[task2.Name] = tr 1109 alloc.AllocatedResources.Tasks[task3.Name] = tr 1110 1111 conf, cleanup := testAllocRunnerConfig(t, alloc) 1112 defer cleanup() 1113 ar, err := NewAllocRunner(conf) 1114 require.NoError(t, err) 1115 defer destroy(ar) 1116 go ar.Run() 1117 1118 // Wait for tasks to start 1119 upd := conf.StateUpdater.(*MockStateUpdater) 1120 last := upd.Last() 1121 testutil.WaitForResult(func() (bool, error) { 1122 last = upd.Last() 1123 if last == nil { 1124 return false, fmt.Errorf("No updates") 1125 } 1126 if n := len(last.TaskStates); n != 3 { 1127 return false, fmt.Errorf("Not enough task states (want: 3; found %d)", n) 1128 } 1129 for name, state := range last.TaskStates { 1130 if state.State != structs.TaskStateRunning { 1131 return false, fmt.Errorf("Task %q is not running yet (it's %q)", name, state.State) 1132 } 1133 } 1134 return true, nil 1135 }, func(err error) { 1136 t.Fatalf("err: %v", err) 1137 }) 1138 1139 // Reset updates 1140 upd.Reset() 1141 1142 // Stop alloc 1143 update := alloc.Copy() 1144 update.DesiredStatus = structs.AllocDesiredStatusStop 1145 ar.Update(update) 1146 1147 // Wait for tasks to stop 1148 testutil.WaitForResult(func() (bool, error) { 1149 last := upd.Last() 1150 if last == nil { 1151 return false, fmt.Errorf("No updates") 1152 } 1153 if last.TaskStates["leader"].FinishedAt.UnixNano() >= last.TaskStates["follower1"].FinishedAt.UnixNano() { 1154 return false, fmt.Errorf("expected leader to finish before follower1: %s >= %s", 1155 last.TaskStates["leader"].FinishedAt, last.TaskStates["follower1"].FinishedAt) 1156 } 1157 if last.TaskStates["leader"].FinishedAt.UnixNano() >= last.TaskStates["follower2"].FinishedAt.UnixNano() { 1158 return false, fmt.Errorf("expected leader to finish before follower2: %s >= %s", 1159 last.TaskStates["leader"].FinishedAt, last.TaskStates["follower2"].FinishedAt) 1160 } 1161 return true, nil 1162 }, func(err error) { 1163 last := upd.Last() 1164 for name, state := range last.TaskStates { 1165 t.Logf("%s: %s", name, state.State) 1166 } 1167 t.Fatalf("err: %v", err) 1168 }) 1169 } 1170 1171 // TestAllocRunner_TaskLeader_StopRestoredTG asserts that when stopping a 1172 // restored task group with a leader that failed before restoring the leader is 1173 // not stopped as it does not exist. 1174 // See https://github.com/hashicorp/nomad/issues/3420#issuecomment-341666932 1175 func TestAllocRunner_TaskLeader_StopRestoredTG(t *testing.T) { 1176 ci.Parallel(t) 1177 1178 alloc := mock.Alloc() 1179 tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name] 1180 alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0 1181 alloc.Job.TaskGroups[0].Tasks[0].RestartPolicy.Attempts = 0 1182 1183 // Create a leader and follower task in the task group 1184 task := alloc.Job.TaskGroups[0].Tasks[0] 1185 task.Name = "follower1" 1186 task.Driver = "mock_driver" 1187 task.KillTimeout = 10 * time.Second 1188 task.Config = map[string]interface{}{ 1189 "run_for": "10s", 1190 } 1191 1192 task2 := alloc.Job.TaskGroups[0].Tasks[0].Copy() 1193 task2.Name = "leader" 1194 task2.Driver = "mock_driver" 1195 task2.Leader = true 1196 task2.KillTimeout = 10 * time.Millisecond 1197 task2.Config = map[string]interface{}{ 1198 "run_for": "10s", 1199 } 1200 1201 alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2) 1202 alloc.AllocatedResources.Tasks[task.Name] = tr 1203 alloc.AllocatedResources.Tasks[task2.Name] = tr 1204 1205 conf, cleanup := testAllocRunnerConfig(t, alloc) 1206 defer cleanup() 1207 1208 // Use a memory backed statedb 1209 conf.StateDB = state.NewMemDB(conf.Logger) 1210 1211 ar, err := NewAllocRunner(conf) 1212 require.NoError(t, err) 1213 1214 // Mimic Nomad exiting before the leader stopping is able to stop other tasks. 1215 ar.tasks["leader"].UpdateState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskKilled)) 1216 ar.tasks["follower1"].UpdateState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted)) 1217 1218 // Create a new AllocRunner to test RestoreState and Run 1219 ar2, err := NewAllocRunner(conf) 1220 require.NoError(t, err) 1221 defer destroy(ar2) 1222 1223 if err := ar2.Restore(); err != nil { 1224 t.Fatalf("error restoring state: %v", err) 1225 } 1226 ar2.Run() 1227 1228 // Wait for tasks to be stopped because leader is dead 1229 testutil.WaitForResult(func() (bool, error) { 1230 alloc := ar2.Alloc() 1231 // TODO: this test does not test anything!!! alloc.TaskStates is an empty map 1232 for task, state := range alloc.TaskStates { 1233 if state.State != structs.TaskStateDead { 1234 return false, fmt.Errorf("Task %q should be dead: %v", task, state.State) 1235 } 1236 } 1237 return true, nil 1238 }, func(err error) { 1239 t.Fatalf("err: %v", err) 1240 }) 1241 1242 // Make sure it GCs properly 1243 ar2.Destroy() 1244 1245 select { 1246 case <-ar2.DestroyCh(): 1247 // exited as expected 1248 case <-time.After(10 * time.Second): 1249 t.Fatalf("timed out waiting for AR to GC") 1250 } 1251 } 1252 1253 func TestAllocRunner_Restore_LifecycleHooks(t *testing.T) { 1254 ci.Parallel(t) 1255 1256 alloc := mock.LifecycleAlloc() 1257 1258 conf, cleanup := testAllocRunnerConfig(t, alloc) 1259 defer cleanup() 1260 1261 // Use a memory backed statedb 1262 conf.StateDB = state.NewMemDB(conf.Logger) 1263 1264 ar, err := NewAllocRunner(conf) 1265 require.NoError(t, err) 1266 1267 go ar.Run() 1268 defer destroy(ar) 1269 1270 // Wait for the coordinator to transition from the "init" state. 1271 tasklifecycle.WaitNotInitUntil(ar.taskCoordinator, time.Second, func() { 1272 t.Fatalf("task coordinator didn't transition from init in time") 1273 }) 1274 1275 // We should see all tasks with Prestart hooks are not blocked from running. 1276 tasklifecycle.RequireTaskAllowed(t, ar.taskCoordinator, ar.tasks["init"].Task()) 1277 tasklifecycle.RequireTaskAllowed(t, ar.taskCoordinator, ar.tasks["side"].Task()) 1278 tasklifecycle.RequireTaskBlocked(t, ar.taskCoordinator, ar.tasks["web"].Task()) 1279 tasklifecycle.RequireTaskBlocked(t, ar.taskCoordinator, ar.tasks["poststart"].Task()) 1280 1281 // Mimic client dies while init task running, and client restarts after 1282 // init task finished and web is running. 1283 ar.tasks["init"].UpdateState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskTerminated)) 1284 ar.tasks["side"].UpdateState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted)) 1285 ar.tasks["web"].UpdateState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted)) 1286 1287 // Create a new AllocRunner to test Restore and Run. 1288 ar2, err := NewAllocRunner(conf) 1289 require.NoError(t, err) 1290 require.NoError(t, ar2.Restore()) 1291 1292 go ar2.Run() 1293 defer destroy(ar2) 1294 1295 // Wait for the coordinator to transition from the "init" state. 1296 tasklifecycle.WaitNotInitUntil(ar.taskCoordinator, time.Second, func() { 1297 t.Fatalf("task coordinator didn't transition from init in time") 1298 }) 1299 1300 // Restore resumes execution with correct lifecycle ordering. 1301 tasklifecycle.RequireTaskBlocked(t, ar2.taskCoordinator, ar2.tasks["init"].Task()) 1302 tasklifecycle.RequireTaskAllowed(t, ar2.taskCoordinator, ar2.tasks["side"].Task()) 1303 tasklifecycle.RequireTaskAllowed(t, ar2.taskCoordinator, ar2.tasks["web"].Task()) 1304 tasklifecycle.RequireTaskAllowed(t, ar2.taskCoordinator, ar2.tasks["poststart"].Task()) 1305 } 1306 1307 func TestAllocRunner_Update_Semantics(t *testing.T) { 1308 ci.Parallel(t) 1309 require := require.New(t) 1310 1311 updatedAlloc := func(a *structs.Allocation) *structs.Allocation { 1312 upd := a.CopySkipJob() 1313 upd.AllocModifyIndex++ 1314 1315 return upd 1316 } 1317 1318 alloc := mock.Alloc() 1319 alloc.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver" 1320 conf, cleanup := testAllocRunnerConfig(t, alloc) 1321 defer cleanup() 1322 1323 ar, err := NewAllocRunner(conf) 1324 require.NoError(err) 1325 1326 upd1 := updatedAlloc(alloc) 1327 ar.Update(upd1) 1328 1329 // Update was placed into a queue 1330 require.Len(ar.allocUpdatedCh, 1) 1331 1332 upd2 := updatedAlloc(alloc) 1333 ar.Update(upd2) 1334 1335 // Allocation was _replaced_ 1336 1337 require.Len(ar.allocUpdatedCh, 1) 1338 queuedAlloc := <-ar.allocUpdatedCh 1339 require.Equal(upd2, queuedAlloc) 1340 1341 // Requeueing older alloc is skipped 1342 ar.Update(upd2) 1343 ar.Update(upd1) 1344 1345 queuedAlloc = <-ar.allocUpdatedCh 1346 require.Equal(upd2, queuedAlloc) 1347 1348 // Ignore after watch closed 1349 1350 close(ar.waitCh) 1351 1352 ar.Update(upd1) 1353 1354 // Did not queue the update 1355 require.Len(ar.allocUpdatedCh, 0) 1356 } 1357 1358 // TestAllocRunner_DeploymentHealth_Healthy_Migration asserts that health is 1359 // reported for services that got migrated; not just part of deployments. 1360 func TestAllocRunner_DeploymentHealth_Healthy_Migration(t *testing.T) { 1361 ci.Parallel(t) 1362 1363 alloc := mock.Alloc() 1364 1365 // Ensure the alloc is *not* part of a deployment 1366 alloc.DeploymentID = "" 1367 1368 // Shorten the default migration healthy time 1369 tg := alloc.Job.TaskGroups[0] 1370 tg.Migrate = structs.DefaultMigrateStrategy() 1371 tg.Migrate.MinHealthyTime = 100 * time.Millisecond 1372 tg.Migrate.HealthCheck = structs.MigrateStrategyHealthStates 1373 1374 task := tg.Tasks[0] 1375 task.Driver = "mock_driver" 1376 task.Config = map[string]interface{}{ 1377 "run_for": "30s", 1378 } 1379 1380 conf, cleanup := testAllocRunnerConfig(t, alloc) 1381 defer cleanup() 1382 1383 ar, err := NewAllocRunner(conf) 1384 require.NoError(t, err) 1385 go ar.Run() 1386 defer destroy(ar) 1387 1388 upd := conf.StateUpdater.(*MockStateUpdater) 1389 testutil.WaitForResult(func() (bool, error) { 1390 last := upd.Last() 1391 if last == nil { 1392 return false, fmt.Errorf("No updates") 1393 } 1394 if !last.DeploymentStatus.HasHealth() { 1395 return false, fmt.Errorf("want deployment status unhealthy; got unset") 1396 } else if !*last.DeploymentStatus.Healthy { 1397 // This is fatal 1398 t.Fatal("want deployment status healthy; got unhealthy") 1399 } 1400 return true, nil 1401 }, func(err error) { 1402 require.NoError(t, err) 1403 }) 1404 } 1405 1406 // TestAllocRunner_DeploymentHealth_Healthy_NoChecks asserts that the health 1407 // watcher will mark the allocation as healthy based on task states alone. 1408 func TestAllocRunner_DeploymentHealth_Healthy_NoChecks(t *testing.T) { 1409 ci.Parallel(t) 1410 1411 alloc := mock.Alloc() 1412 1413 task := alloc.Job.TaskGroups[0].Tasks[0] 1414 task.Driver = "mock_driver" 1415 task.Config = map[string]interface{}{ 1416 "run_for": "10s", 1417 } 1418 1419 // Create a task that takes longer to become healthy 1420 alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task.Copy()) 1421 alloc.AllocatedResources.Tasks["task2"] = alloc.AllocatedResources.Tasks["web"].Copy() 1422 task2 := alloc.Job.TaskGroups[0].Tasks[1] 1423 task2.Name = "task2" 1424 task2.Config["start_block_for"] = "500ms" 1425 1426 // Make the alloc be part of a deployment that uses task states for 1427 // health checks 1428 alloc.DeploymentID = uuid.Generate() 1429 alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 1430 alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_TaskStates 1431 alloc.Job.TaskGroups[0].Update.MaxParallel = 1 1432 alloc.Job.TaskGroups[0].Update.MinHealthyTime = 100 * time.Millisecond 1433 1434 conf, cleanup := testAllocRunnerConfig(t, alloc) 1435 defer cleanup() 1436 1437 ar, err := NewAllocRunner(conf) 1438 require.NoError(t, err) 1439 1440 start, done := time.Now(), time.Time{} 1441 go ar.Run() 1442 defer destroy(ar) 1443 1444 upd := conf.StateUpdater.(*MockStateUpdater) 1445 testutil.WaitForResult(func() (bool, error) { 1446 last := upd.Last() 1447 if last == nil { 1448 return false, fmt.Errorf("No updates") 1449 } 1450 if !last.DeploymentStatus.HasHealth() { 1451 return false, fmt.Errorf("want deployment status unhealthy; got unset") 1452 } else if !*last.DeploymentStatus.Healthy { 1453 // This is fatal 1454 t.Fatal("want deployment status healthy; got unhealthy") 1455 } 1456 1457 // Capture the done timestamp 1458 done = last.DeploymentStatus.Timestamp 1459 return true, nil 1460 }, func(err error) { 1461 require.NoError(t, err) 1462 }) 1463 1464 if d := done.Sub(start); d < 500*time.Millisecond { 1465 t.Fatalf("didn't wait for second task group. Only took %v", d) 1466 } 1467 } 1468 1469 // TestAllocRunner_DeploymentHealth_Unhealthy_Checks asserts that the health 1470 // watcher will mark the allocation as unhealthy with failing checks. 1471 func TestAllocRunner_DeploymentHealth_Unhealthy_Checks(t *testing.T) { 1472 ci.Parallel(t) 1473 1474 alloc := mock.Alloc() 1475 task := alloc.Job.TaskGroups[0].Tasks[0] 1476 task.Driver = "mock_driver" 1477 task.Config = map[string]interface{}{ 1478 "run_for": "10s", 1479 } 1480 1481 // Set a service with check 1482 task.Services = []*structs.Service{ 1483 { 1484 Name: "fakservice", 1485 PortLabel: "http", 1486 Checks: []*structs.ServiceCheck{ 1487 { 1488 Name: "fakecheck", 1489 Type: structs.ServiceCheckScript, 1490 Command: "true", 1491 Interval: 30 * time.Second, 1492 Timeout: 5 * time.Second, 1493 }, 1494 }, 1495 }, 1496 } 1497 1498 // Make the alloc be part of a deployment 1499 alloc.DeploymentID = uuid.Generate() 1500 alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 1501 alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_Checks 1502 alloc.Job.TaskGroups[0].Update.MaxParallel = 1 1503 alloc.Job.TaskGroups[0].Update.MinHealthyTime = 100 * time.Millisecond 1504 alloc.Job.TaskGroups[0].Update.HealthyDeadline = 1 * time.Second 1505 1506 checkUnhealthy := &api.AgentCheck{ 1507 CheckID: uuid.Generate(), 1508 Status: api.HealthWarning, 1509 } 1510 1511 conf, cleanup := testAllocRunnerConfig(t, alloc) 1512 defer cleanup() 1513 1514 // Only return the check as healthy after a duration 1515 consulClient := conf.Consul.(*regMock.ServiceRegistrationHandler) 1516 consulClient.AllocRegistrationsFn = func(allocID string) (*serviceregistration.AllocRegistration, error) { 1517 return &serviceregistration.AllocRegistration{ 1518 Tasks: map[string]*serviceregistration.ServiceRegistrations{ 1519 task.Name: { 1520 Services: map[string]*serviceregistration.ServiceRegistration{ 1521 "123": { 1522 Service: &api.AgentService{Service: "fakeservice"}, 1523 Checks: []*api.AgentCheck{checkUnhealthy}, 1524 }, 1525 }, 1526 }, 1527 }, 1528 }, nil 1529 } 1530 1531 ar, err := NewAllocRunner(conf) 1532 require.NoError(t, err) 1533 go ar.Run() 1534 defer destroy(ar) 1535 1536 var lastUpdate *structs.Allocation 1537 upd := conf.StateUpdater.(*MockStateUpdater) 1538 testutil.WaitForResult(func() (bool, error) { 1539 lastUpdate = upd.Last() 1540 if lastUpdate == nil { 1541 return false, fmt.Errorf("No updates") 1542 } 1543 if !lastUpdate.DeploymentStatus.HasHealth() { 1544 return false, fmt.Errorf("want deployment status unhealthy; got unset") 1545 } else if *lastUpdate.DeploymentStatus.Healthy { 1546 // This is fatal 1547 t.Fatal("want deployment status unhealthy; got healthy") 1548 } 1549 return true, nil 1550 }, func(err error) { 1551 require.NoError(t, err) 1552 }) 1553 1554 // Assert that we have an event explaining why we are unhealthy. 1555 require.Len(t, lastUpdate.TaskStates, 1) 1556 taskState := lastUpdate.TaskStates[task.Name] 1557 require.NotNil(t, taskState) 1558 require.NotEmpty(t, taskState.Events) 1559 last := taskState.Events[len(taskState.Events)-1] 1560 require.Equal(t, allochealth.AllocHealthEventSource, last.Type) 1561 require.Contains(t, last.Message, "by healthy_deadline") 1562 } 1563 1564 // TestAllocRunner_Destroy asserts that Destroy kills and cleans up a running 1565 // alloc. 1566 func TestAllocRunner_Destroy(t *testing.T) { 1567 ci.Parallel(t) 1568 1569 // Ensure task takes some time 1570 alloc := mock.BatchAlloc() 1571 task := alloc.Job.TaskGroups[0].Tasks[0] 1572 task.Config["run_for"] = "10s" 1573 1574 conf, cleanup := testAllocRunnerConfig(t, alloc) 1575 defer cleanup() 1576 1577 // Use a MemDB to assert alloc state gets cleaned up 1578 conf.StateDB = state.NewMemDB(conf.Logger) 1579 1580 ar, err := NewAllocRunner(conf) 1581 require.NoError(t, err) 1582 go ar.Run() 1583 1584 // Wait for alloc to be running 1585 testutil.WaitForResult(func() (bool, error) { 1586 state := ar.AllocState() 1587 1588 return state.ClientStatus == structs.AllocClientStatusRunning, 1589 fmt.Errorf("got client status %v; want running", state.ClientStatus) 1590 }, func(err error) { 1591 require.NoError(t, err) 1592 }) 1593 1594 // Assert state was stored 1595 ls, ts, err := conf.StateDB.GetTaskRunnerState(alloc.ID, task.Name) 1596 require.NoError(t, err) 1597 require.NotNil(t, ls) 1598 require.NotNil(t, ts) 1599 1600 // Now destroy 1601 ar.Destroy() 1602 1603 select { 1604 case <-ar.DestroyCh(): 1605 // Destroyed properly! 1606 case <-time.After(10 * time.Second): 1607 require.Fail(t, "timed out waiting for alloc to be destroyed") 1608 } 1609 1610 // Assert alloc is dead 1611 state := ar.AllocState() 1612 require.Equal(t, structs.AllocClientStatusComplete, state.ClientStatus) 1613 1614 // Assert the state was cleaned 1615 ls, ts, err = conf.StateDB.GetTaskRunnerState(alloc.ID, task.Name) 1616 require.NoError(t, err) 1617 require.Nil(t, ls) 1618 require.Nil(t, ts) 1619 1620 // Assert the alloc directory was cleaned 1621 if _, err := os.Stat(ar.allocDir.AllocDir); err == nil { 1622 require.Fail(t, "alloc dir still exists: %v", ar.allocDir.AllocDir) 1623 } else if !os.IsNotExist(err) { 1624 require.Failf(t, "expected NotExist error", "found %v", err) 1625 } 1626 } 1627 1628 func TestAllocRunner_SimpleRun(t *testing.T) { 1629 ci.Parallel(t) 1630 1631 alloc := mock.BatchAlloc() 1632 1633 conf, cleanup := testAllocRunnerConfig(t, alloc) 1634 defer cleanup() 1635 ar, err := NewAllocRunner(conf) 1636 require.NoError(t, err) 1637 go ar.Run() 1638 defer destroy(ar) 1639 1640 // Wait for alloc to be running 1641 testutil.WaitForResult(func() (bool, error) { 1642 state := ar.AllocState() 1643 1644 if state.ClientStatus != structs.AllocClientStatusComplete { 1645 return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusComplete) 1646 } 1647 1648 for t, s := range state.TaskStates { 1649 if s.FinishedAt.IsZero() { 1650 return false, fmt.Errorf("task %q has zero FinishedAt value", t) 1651 } 1652 } 1653 1654 return true, nil 1655 }, func(err error) { 1656 require.NoError(t, err) 1657 }) 1658 1659 } 1660 1661 // TestAllocRunner_MoveAllocDir asserts that a rescheduled 1662 // allocation copies ephemeral disk content from previous alloc run 1663 func TestAllocRunner_MoveAllocDir(t *testing.T) { 1664 ci.Parallel(t) 1665 1666 // Step 1: start and run a task 1667 alloc := mock.BatchAlloc() 1668 conf, cleanup := testAllocRunnerConfig(t, alloc) 1669 defer cleanup() 1670 ar, err := NewAllocRunner(conf) 1671 require.NoError(t, err) 1672 ar.Run() 1673 defer destroy(ar) 1674 1675 WaitForClientState(t, ar, structs.AllocClientStatusComplete) 1676 1677 // Step 2. Modify its directory 1678 task := alloc.Job.TaskGroups[0].Tasks[0] 1679 dataFile := filepath.Join(ar.allocDir.SharedDir, "data", "data_file") 1680 ioutil.WriteFile(dataFile, []byte("hello world"), os.ModePerm) 1681 taskDir := ar.allocDir.TaskDirs[task.Name] 1682 taskLocalFile := filepath.Join(taskDir.LocalDir, "local_file") 1683 ioutil.WriteFile(taskLocalFile, []byte("good bye world"), os.ModePerm) 1684 1685 // Step 3. Start a new alloc 1686 alloc2 := mock.BatchAlloc() 1687 alloc2.PreviousAllocation = alloc.ID 1688 alloc2.Job.TaskGroups[0].EphemeralDisk.Sticky = true 1689 1690 conf2, cleanup := testAllocRunnerConfig(t, alloc2) 1691 conf2.PrevAllocWatcher, conf2.PrevAllocMigrator = allocwatcher.NewAllocWatcher(allocwatcher.Config{ 1692 Alloc: alloc2, 1693 PreviousRunner: ar, 1694 Logger: conf2.Logger, 1695 }) 1696 defer cleanup() 1697 ar2, err := NewAllocRunner(conf2) 1698 require.NoError(t, err) 1699 1700 ar2.Run() 1701 defer destroy(ar2) 1702 1703 WaitForClientState(t, ar, structs.AllocClientStatusComplete) 1704 1705 // Ensure that data from ar was moved to ar2 1706 dataFile = filepath.Join(ar2.allocDir.SharedDir, "data", "data_file") 1707 fileInfo, _ := os.Stat(dataFile) 1708 require.NotNilf(t, fileInfo, "file %q not found", dataFile) 1709 1710 taskDir = ar2.allocDir.TaskDirs[task.Name] 1711 taskLocalFile = filepath.Join(taskDir.LocalDir, "local_file") 1712 fileInfo, _ = os.Stat(taskLocalFile) 1713 require.NotNilf(t, fileInfo, "file %q not found", dataFile) 1714 1715 } 1716 1717 // TestAllocRuner_HandlesArtifactFailure ensures that if one task in a task group is 1718 // retrying fetching an artifact, other tasks in the group should be able 1719 // to proceed. 1720 func TestAllocRunner_HandlesArtifactFailure(t *testing.T) { 1721 ci.Parallel(t) 1722 1723 alloc := mock.BatchAlloc() 1724 rp := &structs.RestartPolicy{ 1725 Mode: structs.RestartPolicyModeFail, 1726 Attempts: 1, 1727 Delay: time.Nanosecond, 1728 Interval: time.Hour, 1729 } 1730 alloc.Job.TaskGroups[0].RestartPolicy = rp 1731 alloc.Job.TaskGroups[0].Tasks[0].RestartPolicy = rp 1732 1733 // Create a new task with a bad artifact 1734 badtask := alloc.Job.TaskGroups[0].Tasks[0].Copy() 1735 badtask.Name = "bad" 1736 badtask.Artifacts = []*structs.TaskArtifact{ 1737 {GetterSource: "http://127.0.0.1:0/foo/bar/baz"}, 1738 } 1739 1740 alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, badtask) 1741 alloc.AllocatedResources.Tasks["bad"] = &structs.AllocatedTaskResources{ 1742 Cpu: structs.AllocatedCpuResources{ 1743 CpuShares: 500, 1744 }, 1745 Memory: structs.AllocatedMemoryResources{ 1746 MemoryMB: 256, 1747 }, 1748 } 1749 1750 conf, cleanup := testAllocRunnerConfig(t, alloc) 1751 defer cleanup() 1752 ar, err := NewAllocRunner(conf) 1753 require.NoError(t, err) 1754 go ar.Run() 1755 defer destroy(ar) 1756 1757 testutil.WaitForResult(func() (bool, error) { 1758 state := ar.AllocState() 1759 1760 switch state.ClientStatus { 1761 case structs.AllocClientStatusComplete, structs.AllocClientStatusFailed: 1762 return true, nil 1763 default: 1764 return false, fmt.Errorf("got status %v but want terminal", state.ClientStatus) 1765 } 1766 1767 }, func(err error) { 1768 require.NoError(t, err) 1769 }) 1770 1771 state := ar.AllocState() 1772 require.Equal(t, structs.AllocClientStatusFailed, state.ClientStatus) 1773 require.Equal(t, structs.TaskStateDead, state.TaskStates["web"].State) 1774 require.True(t, state.TaskStates["web"].Successful()) 1775 require.Equal(t, structs.TaskStateDead, state.TaskStates["bad"].State) 1776 require.True(t, state.TaskStates["bad"].Failed) 1777 } 1778 1779 // Test that alloc runner kills tasks in task group when another task fails 1780 func TestAllocRunner_TaskFailed_KillTG(t *testing.T) { 1781 ci.Parallel(t) 1782 1783 alloc := mock.Alloc() 1784 tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name] 1785 alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0 1786 alloc.Job.TaskGroups[0].Tasks[0].RestartPolicy.Attempts = 0 1787 1788 // Create two tasks in the task group 1789 task := alloc.Job.TaskGroups[0].Tasks[0] 1790 task.Name = "task1" 1791 task.Driver = "mock_driver" 1792 task.KillTimeout = 10 * time.Millisecond 1793 task.Config = map[string]interface{}{ 1794 "run_for": "10s", 1795 } 1796 // Set a service with check 1797 task.Services = []*structs.Service{ 1798 { 1799 Name: "fakservice", 1800 PortLabel: "http", 1801 Provider: structs.ServiceProviderConsul, 1802 Checks: []*structs.ServiceCheck{ 1803 { 1804 Name: "fakecheck", 1805 Type: structs.ServiceCheckScript, 1806 Command: "true", 1807 Interval: 30 * time.Second, 1808 Timeout: 5 * time.Second, 1809 }, 1810 }, 1811 }, 1812 } 1813 1814 task2 := alloc.Job.TaskGroups[0].Tasks[0].Copy() 1815 task2.Name = "task 2" 1816 task2.Driver = "mock_driver" 1817 task2.Config = map[string]interface{}{ 1818 "start_error": "fail task please", 1819 } 1820 alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2) 1821 alloc.AllocatedResources.Tasks[task.Name] = tr 1822 alloc.AllocatedResources.Tasks[task2.Name] = tr 1823 1824 // Make the alloc be part of a deployment 1825 alloc.DeploymentID = uuid.Generate() 1826 alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() 1827 alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_Checks 1828 alloc.Job.TaskGroups[0].Update.MaxParallel = 1 1829 alloc.Job.TaskGroups[0].Update.MinHealthyTime = 10 * time.Millisecond 1830 alloc.Job.TaskGroups[0].Update.HealthyDeadline = 2 * time.Second 1831 1832 checkHealthy := &api.AgentCheck{ 1833 CheckID: uuid.Generate(), 1834 Status: api.HealthPassing, 1835 } 1836 1837 conf, cleanup := testAllocRunnerConfig(t, alloc) 1838 defer cleanup() 1839 1840 consulClient := conf.Consul.(*regMock.ServiceRegistrationHandler) 1841 consulClient.AllocRegistrationsFn = func(allocID string) (*serviceregistration.AllocRegistration, error) { 1842 return &serviceregistration.AllocRegistration{ 1843 Tasks: map[string]*serviceregistration.ServiceRegistrations{ 1844 task.Name: { 1845 Services: map[string]*serviceregistration.ServiceRegistration{ 1846 "123": { 1847 Service: &api.AgentService{Service: "fakeservice"}, 1848 Checks: []*api.AgentCheck{checkHealthy}, 1849 }, 1850 }, 1851 }, 1852 }, 1853 }, nil 1854 } 1855 1856 ar, err := NewAllocRunner(conf) 1857 require.NoError(t, err) 1858 defer destroy(ar) 1859 go ar.Run() 1860 upd := conf.StateUpdater.(*MockStateUpdater) 1861 1862 testutil.WaitForResult(func() (bool, error) { 1863 last := upd.Last() 1864 if last == nil { 1865 return false, fmt.Errorf("No updates") 1866 } 1867 if last.ClientStatus != structs.AllocClientStatusFailed { 1868 return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusFailed) 1869 } 1870 1871 // Task One should be killed 1872 state1 := last.TaskStates[task.Name] 1873 if state1.State != structs.TaskStateDead { 1874 return false, fmt.Errorf("got state %v; want %v", state1.State, structs.TaskStateDead) 1875 } 1876 if len(state1.Events) < 2 { 1877 // At least have a received and destroyed 1878 return false, fmt.Errorf("Unexpected number of events") 1879 } 1880 1881 found := false 1882 for _, e := range state1.Events { 1883 if e.Type != structs.TaskSiblingFailed { 1884 found = true 1885 } 1886 } 1887 1888 if !found { 1889 return false, fmt.Errorf("Did not find event %v", structs.TaskSiblingFailed) 1890 } 1891 1892 // Task Two should be failed 1893 state2 := last.TaskStates[task2.Name] 1894 if state2.State != structs.TaskStateDead { 1895 return false, fmt.Errorf("got state %v; want %v", state2.State, structs.TaskStateDead) 1896 } 1897 if !state2.Failed { 1898 return false, fmt.Errorf("task2 should have failed") 1899 } 1900 1901 if !last.DeploymentStatus.HasHealth() { 1902 return false, fmt.Errorf("Expected deployment health to be non nil") 1903 } 1904 1905 return true, nil 1906 }, func(err error) { 1907 require.Fail(t, "err: %v", err) 1908 }) 1909 } 1910 1911 // Test that alloc becoming terminal should destroy the alloc runner 1912 func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) { 1913 ci.Parallel(t) 1914 alloc := mock.BatchAlloc() 1915 tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name] 1916 alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0 1917 alloc.Job.TaskGroups[0].Tasks[0].RestartPolicy.Attempts = 0 1918 // Ensure task takes some time 1919 task := alloc.Job.TaskGroups[0].Tasks[0] 1920 task.Driver = "mock_driver" 1921 task.Config["run_for"] = "10s" 1922 alloc.AllocatedResources.Tasks[task.Name] = tr 1923 1924 conf, cleanup := testAllocRunnerConfig(t, alloc) 1925 defer cleanup() 1926 ar, err := NewAllocRunner(conf) 1927 require.NoError(t, err) 1928 defer destroy(ar) 1929 go ar.Run() 1930 upd := conf.StateUpdater.(*MockStateUpdater) 1931 1932 testutil.WaitForResult(func() (bool, error) { 1933 last := upd.Last() 1934 if last == nil { 1935 return false, fmt.Errorf("No updates") 1936 } 1937 if last.ClientStatus != structs.AllocClientStatusRunning { 1938 return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning) 1939 } 1940 return true, nil 1941 }, func(err error) { 1942 require.Fail(t, "err: %v", err) 1943 }) 1944 1945 // Update the alloc to be terminal which should cause the alloc runner to 1946 // stop the tasks and wait for a destroy. 1947 update := ar.alloc.Copy() 1948 update.DesiredStatus = structs.AllocDesiredStatusStop 1949 ar.Update(update) 1950 1951 testutil.WaitForResult(func() (bool, error) { 1952 last := upd.Last() 1953 if last == nil { 1954 return false, fmt.Errorf("No updates") 1955 } 1956 1957 // Check the status has changed. 1958 if last.ClientStatus != structs.AllocClientStatusComplete { 1959 return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) 1960 } 1961 1962 // Check the alloc directory still exists 1963 if _, err := os.Stat(ar.allocDir.AllocDir); err != nil { 1964 return false, fmt.Errorf("alloc dir destroyed: %v", ar.allocDir.AllocDir) 1965 } 1966 1967 return true, nil 1968 }, func(err error) { 1969 require.Fail(t, "err: %v", err) 1970 }) 1971 1972 // Send the destroy signal and ensure the AllocRunner cleans up. 1973 ar.Destroy() 1974 1975 testutil.WaitForResult(func() (bool, error) { 1976 last := upd.Last() 1977 if last == nil { 1978 return false, fmt.Errorf("No updates") 1979 } 1980 1981 // Check the status has changed. 1982 if last.ClientStatus != structs.AllocClientStatusComplete { 1983 return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) 1984 } 1985 1986 // Check the alloc directory was cleaned 1987 if _, err := os.Stat(ar.allocDir.AllocDir); err == nil { 1988 return false, fmt.Errorf("alloc dir still exists: %v", ar.allocDir.AllocDir) 1989 } else if !os.IsNotExist(err) { 1990 return false, fmt.Errorf("stat err: %v", err) 1991 } 1992 1993 return true, nil 1994 }, func(err error) { 1995 require.Fail(t, "err: %v", err) 1996 }) 1997 } 1998 1999 // TestAllocRunner_PersistState_Destroyed asserts that destroyed allocs don't persist anymore 2000 func TestAllocRunner_PersistState_Destroyed(t *testing.T) { 2001 ci.Parallel(t) 2002 2003 alloc := mock.BatchAlloc() 2004 taskName := alloc.Job.LookupTaskGroup(alloc.TaskGroup).Tasks[0].Name 2005 2006 conf, cleanup := testAllocRunnerConfig(t, alloc) 2007 conf.StateDB = state.NewMemDB(conf.Logger) 2008 2009 defer cleanup() 2010 ar, err := NewAllocRunner(conf) 2011 require.NoError(t, err) 2012 defer destroy(ar) 2013 2014 go ar.Run() 2015 2016 select { 2017 case <-ar.WaitCh(): 2018 case <-time.After(10 * time.Second): 2019 require.Fail(t, "timed out waiting for alloc to complete") 2020 } 2021 2022 // test final persisted state upon completion 2023 require.NoError(t, ar.PersistState()) 2024 allocs, _, err := conf.StateDB.GetAllAllocations() 2025 require.NoError(t, err) 2026 require.Len(t, allocs, 1) 2027 require.Equal(t, alloc.ID, allocs[0].ID) 2028 _, ts, err := conf.StateDB.GetTaskRunnerState(alloc.ID, taskName) 2029 require.NoError(t, err) 2030 require.Equal(t, structs.TaskStateDead, ts.State) 2031 2032 // check that DB alloc is empty after destroying AR 2033 ar.Destroy() 2034 select { 2035 case <-ar.DestroyCh(): 2036 case <-time.After(10 * time.Second): 2037 require.Fail(t, "timedout waiting for destruction") 2038 } 2039 2040 allocs, _, err = conf.StateDB.GetAllAllocations() 2041 require.NoError(t, err) 2042 require.Empty(t, allocs) 2043 _, ts, err = conf.StateDB.GetTaskRunnerState(alloc.ID, taskName) 2044 require.NoError(t, err) 2045 require.Nil(t, ts) 2046 2047 // check that DB alloc is empty after persisting state of destroyed AR 2048 ar.PersistState() 2049 allocs, _, err = conf.StateDB.GetAllAllocations() 2050 require.NoError(t, err) 2051 require.Empty(t, allocs) 2052 _, ts, err = conf.StateDB.GetTaskRunnerState(alloc.ID, taskName) 2053 require.NoError(t, err) 2054 require.Nil(t, ts) 2055 } 2056 2057 func TestAllocRunner_Reconnect(t *testing.T) { 2058 t.Parallel() 2059 2060 type tcase struct { 2061 clientStatus string 2062 taskState string 2063 taskEvent *structs.TaskEvent 2064 } 2065 tcases := []tcase{ 2066 { 2067 structs.AllocClientStatusRunning, 2068 structs.TaskStateRunning, 2069 structs.NewTaskEvent(structs.TaskStarted), 2070 }, 2071 { 2072 structs.AllocClientStatusComplete, 2073 structs.TaskStateDead, 2074 structs.NewTaskEvent(structs.TaskTerminated), 2075 }, 2076 { 2077 structs.AllocClientStatusFailed, 2078 structs.TaskStateDead, 2079 structs.NewTaskEvent(structs.TaskDriverFailure).SetFailsTask(), 2080 }, 2081 { 2082 structs.AllocClientStatusPending, 2083 structs.TaskStatePending, 2084 structs.NewTaskEvent(structs.TaskReceived), 2085 }, 2086 } 2087 2088 for _, tc := range tcases { 2089 t.Run(tc.clientStatus, func(t *testing.T) { 2090 // create a running alloc 2091 alloc := mock.BatchAlloc() 2092 alloc.AllocModifyIndex = 10 2093 alloc.ModifyIndex = 10 2094 alloc.ModifyTime = time.Now().UnixNano() 2095 2096 // Ensure task takes some time 2097 task := alloc.Job.TaskGroups[0].Tasks[0] 2098 task.Driver = "mock_driver" 2099 task.Config["run_for"] = "30s" 2100 2101 original := alloc.Copy() 2102 2103 conf, cleanup := testAllocRunnerConfig(t, alloc) 2104 defer cleanup() 2105 2106 ar, err := NewAllocRunner(conf) 2107 require.NoError(t, err) 2108 defer destroy(ar) 2109 2110 go ar.Run() 2111 2112 for _, taskRunner := range ar.tasks { 2113 taskRunner.UpdateState(tc.taskState, tc.taskEvent) 2114 } 2115 2116 update := ar.Alloc().Copy() 2117 2118 update.ClientStatus = structs.AllocClientStatusUnknown 2119 update.AllocModifyIndex = original.AllocModifyIndex + 10 2120 update.ModifyIndex = original.ModifyIndex + 10 2121 update.ModifyTime = original.ModifyTime + 10 2122 2123 err = ar.Reconnect(update) 2124 require.NoError(t, err) 2125 2126 require.Equal(t, tc.clientStatus, ar.AllocState().ClientStatus) 2127 2128 // Make sure the runner's alloc indexes match the update. 2129 require.Equal(t, update.AllocModifyIndex, ar.Alloc().AllocModifyIndex) 2130 require.Equal(t, update.ModifyIndex, ar.Alloc().ModifyIndex) 2131 require.Equal(t, update.ModifyTime, ar.Alloc().ModifyTime) 2132 2133 found := false 2134 2135 updater := conf.StateUpdater.(*MockStateUpdater) 2136 var last *structs.Allocation 2137 testutil.WaitForResult(func() (bool, error) { 2138 last = updater.Last() 2139 if last == nil { 2140 return false, errors.New("last update nil") 2141 } 2142 2143 states := last.TaskStates 2144 for _, s := range states { 2145 for _, e := range s.Events { 2146 if e.Type == structs.TaskClientReconnected { 2147 found = true 2148 return true, nil 2149 } 2150 } 2151 } 2152 2153 return false, errors.New("no reconnect event found") 2154 }, func(err error) { 2155 require.NoError(t, err) 2156 }) 2157 2158 require.True(t, found, "no reconnect event found") 2159 }) 2160 } 2161 } 2162 2163 // TestAllocRunner_Lifecycle_Shutdown_Order asserts that a service job with 3 2164 // lifecycle hooks (1 sidecar, 1 ephemeral, 1 poststop) starts all 4 tasks, and shuts down 2165 // the sidecar after main, but before poststop. 2166 func TestAllocRunner_Lifecycle_Shutdown_Order(t *testing.T) { 2167 alloc := mock.LifecycleAllocWithPoststopDeploy() 2168 2169 alloc.Job.Type = structs.JobTypeService 2170 2171 mainTask := alloc.Job.TaskGroups[0].Tasks[0] 2172 mainTask.Config["run_for"] = "100s" 2173 2174 sidecarTask := alloc.Job.TaskGroups[0].Tasks[1] 2175 sidecarTask.Lifecycle.Hook = structs.TaskLifecycleHookPoststart 2176 sidecarTask.Config["run_for"] = "100s" 2177 2178 poststopTask := alloc.Job.TaskGroups[0].Tasks[2] 2179 ephemeralTask := alloc.Job.TaskGroups[0].Tasks[3] 2180 2181 alloc.Job.TaskGroups[0].Tasks = []*structs.Task{mainTask, ephemeralTask, sidecarTask, poststopTask} 2182 2183 conf, cleanup := testAllocRunnerConfig(t, alloc) 2184 defer cleanup() 2185 ar, err := NewAllocRunner(conf) 2186 require.NoError(t, err) 2187 defer destroy(ar) 2188 go ar.Run() 2189 2190 upd := conf.StateUpdater.(*MockStateUpdater) 2191 2192 // Wait for main and sidecar tasks to be running, and that the 2193 // ephemeral task ran and exited. 2194 testutil.WaitForResult(func() (bool, error) { 2195 last := upd.Last() 2196 if last == nil { 2197 return false, fmt.Errorf("No updates") 2198 } 2199 2200 if last.ClientStatus != structs.AllocClientStatusRunning { 2201 return false, fmt.Errorf("expected alloc to be running not %s", last.ClientStatus) 2202 } 2203 2204 if s := last.TaskStates[mainTask.Name].State; s != structs.TaskStateRunning { 2205 return false, fmt.Errorf("expected main task to be running not %s", s) 2206 } 2207 2208 if s := last.TaskStates[sidecarTask.Name].State; s != structs.TaskStateRunning { 2209 return false, fmt.Errorf("expected sidecar task to be running not %s", s) 2210 } 2211 2212 if s := last.TaskStates[ephemeralTask.Name].State; s != structs.TaskStateDead { 2213 return false, fmt.Errorf("expected ephemeral task to be dead not %s", s) 2214 } 2215 2216 if last.TaskStates[ephemeralTask.Name].Failed { 2217 return false, fmt.Errorf("expected ephemeral task to be successful not failed") 2218 } 2219 2220 return true, nil 2221 }, func(err error) { 2222 t.Fatalf("error waiting for initial state:\n%v", err) 2223 }) 2224 2225 // Tell the alloc to stop 2226 stopAlloc := alloc.Copy() 2227 stopAlloc.DesiredStatus = structs.AllocDesiredStatusStop 2228 ar.Update(stopAlloc) 2229 2230 // Wait for tasks to stop. 2231 testutil.WaitForResult(func() (bool, error) { 2232 last := upd.Last() 2233 2234 if s := last.TaskStates[ephemeralTask.Name].State; s != structs.TaskStateDead { 2235 return false, fmt.Errorf("expected ephemeral task to be dead not %s", s) 2236 } 2237 2238 if last.TaskStates[ephemeralTask.Name].Failed { 2239 return false, fmt.Errorf("expected ephemeral task to be successful not failed") 2240 } 2241 2242 if s := last.TaskStates[mainTask.Name].State; s != structs.TaskStateDead { 2243 return false, fmt.Errorf("expected main task to be dead not %s", s) 2244 } 2245 2246 if last.TaskStates[mainTask.Name].Failed { 2247 return false, fmt.Errorf("expected main task to be successful not failed") 2248 } 2249 2250 if s := last.TaskStates[sidecarTask.Name].State; s != structs.TaskStateDead { 2251 return false, fmt.Errorf("expected sidecar task to be dead not %s", s) 2252 } 2253 2254 if last.TaskStates[sidecarTask.Name].Failed { 2255 return false, fmt.Errorf("expected sidecar task to be successful not failed") 2256 } 2257 2258 if s := last.TaskStates[poststopTask.Name].State; s != structs.TaskStateRunning { 2259 return false, fmt.Errorf("expected poststop task to be running not %s", s) 2260 } 2261 2262 return true, nil 2263 }, func(err error) { 2264 t.Fatalf("error waiting for kill state:\n%v", err) 2265 }) 2266 2267 last := upd.Last() 2268 require.Less(t, last.TaskStates[ephemeralTask.Name].FinishedAt, last.TaskStates[mainTask.Name].FinishedAt) 2269 require.Less(t, last.TaskStates[mainTask.Name].FinishedAt, last.TaskStates[sidecarTask.Name].FinishedAt) 2270 2271 // Wait for poststop task to stop. 2272 testutil.WaitForResult(func() (bool, error) { 2273 last := upd.Last() 2274 2275 if s := last.TaskStates[poststopTask.Name].State; s != structs.TaskStateDead { 2276 return false, fmt.Errorf("expected poststop task to be dead not %s", s) 2277 } 2278 2279 if last.TaskStates[poststopTask.Name].Failed { 2280 return false, fmt.Errorf("expected poststop task to be successful not failed") 2281 } 2282 2283 return true, nil 2284 }, func(err error) { 2285 t.Fatalf("error waiting for poststop state:\n%v", err) 2286 }) 2287 2288 last = upd.Last() 2289 require.Less(t, last.TaskStates[sidecarTask.Name].FinishedAt, last.TaskStates[poststopTask.Name].FinishedAt) 2290 } 2291 2292 func TestHasSidecarTasks(t *testing.T) { 2293 ci.Parallel(t) 2294 2295 testCases := []struct { 2296 name string 2297 lifecycle []*structs.TaskLifecycleConfig 2298 hasSidecars bool 2299 hasNonsidecars bool 2300 }{ 2301 { 2302 name: "all sidecar - one", 2303 lifecycle: []*structs.TaskLifecycleConfig{ 2304 { 2305 Hook: structs.TaskLifecycleHookPrestart, 2306 Sidecar: true, 2307 }, 2308 }, 2309 hasSidecars: true, 2310 hasNonsidecars: false, 2311 }, 2312 { 2313 name: "all sidecar - multiple", 2314 lifecycle: []*structs.TaskLifecycleConfig{ 2315 { 2316 Hook: structs.TaskLifecycleHookPrestart, 2317 Sidecar: true, 2318 }, 2319 { 2320 Hook: structs.TaskLifecycleHookPrestart, 2321 Sidecar: true, 2322 }, 2323 { 2324 Hook: structs.TaskLifecycleHookPrestart, 2325 Sidecar: true, 2326 }, 2327 }, 2328 hasSidecars: true, 2329 hasNonsidecars: false, 2330 }, 2331 { 2332 name: "some sidecars, some others", 2333 lifecycle: []*structs.TaskLifecycleConfig{ 2334 nil, 2335 { 2336 Hook: structs.TaskLifecycleHookPrestart, 2337 Sidecar: false, 2338 }, 2339 { 2340 Hook: structs.TaskLifecycleHookPrestart, 2341 Sidecar: true, 2342 }, 2343 }, 2344 hasSidecars: true, 2345 hasNonsidecars: true, 2346 }, 2347 { 2348 name: "no sidecars", 2349 lifecycle: []*structs.TaskLifecycleConfig{ 2350 nil, 2351 { 2352 Hook: structs.TaskLifecycleHookPrestart, 2353 Sidecar: false, 2354 }, 2355 nil, 2356 }, 2357 hasSidecars: false, 2358 hasNonsidecars: true, 2359 }, 2360 } 2361 2362 for _, tc := range testCases { 2363 t.Run(tc.name, func(t *testing.T) { 2364 // Create alloc with the given task lifecycle configurations. 2365 alloc := mock.BatchAlloc() 2366 2367 tasks := []*structs.Task{} 2368 resources := map[string]*structs.AllocatedTaskResources{} 2369 2370 tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name] 2371 2372 for i, lifecycle := range tc.lifecycle { 2373 task := alloc.Job.TaskGroups[0].Tasks[0].Copy() 2374 task.Name = fmt.Sprintf("task%d", i) 2375 task.Lifecycle = lifecycle 2376 tasks = append(tasks, task) 2377 resources[task.Name] = tr 2378 } 2379 2380 alloc.Job.TaskGroups[0].Tasks = tasks 2381 alloc.AllocatedResources.Tasks = resources 2382 2383 // Create alloc runner. 2384 arConf, cleanup := testAllocRunnerConfig(t, alloc) 2385 defer cleanup() 2386 2387 ar, err := NewAllocRunner(arConf) 2388 require.NoError(t, err) 2389 2390 require.Equal(t, tc.hasSidecars, hasSidecarTasks(ar.tasks), "sidecars") 2391 2392 runners := []*taskrunner.TaskRunner{} 2393 for _, r := range ar.tasks { 2394 runners = append(runners, r) 2395 } 2396 require.Equal(t, tc.hasNonsidecars, hasNonSidecarTasks(runners), "non-sidecars") 2397 2398 }) 2399 } 2400 }