github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/client/alloc_runner_test.go (about) 1 package client 2 3 import ( 4 "fmt" 5 "io/ioutil" 6 "os" 7 "path/filepath" 8 "testing" 9 "time" 10 11 "github.com/hashicorp/nomad/nomad/mock" 12 "github.com/hashicorp/nomad/nomad/structs" 13 "github.com/hashicorp/nomad/testutil" 14 15 "github.com/hashicorp/nomad/client/config" 16 ctestutil "github.com/hashicorp/nomad/client/testutil" 17 "github.com/hashicorp/nomad/client/vaultclient" 18 ) 19 20 type MockAllocStateUpdater struct { 21 Count int 22 Allocs []*structs.Allocation 23 } 24 25 func (m *MockAllocStateUpdater) Update(alloc *structs.Allocation) { 26 m.Count += 1 27 m.Allocs = append(m.Allocs, alloc) 28 } 29 30 func testAllocRunnerFromAlloc(alloc *structs.Allocation, restarts bool) (*MockAllocStateUpdater, *AllocRunner) { 31 logger := testLogger() 32 conf := config.DefaultConfig() 33 conf.StateDir = os.TempDir() 34 conf.AllocDir = os.TempDir() 35 upd := &MockAllocStateUpdater{} 36 if !restarts { 37 *alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0} 38 alloc.Job.Type = structs.JobTypeBatch 39 } 40 vclient := vaultclient.NewMockVaultClient() 41 ar := NewAllocRunner(logger, conf, upd.Update, alloc, vclient) 42 return upd, ar 43 } 44 45 func testAllocRunner(restarts bool) (*MockAllocStateUpdater, *AllocRunner) { 46 return testAllocRunnerFromAlloc(mock.Alloc(), restarts) 47 } 48 49 func TestAllocRunner_SimpleRun(t *testing.T) { 50 ctestutil.ExecCompatible(t) 51 upd, ar := testAllocRunner(false) 52 go ar.Run() 53 defer ar.Destroy() 54 55 testutil.WaitForResult(func() (bool, error) { 56 if upd.Count == 0 { 57 return false, fmt.Errorf("No updates") 58 } 59 last := upd.Allocs[upd.Count-1] 60 if last.ClientStatus != structs.AllocClientStatusComplete { 61 return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) 62 } 63 return true, nil 64 }, func(err error) { 65 t.Fatalf("err: %v", err) 66 }) 67 } 68 69 // TestAllocRuner_RetryArtifact ensures that if one task in a task group is 70 // retrying fetching an artifact, other tasks in the group should be able 71 // to proceed. 72 func TestAllocRunner_RetryArtifact(t *testing.T) { 73 ctestutil.ExecCompatible(t) 74 75 alloc := mock.Alloc() 76 alloc.Job.Type = structs.JobTypeBatch 77 alloc.Job.TaskGroups[0].RestartPolicy.Mode = structs.RestartPolicyModeFail 78 alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 1 79 alloc.Job.TaskGroups[0].RestartPolicy.Delay = time.Duration(4*testutil.TestMultiplier()) * time.Second 80 81 task := alloc.Job.TaskGroups[0].Tasks[0] 82 task.Driver = "mock_driver" 83 task.Config = map[string]interface{}{ 84 "exit_code": "0", 85 "run_for": "1s", 86 } 87 88 // Create a new task with a bad artifact 89 badtask := alloc.Job.TaskGroups[0].Tasks[0].Copy() 90 badtask.Name = "bad" 91 badtask.Artifacts = []*structs.TaskArtifact{ 92 {GetterSource: "http://127.1.1.111:12315/foo/bar/baz"}, 93 } 94 95 alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, badtask) 96 upd, ar := testAllocRunnerFromAlloc(alloc, true) 97 go ar.Run() 98 defer ar.Destroy() 99 100 testutil.WaitForResult(func() (bool, error) { 101 if upd.Count < 6 { 102 return false, fmt.Errorf("Not enough updates") 103 } 104 last := upd.Allocs[upd.Count-1] 105 106 // web task should have completed successfully while bad task 107 // retries artififact fetching 108 webstate := last.TaskStates["web"] 109 if webstate.State != structs.TaskStateDead { 110 return false, fmt.Errorf("expected web to be dead but found %q", last.TaskStates["web"].State) 111 } 112 if !webstate.Successful() { 113 return false, fmt.Errorf("expected web to have exited successfully") 114 } 115 116 // bad task should have failed 117 badstate := last.TaskStates["bad"] 118 if badstate.State != structs.TaskStateDead { 119 return false, fmt.Errorf("expected bad to be dead but found %q", badstate.State) 120 } 121 if !badstate.Failed { 122 return false, fmt.Errorf("expected bad to have failed: %#v", badstate.Events) 123 } 124 return true, nil 125 }, func(err error) { 126 t.Fatalf("err: %v", err) 127 }) 128 } 129 130 func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) { 131 ctestutil.ExecCompatible(t) 132 upd, ar := testAllocRunner(false) 133 134 // Ensure task takes some time 135 task := ar.alloc.Job.TaskGroups[0].Tasks[0] 136 task.Config["command"] = "/bin/sleep" 137 task.Config["args"] = []string{"10"} 138 go ar.Run() 139 140 testutil.WaitForResult(func() (bool, error) { 141 if upd.Count == 0 { 142 return false, fmt.Errorf("No updates") 143 } 144 last := upd.Allocs[upd.Count-1] 145 if last.ClientStatus != structs.AllocClientStatusRunning { 146 return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning) 147 } 148 return true, nil 149 }, func(err error) { 150 t.Fatalf("err: %v", err) 151 }) 152 153 // Update the alloc to be terminal which should cause the alloc runner to 154 // stop the tasks and wait for a destroy. 155 update := ar.alloc.Copy() 156 update.DesiredStatus = structs.AllocDesiredStatusStop 157 ar.Update(update) 158 159 testutil.WaitForResult(func() (bool, error) { 160 if upd.Count == 0 { 161 return false, nil 162 } 163 164 // Check the status has changed. 165 last := upd.Allocs[upd.Count-1] 166 if last.ClientStatus != structs.AllocClientStatusComplete { 167 return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) 168 } 169 170 // Check the state still exists 171 if _, err := os.Stat(ar.stateFilePath()); err != nil { 172 return false, fmt.Errorf("state file destroyed: %v", err) 173 } 174 175 // Check the alloc directory still exists 176 if _, err := os.Stat(ar.ctx.AllocDir.AllocDir); err != nil { 177 return false, fmt.Errorf("alloc dir destroyed: %v", ar.ctx.AllocDir.AllocDir) 178 } 179 180 return true, nil 181 }, func(err error) { 182 t.Fatalf("err: %v", err) 183 }) 184 185 // Send the destroy signal and ensure the AllocRunner cleans up. 186 ar.Destroy() 187 188 testutil.WaitForResult(func() (bool, error) { 189 if upd.Count == 0 { 190 return false, nil 191 } 192 193 // Check the status has changed. 194 last := upd.Allocs[upd.Count-1] 195 if last.ClientStatus != structs.AllocClientStatusComplete { 196 return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) 197 } 198 199 // Check the state was cleaned 200 if _, err := os.Stat(ar.stateFilePath()); err == nil { 201 return false, fmt.Errorf("state file still exists: %v", ar.stateFilePath()) 202 } else if !os.IsNotExist(err) { 203 return false, fmt.Errorf("stat err: %v", err) 204 } 205 206 // Check the alloc directory was cleaned 207 if _, err := os.Stat(ar.ctx.AllocDir.AllocDir); err == nil { 208 return false, fmt.Errorf("alloc dir still exists: %v", ar.ctx.AllocDir.AllocDir) 209 } else if !os.IsNotExist(err) { 210 return false, fmt.Errorf("stat err: %v", err) 211 } 212 213 return true, nil 214 }, func(err error) { 215 t.Fatalf("err: %v", err) 216 }) 217 } 218 219 func TestAllocRunner_Destroy(t *testing.T) { 220 ctestutil.ExecCompatible(t) 221 upd, ar := testAllocRunner(false) 222 223 // Ensure task takes some time 224 task := ar.alloc.Job.TaskGroups[0].Tasks[0] 225 task.Config["command"] = "/bin/sleep" 226 task.Config["args"] = []string{"10"} 227 go ar.Run() 228 start := time.Now() 229 230 // Begin the tear down 231 go func() { 232 time.Sleep(1 * time.Second) 233 ar.Destroy() 234 }() 235 236 testutil.WaitForResult(func() (bool, error) { 237 if upd.Count == 0 { 238 return false, nil 239 } 240 241 // Check the status has changed. 242 last := upd.Allocs[upd.Count-1] 243 if last.ClientStatus != structs.AllocClientStatusComplete { 244 return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) 245 } 246 247 // Check the state was cleaned 248 if _, err := os.Stat(ar.stateFilePath()); err == nil { 249 return false, fmt.Errorf("state file still exists: %v", ar.stateFilePath()) 250 } else if !os.IsNotExist(err) { 251 return false, fmt.Errorf("stat err: %v", err) 252 } 253 254 // Check the alloc directory was cleaned 255 if _, err := os.Stat(ar.ctx.AllocDir.AllocDir); err == nil { 256 return false, fmt.Errorf("alloc dir still exists: %v", ar.ctx.AllocDir.AllocDir) 257 } else if !os.IsNotExist(err) { 258 return false, fmt.Errorf("stat err: %v", err) 259 } 260 261 return true, nil 262 }, func(err error) { 263 t.Fatalf("err: %v", err) 264 }) 265 266 if time.Since(start) > 15*time.Second { 267 t.Fatalf("took too long to terminate") 268 } 269 } 270 271 func TestAllocRunner_Update(t *testing.T) { 272 ctestutil.ExecCompatible(t) 273 _, ar := testAllocRunner(false) 274 275 // Ensure task takes some time 276 task := ar.alloc.Job.TaskGroups[0].Tasks[0] 277 task.Config["command"] = "/bin/sleep" 278 task.Config["args"] = []string{"10"} 279 go ar.Run() 280 defer ar.Destroy() 281 282 // Update the alloc definition 283 newAlloc := new(structs.Allocation) 284 *newAlloc = *ar.alloc 285 newAlloc.Name = "FOO" 286 newAlloc.AllocModifyIndex++ 287 ar.Update(newAlloc) 288 289 // Check the alloc runner stores the update allocation. 290 testutil.WaitForResult(func() (bool, error) { 291 return ar.Alloc().Name == "FOO", nil 292 }, func(err error) { 293 t.Fatalf("err: %v %#v", err, ar.Alloc()) 294 }) 295 } 296 297 func TestAllocRunner_SaveRestoreState(t *testing.T) { 298 alloc := mock.Alloc() 299 task := alloc.Job.TaskGroups[0].Tasks[0] 300 task.Driver = "mock_driver" 301 task.Config = map[string]interface{}{ 302 "exit_code": "0", 303 "run_for": "10s", 304 } 305 306 upd, ar := testAllocRunnerFromAlloc(alloc, false) 307 go ar.Run() 308 309 // Snapshot state 310 testutil.WaitForResult(func() (bool, error) { 311 return len(ar.tasks) == 1, nil 312 }, func(err error) { 313 t.Fatalf("task never started: %v", err) 314 }) 315 316 err := ar.SaveState() 317 if err != nil { 318 t.Fatalf("err: %v", err) 319 } 320 321 // Create a new alloc runner 322 ar2 := NewAllocRunner(ar.logger, ar.config, upd.Update, 323 &structs.Allocation{ID: ar.alloc.ID}, ar.vaultClient) 324 err = ar2.RestoreState() 325 if err != nil { 326 t.Fatalf("err: %v", err) 327 } 328 go ar2.Run() 329 330 testutil.WaitForResult(func() (bool, error) { 331 if len(ar2.tasks) != 1 { 332 return false, fmt.Errorf("Incorrect number of tasks") 333 } 334 335 if upd.Count == 0 { 336 return false, nil 337 } 338 339 last := upd.Allocs[upd.Count-1] 340 return last.ClientStatus == structs.AllocClientStatusRunning, nil 341 }, func(err error) { 342 t.Fatalf("err: %v %#v %#v", err, upd.Allocs[0], ar.alloc.TaskStates) 343 }) 344 345 // Destroy and wait 346 ar2.Destroy() 347 start := time.Now() 348 349 testutil.WaitForResult(func() (bool, error) { 350 alloc := ar2.Alloc() 351 if alloc.ClientStatus != structs.AllocClientStatusComplete { 352 return false, fmt.Errorf("Bad client status; got %v; want %v", alloc.ClientStatus, structs.AllocClientStatusComplete) 353 } 354 return true, nil 355 }, func(err error) { 356 t.Fatalf("err: %v %#v %#v", err, upd.Allocs[0], ar.alloc.TaskStates) 357 }) 358 359 if time.Since(start) > time.Duration(testutil.TestMultiplier()*5)*time.Second { 360 t.Fatalf("took too long to terminate") 361 } 362 } 363 364 func TestAllocRunner_SaveRestoreState_TerminalAlloc(t *testing.T) { 365 ctestutil.ExecCompatible(t) 366 upd, ar := testAllocRunner(false) 367 ar.logger = prefixedTestLogger("ar1: ") 368 369 // Ensure task takes some time 370 371 ar.alloc.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver" 372 task := ar.alloc.Job.TaskGroups[0].Tasks[0] 373 task.Config["run_for"] = "10s" 374 go ar.Run() 375 376 testutil.WaitForResult(func() (bool, error) { 377 if upd.Count == 0 { 378 return false, fmt.Errorf("No updates") 379 } 380 last := upd.Allocs[upd.Count-1] 381 if last.ClientStatus != structs.AllocClientStatusRunning { 382 return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning) 383 } 384 return true, nil 385 }, func(err error) { 386 t.Fatalf("err: %v", err) 387 }) 388 389 // Update the alloc to be terminal which should cause the alloc runner to 390 // stop the tasks and wait for a destroy. 391 update := ar.alloc.Copy() 392 update.DesiredStatus = structs.AllocDesiredStatusStop 393 ar.Update(update) 394 395 testutil.WaitForResult(func() (bool, error) { 396 return ar.alloc.DesiredStatus == structs.AllocDesiredStatusStop, nil 397 }, func(err error) { 398 t.Fatalf("err: %v", err) 399 }) 400 401 err := ar.SaveState() 402 if err != nil { 403 t.Fatalf("err: %v", err) 404 } 405 406 // Ensure both alloc runners don't destroy 407 ar.destroy = true 408 409 // Create a new alloc runner 410 ar2 := NewAllocRunner(ar.logger, ar.config, upd.Update, 411 &structs.Allocation{ID: ar.alloc.ID}, ar.vaultClient) 412 ar2.logger = prefixedTestLogger("ar2: ") 413 err = ar2.RestoreState() 414 if err != nil { 415 t.Fatalf("err: %v", err) 416 } 417 go ar2.Run() 418 ar2.logger.Println("[TESTING] starting second alloc runner") 419 420 testutil.WaitForResult(func() (bool, error) { 421 // Check the state still exists 422 if _, err := os.Stat(ar.stateFilePath()); err != nil { 423 return false, fmt.Errorf("state file destroyed: %v", err) 424 } 425 426 // Check the alloc directory still exists 427 if _, err := os.Stat(ar.ctx.AllocDir.AllocDir); err != nil { 428 return false, fmt.Errorf("alloc dir destroyed: %v", ar.ctx.AllocDir.AllocDir) 429 } 430 431 return true, nil 432 }, func(err error) { 433 t.Fatalf("err: %v %#v %#v", err, upd.Allocs[0], ar.alloc.TaskStates) 434 }) 435 436 // Send the destroy signal and ensure the AllocRunner cleans up. 437 ar2.logger.Println("[TESTING] destroying second alloc runner") 438 ar2.Destroy() 439 440 testutil.WaitForResult(func() (bool, error) { 441 if upd.Count == 0 { 442 return false, nil 443 } 444 445 // Check the status has changed. 446 last := upd.Allocs[upd.Count-1] 447 if last.ClientStatus != structs.AllocClientStatusComplete { 448 return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) 449 } 450 451 // Check the state was cleaned 452 if _, err := os.Stat(ar.stateFilePath()); err == nil { 453 return false, fmt.Errorf("state file still exists: %v", ar.stateFilePath()) 454 } else if !os.IsNotExist(err) { 455 return false, fmt.Errorf("stat err: %v", err) 456 } 457 458 // Check the alloc directory was cleaned 459 if _, err := os.Stat(ar.ctx.AllocDir.AllocDir); err == nil { 460 return false, fmt.Errorf("alloc dir still exists: %v", ar.ctx.AllocDir.AllocDir) 461 } else if !os.IsNotExist(err) { 462 return false, fmt.Errorf("stat err: %v", err) 463 } 464 465 return true, nil 466 }, func(err error) { 467 t.Fatalf("err: %v", err) 468 }) 469 } 470 471 func TestAllocRunner_TaskFailed_KillTG(t *testing.T) { 472 ctestutil.ExecCompatible(t) 473 upd, ar := testAllocRunner(false) 474 475 // Create two tasks in the task group 476 task := ar.alloc.Job.TaskGroups[0].Tasks[0] 477 task.Config["command"] = "/bin/sleep" 478 task.Config["args"] = []string{"1000"} 479 480 task2 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy() 481 task2.Name = "task 2" 482 task2.Config = map[string]interface{}{"command": "invalidBinaryToFail"} 483 ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2) 484 ar.alloc.TaskResources[task2.Name] = task2.Resources 485 //t.Logf("%#v", ar.alloc.Job.TaskGroups[0]) 486 go ar.Run() 487 488 testutil.WaitForResult(func() (bool, error) { 489 if upd.Count == 0 { 490 return false, fmt.Errorf("No updates") 491 } 492 last := upd.Allocs[upd.Count-1] 493 if last.ClientStatus != structs.AllocClientStatusFailed { 494 return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusFailed) 495 } 496 497 // Task One should be killed 498 state1 := last.TaskStates[task.Name] 499 if state1.State != structs.TaskStateDead { 500 return false, fmt.Errorf("got state %v; want %v", state1.State, structs.TaskStateDead) 501 } 502 if len(state1.Events) < 3 { 503 return false, fmt.Errorf("Unexpected number of events") 504 } 505 if lastE := state1.Events[len(state1.Events)-3]; lastE.Type != structs.TaskSiblingFailed { 506 return false, fmt.Errorf("got last event %v; want %v", lastE.Type, structs.TaskSiblingFailed) 507 } 508 509 // Task Two should be failed 510 state2 := last.TaskStates[task2.Name] 511 if state2.State != structs.TaskStateDead { 512 return false, fmt.Errorf("got state %v; want %v", state2.State, structs.TaskStateDead) 513 } 514 if !state2.Failed { 515 return false, fmt.Errorf("task2 should have failed") 516 } 517 518 return true, nil 519 }, func(err error) { 520 t.Fatalf("err: %v", err) 521 }) 522 } 523 524 func TestAllocRunner_MoveAllocDir(t *testing.T) { 525 // Create an alloc runner 526 alloc := mock.Alloc() 527 task := alloc.Job.TaskGroups[0].Tasks[0] 528 task.Driver = "mock_driver" 529 task.Config = map[string]interface{}{ 530 "run_for": "1s", 531 } 532 upd, ar := testAllocRunnerFromAlloc(alloc, false) 533 go ar.Run() 534 535 testutil.WaitForResult(func() (bool, error) { 536 if upd.Count == 0 { 537 return false, fmt.Errorf("No updates") 538 } 539 last := upd.Allocs[upd.Count-1] 540 if last.ClientStatus != structs.AllocClientStatusComplete { 541 return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) 542 } 543 return true, nil 544 }, func(err error) { 545 t.Fatalf("err: %v", err) 546 }) 547 548 // Write some data in data dir and task dir of the alloc 549 dataFile := filepath.Join(ar.ctx.AllocDir.SharedDir, "data", "data_file") 550 ioutil.WriteFile(dataFile, []byte("hello world"), os.ModePerm) 551 taskDir := ar.ctx.AllocDir.TaskDirs[task.Name] 552 taskLocalFile := filepath.Join(taskDir, "local", "local_file") 553 ioutil.WriteFile(taskLocalFile, []byte("good bye world"), os.ModePerm) 554 555 // Create another alloc runner 556 alloc1 := mock.Alloc() 557 task = alloc1.Job.TaskGroups[0].Tasks[0] 558 task.Driver = "mock_driver" 559 task.Config = map[string]interface{}{ 560 "run_for": "1s", 561 } 562 upd1, ar1 := testAllocRunnerFromAlloc(alloc1, false) 563 ar1.SetPreviousAllocDir(ar.ctx.AllocDir) 564 go ar1.Run() 565 566 testutil.WaitForResult(func() (bool, error) { 567 if upd1.Count == 0 { 568 return false, fmt.Errorf("No updates") 569 } 570 last := upd1.Allocs[upd1.Count-1] 571 if last.ClientStatus != structs.AllocClientStatusComplete { 572 return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) 573 } 574 return true, nil 575 }, func(err error) { 576 t.Fatalf("err: %v", err) 577 }) 578 579 // Ensure that data from ar1 was moved to ar 580 taskDir = ar1.ctx.AllocDir.TaskDirs[task.Name] 581 taskLocalFile = filepath.Join(taskDir, "local", "local_file") 582 if fileInfo, _ := os.Stat(taskLocalFile); fileInfo == nil { 583 t.Fatalf("file %v not found", taskLocalFile) 584 } 585 586 dataFile = filepath.Join(ar1.ctx.AllocDir.SharedDir, "data", "data_file") 587 if fileInfo, _ := os.Stat(dataFile); fileInfo == nil { 588 t.Fatalf("file %v not found", dataFile) 589 } 590 }