github.com/manicqin/nomad@v0.9.5/client/allocrunner/alloc_runner_test.go

github.com/manicqin/nomad@v0.9.5/client/allocrunner/alloc_runner_test.go (about)

     1  package allocrunner
     2  
     3  import (
     4  	"fmt"
     5  	"io/ioutil"
     6  	"os"
     7  	"path/filepath"
     8  	"testing"
     9  	"time"
    10  
    11  	"github.com/hashicorp/consul/api"
    12  	"github.com/hashicorp/nomad/client/allochealth"
    13  	"github.com/hashicorp/nomad/client/allocwatcher"
    14  	cconsul "github.com/hashicorp/nomad/client/consul"
    15  	"github.com/hashicorp/nomad/client/state"
    16  	"github.com/hashicorp/nomad/command/agent/consul"
    17  	"github.com/hashicorp/nomad/helper/uuid"
    18  	"github.com/hashicorp/nomad/nomad/mock"
    19  	"github.com/hashicorp/nomad/nomad/structs"
    20  	"github.com/hashicorp/nomad/testutil"
    21  	"github.com/stretchr/testify/require"
    22  )
    23  
    24  // destroy does a blocking destroy on an alloc runner
    25  func destroy(ar *allocRunner) {
    26  	ar.Destroy()
    27  	<-ar.DestroyCh()
    28  }
    29  
    30  // TestAllocRunner_AllocState_Initialized asserts that getting TaskStates via
    31  // AllocState() are initialized even before the AllocRunner has run.
    32  func TestAllocRunner_AllocState_Initialized(t *testing.T) {
    33  	t.Parallel()
    34  
    35  	alloc := mock.Alloc()
    36  	alloc.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver"
    37  	conf, cleanup := testAllocRunnerConfig(t, alloc)
    38  	defer cleanup()
    39  
    40  	ar, err := NewAllocRunner(conf)
    41  	require.NoError(t, err)
    42  
    43  	allocState := ar.AllocState()
    44  
    45  	require.NotNil(t, allocState)
    46  	require.NotNil(t, allocState.TaskStates[conf.Alloc.Job.TaskGroups[0].Tasks[0].Name])
    47  }
    48  
    49  // TestAllocRunner_TaskLeader_KillTG asserts that when a leader task dies the
    50  // entire task group is killed.
    51  func TestAllocRunner_TaskLeader_KillTG(t *testing.T) {
    52  	t.Parallel()
    53  
    54  	alloc := mock.BatchAlloc()
    55  	tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name]
    56  	alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0
    57  
    58  	// Create two tasks in the task group
    59  	task := alloc.Job.TaskGroups[0].Tasks[0]
    60  	task.Name = "task1"
    61  	task.Driver = "mock_driver"
    62  	task.KillTimeout = 10 * time.Millisecond
    63  	task.Config = map[string]interface{}{
    64  		"run_for": "10s",
    65  	}
    66  
    67  	task2 := alloc.Job.TaskGroups[0].Tasks[0].Copy()
    68  	task2.Name = "task2"
    69  	task2.Driver = "mock_driver"
    70  	task2.Leader = true
    71  	task2.Config = map[string]interface{}{
    72  		"run_for": "1s",
    73  	}
    74  	alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2)
    75  	alloc.AllocatedResources.Tasks[task.Name] = tr
    76  	alloc.AllocatedResources.Tasks[task2.Name] = tr
    77  
    78  	conf, cleanup := testAllocRunnerConfig(t, alloc)
    79  	defer cleanup()
    80  	ar, err := NewAllocRunner(conf)
    81  	require.NoError(t, err)
    82  	defer destroy(ar)
    83  	go ar.Run()
    84  
    85  	// Wait for all tasks to be killed
    86  	upd := conf.StateUpdater.(*MockStateUpdater)
    87  	testutil.WaitForResult(func() (bool, error) {
    88  		last := upd.Last()
    89  		if last == nil {
    90  			return false, fmt.Errorf("No updates")
    91  		}
    92  		if last.ClientStatus != structs.AllocClientStatusComplete {
    93  			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
    94  		}
    95  
    96  		// Task1 should be killed because Task2 exited
    97  		state1 := last.TaskStates[task.Name]
    98  		if state1.State != structs.TaskStateDead {
    99  			return false, fmt.Errorf("got state %v; want %v", state1.State, structs.TaskStateDead)
   100  		}
   101  		if state1.FinishedAt.IsZero() || state1.StartedAt.IsZero() {
   102  			return false, fmt.Errorf("expected to have a start and finish time")
   103  		}
   104  		if len(state1.Events) < 2 {
   105  			// At least have a received and destroyed
   106  			return false, fmt.Errorf("Unexpected number of events")
   107  		}
   108  
   109  		found := false
   110  		killingMsg := ""
   111  		for _, e := range state1.Events {
   112  			if e.Type != structs.TaskLeaderDead {
   113  				found = true
   114  			}
   115  			if e.Type == structs.TaskKilling {
   116  				killingMsg = e.DisplayMessage
   117  			}
   118  		}
   119  
   120  		if !found {
   121  			return false, fmt.Errorf("Did not find event %v", structs.TaskLeaderDead)
   122  		}
   123  
   124  		expectedKillingMsg := "Sent interrupt. Waiting 10ms before force killing"
   125  		if killingMsg != expectedKillingMsg {
   126  			return false, fmt.Errorf("Unexpected task event message - wanted %q. got %q", killingMsg, expectedKillingMsg)
   127  		}
   128  
   129  		// Task Two should be dead
   130  		state2 := last.TaskStates[task2.Name]
   131  		if state2.State != structs.TaskStateDead {
   132  			return false, fmt.Errorf("got state %v; want %v", state2.State, structs.TaskStateDead)
   133  		}
   134  		if state2.FinishedAt.IsZero() || state2.StartedAt.IsZero() {
   135  			return false, fmt.Errorf("expected to have a start and finish time")
   136  		}
   137  
   138  		return true, nil
   139  	}, func(err error) {
   140  		t.Fatalf("err: %v", err)
   141  	})
   142  }
   143  
   144  func TestAllocRunner_TaskGroup_ShutdownDelay(t *testing.T) {
   145  	t.Parallel()
   146  
   147  	alloc := mock.Alloc()
   148  	tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name]
   149  	alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0
   150  
   151  	// Create a group service
   152  	tg := alloc.Job.TaskGroups[0]
   153  	tg.Services = []*structs.Service{
   154  		{
   155  			Name: "shutdown_service",
   156  		},
   157  	}
   158  
   159  	// Create two tasks in the  group
   160  	task := alloc.Job.TaskGroups[0].Tasks[0]
   161  	task.Name = "follower1"
   162  	task.Driver = "mock_driver"
   163  	task.Config = map[string]interface{}{
   164  		"run_for": "10s",
   165  	}
   166  
   167  	task2 := alloc.Job.TaskGroups[0].Tasks[0].Copy()
   168  	task2.Name = "leader"
   169  	task2.Driver = "mock_driver"
   170  	task2.Leader = true
   171  	task2.Config = map[string]interface{}{
   172  		"run_for": "10s",
   173  	}
   174  
   175  	alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2)
   176  	alloc.AllocatedResources.Tasks[task.Name] = tr
   177  	alloc.AllocatedResources.Tasks[task2.Name] = tr
   178  
   179  	// Set a shutdown delay
   180  	shutdownDelay := 1 * time.Second
   181  	alloc.Job.TaskGroups[0].ShutdownDelay = &shutdownDelay
   182  
   183  	conf, cleanup := testAllocRunnerConfig(t, alloc)
   184  	defer cleanup()
   185  	ar, err := NewAllocRunner(conf)
   186  	require.NoError(t, err)
   187  	defer destroy(ar)
   188  	go ar.Run()
   189  
   190  	// Wait for tasks to start
   191  	upd := conf.StateUpdater.(*MockStateUpdater)
   192  	last := upd.Last()
   193  	testutil.WaitForResult(func() (bool, error) {
   194  		last = upd.Last()
   195  		if last == nil {
   196  			return false, fmt.Errorf("No updates")
   197  		}
   198  		if n := len(last.TaskStates); n != 2 {
   199  			return false, fmt.Errorf("Not enough task states (want: 2; found %d)", n)
   200  		}
   201  		for name, state := range last.TaskStates {
   202  			if state.State != structs.TaskStateRunning {
   203  				return false, fmt.Errorf("Task %q is not running yet (it's %q)", name, state.State)
   204  			}
   205  		}
   206  		return true, nil
   207  	}, func(err error) {
   208  		t.Fatalf("err: %v", err)
   209  	})
   210  
   211  	// Reset updates
   212  	upd.Reset()
   213  
   214  	// Stop alloc
   215  	shutdownInit := time.Now()
   216  	update := alloc.Copy()
   217  	update.DesiredStatus = structs.AllocDesiredStatusStop
   218  	ar.Update(update)
   219  
   220  	// Wait for tasks to stop
   221  	testutil.WaitForResult(func() (bool, error) {
   222  		last := upd.Last()
   223  		if last == nil {
   224  			return false, fmt.Errorf("No updates")
   225  		}
   226  
   227  		fin := last.TaskStates["leader"].FinishedAt
   228  
   229  		if fin.IsZero() {
   230  			return false, nil
   231  		}
   232  
   233  		return true, nil
   234  	}, func(err error) {
   235  		last := upd.Last()
   236  		for name, state := range last.TaskStates {
   237  			t.Logf("%s: %s", name, state.State)
   238  		}
   239  		t.Fatalf("err: %v", err)
   240  	})
   241  
   242  	// Get consul client operations
   243  	consulClient := conf.Consul.(*cconsul.MockConsulServiceClient)
   244  	consulOpts := consulClient.GetOps()
   245  	var groupRemoveOp cconsul.MockConsulOp
   246  	for _, op := range consulOpts {
   247  		// Grab the first deregistration request
   248  		if op.Op == "remove" && op.Name == "group-web" {
   249  			groupRemoveOp = op
   250  			break
   251  		}
   252  	}
   253  
   254  	// Ensure remove operation is close to shutdown initiation
   255  	require.True(t, groupRemoveOp.OccurredAt.Sub(shutdownInit) < 100*time.Millisecond)
   256  
   257  	last = upd.Last()
   258  	minShutdown := shutdownInit.Add(task.ShutdownDelay)
   259  	leaderFinished := last.TaskStates["leader"].FinishedAt
   260  	followerFinished := last.TaskStates["follower1"].FinishedAt
   261  
   262  	// Check that both tasks shut down after min possible shutdown time
   263  	require.Greater(t, leaderFinished.UnixNano(), minShutdown.UnixNano())
   264  	require.Greater(t, followerFinished.UnixNano(), minShutdown.UnixNano())
   265  
   266  	// Check that there is at least shutdown_delay between consul
   267  	// remove operation and task finished at time
   268  	require.True(t, leaderFinished.Sub(groupRemoveOp.OccurredAt) > shutdownDelay)
   269  }
   270  
   271  // TestAllocRunner_TaskLeader_StopTG asserts that when stopping an alloc with a
   272  // leader the leader is stopped before other tasks.
   273  func TestAllocRunner_TaskLeader_StopTG(t *testing.T) {
   274  	t.Parallel()
   275  
   276  	alloc := mock.Alloc()
   277  	tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name]
   278  	alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0
   279  
   280  	// Create 3 tasks in the task group
   281  	task := alloc.Job.TaskGroups[0].Tasks[0]
   282  	task.Name = "follower1"
   283  	task.Driver = "mock_driver"
   284  	task.Config = map[string]interface{}{
   285  		"run_for": "10s",
   286  	}
   287  
   288  	task2 := alloc.Job.TaskGroups[0].Tasks[0].Copy()
   289  	task2.Name = "leader"
   290  	task2.Driver = "mock_driver"
   291  	task2.Leader = true
   292  	task2.Config = map[string]interface{}{
   293  		"run_for": "10s",
   294  	}
   295  
   296  	task3 := alloc.Job.TaskGroups[0].Tasks[0].Copy()
   297  	task3.Name = "follower2"
   298  	task3.Driver = "mock_driver"
   299  	task3.Config = map[string]interface{}{
   300  		"run_for": "10s",
   301  	}
   302  	alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2, task3)
   303  	alloc.AllocatedResources.Tasks[task.Name] = tr
   304  	alloc.AllocatedResources.Tasks[task2.Name] = tr
   305  	alloc.AllocatedResources.Tasks[task3.Name] = tr
   306  
   307  	conf, cleanup := testAllocRunnerConfig(t, alloc)
   308  	defer cleanup()
   309  	ar, err := NewAllocRunner(conf)
   310  	require.NoError(t, err)
   311  	defer destroy(ar)
   312  	go ar.Run()
   313  
   314  	// Wait for tasks to start
   315  	upd := conf.StateUpdater.(*MockStateUpdater)
   316  	last := upd.Last()
   317  	testutil.WaitForResult(func() (bool, error) {
   318  		last = upd.Last()
   319  		if last == nil {
   320  			return false, fmt.Errorf("No updates")
   321  		}
   322  		if n := len(last.TaskStates); n != 3 {
   323  			return false, fmt.Errorf("Not enough task states (want: 3; found %d)", n)
   324  		}
   325  		for name, state := range last.TaskStates {
   326  			if state.State != structs.TaskStateRunning {
   327  				return false, fmt.Errorf("Task %q is not running yet (it's %q)", name, state.State)
   328  			}
   329  		}
   330  		return true, nil
   331  	}, func(err error) {
   332  		t.Fatalf("err: %v", err)
   333  	})
   334  
   335  	// Reset updates
   336  	upd.Reset()
   337  
   338  	// Stop alloc
   339  	update := alloc.Copy()
   340  	update.DesiredStatus = structs.AllocDesiredStatusStop
   341  	ar.Update(update)
   342  
   343  	// Wait for tasks to stop
   344  	testutil.WaitForResult(func() (bool, error) {
   345  		last := upd.Last()
   346  		if last == nil {
   347  			return false, fmt.Errorf("No updates")
   348  		}
   349  		if last.TaskStates["leader"].FinishedAt.UnixNano() >= last.TaskStates["follower1"].FinishedAt.UnixNano() {
   350  			return false, fmt.Errorf("expected leader to finish before follower1: %s >= %s",
   351  				last.TaskStates["leader"].FinishedAt, last.TaskStates["follower1"].FinishedAt)
   352  		}
   353  		if last.TaskStates["leader"].FinishedAt.UnixNano() >= last.TaskStates["follower2"].FinishedAt.UnixNano() {
   354  			return false, fmt.Errorf("expected leader to finish before follower2: %s >= %s",
   355  				last.TaskStates["leader"].FinishedAt, last.TaskStates["follower2"].FinishedAt)
   356  		}
   357  		return true, nil
   358  	}, func(err error) {
   359  		last := upd.Last()
   360  		for name, state := range last.TaskStates {
   361  			t.Logf("%s: %s", name, state.State)
   362  		}
   363  		t.Fatalf("err: %v", err)
   364  	})
   365  }
   366  
   367  // TestAllocRunner_TaskLeader_StopRestoredTG asserts that when stopping a
   368  // restored task group with a leader that failed before restoring the leader is
   369  // not stopped as it does not exist.
   370  // See https://github.com/hashicorp/nomad/issues/3420#issuecomment-341666932
   371  func TestAllocRunner_TaskLeader_StopRestoredTG(t *testing.T) {
   372  	t.Parallel()
   373  
   374  	alloc := mock.Alloc()
   375  	tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name]
   376  	alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0
   377  
   378  	// Create a leader and follower task in the task group
   379  	task := alloc.Job.TaskGroups[0].Tasks[0]
   380  	task.Name = "follower1"
   381  	task.Driver = "mock_driver"
   382  	task.KillTimeout = 10 * time.Second
   383  	task.Config = map[string]interface{}{
   384  		"run_for": "10s",
   385  	}
   386  
   387  	task2 := alloc.Job.TaskGroups[0].Tasks[0].Copy()
   388  	task2.Name = "leader"
   389  	task2.Driver = "mock_driver"
   390  	task2.Leader = true
   391  	task2.KillTimeout = 10 * time.Millisecond
   392  	task2.Config = map[string]interface{}{
   393  		"run_for": "10s",
   394  	}
   395  
   396  	alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2)
   397  	alloc.AllocatedResources.Tasks[task.Name] = tr
   398  	alloc.AllocatedResources.Tasks[task2.Name] = tr
   399  
   400  	conf, cleanup := testAllocRunnerConfig(t, alloc)
   401  	defer cleanup()
   402  
   403  	// Use a memory backed statedb
   404  	conf.StateDB = state.NewMemDB(conf.Logger)
   405  
   406  	ar, err := NewAllocRunner(conf)
   407  	require.NoError(t, err)
   408  
   409  	// Mimic Nomad exiting before the leader stopping is able to stop other tasks.
   410  	ar.tasks["leader"].UpdateState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskKilled))
   411  	ar.tasks["follower1"].UpdateState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted))
   412  
   413  	// Create a new AllocRunner to test RestoreState and Run
   414  	ar2, err := NewAllocRunner(conf)
   415  	require.NoError(t, err)
   416  	defer destroy(ar2)
   417  
   418  	if err := ar2.Restore(); err != nil {
   419  		t.Fatalf("error restoring state: %v", err)
   420  	}
   421  	ar2.Run()
   422  
   423  	// Wait for tasks to be stopped because leader is dead
   424  	testutil.WaitForResult(func() (bool, error) {
   425  		alloc := ar2.Alloc()
   426  		for task, state := range alloc.TaskStates {
   427  			if state.State != structs.TaskStateDead {
   428  				return false, fmt.Errorf("Task %q should be dead: %v", task, state.State)
   429  			}
   430  		}
   431  		return true, nil
   432  	}, func(err error) {
   433  		t.Fatalf("err: %v", err)
   434  	})
   435  
   436  	// Make sure it GCs properly
   437  	ar2.Destroy()
   438  
   439  	select {
   440  	case <-ar2.DestroyCh():
   441  		// exited as expected
   442  	case <-time.After(10 * time.Second):
   443  		t.Fatalf("timed out waiting for AR to GC")
   444  	}
   445  }
   446  
   447  func TestAllocRunner_Update_Semantics(t *testing.T) {
   448  	t.Parallel()
   449  	require := require.New(t)
   450  
   451  	updatedAlloc := func(a *structs.Allocation) *structs.Allocation {
   452  		upd := a.CopySkipJob()
   453  		upd.AllocModifyIndex++
   454  
   455  		return upd
   456  	}
   457  
   458  	alloc := mock.Alloc()
   459  	alloc.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver"
   460  	conf, cleanup := testAllocRunnerConfig(t, alloc)
   461  	defer cleanup()
   462  
   463  	ar, err := NewAllocRunner(conf)
   464  	require.NoError(err)
   465  
   466  	upd1 := updatedAlloc(alloc)
   467  	ar.Update(upd1)
   468  
   469  	// Update was placed into a queue
   470  	require.Len(ar.allocUpdatedCh, 1)
   471  
   472  	upd2 := updatedAlloc(alloc)
   473  	ar.Update(upd2)
   474  
   475  	// Allocation was _replaced_
   476  
   477  	require.Len(ar.allocUpdatedCh, 1)
   478  	queuedAlloc := <-ar.allocUpdatedCh
   479  	require.Equal(upd2, queuedAlloc)
   480  
   481  	// Requeueing older alloc is skipped
   482  	ar.Update(upd2)
   483  	ar.Update(upd1)
   484  
   485  	queuedAlloc = <-ar.allocUpdatedCh
   486  	require.Equal(upd2, queuedAlloc)
   487  
   488  	// Ignore after watch closed
   489  
   490  	close(ar.waitCh)
   491  
   492  	ar.Update(upd1)
   493  
   494  	// Did not queue the update
   495  	require.Len(ar.allocUpdatedCh, 0)
   496  }
   497  
   498  // TestAllocRunner_DeploymentHealth_Healthy_Migration asserts that health is
   499  // reported for services that got migrated; not just part of deployments.
   500  func TestAllocRunner_DeploymentHealth_Healthy_Migration(t *testing.T) {
   501  	t.Parallel()
   502  
   503  	alloc := mock.Alloc()
   504  
   505  	// Ensure the alloc is *not* part of a deployment
   506  	alloc.DeploymentID = ""
   507  
   508  	// Shorten the default migration healthy time
   509  	tg := alloc.Job.TaskGroups[0]
   510  	tg.Migrate = structs.DefaultMigrateStrategy()
   511  	tg.Migrate.MinHealthyTime = 100 * time.Millisecond
   512  	tg.Migrate.HealthCheck = structs.MigrateStrategyHealthStates
   513  
   514  	task := tg.Tasks[0]
   515  	task.Driver = "mock_driver"
   516  	task.Config = map[string]interface{}{
   517  		"run_for": "30s",
   518  	}
   519  
   520  	conf, cleanup := testAllocRunnerConfig(t, alloc)
   521  	defer cleanup()
   522  
   523  	ar, err := NewAllocRunner(conf)
   524  	require.NoError(t, err)
   525  	go ar.Run()
   526  	defer destroy(ar)
   527  
   528  	upd := conf.StateUpdater.(*MockStateUpdater)
   529  	testutil.WaitForResult(func() (bool, error) {
   530  		last := upd.Last()
   531  		if last == nil {
   532  			return false, fmt.Errorf("No updates")
   533  		}
   534  		if !last.DeploymentStatus.HasHealth() {
   535  			return false, fmt.Errorf("want deployment status unhealthy; got unset")
   536  		} else if !*last.DeploymentStatus.Healthy {
   537  			// This is fatal
   538  			t.Fatal("want deployment status healthy; got unhealthy")
   539  		}
   540  		return true, nil
   541  	}, func(err error) {
   542  		require.NoError(t, err)
   543  	})
   544  }
   545  
   546  // TestAllocRunner_DeploymentHealth_Healthy_NoChecks asserts that the health
   547  // watcher will mark the allocation as healthy based on task states alone.
   548  func TestAllocRunner_DeploymentHealth_Healthy_NoChecks(t *testing.T) {
   549  	t.Parallel()
   550  
   551  	alloc := mock.Alloc()
   552  
   553  	task := alloc.Job.TaskGroups[0].Tasks[0]
   554  	task.Driver = "mock_driver"
   555  	task.Config = map[string]interface{}{
   556  		"run_for": "10s",
   557  	}
   558  
   559  	// Create a task that takes longer to become healthy
   560  	alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task.Copy())
   561  	alloc.AllocatedResources.Tasks["task2"] = alloc.AllocatedResources.Tasks["web"].Copy()
   562  	task2 := alloc.Job.TaskGroups[0].Tasks[1]
   563  	task2.Name = "task2"
   564  	task2.Config["start_block_for"] = "500ms"
   565  
   566  	// Make the alloc be part of a deployment that uses task states for
   567  	// health checks
   568  	alloc.DeploymentID = uuid.Generate()
   569  	alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   570  	alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_TaskStates
   571  	alloc.Job.TaskGroups[0].Update.MaxParallel = 1
   572  	alloc.Job.TaskGroups[0].Update.MinHealthyTime = 100 * time.Millisecond
   573  
   574  	conf, cleanup := testAllocRunnerConfig(t, alloc)
   575  	defer cleanup()
   576  
   577  	ar, err := NewAllocRunner(conf)
   578  	require.NoError(t, err)
   579  
   580  	start, done := time.Now(), time.Time{}
   581  	go ar.Run()
   582  	defer destroy(ar)
   583  
   584  	upd := conf.StateUpdater.(*MockStateUpdater)
   585  	testutil.WaitForResult(func() (bool, error) {
   586  		last := upd.Last()
   587  		if last == nil {
   588  			return false, fmt.Errorf("No updates")
   589  		}
   590  		if !last.DeploymentStatus.HasHealth() {
   591  			return false, fmt.Errorf("want deployment status unhealthy; got unset")
   592  		} else if !*last.DeploymentStatus.Healthy {
   593  			// This is fatal
   594  			t.Fatal("want deployment status healthy; got unhealthy")
   595  		}
   596  
   597  		// Capture the done timestamp
   598  		done = last.DeploymentStatus.Timestamp
   599  		return true, nil
   600  	}, func(err error) {
   601  		require.NoError(t, err)
   602  	})
   603  
   604  	if d := done.Sub(start); d < 500*time.Millisecond {
   605  		t.Fatalf("didn't wait for second task group. Only took %v", d)
   606  	}
   607  }
   608  
   609  // TestAllocRunner_DeploymentHealth_Unhealthy_Checks asserts that the health
   610  // watcher will mark the allocation as unhealthy with failing checks.
   611  func TestAllocRunner_DeploymentHealth_Unhealthy_Checks(t *testing.T) {
   612  	t.Parallel()
   613  
   614  	alloc := mock.Alloc()
   615  	task := alloc.Job.TaskGroups[0].Tasks[0]
   616  	task.Driver = "mock_driver"
   617  	task.Config = map[string]interface{}{
   618  		"run_for": "10s",
   619  	}
   620  
   621  	// Set a service with check
   622  	task.Services = []*structs.Service{
   623  		{
   624  			Name:      "fakservice",
   625  			PortLabel: "http",
   626  			Checks: []*structs.ServiceCheck{
   627  				{
   628  					Name:     "fakecheck",
   629  					Type:     structs.ServiceCheckScript,
   630  					Command:  "true",
   631  					Interval: 30 * time.Second,
   632  					Timeout:  5 * time.Second,
   633  				},
   634  			},
   635  		},
   636  	}
   637  
   638  	// Make the alloc be part of a deployment
   639  	alloc.DeploymentID = uuid.Generate()
   640  	alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   641  	alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_Checks
   642  	alloc.Job.TaskGroups[0].Update.MaxParallel = 1
   643  	alloc.Job.TaskGroups[0].Update.MinHealthyTime = 100 * time.Millisecond
   644  	alloc.Job.TaskGroups[0].Update.HealthyDeadline = 1 * time.Second
   645  
   646  	checkUnhealthy := &api.AgentCheck{
   647  		CheckID: uuid.Generate(),
   648  		Status:  api.HealthWarning,
   649  	}
   650  
   651  	conf, cleanup := testAllocRunnerConfig(t, alloc)
   652  	defer cleanup()
   653  
   654  	// Only return the check as healthy after a duration
   655  	consulClient := conf.Consul.(*cconsul.MockConsulServiceClient)
   656  	consulClient.AllocRegistrationsFn = func(allocID string) (*consul.AllocRegistration, error) {
   657  		return &consul.AllocRegistration{
   658  			Tasks: map[string]*consul.ServiceRegistrations{
   659  				task.Name: {
   660  					Services: map[string]*consul.ServiceRegistration{
   661  						"123": {
   662  							Service: &api.AgentService{Service: "fakeservice"},
   663  							Checks:  []*api.AgentCheck{checkUnhealthy},
   664  						},
   665  					},
   666  				},
   667  			},
   668  		}, nil
   669  	}
   670  
   671  	ar, err := NewAllocRunner(conf)
   672  	require.NoError(t, err)
   673  	go ar.Run()
   674  	defer destroy(ar)
   675  
   676  	var lastUpdate *structs.Allocation
   677  	upd := conf.StateUpdater.(*MockStateUpdater)
   678  	testutil.WaitForResult(func() (bool, error) {
   679  		lastUpdate = upd.Last()
   680  		if lastUpdate == nil {
   681  			return false, fmt.Errorf("No updates")
   682  		}
   683  		if !lastUpdate.DeploymentStatus.HasHealth() {
   684  			return false, fmt.Errorf("want deployment status unhealthy; got unset")
   685  		} else if *lastUpdate.DeploymentStatus.Healthy {
   686  			// This is fatal
   687  			t.Fatal("want deployment status unhealthy; got healthy")
   688  		}
   689  		return true, nil
   690  	}, func(err error) {
   691  		require.NoError(t, err)
   692  	})
   693  
   694  	// Assert that we have an event explaining why we are unhealthy.
   695  	require.Len(t, lastUpdate.TaskStates, 1)
   696  	state := lastUpdate.TaskStates[task.Name]
   697  	require.NotNil(t, state)
   698  	require.NotEmpty(t, state.Events)
   699  	last := state.Events[len(state.Events)-1]
   700  	require.Equal(t, allochealth.AllocHealthEventSource, last.Type)
   701  	require.Contains(t, last.Message, "by deadline")
   702  }
   703  
   704  // TestAllocRunner_Destroy asserts that Destroy kills and cleans up a running
   705  // alloc.
   706  func TestAllocRunner_Destroy(t *testing.T) {
   707  	t.Parallel()
   708  
   709  	// Ensure task takes some time
   710  	alloc := mock.BatchAlloc()
   711  	task := alloc.Job.TaskGroups[0].Tasks[0]
   712  	task.Config["run_for"] = "10s"
   713  
   714  	conf, cleanup := testAllocRunnerConfig(t, alloc)
   715  	defer cleanup()
   716  
   717  	// Use a MemDB to assert alloc state gets cleaned up
   718  	conf.StateDB = state.NewMemDB(conf.Logger)
   719  
   720  	ar, err := NewAllocRunner(conf)
   721  	require.NoError(t, err)
   722  	go ar.Run()
   723  
   724  	// Wait for alloc to be running
   725  	testutil.WaitForResult(func() (bool, error) {
   726  		state := ar.AllocState()
   727  
   728  		return state.ClientStatus == structs.AllocClientStatusRunning,
   729  			fmt.Errorf("got client status %v; want running", state.ClientStatus)
   730  	}, func(err error) {
   731  		require.NoError(t, err)
   732  	})
   733  
   734  	// Assert state was stored
   735  	ls, ts, err := conf.StateDB.GetTaskRunnerState(alloc.ID, task.Name)
   736  	require.NoError(t, err)
   737  	require.NotNil(t, ls)
   738  	require.NotNil(t, ts)
   739  
   740  	// Now destroy
   741  	ar.Destroy()
   742  
   743  	select {
   744  	case <-ar.DestroyCh():
   745  		// Destroyed properly!
   746  	case <-time.After(10 * time.Second):
   747  		require.Fail(t, "timed out waiting for alloc to be destroyed")
   748  	}
   749  
   750  	// Assert alloc is dead
   751  	state := ar.AllocState()
   752  	require.Equal(t, structs.AllocClientStatusComplete, state.ClientStatus)
   753  
   754  	// Assert the state was cleaned
   755  	ls, ts, err = conf.StateDB.GetTaskRunnerState(alloc.ID, task.Name)
   756  	require.NoError(t, err)
   757  	require.Nil(t, ls)
   758  	require.Nil(t, ts)
   759  
   760  	// Assert the alloc directory was cleaned
   761  	if _, err := os.Stat(ar.allocDir.AllocDir); err == nil {
   762  		require.Fail(t, "alloc dir still exists: %v", ar.allocDir.AllocDir)
   763  	} else if !os.IsNotExist(err) {
   764  		require.Failf(t, "expected NotExist error", "found %v", err)
   765  	}
   766  }
   767  
   768  func TestAllocRunner_SimpleRun(t *testing.T) {
   769  	t.Parallel()
   770  
   771  	alloc := mock.BatchAlloc()
   772  
   773  	conf, cleanup := testAllocRunnerConfig(t, alloc)
   774  	defer cleanup()
   775  	ar, err := NewAllocRunner(conf)
   776  	require.NoError(t, err)
   777  	go ar.Run()
   778  	defer destroy(ar)
   779  
   780  	// Wait for alloc to be running
   781  	testutil.WaitForResult(func() (bool, error) {
   782  		state := ar.AllocState()
   783  
   784  		if state.ClientStatus != structs.AllocClientStatusComplete {
   785  			return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusComplete)
   786  		}
   787  
   788  		for t, s := range state.TaskStates {
   789  			if s.FinishedAt.IsZero() {
   790  				return false, fmt.Errorf("task %q has zero FinishedAt value", t)
   791  			}
   792  		}
   793  
   794  		return true, nil
   795  	}, func(err error) {
   796  		require.NoError(t, err)
   797  	})
   798  
   799  }
   800  
   801  // TestAllocRunner_MoveAllocDir asserts that a rescheduled
   802  // allocation copies ephemeral disk content from previous alloc run
   803  func TestAllocRunner_MoveAllocDir(t *testing.T) {
   804  	t.Parallel()
   805  
   806  	// Step 1: start and run a task
   807  	alloc := mock.BatchAlloc()
   808  	conf, cleanup := testAllocRunnerConfig(t, alloc)
   809  	defer cleanup()
   810  	ar, err := NewAllocRunner(conf)
   811  	require.NoError(t, err)
   812  	ar.Run()
   813  	defer destroy(ar)
   814  
   815  	require.Equal(t, structs.AllocClientStatusComplete, ar.AllocState().ClientStatus)
   816  
   817  	// Step 2. Modify its directory
   818  	task := alloc.Job.TaskGroups[0].Tasks[0]
   819  	dataFile := filepath.Join(ar.allocDir.SharedDir, "data", "data_file")
   820  	ioutil.WriteFile(dataFile, []byte("hello world"), os.ModePerm)
   821  	taskDir := ar.allocDir.TaskDirs[task.Name]
   822  	taskLocalFile := filepath.Join(taskDir.LocalDir, "local_file")
   823  	ioutil.WriteFile(taskLocalFile, []byte("good bye world"), os.ModePerm)
   824  
   825  	// Step 3. Start a new alloc
   826  	alloc2 := mock.BatchAlloc()
   827  	alloc2.PreviousAllocation = alloc.ID
   828  	alloc2.Job.TaskGroups[0].EphemeralDisk.Sticky = true
   829  
   830  	conf2, cleanup := testAllocRunnerConfig(t, alloc2)
   831  	conf2.PrevAllocWatcher, conf2.PrevAllocMigrator = allocwatcher.NewAllocWatcher(allocwatcher.Config{
   832  		Alloc:          alloc2,
   833  		PreviousRunner: ar,
   834  		Logger:         conf2.Logger,
   835  	})
   836  	defer cleanup()
   837  	ar2, err := NewAllocRunner(conf2)
   838  	require.NoError(t, err)
   839  
   840  	ar2.Run()
   841  	defer destroy(ar2)
   842  
   843  	require.Equal(t, structs.AllocClientStatusComplete, ar2.AllocState().ClientStatus)
   844  
   845  	// Ensure that data from ar was moved to ar2
   846  	dataFile = filepath.Join(ar2.allocDir.SharedDir, "data", "data_file")
   847  	fileInfo, _ := os.Stat(dataFile)
   848  	require.NotNilf(t, fileInfo, "file %q not found", dataFile)
   849  
   850  	taskDir = ar2.allocDir.TaskDirs[task.Name]
   851  	taskLocalFile = filepath.Join(taskDir.LocalDir, "local_file")
   852  	fileInfo, _ = os.Stat(taskLocalFile)
   853  	require.NotNilf(t, fileInfo, "file %q not found", dataFile)
   854  
   855  }
   856  
   857  // TestAllocRuner_HandlesArtifactFailure ensures that if one task in a task group is
   858  // retrying fetching an artifact, other tasks in the group should be able
   859  // to proceed.
   860  func TestAllocRunner_HandlesArtifactFailure(t *testing.T) {
   861  	t.Parallel()
   862  
   863  	alloc := mock.BatchAlloc()
   864  	alloc.Job.TaskGroups[0].RestartPolicy = &structs.RestartPolicy{
   865  		Mode:     structs.RestartPolicyModeFail,
   866  		Attempts: 1,
   867  		Delay:    time.Nanosecond,
   868  		Interval: time.Hour,
   869  	}
   870  
   871  	// Create a new task with a bad artifact
   872  	badtask := alloc.Job.TaskGroups[0].Tasks[0].Copy()
   873  	badtask.Name = "bad"
   874  	badtask.Artifacts = []*structs.TaskArtifact{
   875  		{GetterSource: "http://127.0.0.1:0/foo/bar/baz"},
   876  	}
   877  
   878  	alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, badtask)
   879  	alloc.AllocatedResources.Tasks["bad"] = &structs.AllocatedTaskResources{
   880  		Cpu: structs.AllocatedCpuResources{
   881  			CpuShares: 500,
   882  		},
   883  		Memory: structs.AllocatedMemoryResources{
   884  			MemoryMB: 256,
   885  		},
   886  	}
   887  
   888  	conf, cleanup := testAllocRunnerConfig(t, alloc)
   889  	defer cleanup()
   890  	ar, err := NewAllocRunner(conf)
   891  	require.NoError(t, err)
   892  	go ar.Run()
   893  	defer destroy(ar)
   894  
   895  	testutil.WaitForResult(func() (bool, error) {
   896  		state := ar.AllocState()
   897  
   898  		switch state.ClientStatus {
   899  		case structs.AllocClientStatusComplete, structs.AllocClientStatusFailed:
   900  			return true, nil
   901  		default:
   902  			return false, fmt.Errorf("got status %v but want terminal", state.ClientStatus)
   903  		}
   904  
   905  	}, func(err error) {
   906  		require.NoError(t, err)
   907  	})
   908  
   909  	state := ar.AllocState()
   910  	require.Equal(t, structs.AllocClientStatusFailed, state.ClientStatus)
   911  	require.Equal(t, structs.TaskStateDead, state.TaskStates["web"].State)
   912  	require.True(t, state.TaskStates["web"].Successful())
   913  	require.Equal(t, structs.TaskStateDead, state.TaskStates["bad"].State)
   914  	require.True(t, state.TaskStates["bad"].Failed)
   915  }
   916  
   917  // Test that alloc runner kills tasks in task group when another task fails
   918  func TestAllocRunner_TaskFailed_KillTG(t *testing.T) {
   919  	alloc := mock.Alloc()
   920  	tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name]
   921  	alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0
   922  
   923  	// Create two tasks in the task group
   924  	task := alloc.Job.TaskGroups[0].Tasks[0]
   925  	task.Name = "task1"
   926  	task.Driver = "mock_driver"
   927  	task.KillTimeout = 10 * time.Millisecond
   928  	task.Config = map[string]interface{}{
   929  		"run_for": "10s",
   930  	}
   931  	// Set a service with check
   932  	task.Services = []*structs.Service{
   933  		{
   934  			Name:      "fakservice",
   935  			PortLabel: "http",
   936  			Checks: []*structs.ServiceCheck{
   937  				{
   938  					Name:     "fakecheck",
   939  					Type:     structs.ServiceCheckScript,
   940  					Command:  "true",
   941  					Interval: 30 * time.Second,
   942  					Timeout:  5 * time.Second,
   943  				},
   944  			},
   945  		},
   946  	}
   947  
   948  	task2 := alloc.Job.TaskGroups[0].Tasks[0].Copy()
   949  	task2.Name = "task 2"
   950  	task2.Driver = "mock_driver"
   951  	task2.Config = map[string]interface{}{
   952  		"start_error": "fail task please",
   953  	}
   954  	alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2)
   955  	alloc.AllocatedResources.Tasks[task.Name] = tr
   956  	alloc.AllocatedResources.Tasks[task2.Name] = tr
   957  
   958  	// Make the alloc be part of a deployment
   959  	alloc.DeploymentID = uuid.Generate()
   960  	alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   961  	alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_Checks
   962  	alloc.Job.TaskGroups[0].Update.MaxParallel = 1
   963  	alloc.Job.TaskGroups[0].Update.MinHealthyTime = 10 * time.Millisecond
   964  	alloc.Job.TaskGroups[0].Update.HealthyDeadline = 2 * time.Second
   965  
   966  	checkHealthy := &api.AgentCheck{
   967  		CheckID: uuid.Generate(),
   968  		Status:  api.HealthPassing,
   969  	}
   970  
   971  	conf, cleanup := testAllocRunnerConfig(t, alloc)
   972  	defer cleanup()
   973  
   974  	consulClient := conf.Consul.(*cconsul.MockConsulServiceClient)
   975  	consulClient.AllocRegistrationsFn = func(allocID string) (*consul.AllocRegistration, error) {
   976  		return &consul.AllocRegistration{
   977  			Tasks: map[string]*consul.ServiceRegistrations{
   978  				task.Name: {
   979  					Services: map[string]*consul.ServiceRegistration{
   980  						"123": {
   981  							Service: &api.AgentService{Service: "fakeservice"},
   982  							Checks:  []*api.AgentCheck{checkHealthy},
   983  						},
   984  					},
   985  				},
   986  			},
   987  		}, nil
   988  	}
   989  
   990  	ar, err := NewAllocRunner(conf)
   991  	require.NoError(t, err)
   992  	defer destroy(ar)
   993  	go ar.Run()
   994  	upd := conf.StateUpdater.(*MockStateUpdater)
   995  
   996  	testutil.WaitForResult(func() (bool, error) {
   997  		last := upd.Last()
   998  		if last == nil {
   999  			return false, fmt.Errorf("No updates")
  1000  		}
  1001  		if last.ClientStatus != structs.AllocClientStatusFailed {
  1002  			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusFailed)
  1003  		}
  1004  
  1005  		// Task One should be killed
  1006  		state1 := last.TaskStates[task.Name]
  1007  		if state1.State != structs.TaskStateDead {
  1008  			return false, fmt.Errorf("got state %v; want %v", state1.State, structs.TaskStateDead)
  1009  		}
  1010  		if len(state1.Events) < 2 {
  1011  			// At least have a received and destroyed
  1012  			return false, fmt.Errorf("Unexpected number of events")
  1013  		}
  1014  
  1015  		found := false
  1016  		for _, e := range state1.Events {
  1017  			if e.Type != structs.TaskSiblingFailed {
  1018  				found = true
  1019  			}
  1020  		}
  1021  
  1022  		if !found {
  1023  			return false, fmt.Errorf("Did not find event %v", structs.TaskSiblingFailed)
  1024  		}
  1025  
  1026  		// Task Two should be failed
  1027  		state2 := last.TaskStates[task2.Name]
  1028  		if state2.State != structs.TaskStateDead {
  1029  			return false, fmt.Errorf("got state %v; want %v", state2.State, structs.TaskStateDead)
  1030  		}
  1031  		if !state2.Failed {
  1032  			return false, fmt.Errorf("task2 should have failed")
  1033  		}
  1034  
  1035  		if !last.DeploymentStatus.HasHealth() {
  1036  			return false, fmt.Errorf("Expected deployment health to be non nil")
  1037  		}
  1038  
  1039  		return true, nil
  1040  	}, func(err error) {
  1041  		require.Fail(t, "err: %v", err)
  1042  	})
  1043  }
  1044  
  1045  // Test that alloc becoming terminal should destroy the alloc runner
  1046  func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) {
  1047  	t.Parallel()
  1048  	alloc := mock.BatchAlloc()
  1049  	tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name]
  1050  	alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0
  1051  	// Ensure task takes some time
  1052  	task := alloc.Job.TaskGroups[0].Tasks[0]
  1053  	task.Driver = "mock_driver"
  1054  	task.Config["run_for"] = "10s"
  1055  	alloc.AllocatedResources.Tasks[task.Name] = tr
  1056  
  1057  	conf, cleanup := testAllocRunnerConfig(t, alloc)
  1058  	defer cleanup()
  1059  	ar, err := NewAllocRunner(conf)
  1060  	require.NoError(t, err)
  1061  	defer destroy(ar)
  1062  	go ar.Run()
  1063  	upd := conf.StateUpdater.(*MockStateUpdater)
  1064  
  1065  	testutil.WaitForResult(func() (bool, error) {
  1066  		last := upd.Last()
  1067  		if last == nil {
  1068  			return false, fmt.Errorf("No updates")
  1069  		}
  1070  		if last.ClientStatus != structs.AllocClientStatusRunning {
  1071  			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
  1072  		}
  1073  		return true, nil
  1074  	}, func(err error) {
  1075  		require.Fail(t, "err: %v", err)
  1076  	})
  1077  
  1078  	// Update the alloc to be terminal which should cause the alloc runner to
  1079  	// stop the tasks and wait for a destroy.
  1080  	update := ar.alloc.Copy()
  1081  	update.DesiredStatus = structs.AllocDesiredStatusStop
  1082  	ar.Update(update)
  1083  
  1084  	testutil.WaitForResult(func() (bool, error) {
  1085  		last := upd.Last()
  1086  		if last == nil {
  1087  			return false, fmt.Errorf("No updates")
  1088  		}
  1089  
  1090  		// Check the status has changed.
  1091  		if last.ClientStatus != structs.AllocClientStatusComplete {
  1092  			return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
  1093  		}
  1094  
  1095  		// Check the alloc directory still exists
  1096  		if _, err := os.Stat(ar.allocDir.AllocDir); err != nil {
  1097  			return false, fmt.Errorf("alloc dir destroyed: %v", ar.allocDir.AllocDir)
  1098  		}
  1099  
  1100  		return true, nil
  1101  	}, func(err error) {
  1102  		require.Fail(t, "err: %v", err)
  1103  	})
  1104  
  1105  	// Send the destroy signal and ensure the AllocRunner cleans up.
  1106  	ar.Destroy()
  1107  
  1108  	testutil.WaitForResult(func() (bool, error) {
  1109  		last := upd.Last()
  1110  		if last == nil {
  1111  			return false, fmt.Errorf("No updates")
  1112  		}
  1113  
  1114  		// Check the status has changed.
  1115  		if last.ClientStatus != structs.AllocClientStatusComplete {
  1116  			return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
  1117  		}
  1118  
  1119  		// Check the alloc directory was cleaned
  1120  		if _, err := os.Stat(ar.allocDir.AllocDir); err == nil {
  1121  			return false, fmt.Errorf("alloc dir still exists: %v", ar.allocDir.AllocDir)
  1122  		} else if !os.IsNotExist(err) {
  1123  			return false, fmt.Errorf("stat err: %v", err)
  1124  		}
  1125  
  1126  		return true, nil
  1127  	}, func(err error) {
  1128  		require.Fail(t, "err: %v", err)
  1129  	})
  1130  }
  1131  
  1132  // TestAllocRunner_PersistState_Destroyed asserts that destroyed allocs don't persist anymore
  1133  func TestAllocRunner_PersistState_Destroyed(t *testing.T) {
  1134  	t.Parallel()
  1135  
  1136  	alloc := mock.BatchAlloc()
  1137  	taskName := alloc.Job.LookupTaskGroup(alloc.TaskGroup).Tasks[0].Name
  1138  
  1139  	conf, cleanup := testAllocRunnerConfig(t, alloc)
  1140  	conf.StateDB = state.NewMemDB(conf.Logger)
  1141  
  1142  	defer cleanup()
  1143  	ar, err := NewAllocRunner(conf)
  1144  	require.NoError(t, err)
  1145  	defer destroy(ar)
  1146  
  1147  	go ar.Run()
  1148  
  1149  	select {
  1150  	case <-ar.WaitCh():
  1151  	case <-time.After(10 * time.Second):
  1152  		require.Fail(t, "timed out waiting for alloc to complete")
  1153  	}
  1154  
  1155  	// test final persisted state upon completion
  1156  	require.NoError(t, ar.PersistState())
  1157  	allocs, _, err := conf.StateDB.GetAllAllocations()
  1158  	require.NoError(t, err)
  1159  	require.Len(t, allocs, 1)
  1160  	require.Equal(t, alloc.ID, allocs[0].ID)
  1161  	_, ts, err := conf.StateDB.GetTaskRunnerState(alloc.ID, taskName)
  1162  	require.NoError(t, err)
  1163  	require.Equal(t, structs.TaskStateDead, ts.State)
  1164  
  1165  	// check that DB alloc is empty after destroying AR
  1166  	ar.Destroy()
  1167  	select {
  1168  	case <-ar.DestroyCh():
  1169  	case <-time.After(10 * time.Second):
  1170  		require.Fail(t, "timedout waiting for destruction")
  1171  	}
  1172  
  1173  	allocs, _, err = conf.StateDB.GetAllAllocations()
  1174  	require.NoError(t, err)
  1175  	require.Empty(t, allocs)
  1176  	_, ts, err = conf.StateDB.GetTaskRunnerState(alloc.ID, taskName)
  1177  	require.NoError(t, err)
  1178  	require.Nil(t, ts)
  1179  
  1180  	// check that DB alloc is empty after persisting state of destroyed AR
  1181  	ar.PersistState()
  1182  	allocs, _, err = conf.StateDB.GetAllAllocations()
  1183  	require.NoError(t, err)
  1184  	require.Empty(t, allocs)
  1185  	_, ts, err = conf.StateDB.GetTaskRunnerState(alloc.ID, taskName)
  1186  	require.NoError(t, err)
  1187  	require.Nil(t, ts)
  1188  }