github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/allocrunner/taskrunner/task_runner_test.go (about)

     1  package taskrunner
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"io/ioutil"
     8  	"net/http"
     9  	"net/http/httptest"
    10  	"os"
    11  	"path/filepath"
    12  	"strings"
    13  	"testing"
    14  	"time"
    15  
    16  	"github.com/golang/snappy"
    17  	"github.com/hashicorp/nomad/client/allocdir"
    18  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
    19  	"github.com/hashicorp/nomad/client/config"
    20  	"github.com/hashicorp/nomad/client/consul"
    21  	consulapi "github.com/hashicorp/nomad/client/consul"
    22  	"github.com/hashicorp/nomad/client/devicemanager"
    23  	"github.com/hashicorp/nomad/client/pluginmanager/drivermanager"
    24  	cstate "github.com/hashicorp/nomad/client/state"
    25  	ctestutil "github.com/hashicorp/nomad/client/testutil"
    26  	"github.com/hashicorp/nomad/client/vaultclient"
    27  	agentconsul "github.com/hashicorp/nomad/command/agent/consul"
    28  	mockdriver "github.com/hashicorp/nomad/drivers/mock"
    29  	"github.com/hashicorp/nomad/drivers/rawexec"
    30  	"github.com/hashicorp/nomad/helper/testlog"
    31  	"github.com/hashicorp/nomad/helper/uuid"
    32  	"github.com/hashicorp/nomad/nomad/mock"
    33  	"github.com/hashicorp/nomad/nomad/structs"
    34  	"github.com/hashicorp/nomad/plugins/device"
    35  	"github.com/hashicorp/nomad/plugins/drivers"
    36  	"github.com/hashicorp/nomad/testutil"
    37  	"github.com/kr/pretty"
    38  	"github.com/stretchr/testify/assert"
    39  	"github.com/stretchr/testify/require"
    40  )
    41  
    42  type MockTaskStateUpdater struct {
    43  	ch chan struct{}
    44  }
    45  
    46  func NewMockTaskStateUpdater() *MockTaskStateUpdater {
    47  	return &MockTaskStateUpdater{
    48  		ch: make(chan struct{}, 1),
    49  	}
    50  }
    51  
    52  func (m *MockTaskStateUpdater) TaskStateUpdated() {
    53  	select {
    54  	case m.ch <- struct{}{}:
    55  	default:
    56  	}
    57  }
    58  
    59  // testTaskRunnerConfig returns a taskrunner.Config for the given alloc+task
    60  // plus a cleanup func.
    61  func testTaskRunnerConfig(t *testing.T, alloc *structs.Allocation, taskName string) (*Config, func()) {
    62  	logger := testlog.HCLogger(t)
    63  	clientConf, cleanup := config.TestClientConfig(t)
    64  
    65  	// Find the task
    66  	var thisTask *structs.Task
    67  	for _, tg := range alloc.Job.TaskGroups {
    68  		for _, task := range tg.Tasks {
    69  			if task.Name == taskName {
    70  				if thisTask != nil {
    71  					cleanup()
    72  					t.Fatalf("multiple tasks named %q; cannot use this helper", taskName)
    73  				}
    74  				thisTask = task
    75  			}
    76  		}
    77  	}
    78  	if thisTask == nil {
    79  		cleanup()
    80  		t.Fatalf("could not find task %q", taskName)
    81  	}
    82  
    83  	// Create the alloc dir + task dir
    84  	allocPath := filepath.Join(clientConf.AllocDir, alloc.ID)
    85  	allocDir := allocdir.NewAllocDir(logger, allocPath)
    86  	if err := allocDir.Build(); err != nil {
    87  		cleanup()
    88  		t.Fatalf("error building alloc dir: %v", err)
    89  	}
    90  	taskDir := allocDir.NewTaskDir(taskName)
    91  
    92  	trCleanup := func() {
    93  		if err := allocDir.Destroy(); err != nil {
    94  			t.Logf("error destroying alloc dir: %v", err)
    95  		}
    96  		cleanup()
    97  	}
    98  
    99  	// Create a closed channel to mock TaskHookCoordinator.startConditionForTask.
   100  	// Closed channel indicates this task is not blocked on prestart hooks.
   101  	closedCh := make(chan struct{})
   102  	close(closedCh)
   103  
   104  	conf := &Config{
   105  		Alloc:                alloc,
   106  		ClientConfig:         clientConf,
   107  		Task:                 thisTask,
   108  		TaskDir:              taskDir,
   109  		Logger:               clientConf.Logger,
   110  		Consul:               consulapi.NewMockConsulServiceClient(t, logger),
   111  		ConsulSI:             consulapi.NewMockServiceIdentitiesClient(),
   112  		Vault:                vaultclient.NewMockVaultClient(),
   113  		StateDB:              cstate.NoopDB{},
   114  		StateUpdater:         NewMockTaskStateUpdater(),
   115  		DeviceManager:        devicemanager.NoopMockManager(),
   116  		DriverManager:        drivermanager.TestDriverManager(t),
   117  		ServersContactedCh:   make(chan struct{}),
   118  		StartConditionMetCtx: closedCh,
   119  	}
   120  	return conf, trCleanup
   121  }
   122  
   123  // runTestTaskRunner runs a TaskRunner and returns its configuration as well as
   124  // a cleanup function that ensures the runner is stopped and cleaned up. Tests
   125  // which need to change the Config *must* use testTaskRunnerConfig instead.
   126  func runTestTaskRunner(t *testing.T, alloc *structs.Allocation, taskName string) (*TaskRunner, *Config, func()) {
   127  	config, cleanup := testTaskRunnerConfig(t, alloc, taskName)
   128  
   129  	tr, err := NewTaskRunner(config)
   130  	require.NoError(t, err)
   131  	go tr.Run()
   132  
   133  	return tr, config, func() {
   134  		tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
   135  		cleanup()
   136  	}
   137  }
   138  
   139  // TestTaskRunner_Restore_Running asserts restoring a running task does not
   140  // rerun the task.
   141  func TestTaskRunner_Restore_Running(t *testing.T) {
   142  	t.Parallel()
   143  	require := require.New(t)
   144  
   145  	alloc := mock.BatchAlloc()
   146  	alloc.Job.TaskGroups[0].Count = 1
   147  	task := alloc.Job.TaskGroups[0].Tasks[0]
   148  	task.Driver = "mock_driver"
   149  	task.Config = map[string]interface{}{
   150  		"run_for": "2s",
   151  	}
   152  	conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
   153  	conf.StateDB = cstate.NewMemDB(conf.Logger) // "persist" state between task runners
   154  	defer cleanup()
   155  
   156  	// Run the first TaskRunner
   157  	origTR, err := NewTaskRunner(conf)
   158  	require.NoError(err)
   159  	go origTR.Run()
   160  	defer origTR.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
   161  
   162  	// Wait for it to be running
   163  	testWaitForTaskToStart(t, origTR)
   164  
   165  	// Cause TR to exit without shutting down task
   166  	origTR.Shutdown()
   167  
   168  	// Start a new TaskRunner and make sure it does not rerun the task
   169  	newTR, err := NewTaskRunner(conf)
   170  	require.NoError(err)
   171  
   172  	// Do the Restore
   173  	require.NoError(newTR.Restore())
   174  
   175  	go newTR.Run()
   176  	defer newTR.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
   177  
   178  	// Wait for new task runner to exit when the process does
   179  	<-newTR.WaitCh()
   180  
   181  	// Assert that the process was only started once
   182  	started := 0
   183  	state := newTR.TaskState()
   184  	require.Equal(structs.TaskStateDead, state.State)
   185  	for _, ev := range state.Events {
   186  		if ev.Type == structs.TaskStarted {
   187  			started++
   188  		}
   189  	}
   190  	assert.Equal(t, 1, started)
   191  }
   192  
   193  // setupRestoreFailureTest starts a service, shuts down the task runner, and
   194  // kills the task before restarting a new TaskRunner. The new TaskRunner is
   195  // returned once it is running and waiting in pending along with a cleanup
   196  // func.
   197  func setupRestoreFailureTest(t *testing.T, alloc *structs.Allocation) (*TaskRunner, *Config, func()) {
   198  	t.Parallel()
   199  
   200  	task := alloc.Job.TaskGroups[0].Tasks[0]
   201  	task.Driver = "raw_exec"
   202  	task.Config = map[string]interface{}{
   203  		"command": "sleep",
   204  		"args":    []string{"30"},
   205  	}
   206  	conf, cleanup1 := testTaskRunnerConfig(t, alloc, task.Name)
   207  	conf.StateDB = cstate.NewMemDB(conf.Logger) // "persist" state between runs
   208  
   209  	// Run the first TaskRunner
   210  	origTR, err := NewTaskRunner(conf)
   211  	require.NoError(t, err)
   212  	go origTR.Run()
   213  	cleanup2 := func() {
   214  		origTR.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
   215  		cleanup1()
   216  	}
   217  
   218  	// Wait for it to be running
   219  	testWaitForTaskToStart(t, origTR)
   220  
   221  	handle := origTR.getDriverHandle()
   222  	require.NotNil(t, handle)
   223  	taskID := handle.taskID
   224  
   225  	// Cause TR to exit without shutting down task
   226  	origTR.Shutdown()
   227  
   228  	// Get the driver
   229  	driverPlugin, err := conf.DriverManager.Dispense(rawexec.PluginID.Name)
   230  	require.NoError(t, err)
   231  	rawexecDriver := driverPlugin.(*rawexec.Driver)
   232  
   233  	// Assert the task is still running despite TR having exited
   234  	taskStatus, err := rawexecDriver.InspectTask(taskID)
   235  	require.NoError(t, err)
   236  	require.Equal(t, drivers.TaskStateRunning, taskStatus.State)
   237  
   238  	// Kill the task so it fails to recover when restore is called
   239  	require.NoError(t, rawexecDriver.DestroyTask(taskID, true))
   240  	_, err = rawexecDriver.InspectTask(taskID)
   241  	require.EqualError(t, err, drivers.ErrTaskNotFound.Error())
   242  
   243  	// Create a new TaskRunner and Restore the task
   244  	conf.ServersContactedCh = make(chan struct{})
   245  	newTR, err := NewTaskRunner(conf)
   246  	require.NoError(t, err)
   247  
   248  	// Assert the TR will wait on servers because reattachment failed
   249  	require.NoError(t, newTR.Restore())
   250  	require.True(t, newTR.waitOnServers)
   251  
   252  	// Start new TR
   253  	go newTR.Run()
   254  	cleanup3 := func() {
   255  		newTR.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
   256  		cleanup2()
   257  		cleanup1()
   258  	}
   259  
   260  	// Assert task has not been restarted
   261  	_, err = rawexecDriver.InspectTask(taskID)
   262  	require.EqualError(t, err, drivers.ErrTaskNotFound.Error())
   263  	ts := newTR.TaskState()
   264  	require.Equal(t, structs.TaskStatePending, ts.State)
   265  
   266  	return newTR, conf, cleanup3
   267  }
   268  
   269  // TestTaskRunner_Restore_Restart asserts restoring a dead task blocks until
   270  // MarkAlive is called. #1795
   271  func TestTaskRunner_Restore_Restart(t *testing.T) {
   272  	newTR, conf, cleanup := setupRestoreFailureTest(t, mock.Alloc())
   273  	defer cleanup()
   274  
   275  	// Fake contacting the server by closing the chan
   276  	close(conf.ServersContactedCh)
   277  
   278  	testutil.WaitForResult(func() (bool, error) {
   279  		ts := newTR.TaskState().State
   280  		return ts == structs.TaskStateRunning, fmt.Errorf("expected task to be running but found %q", ts)
   281  	}, func(err error) {
   282  		require.NoError(t, err)
   283  	})
   284  }
   285  
   286  // TestTaskRunner_Restore_Kill asserts restoring a dead task blocks until
   287  // the task is killed. #1795
   288  func TestTaskRunner_Restore_Kill(t *testing.T) {
   289  	newTR, _, cleanup := setupRestoreFailureTest(t, mock.Alloc())
   290  	defer cleanup()
   291  
   292  	// Sending the task a terminal update shouldn't kill it or unblock it
   293  	alloc := newTR.Alloc().Copy()
   294  	alloc.DesiredStatus = structs.AllocDesiredStatusStop
   295  	newTR.Update(alloc)
   296  
   297  	require.Equal(t, structs.TaskStatePending, newTR.TaskState().State)
   298  
   299  	// AllocRunner will immediately kill tasks after sending a terminal
   300  	// update.
   301  	newTR.Kill(context.Background(), structs.NewTaskEvent(structs.TaskKilling))
   302  
   303  	select {
   304  	case <-newTR.WaitCh():
   305  		// It died as expected!
   306  	case <-time.After(10 * time.Second):
   307  		require.Fail(t, "timeout waiting for task to die")
   308  	}
   309  }
   310  
   311  // TestTaskRunner_Restore_Update asserts restoring a dead task blocks until
   312  // Update is called. #1795
   313  func TestTaskRunner_Restore_Update(t *testing.T) {
   314  	newTR, conf, cleanup := setupRestoreFailureTest(t, mock.Alloc())
   315  	defer cleanup()
   316  
   317  	// Fake Client.runAllocs behavior by calling Update then closing chan
   318  	alloc := newTR.Alloc().Copy()
   319  	newTR.Update(alloc)
   320  
   321  	// Update alone should not unblock the test
   322  	require.Equal(t, structs.TaskStatePending, newTR.TaskState().State)
   323  
   324  	// Fake Client.runAllocs behavior of closing chan after Update
   325  	close(conf.ServersContactedCh)
   326  
   327  	testutil.WaitForResult(func() (bool, error) {
   328  		ts := newTR.TaskState().State
   329  		return ts == structs.TaskStateRunning, fmt.Errorf("expected task to be running but found %q", ts)
   330  	}, func(err error) {
   331  		require.NoError(t, err)
   332  	})
   333  }
   334  
   335  // TestTaskRunner_Restore_System asserts restoring a dead system task does not
   336  // block.
   337  func TestTaskRunner_Restore_System(t *testing.T) {
   338  	t.Parallel()
   339  
   340  	alloc := mock.Alloc()
   341  	alloc.Job.Type = structs.JobTypeSystem
   342  	task := alloc.Job.TaskGroups[0].Tasks[0]
   343  	task.Driver = "raw_exec"
   344  	task.Config = map[string]interface{}{
   345  		"command": "sleep",
   346  		"args":    []string{"30"},
   347  	}
   348  	conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
   349  	defer cleanup()
   350  	conf.StateDB = cstate.NewMemDB(conf.Logger) // "persist" state between runs
   351  
   352  	// Run the first TaskRunner
   353  	origTR, err := NewTaskRunner(conf)
   354  	require.NoError(t, err)
   355  	go origTR.Run()
   356  	defer origTR.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
   357  
   358  	// Wait for it to be running
   359  	testWaitForTaskToStart(t, origTR)
   360  
   361  	handle := origTR.getDriverHandle()
   362  	require.NotNil(t, handle)
   363  	taskID := handle.taskID
   364  
   365  	// Cause TR to exit without shutting down task
   366  	origTR.Shutdown()
   367  
   368  	// Get the driver
   369  	driverPlugin, err := conf.DriverManager.Dispense(rawexec.PluginID.Name)
   370  	require.NoError(t, err)
   371  	rawexecDriver := driverPlugin.(*rawexec.Driver)
   372  
   373  	// Assert the task is still running despite TR having exited
   374  	taskStatus, err := rawexecDriver.InspectTask(taskID)
   375  	require.NoError(t, err)
   376  	require.Equal(t, drivers.TaskStateRunning, taskStatus.State)
   377  
   378  	// Kill the task so it fails to recover when restore is called
   379  	require.NoError(t, rawexecDriver.DestroyTask(taskID, true))
   380  	_, err = rawexecDriver.InspectTask(taskID)
   381  	require.EqualError(t, err, drivers.ErrTaskNotFound.Error())
   382  
   383  	// Create a new TaskRunner and Restore the task
   384  	conf.ServersContactedCh = make(chan struct{})
   385  	newTR, err := NewTaskRunner(conf)
   386  	require.NoError(t, err)
   387  
   388  	// Assert the TR will not wait on servers even though reattachment
   389  	// failed because it is a system task.
   390  	require.NoError(t, newTR.Restore())
   391  	require.False(t, newTR.waitOnServers)
   392  
   393  	// Nothing should have closed the chan
   394  	select {
   395  	case <-conf.ServersContactedCh:
   396  		require.Fail(t, "serversContactedCh was closed but should not have been")
   397  	default:
   398  	}
   399  
   400  	testutil.WaitForResult(func() (bool, error) {
   401  		ts := newTR.TaskState().State
   402  		return ts == structs.TaskStateRunning, fmt.Errorf("expected task to be running but found %q", ts)
   403  	}, func(err error) {
   404  		require.NoError(t, err)
   405  	})
   406  }
   407  
   408  // TestTaskRunner_TaskEnv_Interpolated asserts driver configurations are
   409  // interpolated.
   410  func TestTaskRunner_TaskEnv_Interpolated(t *testing.T) {
   411  	t.Parallel()
   412  	require := require.New(t)
   413  
   414  	alloc := mock.BatchAlloc()
   415  	alloc.Job.TaskGroups[0].Meta = map[string]string{
   416  		"common_user": "somebody",
   417  	}
   418  	task := alloc.Job.TaskGroups[0].Tasks[0]
   419  	task.Meta = map[string]string{
   420  		"foo": "bar",
   421  	}
   422  
   423  	// Use interpolation from both node attributes and meta vars
   424  	task.Config = map[string]interface{}{
   425  		"run_for":       "1ms",
   426  		"stdout_string": `${node.region} ${NOMAD_META_foo} ${NOMAD_META_common_user}`,
   427  	}
   428  
   429  	tr, conf, cleanup := runTestTaskRunner(t, alloc, task.Name)
   430  	defer cleanup()
   431  
   432  	// Wait for task to complete
   433  	select {
   434  	case <-tr.WaitCh():
   435  	case <-time.After(3 * time.Second):
   436  		require.Fail("timeout waiting for task to exit")
   437  	}
   438  
   439  	// Get the mock driver plugin
   440  	driverPlugin, err := conf.DriverManager.Dispense(mockdriver.PluginID.Name)
   441  	require.NoError(err)
   442  	mockDriver := driverPlugin.(*mockdriver.Driver)
   443  
   444  	// Assert its config has been properly interpolated
   445  	driverCfg, mockCfg := mockDriver.GetTaskConfig()
   446  	require.NotNil(driverCfg)
   447  	require.NotNil(mockCfg)
   448  	assert.Equal(t, "global bar somebody", mockCfg.StdoutString)
   449  }
   450  
   451  // TestTaskRunner_TaskEnv_Chroot asserts chroot drivers use chroot paths and
   452  // not host paths.
   453  func TestTaskRunner_TaskEnv_Chroot(t *testing.T) {
   454  	ctestutil.ExecCompatible(t)
   455  	t.Parallel()
   456  	require := require.New(t)
   457  
   458  	alloc := mock.BatchAlloc()
   459  	task := alloc.Job.TaskGroups[0].Tasks[0]
   460  	task.Driver = "exec"
   461  	task.Config = map[string]interface{}{
   462  		"command": "bash",
   463  		"args": []string{"-c", "echo $NOMAD_ALLOC_DIR; " +
   464  			"echo $NOMAD_TASK_DIR; " +
   465  			"echo $NOMAD_SECRETS_DIR; " +
   466  			"echo $PATH; ",
   467  		},
   468  	}
   469  
   470  	// Expect chroot paths and host $PATH
   471  	exp := fmt.Sprintf(`/alloc
   472  /local
   473  /secrets
   474  %s
   475  `, os.Getenv("PATH"))
   476  
   477  	conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
   478  	defer cleanup()
   479  
   480  	// Remove /sbin and /usr from chroot
   481  	conf.ClientConfig.ChrootEnv = map[string]string{
   482  		"/bin":            "/bin",
   483  		"/etc":            "/etc",
   484  		"/lib":            "/lib",
   485  		"/lib32":          "/lib32",
   486  		"/lib64":          "/lib64",
   487  		"/run/resolvconf": "/run/resolvconf",
   488  	}
   489  
   490  	tr, err := NewTaskRunner(conf)
   491  	require.NoError(err)
   492  	go tr.Run()
   493  	defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
   494  
   495  	// Wait for task to exit
   496  	select {
   497  	case <-tr.WaitCh():
   498  	case <-time.After(15 * time.Second):
   499  		require.Fail("timeout waiting for task to exit")
   500  	}
   501  
   502  	// Read stdout
   503  	p := filepath.Join(conf.TaskDir.LogDir, task.Name+".stdout.0")
   504  	stdout, err := ioutil.ReadFile(p)
   505  	require.NoError(err)
   506  	require.Equalf(exp, string(stdout), "expected: %s\n\nactual: %s\n", exp, stdout)
   507  }
   508  
   509  // TestTaskRunner_TaskEnv_Image asserts image drivers use chroot paths and
   510  // not host paths. Host env vars should also be excluded.
   511  func TestTaskRunner_TaskEnv_Image(t *testing.T) {
   512  	ctestutil.DockerCompatible(t)
   513  	t.Parallel()
   514  	require := require.New(t)
   515  
   516  	alloc := mock.BatchAlloc()
   517  	task := alloc.Job.TaskGroups[0].Tasks[0]
   518  	task.Driver = "docker"
   519  	task.Config = map[string]interface{}{
   520  		"image":        "redis:3.2-alpine",
   521  		"network_mode": "none",
   522  		"command":      "sh",
   523  		"args": []string{"-c", "echo $NOMAD_ALLOC_DIR; " +
   524  			"echo $NOMAD_TASK_DIR; " +
   525  			"echo $NOMAD_SECRETS_DIR; " +
   526  			"echo $PATH",
   527  		},
   528  	}
   529  
   530  	// Expect chroot paths and image specific PATH
   531  	exp := `/alloc
   532  /local
   533  /secrets
   534  /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
   535  `
   536  
   537  	tr, conf, cleanup := runTestTaskRunner(t, alloc, task.Name)
   538  	defer cleanup()
   539  
   540  	// Wait for task to exit
   541  	select {
   542  	case <-tr.WaitCh():
   543  	case <-time.After(15 * time.Second):
   544  		require.Fail("timeout waiting for task to exit")
   545  	}
   546  
   547  	// Read stdout
   548  	p := filepath.Join(conf.TaskDir.LogDir, task.Name+".stdout.0")
   549  	stdout, err := ioutil.ReadFile(p)
   550  	require.NoError(err)
   551  	require.Equalf(exp, string(stdout), "expected: %s\n\nactual: %s\n", exp, stdout)
   552  }
   553  
   554  // TestTaskRunner_TaskEnv_None asserts raw_exec uses host paths and env vars.
   555  func TestTaskRunner_TaskEnv_None(t *testing.T) {
   556  	t.Parallel()
   557  	require := require.New(t)
   558  
   559  	alloc := mock.BatchAlloc()
   560  	task := alloc.Job.TaskGroups[0].Tasks[0]
   561  	task.Driver = "raw_exec"
   562  	task.Config = map[string]interface{}{
   563  		"command": "sh",
   564  		"args": []string{"-c", "echo $NOMAD_ALLOC_DIR; " +
   565  			"echo $NOMAD_TASK_DIR; " +
   566  			"echo $NOMAD_SECRETS_DIR; " +
   567  			"echo $PATH",
   568  		},
   569  	}
   570  
   571  	tr, conf, cleanup := runTestTaskRunner(t, alloc, task.Name)
   572  	defer cleanup()
   573  
   574  	// Expect host paths
   575  	root := filepath.Join(conf.ClientConfig.AllocDir, alloc.ID)
   576  	taskDir := filepath.Join(root, task.Name)
   577  	exp := fmt.Sprintf(`%s/alloc
   578  %s/local
   579  %s/secrets
   580  %s
   581  `, root, taskDir, taskDir, os.Getenv("PATH"))
   582  
   583  	// Wait for task to exit
   584  	select {
   585  	case <-tr.WaitCh():
   586  	case <-time.After(15 * time.Second):
   587  		require.Fail("timeout waiting for task to exit")
   588  	}
   589  
   590  	// Read stdout
   591  	p := filepath.Join(conf.TaskDir.LogDir, task.Name+".stdout.0")
   592  	stdout, err := ioutil.ReadFile(p)
   593  	require.NoError(err)
   594  	require.Equalf(exp, string(stdout), "expected: %s\n\nactual: %s\n", exp, stdout)
   595  }
   596  
   597  // Test that devices get sent to the driver
   598  func TestTaskRunner_DevicePropogation(t *testing.T) {
   599  	t.Parallel()
   600  	require := require.New(t)
   601  
   602  	// Create a mock alloc that has a gpu
   603  	alloc := mock.BatchAlloc()
   604  	alloc.Job.TaskGroups[0].Count = 1
   605  	task := alloc.Job.TaskGroups[0].Tasks[0]
   606  	task.Driver = "mock_driver"
   607  	task.Config = map[string]interface{}{
   608  		"run_for": "100ms",
   609  	}
   610  	tRes := alloc.AllocatedResources.Tasks[task.Name]
   611  	tRes.Devices = append(tRes.Devices, &structs.AllocatedDeviceResource{Type: "mock"})
   612  
   613  	conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
   614  	conf.StateDB = cstate.NewMemDB(conf.Logger) // "persist" state between task runners
   615  	defer cleanup()
   616  
   617  	// Setup the devicemanager
   618  	dm, ok := conf.DeviceManager.(*devicemanager.MockManager)
   619  	require.True(ok)
   620  
   621  	dm.ReserveF = func(d *structs.AllocatedDeviceResource) (*device.ContainerReservation, error) {
   622  		res := &device.ContainerReservation{
   623  			Envs: map[string]string{
   624  				"ABC": "123",
   625  			},
   626  			Mounts: []*device.Mount{
   627  				{
   628  					ReadOnly: true,
   629  					TaskPath: "foo",
   630  					HostPath: "bar",
   631  				},
   632  			},
   633  			Devices: []*device.DeviceSpec{
   634  				{
   635  					TaskPath:    "foo",
   636  					HostPath:    "bar",
   637  					CgroupPerms: "123",
   638  				},
   639  			},
   640  		}
   641  		return res, nil
   642  	}
   643  
   644  	// Run the TaskRunner
   645  	tr, err := NewTaskRunner(conf)
   646  	require.NoError(err)
   647  	go tr.Run()
   648  	defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
   649  
   650  	// Wait for task to complete
   651  	select {
   652  	case <-tr.WaitCh():
   653  	case <-time.After(3 * time.Second):
   654  	}
   655  
   656  	// Get the mock driver plugin
   657  	driverPlugin, err := conf.DriverManager.Dispense(mockdriver.PluginID.Name)
   658  	require.NoError(err)
   659  	mockDriver := driverPlugin.(*mockdriver.Driver)
   660  
   661  	// Assert its config has been properly interpolated
   662  	driverCfg, _ := mockDriver.GetTaskConfig()
   663  	require.NotNil(driverCfg)
   664  	require.Len(driverCfg.Devices, 1)
   665  	require.Equal(driverCfg.Devices[0].Permissions, "123")
   666  	require.Len(driverCfg.Mounts, 1)
   667  	require.Equal(driverCfg.Mounts[0].TaskPath, "foo")
   668  	require.Contains(driverCfg.Env, "ABC")
   669  }
   670  
   671  // mockEnvHook is a test hook that sets an env var and done=true. It fails if
   672  // it's called more than once.
   673  type mockEnvHook struct {
   674  	called int
   675  }
   676  
   677  func (*mockEnvHook) Name() string {
   678  	return "mock_env_hook"
   679  }
   680  
   681  func (h *mockEnvHook) Prestart(ctx context.Context, req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error {
   682  	h.called++
   683  
   684  	resp.Done = true
   685  	resp.Env = map[string]string{
   686  		"mock_hook": "1",
   687  	}
   688  
   689  	return nil
   690  }
   691  
   692  // TestTaskRunner_Restore_HookEnv asserts that re-running prestart hooks with
   693  // hook environments set restores the environment without re-running done
   694  // hooks.
   695  func TestTaskRunner_Restore_HookEnv(t *testing.T) {
   696  	t.Parallel()
   697  	require := require.New(t)
   698  
   699  	alloc := mock.BatchAlloc()
   700  	task := alloc.Job.TaskGroups[0].Tasks[0]
   701  	conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
   702  	conf.StateDB = cstate.NewMemDB(conf.Logger) // "persist" state between prestart calls
   703  	defer cleanup()
   704  
   705  	tr, err := NewTaskRunner(conf)
   706  	require.NoError(err)
   707  
   708  	// Override the default hooks to only run the mock hook
   709  	mockHook := &mockEnvHook{}
   710  	tr.runnerHooks = []interfaces.TaskHook{mockHook}
   711  
   712  	// Manually run prestart hooks
   713  	require.NoError(tr.prestart())
   714  
   715  	// Assert env was called
   716  	require.Equal(1, mockHook.called)
   717  
   718  	// Re-running prestart hooks should *not* call done mock hook
   719  	require.NoError(tr.prestart())
   720  
   721  	// Assert env was called
   722  	require.Equal(1, mockHook.called)
   723  
   724  	// Assert the env is still set
   725  	env := tr.envBuilder.Build().All()
   726  	require.Contains(env, "mock_hook")
   727  	require.Equal("1", env["mock_hook"])
   728  }
   729  
   730  // This test asserts that we can recover from an "external" plugin exiting by
   731  // retrieving a new instance of the driver and recovering the task.
   732  func TestTaskRunner_RecoverFromDriverExiting(t *testing.T) {
   733  	t.Parallel()
   734  	require := require.New(t)
   735  
   736  	// Create an allocation using the mock driver that exits simulating the
   737  	// driver crashing. We can then test that the task runner recovers from this
   738  	alloc := mock.BatchAlloc()
   739  	task := alloc.Job.TaskGroups[0].Tasks[0]
   740  	task.Driver = "mock_driver"
   741  	task.Config = map[string]interface{}{
   742  		"plugin_exit_after": "1s",
   743  		"run_for":           "5s",
   744  	}
   745  
   746  	conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
   747  	conf.StateDB = cstate.NewMemDB(conf.Logger) // "persist" state between prestart calls
   748  	defer cleanup()
   749  
   750  	tr, err := NewTaskRunner(conf)
   751  	require.NoError(err)
   752  
   753  	start := time.Now()
   754  	go tr.Run()
   755  	defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
   756  
   757  	// Wait for the task to be running
   758  	testWaitForTaskToStart(t, tr)
   759  
   760  	// Get the task ID
   761  	tr.stateLock.RLock()
   762  	l := tr.localState.TaskHandle
   763  	require.NotNil(l)
   764  	require.NotNil(l.Config)
   765  	require.NotEmpty(l.Config.ID)
   766  	id := l.Config.ID
   767  	tr.stateLock.RUnlock()
   768  
   769  	// Get the mock driver plugin
   770  	driverPlugin, err := conf.DriverManager.Dispense(mockdriver.PluginID.Name)
   771  	require.NoError(err)
   772  	mockDriver := driverPlugin.(*mockdriver.Driver)
   773  
   774  	// Wait for the task to start
   775  	testutil.WaitForResult(func() (bool, error) {
   776  		// Get the handle and check that it was recovered
   777  		handle := mockDriver.GetHandle(id)
   778  		if handle == nil {
   779  			return false, fmt.Errorf("nil handle")
   780  		}
   781  		if !handle.Recovered {
   782  			return false, fmt.Errorf("handle not recovered")
   783  		}
   784  		return true, nil
   785  	}, func(err error) {
   786  		t.Fatal(err.Error())
   787  	})
   788  
   789  	// Wait for task to complete
   790  	select {
   791  	case <-tr.WaitCh():
   792  	case <-time.After(10 * time.Second):
   793  	}
   794  
   795  	// Ensure that we actually let the task complete
   796  	require.True(time.Now().Sub(start) > 5*time.Second)
   797  
   798  	// Check it finished successfully
   799  	state := tr.TaskState()
   800  	require.True(state.Successful())
   801  }
   802  
   803  // TestTaskRunner_ShutdownDelay asserts services are removed from Consul
   804  // ${shutdown_delay} seconds before killing the process.
   805  func TestTaskRunner_ShutdownDelay(t *testing.T) {
   806  	t.Parallel()
   807  
   808  	alloc := mock.Alloc()
   809  	task := alloc.Job.TaskGroups[0].Tasks[0]
   810  	task.Services[0].Tags = []string{"tag1"}
   811  	task.Services = task.Services[:1] // only need 1 for this test
   812  	task.Driver = "mock_driver"
   813  	task.Config = map[string]interface{}{
   814  		"run_for": "1000s",
   815  	}
   816  
   817  	// No shutdown escape hatch for this delay, so don't set it too high
   818  	task.ShutdownDelay = 1000 * time.Duration(testutil.TestMultiplier()) * time.Millisecond
   819  
   820  	tr, conf, cleanup := runTestTaskRunner(t, alloc, task.Name)
   821  	defer cleanup()
   822  
   823  	mockConsul := conf.Consul.(*consul.MockConsulServiceClient)
   824  
   825  	// Wait for the task to start
   826  	testWaitForTaskToStart(t, tr)
   827  
   828  	testutil.WaitForResult(func() (bool, error) {
   829  		ops := mockConsul.GetOps()
   830  		if n := len(ops); n != 1 {
   831  			return false, fmt.Errorf("expected 1 consul operation. Found %d", n)
   832  		}
   833  		return ops[0].Op == "add", fmt.Errorf("consul operation was not a registration: %#v", ops[0])
   834  	}, func(err error) {
   835  		t.Fatalf("err: %v", err)
   836  	})
   837  
   838  	// Asynchronously kill task
   839  	killSent := time.Now()
   840  	killed := make(chan struct{})
   841  	go func() {
   842  		defer close(killed)
   843  		assert.NoError(t, tr.Kill(context.Background(), structs.NewTaskEvent("test")))
   844  	}()
   845  
   846  	// Wait for *2* deregistration calls (due to needing to remove both
   847  	// canary tag variants)
   848  WAIT:
   849  	for {
   850  		ops := mockConsul.GetOps()
   851  		switch n := len(ops); n {
   852  		case 1, 2:
   853  			// Waiting for both deregistration calls
   854  		case 3:
   855  			require.Equalf(t, "remove", ops[1].Op, "expected deregistration but found: %#v", ops[1])
   856  			require.Equalf(t, "remove", ops[2].Op, "expected deregistration but found: %#v", ops[2])
   857  			break WAIT
   858  		default:
   859  			// ?!
   860  			t.Fatalf("unexpected number of consul operations: %d\n%s", n, pretty.Sprint(ops))
   861  
   862  		}
   863  
   864  		select {
   865  		case <-killed:
   866  			t.Fatal("killed while service still registered")
   867  		case <-time.After(10 * time.Millisecond):
   868  		}
   869  	}
   870  
   871  	// Wait for actual exit
   872  	select {
   873  	case <-tr.WaitCh():
   874  	case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
   875  		t.Fatalf("timeout")
   876  	}
   877  
   878  	<-killed
   879  	killDur := time.Now().Sub(killSent)
   880  	if killDur < task.ShutdownDelay {
   881  		t.Fatalf("task killed before shutdown_delay (killed_after: %s; shutdown_delay: %s",
   882  			killDur, task.ShutdownDelay,
   883  		)
   884  	}
   885  }
   886  
   887  // TestTaskRunner_Dispatch_Payload asserts that a dispatch job runs and the
   888  // payload was written to disk.
   889  func TestTaskRunner_Dispatch_Payload(t *testing.T) {
   890  	t.Parallel()
   891  
   892  	alloc := mock.BatchAlloc()
   893  	task := alloc.Job.TaskGroups[0].Tasks[0]
   894  	task.Driver = "mock_driver"
   895  	task.Config = map[string]interface{}{
   896  		"run_for": "1s",
   897  	}
   898  
   899  	fileName := "test"
   900  	task.DispatchPayload = &structs.DispatchPayloadConfig{
   901  		File: fileName,
   902  	}
   903  	alloc.Job.ParameterizedJob = &structs.ParameterizedJobConfig{}
   904  
   905  	// Add a payload (they're snappy encoded bytes)
   906  	expected := []byte("hello world")
   907  	compressed := snappy.Encode(nil, expected)
   908  	alloc.Job.Payload = compressed
   909  
   910  	tr, _, cleanup := runTestTaskRunner(t, alloc, task.Name)
   911  	defer cleanup()
   912  
   913  	// Wait for it to finish
   914  	testutil.WaitForResult(func() (bool, error) {
   915  		ts := tr.TaskState()
   916  		return ts.State == structs.TaskStateDead, fmt.Errorf("%v", ts.State)
   917  	}, func(err error) {
   918  		require.NoError(t, err)
   919  	})
   920  
   921  	// Should have exited successfully
   922  	ts := tr.TaskState()
   923  	require.False(t, ts.Failed)
   924  	require.Zero(t, ts.Restarts)
   925  
   926  	// Check that the file was written to disk properly
   927  	payloadPath := filepath.Join(tr.taskDir.LocalDir, fileName)
   928  	data, err := ioutil.ReadFile(payloadPath)
   929  	require.NoError(t, err)
   930  	require.Equal(t, expected, data)
   931  }
   932  
   933  // TestTaskRunner_SignalFailure asserts that signal errors are properly
   934  // propagated from the driver to TaskRunner.
   935  func TestTaskRunner_SignalFailure(t *testing.T) {
   936  	t.Parallel()
   937  
   938  	alloc := mock.Alloc()
   939  	task := alloc.Job.TaskGroups[0].Tasks[0]
   940  	task.Driver = "mock_driver"
   941  	errMsg := "test forcing failure"
   942  	task.Config = map[string]interface{}{
   943  		"run_for":      "10m",
   944  		"signal_error": errMsg,
   945  	}
   946  
   947  	tr, _, cleanup := runTestTaskRunner(t, alloc, task.Name)
   948  	defer cleanup()
   949  
   950  	testWaitForTaskToStart(t, tr)
   951  
   952  	require.EqualError(t, tr.Signal(&structs.TaskEvent{}, "SIGINT"), errMsg)
   953  }
   954  
   955  // TestTaskRunner_RestartTask asserts that restarting a task works and emits a
   956  // Restarting event.
   957  func TestTaskRunner_RestartTask(t *testing.T) {
   958  	t.Parallel()
   959  
   960  	alloc := mock.Alloc()
   961  	task := alloc.Job.TaskGroups[0].Tasks[0]
   962  	task.Driver = "mock_driver"
   963  	task.Config = map[string]interface{}{
   964  		"run_for": "10m",
   965  	}
   966  
   967  	tr, _, cleanup := runTestTaskRunner(t, alloc, task.Name)
   968  	defer cleanup()
   969  
   970  	testWaitForTaskToStart(t, tr)
   971  
   972  	// Restart task. Send a RestartSignal event like check watcher. Restart
   973  	// handler emits the Restarting event.
   974  	event := structs.NewTaskEvent(structs.TaskRestartSignal).SetRestartReason("test")
   975  	const fail = false
   976  	tr.Restart(context.Background(), event.Copy(), fail)
   977  
   978  	// Wait for it to restart and be running again
   979  	testutil.WaitForResult(func() (bool, error) {
   980  		ts := tr.TaskState()
   981  		if ts.Restarts != 1 {
   982  			return false, fmt.Errorf("expected 1 restart but found %d\nevents: %s",
   983  				ts.Restarts, pretty.Sprint(ts.Events))
   984  		}
   985  		if ts.State != structs.TaskStateRunning {
   986  			return false, fmt.Errorf("expected running but received %s", ts.State)
   987  		}
   988  		return true, nil
   989  	}, func(err error) {
   990  		require.NoError(t, err)
   991  	})
   992  
   993  	// Assert the expected Restarting event was emitted
   994  	found := false
   995  	events := tr.TaskState().Events
   996  	for _, e := range events {
   997  		if e.Type == structs.TaskRestartSignal {
   998  			found = true
   999  			require.Equal(t, event.Time, e.Time)
  1000  			require.Equal(t, event.RestartReason, e.RestartReason)
  1001  			require.Contains(t, e.DisplayMessage, event.RestartReason)
  1002  		}
  1003  	}
  1004  	require.True(t, found, "restarting task event not found", pretty.Sprint(events))
  1005  }
  1006  
  1007  // TestTaskRunner_CheckWatcher_Restart asserts that when enabled an unhealthy
  1008  // Consul check will cause a task to restart following restart policy rules.
  1009  func TestTaskRunner_CheckWatcher_Restart(t *testing.T) {
  1010  	t.Parallel()
  1011  
  1012  	alloc := mock.Alloc()
  1013  
  1014  	// Make the restart policy fail within this test
  1015  	tg := alloc.Job.TaskGroups[0]
  1016  	tg.RestartPolicy.Attempts = 2
  1017  	tg.RestartPolicy.Interval = 1 * time.Minute
  1018  	tg.RestartPolicy.Delay = 10 * time.Millisecond
  1019  	tg.RestartPolicy.Mode = structs.RestartPolicyModeFail
  1020  
  1021  	task := tg.Tasks[0]
  1022  	task.Driver = "mock_driver"
  1023  	task.Config = map[string]interface{}{
  1024  		"run_for": "10m",
  1025  	}
  1026  
  1027  	// Make the task register a check that fails
  1028  	task.Services[0].Checks[0] = &structs.ServiceCheck{
  1029  		Name:     "test-restarts",
  1030  		Type:     structs.ServiceCheckTCP,
  1031  		Interval: 50 * time.Millisecond,
  1032  		CheckRestart: &structs.CheckRestart{
  1033  			Limit: 2,
  1034  			Grace: 100 * time.Millisecond,
  1035  		},
  1036  	}
  1037  
  1038  	conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
  1039  	defer cleanup()
  1040  
  1041  	// Replace mock Consul ServiceClient, with the real ServiceClient
  1042  	// backed by a mock consul whose checks are always unhealthy.
  1043  	consulAgent := agentconsul.NewMockAgent()
  1044  	consulAgent.SetStatus("critical")
  1045  	consulClient := agentconsul.NewServiceClient(consulAgent, conf.Logger, true)
  1046  	go consulClient.Run()
  1047  	defer consulClient.Shutdown()
  1048  
  1049  	conf.Consul = consulClient
  1050  
  1051  	tr, err := NewTaskRunner(conf)
  1052  	require.NoError(t, err)
  1053  
  1054  	expectedEvents := []string{
  1055  		"Received",
  1056  		"Task Setup",
  1057  		"Started",
  1058  		"Restart Signaled",
  1059  		"Terminated",
  1060  		"Restarting",
  1061  		"Started",
  1062  		"Restart Signaled",
  1063  		"Terminated",
  1064  		"Restarting",
  1065  		"Started",
  1066  		"Restart Signaled",
  1067  		"Terminated",
  1068  		"Not Restarting",
  1069  	}
  1070  
  1071  	// Bump maxEvents so task events aren't dropped
  1072  	tr.maxEvents = 100
  1073  
  1074  	go tr.Run()
  1075  	defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
  1076  
  1077  	// Wait until the task exits. Don't simply wait for it to run as it may
  1078  	// get restarted and terminated before the test is able to observe it
  1079  	// running.
  1080  	select {
  1081  	case <-tr.WaitCh():
  1082  	case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
  1083  		require.Fail(t, "timeout")
  1084  	}
  1085  
  1086  	state := tr.TaskState()
  1087  	actualEvents := make([]string, len(state.Events))
  1088  	for i, e := range state.Events {
  1089  		actualEvents[i] = string(e.Type)
  1090  	}
  1091  	require.Equal(t, actualEvents, expectedEvents)
  1092  	require.Equal(t, structs.TaskStateDead, state.State)
  1093  	require.True(t, state.Failed, pretty.Sprint(state))
  1094  }
  1095  
  1096  type mockEnvoyBootstrapHook struct {
  1097  	// nothing
  1098  }
  1099  
  1100  func (_ *mockEnvoyBootstrapHook) Name() string {
  1101  	return "mock_envoy_bootstrap"
  1102  }
  1103  
  1104  func (_ *mockEnvoyBootstrapHook) Prestart(_ context.Context, _ *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error {
  1105  	resp.Done = true
  1106  	return nil
  1107  }
  1108  
  1109  // The envoy bootstrap hook tries to connect to consul and run the envoy
  1110  // bootstrap command, so turn it off when testing connect jobs that are not
  1111  // using envoy.
  1112  func useMockEnvoyBootstrapHook(tr *TaskRunner) {
  1113  	mock := new(mockEnvoyBootstrapHook)
  1114  	for i, hook := range tr.runnerHooks {
  1115  		if _, ok := hook.(*envoyBootstrapHook); ok {
  1116  			tr.runnerHooks[i] = mock
  1117  		}
  1118  	}
  1119  }
  1120  
  1121  // TestTaskRunner_BlockForSIDSToken asserts tasks do not start until a Consul
  1122  // Service Identity token is derived.
  1123  func TestTaskRunner_BlockForSIDSToken(t *testing.T) {
  1124  	t.Parallel()
  1125  	r := require.New(t)
  1126  
  1127  	alloc := mock.BatchConnectAlloc()
  1128  	task := alloc.Job.TaskGroups[0].Tasks[0]
  1129  	task.Config = map[string]interface{}{
  1130  		"run_for": "0s",
  1131  	}
  1132  
  1133  	trConfig, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
  1134  	defer cleanup()
  1135  
  1136  	// set a consul token on the Nomad client's consul config, because that is
  1137  	// what gates the action of requesting SI token(s)
  1138  	trConfig.ClientConfig.ConsulConfig.Token = uuid.Generate()
  1139  
  1140  	// control when we get a Consul SI token
  1141  	token := uuid.Generate()
  1142  	waitCh := make(chan struct{})
  1143  	deriveFn := func(*structs.Allocation, []string) (map[string]string, error) {
  1144  		<-waitCh
  1145  		return map[string]string{task.Name: token}, nil
  1146  	}
  1147  	siClient := trConfig.ConsulSI.(*consulapi.MockServiceIdentitiesClient)
  1148  	siClient.DeriveTokenFn = deriveFn
  1149  
  1150  	// start the task runner
  1151  	tr, err := NewTaskRunner(trConfig)
  1152  	r.NoError(err)
  1153  	defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
  1154  	useMockEnvoyBootstrapHook(tr) // mock the envoy bootstrap hook
  1155  
  1156  	go tr.Run()
  1157  
  1158  	// assert task runner blocks on SI token
  1159  	select {
  1160  	case <-tr.WaitCh():
  1161  		r.Fail("task_runner exited before si unblocked")
  1162  	case <-time.After(100 * time.Millisecond):
  1163  	}
  1164  
  1165  	// assert task state is still pending
  1166  	r.Equal(structs.TaskStatePending, tr.TaskState().State)
  1167  
  1168  	// unblock service identity token
  1169  	close(waitCh)
  1170  
  1171  	// task runner should exit now that it has been unblocked and it is a batch
  1172  	// job with a zero sleep time
  1173  	select {
  1174  	case <-tr.WaitCh():
  1175  	case <-time.After(15 * time.Second * time.Duration(testutil.TestMultiplier())):
  1176  		r.Fail("timed out waiting for batch task to exist")
  1177  	}
  1178  
  1179  	// assert task exited successfully
  1180  	finalState := tr.TaskState()
  1181  	r.Equal(structs.TaskStateDead, finalState.State)
  1182  	r.False(finalState.Failed)
  1183  
  1184  	// assert the token is on disk
  1185  	tokenPath := filepath.Join(trConfig.TaskDir.SecretsDir, sidsTokenFile)
  1186  	data, err := ioutil.ReadFile(tokenPath)
  1187  	r.NoError(err)
  1188  	r.Equal(token, string(data))
  1189  }
  1190  
  1191  func TestTaskRunner_DeriveSIToken_Retry(t *testing.T) {
  1192  	t.Parallel()
  1193  	r := require.New(t)
  1194  
  1195  	alloc := mock.BatchConnectAlloc()
  1196  	task := alloc.Job.TaskGroups[0].Tasks[0]
  1197  	task.Config = map[string]interface{}{
  1198  		"run_for": "0s",
  1199  	}
  1200  
  1201  	trConfig, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
  1202  	defer cleanup()
  1203  
  1204  	// set a consul token on the Nomad client's consul config, because that is
  1205  	// what gates the action of requesting SI token(s)
  1206  	trConfig.ClientConfig.ConsulConfig.Token = uuid.Generate()
  1207  
  1208  	// control when we get a Consul SI token (recoverable failure on first call)
  1209  	token := uuid.Generate()
  1210  	deriveCount := 0
  1211  	deriveFn := func(*structs.Allocation, []string) (map[string]string, error) {
  1212  		if deriveCount > 0 {
  1213  
  1214  			return map[string]string{task.Name: token}, nil
  1215  		}
  1216  		deriveCount++
  1217  		return nil, structs.NewRecoverableError(errors.New("try again later"), true)
  1218  	}
  1219  	siClient := trConfig.ConsulSI.(*consulapi.MockServiceIdentitiesClient)
  1220  	siClient.DeriveTokenFn = deriveFn
  1221  
  1222  	// start the task runner
  1223  	tr, err := NewTaskRunner(trConfig)
  1224  	r.NoError(err)
  1225  	defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
  1226  	useMockEnvoyBootstrapHook(tr) // mock the envoy bootstrap
  1227  	go tr.Run()
  1228  
  1229  	// assert task runner blocks on SI token
  1230  	select {
  1231  	case <-tr.WaitCh():
  1232  	case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
  1233  		r.Fail("timed out waiting for task runner")
  1234  	}
  1235  
  1236  	// assert task exited successfully
  1237  	finalState := tr.TaskState()
  1238  	r.Equal(structs.TaskStateDead, finalState.State)
  1239  	r.False(finalState.Failed)
  1240  
  1241  	// assert the token is on disk
  1242  	tokenPath := filepath.Join(trConfig.TaskDir.SecretsDir, sidsTokenFile)
  1243  	data, err := ioutil.ReadFile(tokenPath)
  1244  	r.NoError(err)
  1245  	r.Equal(token, string(data))
  1246  }
  1247  
  1248  // TestTaskRunner_DeriveSIToken_Unrecoverable asserts that an unrecoverable error
  1249  // from deriving a service identity token will fail a task.
  1250  func TestTaskRunner_DeriveSIToken_Unrecoverable(t *testing.T) {
  1251  	t.Parallel()
  1252  	r := require.New(t)
  1253  
  1254  	alloc := mock.BatchConnectAlloc()
  1255  	tg := alloc.Job.TaskGroups[0]
  1256  	tg.RestartPolicy.Attempts = 0
  1257  	tg.RestartPolicy.Interval = 0
  1258  	tg.RestartPolicy.Delay = 0
  1259  	tg.RestartPolicy.Mode = structs.RestartPolicyModeFail
  1260  	task := tg.Tasks[0]
  1261  	task.Config = map[string]interface{}{
  1262  		"run_for": "0s",
  1263  	}
  1264  
  1265  	trConfig, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
  1266  	defer cleanup()
  1267  
  1268  	// set a consul token on the Nomad client's consul config, because that is
  1269  	// what gates the action of requesting SI token(s)
  1270  	trConfig.ClientConfig.ConsulConfig.Token = uuid.Generate()
  1271  
  1272  	// SI token derivation suffers a non-retryable error
  1273  	siClient := trConfig.ConsulSI.(*consulapi.MockServiceIdentitiesClient)
  1274  	siClient.SetDeriveTokenError(alloc.ID, []string{task.Name}, errors.New("non-recoverable"))
  1275  
  1276  	tr, err := NewTaskRunner(trConfig)
  1277  	r.NoError(err)
  1278  
  1279  	defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
  1280  	useMockEnvoyBootstrapHook(tr) // mock the envoy bootstrap hook
  1281  	go tr.Run()
  1282  
  1283  	// Wait for the task to die
  1284  	select {
  1285  	case <-tr.WaitCh():
  1286  	case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
  1287  		require.Fail(t, "timed out waiting for task runner to fail")
  1288  	}
  1289  
  1290  	// assert we have died and failed
  1291  	finalState := tr.TaskState()
  1292  	r.Equal(structs.TaskStateDead, finalState.State)
  1293  	r.True(finalState.Failed)
  1294  	r.Equal(5, len(finalState.Events))
  1295  	/*
  1296  	 + event: Task received by client
  1297  	 + event: Building Task Directory
  1298  	 + event: consul: failed to derive SI token: non-recoverable
  1299  	 + event: consul_sids: context canceled
  1300  	 + event: Policy allows no restarts
  1301  	*/
  1302  	r.Equal("true", finalState.Events[2].Details["fails_task"])
  1303  }
  1304  
  1305  // TestTaskRunner_BlockForVaultToken asserts tasks do not start until a vault token
  1306  // is derived.
  1307  func TestTaskRunner_BlockForVaultToken(t *testing.T) {
  1308  	t.Parallel()
  1309  
  1310  	alloc := mock.BatchAlloc()
  1311  	task := alloc.Job.TaskGroups[0].Tasks[0]
  1312  	task.Config = map[string]interface{}{
  1313  		"run_for": "0s",
  1314  	}
  1315  	task.Vault = &structs.Vault{Policies: []string{"default"}}
  1316  
  1317  	conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
  1318  	defer cleanup()
  1319  
  1320  	// Control when we get a Vault token
  1321  	token := "1234"
  1322  	waitCh := make(chan struct{})
  1323  	handler := func(*structs.Allocation, []string) (map[string]string, error) {
  1324  		<-waitCh
  1325  		return map[string]string{task.Name: token}, nil
  1326  	}
  1327  	vaultClient := conf.Vault.(*vaultclient.MockVaultClient)
  1328  	vaultClient.DeriveTokenFn = handler
  1329  
  1330  	tr, err := NewTaskRunner(conf)
  1331  	require.NoError(t, err)
  1332  	defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
  1333  	go tr.Run()
  1334  
  1335  	// Assert TR blocks on vault token (does *not* exit)
  1336  	select {
  1337  	case <-tr.WaitCh():
  1338  		require.Fail(t, "tr exited before vault unblocked")
  1339  	case <-time.After(1 * time.Second):
  1340  	}
  1341  
  1342  	// Assert task state is still Pending
  1343  	require.Equal(t, structs.TaskStatePending, tr.TaskState().State)
  1344  
  1345  	// Unblock vault token
  1346  	close(waitCh)
  1347  
  1348  	// TR should exit now that it's unblocked by vault as its a batch job
  1349  	// with 0 sleeping.
  1350  	select {
  1351  	case <-tr.WaitCh():
  1352  	case <-time.After(15 * time.Second * time.Duration(testutil.TestMultiplier())):
  1353  		require.Fail(t, "timed out waiting for batch task to exit")
  1354  	}
  1355  
  1356  	// Assert task exited successfully
  1357  	finalState := tr.TaskState()
  1358  	require.Equal(t, structs.TaskStateDead, finalState.State)
  1359  	require.False(t, finalState.Failed)
  1360  
  1361  	// Check that the token is on disk
  1362  	tokenPath := filepath.Join(conf.TaskDir.SecretsDir, vaultTokenFile)
  1363  	data, err := ioutil.ReadFile(tokenPath)
  1364  	require.NoError(t, err)
  1365  	require.Equal(t, token, string(data))
  1366  
  1367  	// Check the token was revoked
  1368  	testutil.WaitForResult(func() (bool, error) {
  1369  		if len(vaultClient.StoppedTokens()) != 1 {
  1370  			return false, fmt.Errorf("Expected a stopped token %q but found: %v", token, vaultClient.StoppedTokens())
  1371  		}
  1372  
  1373  		if a := vaultClient.StoppedTokens()[0]; a != token {
  1374  			return false, fmt.Errorf("got stopped token %q; want %q", a, token)
  1375  		}
  1376  		return true, nil
  1377  	}, func(err error) {
  1378  		require.Fail(t, err.Error())
  1379  	})
  1380  }
  1381  
  1382  // TestTaskRunner_DeriveToken_Retry asserts that if a recoverable error is
  1383  // returned when deriving a vault token a task will continue to block while
  1384  // it's retried.
  1385  func TestTaskRunner_DeriveToken_Retry(t *testing.T) {
  1386  	t.Parallel()
  1387  	alloc := mock.BatchAlloc()
  1388  	task := alloc.Job.TaskGroups[0].Tasks[0]
  1389  	task.Vault = &structs.Vault{Policies: []string{"default"}}
  1390  
  1391  	conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
  1392  	defer cleanup()
  1393  
  1394  	// Fail on the first attempt to derive a vault token
  1395  	token := "1234"
  1396  	count := 0
  1397  	handler := func(*structs.Allocation, []string) (map[string]string, error) {
  1398  		if count > 0 {
  1399  			return map[string]string{task.Name: token}, nil
  1400  		}
  1401  
  1402  		count++
  1403  		return nil, structs.NewRecoverableError(fmt.Errorf("Want a retry"), true)
  1404  	}
  1405  	vaultClient := conf.Vault.(*vaultclient.MockVaultClient)
  1406  	vaultClient.DeriveTokenFn = handler
  1407  
  1408  	tr, err := NewTaskRunner(conf)
  1409  	require.NoError(t, err)
  1410  	defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
  1411  	go tr.Run()
  1412  
  1413  	// Wait for TR to exit and check its state
  1414  	select {
  1415  	case <-tr.WaitCh():
  1416  	case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
  1417  		require.Fail(t, "timed out waiting for task runner to exit")
  1418  	}
  1419  
  1420  	state := tr.TaskState()
  1421  	require.Equal(t, structs.TaskStateDead, state.State)
  1422  	require.False(t, state.Failed)
  1423  
  1424  	require.Equal(t, 1, count)
  1425  
  1426  	// Check that the token is on disk
  1427  	tokenPath := filepath.Join(conf.TaskDir.SecretsDir, vaultTokenFile)
  1428  	data, err := ioutil.ReadFile(tokenPath)
  1429  	require.NoError(t, err)
  1430  	require.Equal(t, token, string(data))
  1431  
  1432  	// Check the token was revoked
  1433  	testutil.WaitForResult(func() (bool, error) {
  1434  		if len(vaultClient.StoppedTokens()) != 1 {
  1435  			return false, fmt.Errorf("Expected a stopped token: %v", vaultClient.StoppedTokens())
  1436  		}
  1437  
  1438  		if a := vaultClient.StoppedTokens()[0]; a != token {
  1439  			return false, fmt.Errorf("got stopped token %q; want %q", a, token)
  1440  		}
  1441  		return true, nil
  1442  	}, func(err error) {
  1443  		require.Fail(t, err.Error())
  1444  	})
  1445  }
  1446  
  1447  // TestTaskRunner_DeriveToken_Unrecoverable asserts that an unrecoverable error
  1448  // from deriving a vault token will fail a task.
  1449  func TestTaskRunner_DeriveToken_Unrecoverable(t *testing.T) {
  1450  	t.Parallel()
  1451  
  1452  	// Use a batch job with no restarts
  1453  	alloc := mock.BatchAlloc()
  1454  	tg := alloc.Job.TaskGroups[0]
  1455  	tg.RestartPolicy.Attempts = 0
  1456  	tg.RestartPolicy.Interval = 0
  1457  	tg.RestartPolicy.Delay = 0
  1458  	tg.RestartPolicy.Mode = structs.RestartPolicyModeFail
  1459  	task := tg.Tasks[0]
  1460  	task.Config = map[string]interface{}{
  1461  		"run_for": "0s",
  1462  	}
  1463  	task.Vault = &structs.Vault{Policies: []string{"default"}}
  1464  
  1465  	conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
  1466  	defer cleanup()
  1467  
  1468  	// Error the token derivation
  1469  	vaultClient := conf.Vault.(*vaultclient.MockVaultClient)
  1470  	vaultClient.SetDeriveTokenError(alloc.ID, []string{task.Name}, fmt.Errorf("Non recoverable"))
  1471  
  1472  	tr, err := NewTaskRunner(conf)
  1473  	require.NoError(t, err)
  1474  	defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
  1475  	go tr.Run()
  1476  
  1477  	// Wait for the task to die
  1478  	select {
  1479  	case <-tr.WaitCh():
  1480  	case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
  1481  		require.Fail(t, "timed out waiting for task runner to fail")
  1482  	}
  1483  
  1484  	// Task should be dead and last event should have failed task
  1485  	state := tr.TaskState()
  1486  	require.Equal(t, structs.TaskStateDead, state.State)
  1487  	require.True(t, state.Failed)
  1488  	require.Len(t, state.Events, 3)
  1489  	require.True(t, state.Events[2].FailsTask)
  1490  }
  1491  
  1492  // TestTaskRunner_Download_ChrootExec asserts that downloaded artifacts may be
  1493  // executed in a chroot.
  1494  func TestTaskRunner_Download_ChrootExec(t *testing.T) {
  1495  	t.Parallel()
  1496  	ctestutil.ExecCompatible(t)
  1497  
  1498  	ts := httptest.NewServer(http.FileServer(http.Dir(filepath.Dir("."))))
  1499  	defer ts.Close()
  1500  
  1501  	// Create a task that downloads a script and executes it.
  1502  	alloc := mock.BatchAlloc()
  1503  	alloc.Job.TaskGroups[0].RestartPolicy = &structs.RestartPolicy{}
  1504  	task := alloc.Job.TaskGroups[0].Tasks[0]
  1505  	task.RestartPolicy = &structs.RestartPolicy{}
  1506  	task.Driver = "exec"
  1507  	task.Config = map[string]interface{}{
  1508  		"command": "noop.sh",
  1509  	}
  1510  	task.Artifacts = []*structs.TaskArtifact{
  1511  		{
  1512  			GetterSource: fmt.Sprintf("%s/testdata/noop.sh", ts.URL),
  1513  			GetterMode:   "file",
  1514  			RelativeDest: "noop.sh",
  1515  		},
  1516  	}
  1517  
  1518  	tr, _, cleanup := runTestTaskRunner(t, alloc, task.Name)
  1519  	defer cleanup()
  1520  
  1521  	// Wait for task to run and exit
  1522  	select {
  1523  	case <-tr.WaitCh():
  1524  	case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
  1525  		require.Fail(t, "timed out waiting for task runner to exit")
  1526  	}
  1527  
  1528  	state := tr.TaskState()
  1529  	require.Equal(t, structs.TaskStateDead, state.State)
  1530  	require.False(t, state.Failed)
  1531  }
  1532  
  1533  // TestTaskRunner_Download_Exec asserts that downloaded artifacts may be
  1534  // executed in a driver without filesystem isolation.
  1535  func TestTaskRunner_Download_RawExec(t *testing.T) {
  1536  	t.Parallel()
  1537  
  1538  	ts := httptest.NewServer(http.FileServer(http.Dir(filepath.Dir("."))))
  1539  	defer ts.Close()
  1540  
  1541  	// Create a task that downloads a script and executes it.
  1542  	alloc := mock.BatchAlloc()
  1543  	alloc.Job.TaskGroups[0].RestartPolicy = &structs.RestartPolicy{}
  1544  	task := alloc.Job.TaskGroups[0].Tasks[0]
  1545  	task.RestartPolicy = &structs.RestartPolicy{}
  1546  	task.Driver = "raw_exec"
  1547  	task.Config = map[string]interface{}{
  1548  		"command": "noop.sh",
  1549  	}
  1550  	task.Artifacts = []*structs.TaskArtifact{
  1551  		{
  1552  			GetterSource: fmt.Sprintf("%s/testdata/noop.sh", ts.URL),
  1553  			GetterMode:   "file",
  1554  			RelativeDest: "noop.sh",
  1555  		},
  1556  	}
  1557  
  1558  	tr, _, cleanup := runTestTaskRunner(t, alloc, task.Name)
  1559  	defer cleanup()
  1560  
  1561  	// Wait for task to run and exit
  1562  	select {
  1563  	case <-tr.WaitCh():
  1564  	case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
  1565  		require.Fail(t, "timed out waiting for task runner to exit")
  1566  	}
  1567  
  1568  	state := tr.TaskState()
  1569  	require.Equal(t, structs.TaskStateDead, state.State)
  1570  	require.False(t, state.Failed)
  1571  }
  1572  
  1573  // TestTaskRunner_Download_List asserts that multiple artificats are downloaded
  1574  // before a task is run.
  1575  func TestTaskRunner_Download_List(t *testing.T) {
  1576  	t.Parallel()
  1577  	ts := httptest.NewServer(http.FileServer(http.Dir(filepath.Dir("."))))
  1578  	defer ts.Close()
  1579  
  1580  	// Create an allocation that has a task with a list of artifacts.
  1581  	alloc := mock.BatchAlloc()
  1582  	task := alloc.Job.TaskGroups[0].Tasks[0]
  1583  	f1 := "task_runner_test.go"
  1584  	f2 := "task_runner.go"
  1585  	artifact1 := structs.TaskArtifact{
  1586  		GetterSource: fmt.Sprintf("%s/%s", ts.URL, f1),
  1587  	}
  1588  	artifact2 := structs.TaskArtifact{
  1589  		GetterSource: fmt.Sprintf("%s/%s", ts.URL, f2),
  1590  	}
  1591  	task.Artifacts = []*structs.TaskArtifact{&artifact1, &artifact2}
  1592  
  1593  	tr, conf, cleanup := runTestTaskRunner(t, alloc, task.Name)
  1594  	defer cleanup()
  1595  
  1596  	// Wait for task to run and exit
  1597  	select {
  1598  	case <-tr.WaitCh():
  1599  	case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
  1600  		require.Fail(t, "timed out waiting for task runner to exit")
  1601  	}
  1602  
  1603  	state := tr.TaskState()
  1604  	require.Equal(t, structs.TaskStateDead, state.State)
  1605  	require.False(t, state.Failed)
  1606  
  1607  	require.Len(t, state.Events, 5)
  1608  	assert.Equal(t, structs.TaskReceived, state.Events[0].Type)
  1609  	assert.Equal(t, structs.TaskSetup, state.Events[1].Type)
  1610  	assert.Equal(t, structs.TaskDownloadingArtifacts, state.Events[2].Type)
  1611  	assert.Equal(t, structs.TaskStarted, state.Events[3].Type)
  1612  	assert.Equal(t, structs.TaskTerminated, state.Events[4].Type)
  1613  
  1614  	// Check that both files exist.
  1615  	_, err := os.Stat(filepath.Join(conf.TaskDir.Dir, f1))
  1616  	require.NoErrorf(t, err, "%v not downloaded", f1)
  1617  
  1618  	_, err = os.Stat(filepath.Join(conf.TaskDir.Dir, f2))
  1619  	require.NoErrorf(t, err, "%v not downloaded", f2)
  1620  }
  1621  
  1622  // TestTaskRunner_Download_Retries asserts that failed artifact downloads are
  1623  // retried according to the task's restart policy.
  1624  func TestTaskRunner_Download_Retries(t *testing.T) {
  1625  	t.Parallel()
  1626  
  1627  	// Create an allocation that has a task with bad artifacts.
  1628  	alloc := mock.BatchAlloc()
  1629  	task := alloc.Job.TaskGroups[0].Tasks[0]
  1630  	artifact := structs.TaskArtifact{
  1631  		GetterSource: "http://127.0.0.1:0/foo/bar/baz",
  1632  	}
  1633  	task.Artifacts = []*structs.TaskArtifact{&artifact}
  1634  
  1635  	// Make the restart policy retry once
  1636  	rp := &structs.RestartPolicy{
  1637  		Attempts: 1,
  1638  		Interval: 10 * time.Minute,
  1639  		Delay:    1 * time.Second,
  1640  		Mode:     structs.RestartPolicyModeFail,
  1641  	}
  1642  	alloc.Job.TaskGroups[0].RestartPolicy = rp
  1643  	alloc.Job.TaskGroups[0].Tasks[0].RestartPolicy = rp
  1644  
  1645  	tr, _, cleanup := runTestTaskRunner(t, alloc, task.Name)
  1646  	defer cleanup()
  1647  
  1648  	select {
  1649  	case <-tr.WaitCh():
  1650  	case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
  1651  		require.Fail(t, "timed out waiting for task to exit")
  1652  	}
  1653  
  1654  	state := tr.TaskState()
  1655  	require.Equal(t, structs.TaskStateDead, state.State)
  1656  	require.True(t, state.Failed)
  1657  	require.Len(t, state.Events, 8, pretty.Sprint(state.Events))
  1658  	require.Equal(t, structs.TaskReceived, state.Events[0].Type)
  1659  	require.Equal(t, structs.TaskSetup, state.Events[1].Type)
  1660  	require.Equal(t, structs.TaskDownloadingArtifacts, state.Events[2].Type)
  1661  	require.Equal(t, structs.TaskArtifactDownloadFailed, state.Events[3].Type)
  1662  	require.Equal(t, structs.TaskRestarting, state.Events[4].Type)
  1663  	require.Equal(t, structs.TaskDownloadingArtifacts, state.Events[5].Type)
  1664  	require.Equal(t, structs.TaskArtifactDownloadFailed, state.Events[6].Type)
  1665  	require.Equal(t, structs.TaskNotRestarting, state.Events[7].Type)
  1666  }
  1667  
  1668  // TestTaskRunner_DriverNetwork asserts that a driver's network is properly
  1669  // used in services and checks.
  1670  func TestTaskRunner_DriverNetwork(t *testing.T) {
  1671  	t.Parallel()
  1672  
  1673  	alloc := mock.Alloc()
  1674  	task := alloc.Job.TaskGroups[0].Tasks[0]
  1675  	task.Driver = "mock_driver"
  1676  	task.Config = map[string]interface{}{
  1677  		"run_for":         "100s",
  1678  		"driver_ip":       "10.1.2.3",
  1679  		"driver_port_map": "http:80",
  1680  	}
  1681  
  1682  	// Create services and checks with custom address modes to exercise
  1683  	// address detection logic
  1684  	task.Services = []*structs.Service{
  1685  		{
  1686  			Name:        "host-service",
  1687  			PortLabel:   "http",
  1688  			AddressMode: "host",
  1689  			Checks: []*structs.ServiceCheck{
  1690  				{
  1691  					Name:        "driver-check",
  1692  					Type:        "tcp",
  1693  					PortLabel:   "1234",
  1694  					AddressMode: "driver",
  1695  				},
  1696  			},
  1697  		},
  1698  		{
  1699  			Name:        "driver-service",
  1700  			PortLabel:   "5678",
  1701  			AddressMode: "driver",
  1702  			Checks: []*structs.ServiceCheck{
  1703  				{
  1704  					Name:      "host-check",
  1705  					Type:      "tcp",
  1706  					PortLabel: "http",
  1707  				},
  1708  				{
  1709  					Name:        "driver-label-check",
  1710  					Type:        "tcp",
  1711  					PortLabel:   "http",
  1712  					AddressMode: "driver",
  1713  				},
  1714  			},
  1715  		},
  1716  	}
  1717  
  1718  	conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
  1719  	defer cleanup()
  1720  
  1721  	// Use a mock agent to test for services
  1722  	consulAgent := agentconsul.NewMockAgent()
  1723  	consulClient := agentconsul.NewServiceClient(consulAgent, conf.Logger, true)
  1724  	defer consulClient.Shutdown()
  1725  	go consulClient.Run()
  1726  
  1727  	conf.Consul = consulClient
  1728  
  1729  	tr, err := NewTaskRunner(conf)
  1730  	require.NoError(t, err)
  1731  	defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
  1732  	go tr.Run()
  1733  
  1734  	// Wait for the task to start
  1735  	testWaitForTaskToStart(t, tr)
  1736  
  1737  	testutil.WaitForResult(func() (bool, error) {
  1738  		services, _ := consulAgent.Services()
  1739  		if n := len(services); n != 2 {
  1740  			return false, fmt.Errorf("expected 2 services, but found %d", n)
  1741  		}
  1742  		for _, s := range services {
  1743  			switch s.Service {
  1744  			case "host-service":
  1745  				if expected := "192.168.0.100"; s.Address != expected {
  1746  					return false, fmt.Errorf("expected host-service to have IP=%s but found %s",
  1747  						expected, s.Address)
  1748  				}
  1749  			case "driver-service":
  1750  				if expected := "10.1.2.3"; s.Address != expected {
  1751  					return false, fmt.Errorf("expected driver-service to have IP=%s but found %s",
  1752  						expected, s.Address)
  1753  				}
  1754  				if expected := 5678; s.Port != expected {
  1755  					return false, fmt.Errorf("expected driver-service to have port=%d but found %d",
  1756  						expected, s.Port)
  1757  				}
  1758  			default:
  1759  				return false, fmt.Errorf("unexpected service: %q", s.Service)
  1760  			}
  1761  
  1762  		}
  1763  
  1764  		checks := consulAgent.CheckRegs()
  1765  		if n := len(checks); n != 3 {
  1766  			return false, fmt.Errorf("expected 3 checks, but found %d", n)
  1767  		}
  1768  		for _, check := range checks {
  1769  			switch check.Name {
  1770  			case "driver-check":
  1771  				if expected := "10.1.2.3:1234"; check.TCP != expected {
  1772  					return false, fmt.Errorf("expected driver-check to have address %q but found %q", expected, check.TCP)
  1773  				}
  1774  			case "driver-label-check":
  1775  				if expected := "10.1.2.3:80"; check.TCP != expected {
  1776  					return false, fmt.Errorf("expected driver-label-check to have address %q but found %q", expected, check.TCP)
  1777  				}
  1778  			case "host-check":
  1779  				if expected := "192.168.0.100:"; !strings.HasPrefix(check.TCP, expected) {
  1780  					return false, fmt.Errorf("expected host-check to have address start with %q but found %q", expected, check.TCP)
  1781  				}
  1782  			default:
  1783  				return false, fmt.Errorf("unexpected check: %q", check.Name)
  1784  			}
  1785  		}
  1786  
  1787  		return true, nil
  1788  	}, func(err error) {
  1789  		services, _ := consulAgent.Services()
  1790  		for _, s := range services {
  1791  			t.Logf(pretty.Sprint("Service: ", s))
  1792  		}
  1793  		for _, c := range consulAgent.CheckRegs() {
  1794  			t.Logf(pretty.Sprint("Check:   ", c))
  1795  		}
  1796  		require.NoError(t, err)
  1797  	})
  1798  }
  1799  
  1800  // TestTaskRunner_RestartSignalTask_NotRunning asserts resilience to failures
  1801  // when a restart or signal is triggered and the task is not running.
  1802  func TestTaskRunner_RestartSignalTask_NotRunning(t *testing.T) {
  1803  	t.Parallel()
  1804  
  1805  	alloc := mock.BatchAlloc()
  1806  	task := alloc.Job.TaskGroups[0].Tasks[0]
  1807  	task.Driver = "mock_driver"
  1808  	task.Config = map[string]interface{}{
  1809  		"run_for": "0s",
  1810  	}
  1811  
  1812  	// Use vault to block the start
  1813  	task.Vault = &structs.Vault{Policies: []string{"default"}}
  1814  
  1815  	conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
  1816  	defer cleanup()
  1817  
  1818  	// Control when we get a Vault token
  1819  	waitCh := make(chan struct{}, 1)
  1820  	defer close(waitCh)
  1821  	handler := func(*structs.Allocation, []string) (map[string]string, error) {
  1822  		<-waitCh
  1823  		return map[string]string{task.Name: "1234"}, nil
  1824  	}
  1825  	vaultClient := conf.Vault.(*vaultclient.MockVaultClient)
  1826  	vaultClient.DeriveTokenFn = handler
  1827  
  1828  	tr, err := NewTaskRunner(conf)
  1829  	require.NoError(t, err)
  1830  	defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
  1831  	go tr.Run()
  1832  
  1833  	select {
  1834  	case <-tr.WaitCh():
  1835  		require.Fail(t, "unexpected exit")
  1836  	case <-time.After(1 * time.Second):
  1837  	}
  1838  
  1839  	// Send a signal and restart
  1840  	err = tr.Signal(structs.NewTaskEvent("don't panic"), "QUIT")
  1841  	require.EqualError(t, err, ErrTaskNotRunning.Error())
  1842  
  1843  	// Send a restart
  1844  	err = tr.Restart(context.Background(), structs.NewTaskEvent("don't panic"), false)
  1845  	require.EqualError(t, err, ErrTaskNotRunning.Error())
  1846  
  1847  	// Unblock and let it finish
  1848  	waitCh <- struct{}{}
  1849  
  1850  	select {
  1851  	case <-tr.WaitCh():
  1852  	case <-time.After(10 * time.Second):
  1853  		require.Fail(t, "timed out waiting for task to complete")
  1854  	}
  1855  
  1856  	// Assert the task ran and never restarted
  1857  	state := tr.TaskState()
  1858  	require.Equal(t, structs.TaskStateDead, state.State)
  1859  	require.False(t, state.Failed)
  1860  	require.Len(t, state.Events, 4, pretty.Sprint(state.Events))
  1861  	require.Equal(t, structs.TaskReceived, state.Events[0].Type)
  1862  	require.Equal(t, structs.TaskSetup, state.Events[1].Type)
  1863  	require.Equal(t, structs.TaskStarted, state.Events[2].Type)
  1864  	require.Equal(t, structs.TaskTerminated, state.Events[3].Type)
  1865  }
  1866  
  1867  // TestTaskRunner_Run_RecoverableStartError asserts tasks are restarted if they
  1868  // return a recoverable error from StartTask.
  1869  func TestTaskRunner_Run_RecoverableStartError(t *testing.T) {
  1870  	t.Parallel()
  1871  
  1872  	alloc := mock.BatchAlloc()
  1873  	task := alloc.Job.TaskGroups[0].Tasks[0]
  1874  	task.Config = map[string]interface{}{
  1875  		"start_error":             "driver failure",
  1876  		"start_error_recoverable": true,
  1877  	}
  1878  
  1879  	// Make the restart policy retry once
  1880  	rp := &structs.RestartPolicy{
  1881  		Attempts: 1,
  1882  		Interval: 10 * time.Minute,
  1883  		Delay:    0,
  1884  		Mode:     structs.RestartPolicyModeFail,
  1885  	}
  1886  	alloc.Job.TaskGroups[0].RestartPolicy = rp
  1887  	alloc.Job.TaskGroups[0].Tasks[0].RestartPolicy = rp
  1888  
  1889  	tr, _, cleanup := runTestTaskRunner(t, alloc, task.Name)
  1890  	defer cleanup()
  1891  
  1892  	select {
  1893  	case <-tr.WaitCh():
  1894  	case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
  1895  		require.Fail(t, "timed out waiting for task to exit")
  1896  	}
  1897  
  1898  	state := tr.TaskState()
  1899  	require.Equal(t, structs.TaskStateDead, state.State)
  1900  	require.True(t, state.Failed)
  1901  	require.Len(t, state.Events, 6, pretty.Sprint(state.Events))
  1902  	require.Equal(t, structs.TaskReceived, state.Events[0].Type)
  1903  	require.Equal(t, structs.TaskSetup, state.Events[1].Type)
  1904  	require.Equal(t, structs.TaskDriverFailure, state.Events[2].Type)
  1905  	require.Equal(t, structs.TaskRestarting, state.Events[3].Type)
  1906  	require.Equal(t, structs.TaskDriverFailure, state.Events[4].Type)
  1907  	require.Equal(t, structs.TaskNotRestarting, state.Events[5].Type)
  1908  }
  1909  
  1910  // TestTaskRunner_Template_Artifact asserts that tasks can use artifacts as templates.
  1911  func TestTaskRunner_Template_Artifact(t *testing.T) {
  1912  	t.Parallel()
  1913  
  1914  	ts := httptest.NewServer(http.FileServer(http.Dir(".")))
  1915  	defer ts.Close()
  1916  
  1917  	alloc := mock.BatchAlloc()
  1918  	task := alloc.Job.TaskGroups[0].Tasks[0]
  1919  	f1 := "task_runner.go"
  1920  	f2 := "test"
  1921  	task.Artifacts = []*structs.TaskArtifact{
  1922  		{GetterSource: fmt.Sprintf("%s/%s", ts.URL, f1)},
  1923  	}
  1924  	task.Templates = []*structs.Template{
  1925  		{
  1926  			SourcePath: f1,
  1927  			DestPath:   "local/test",
  1928  			ChangeMode: structs.TemplateChangeModeNoop,
  1929  		},
  1930  	}
  1931  
  1932  	conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
  1933  	defer cleanup()
  1934  
  1935  	tr, err := NewTaskRunner(conf)
  1936  	require.NoError(t, err)
  1937  	defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
  1938  	go tr.Run()
  1939  
  1940  	// Wait for task to run and exit
  1941  	select {
  1942  	case <-tr.WaitCh():
  1943  	case <-time.After(15 * time.Second * time.Duration(testutil.TestMultiplier())):
  1944  		require.Fail(t, "timed out waiting for task runner to exit")
  1945  	}
  1946  
  1947  	state := tr.TaskState()
  1948  	require.Equal(t, structs.TaskStateDead, state.State)
  1949  	require.True(t, state.Successful())
  1950  	require.False(t, state.Failed)
  1951  
  1952  	artifactsDownloaded := false
  1953  	for _, e := range state.Events {
  1954  		if e.Type == structs.TaskDownloadingArtifacts {
  1955  			artifactsDownloaded = true
  1956  		}
  1957  	}
  1958  	assert.True(t, artifactsDownloaded, "expected artifacts downloaded events")
  1959  
  1960  	// Check that both files exist.
  1961  	_, err = os.Stat(filepath.Join(conf.TaskDir.Dir, f1))
  1962  	require.NoErrorf(t, err, "%v not downloaded", f1)
  1963  
  1964  	_, err = os.Stat(filepath.Join(conf.TaskDir.LocalDir, f2))
  1965  	require.NoErrorf(t, err, "%v not rendered", f2)
  1966  }
  1967  
  1968  // TestTaskRunner_Template_BlockingPreStart asserts that a template
  1969  // that fails to render in PreStart can gracefully be shutdown by
  1970  // either killCtx or shutdownCtx
  1971  func TestTaskRunner_Template_BlockingPreStart(t *testing.T) {
  1972  	t.Parallel()
  1973  
  1974  	alloc := mock.BatchAlloc()
  1975  	task := alloc.Job.TaskGroups[0].Tasks[0]
  1976  	task.Templates = []*structs.Template{
  1977  		{
  1978  			EmbeddedTmpl: `{{ with secret "foo/secret" }}{{ .Data.certificate }}{{ end }}`,
  1979  			DestPath:     "local/test",
  1980  			ChangeMode:   structs.TemplateChangeModeNoop,
  1981  		},
  1982  	}
  1983  
  1984  	task.Vault = &structs.Vault{Policies: []string{"default"}}
  1985  
  1986  	conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
  1987  	defer cleanup()
  1988  
  1989  	tr, err := NewTaskRunner(conf)
  1990  	require.NoError(t, err)
  1991  	go tr.Run()
  1992  	defer tr.Shutdown()
  1993  
  1994  	testutil.WaitForResult(func() (bool, error) {
  1995  		ts := tr.TaskState()
  1996  
  1997  		if len(ts.Events) == 0 {
  1998  			return false, fmt.Errorf("no events yet")
  1999  		}
  2000  
  2001  		for _, e := range ts.Events {
  2002  			if e.Type == "Template" && strings.Contains(e.DisplayMessage, "vault.read(foo/secret)") {
  2003  				return true, nil
  2004  			}
  2005  		}
  2006  
  2007  		return false, fmt.Errorf("no missing vault secret template event yet: %#v", ts.Events)
  2008  
  2009  	}, func(err error) {
  2010  		require.NoError(t, err)
  2011  	})
  2012  
  2013  	shutdown := func() <-chan bool {
  2014  		finished := make(chan bool)
  2015  		go func() {
  2016  			tr.Shutdown()
  2017  			finished <- true
  2018  		}()
  2019  
  2020  		return finished
  2021  	}
  2022  
  2023  	select {
  2024  	case <-shutdown():
  2025  		// it shut down like it should have
  2026  	case <-time.After(10 * time.Second):
  2027  		require.Fail(t, "timeout shutting down task")
  2028  	}
  2029  }
  2030  
  2031  // TestTaskRunner_Template_NewVaultToken asserts that a new vault token is
  2032  // created when rendering template and that it is revoked on alloc completion
  2033  func TestTaskRunner_Template_NewVaultToken(t *testing.T) {
  2034  	t.Parallel()
  2035  
  2036  	alloc := mock.BatchAlloc()
  2037  	task := alloc.Job.TaskGroups[0].Tasks[0]
  2038  	task.Templates = []*structs.Template{
  2039  		{
  2040  			EmbeddedTmpl: `{{key "foo"}}`,
  2041  			DestPath:     "local/test",
  2042  			ChangeMode:   structs.TemplateChangeModeNoop,
  2043  		},
  2044  	}
  2045  	task.Vault = &structs.Vault{Policies: []string{"default"}}
  2046  
  2047  	conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
  2048  	defer cleanup()
  2049  
  2050  	tr, err := NewTaskRunner(conf)
  2051  	require.NoError(t, err)
  2052  	defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
  2053  	go tr.Run()
  2054  
  2055  	// Wait for a Vault token
  2056  	var token string
  2057  	testutil.WaitForResult(func() (bool, error) {
  2058  		token = tr.getVaultToken()
  2059  
  2060  		if token == "" {
  2061  			return false, fmt.Errorf("No Vault token")
  2062  		}
  2063  
  2064  		return true, nil
  2065  	}, func(err error) {
  2066  		require.NoError(t, err)
  2067  	})
  2068  
  2069  	vault := conf.Vault.(*vaultclient.MockVaultClient)
  2070  	renewalCh, ok := vault.RenewTokens()[token]
  2071  	require.True(t, ok, "no renewal channel for token")
  2072  
  2073  	renewalCh <- fmt.Errorf("Test killing")
  2074  	close(renewalCh)
  2075  
  2076  	var token2 string
  2077  	testutil.WaitForResult(func() (bool, error) {
  2078  		token2 = tr.getVaultToken()
  2079  
  2080  		if token2 == "" {
  2081  			return false, fmt.Errorf("No Vault token")
  2082  		}
  2083  
  2084  		if token2 == token {
  2085  			return false, fmt.Errorf("token wasn't recreated")
  2086  		}
  2087  
  2088  		return true, nil
  2089  	}, func(err error) {
  2090  		require.NoError(t, err)
  2091  	})
  2092  
  2093  	// Check the token was revoked
  2094  	testutil.WaitForResult(func() (bool, error) {
  2095  		if len(vault.StoppedTokens()) != 1 {
  2096  			return false, fmt.Errorf("Expected a stopped token: %v", vault.StoppedTokens())
  2097  		}
  2098  
  2099  		if a := vault.StoppedTokens()[0]; a != token {
  2100  			return false, fmt.Errorf("got stopped token %q; want %q", a, token)
  2101  		}
  2102  
  2103  		return true, nil
  2104  	}, func(err error) {
  2105  		require.NoError(t, err)
  2106  	})
  2107  
  2108  }
  2109  
  2110  // TestTaskRunner_VaultManager_Restart asserts that the alloc is restarted when the alloc
  2111  // derived vault token expires, when task is configured with Restart change mode
  2112  func TestTaskRunner_VaultManager_Restart(t *testing.T) {
  2113  	t.Parallel()
  2114  
  2115  	alloc := mock.BatchAlloc()
  2116  	task := alloc.Job.TaskGroups[0].Tasks[0]
  2117  	task.Config = map[string]interface{}{
  2118  		"run_for": "10s",
  2119  	}
  2120  	task.Vault = &structs.Vault{
  2121  		Policies:   []string{"default"},
  2122  		ChangeMode: structs.VaultChangeModeRestart,
  2123  	}
  2124  
  2125  	conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
  2126  	defer cleanup()
  2127  
  2128  	tr, err := NewTaskRunner(conf)
  2129  	require.NoError(t, err)
  2130  	defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
  2131  	go tr.Run()
  2132  
  2133  	testWaitForTaskToStart(t, tr)
  2134  
  2135  	tr.vaultTokenLock.Lock()
  2136  	token := tr.vaultToken
  2137  	tr.vaultTokenLock.Unlock()
  2138  
  2139  	require.NotEmpty(t, token)
  2140  
  2141  	vault := conf.Vault.(*vaultclient.MockVaultClient)
  2142  	renewalCh, ok := vault.RenewTokens()[token]
  2143  	require.True(t, ok, "no renewal channel for token")
  2144  
  2145  	renewalCh <- fmt.Errorf("Test killing")
  2146  	close(renewalCh)
  2147  
  2148  	testutil.WaitForResult(func() (bool, error) {
  2149  		state := tr.TaskState()
  2150  
  2151  		if len(state.Events) == 0 {
  2152  			return false, fmt.Errorf("no events yet")
  2153  		}
  2154  
  2155  		foundRestartSignal, foundRestarting := false, false
  2156  		for _, e := range state.Events {
  2157  			switch e.Type {
  2158  			case structs.TaskRestartSignal:
  2159  				foundRestartSignal = true
  2160  			case structs.TaskRestarting:
  2161  				foundRestarting = true
  2162  			}
  2163  		}
  2164  
  2165  		if !foundRestartSignal {
  2166  			return false, fmt.Errorf("no restart signal event yet: %#v", state.Events)
  2167  		}
  2168  
  2169  		if !foundRestarting {
  2170  			return false, fmt.Errorf("no restarting event yet: %#v", state.Events)
  2171  		}
  2172  
  2173  		lastEvent := state.Events[len(state.Events)-1]
  2174  		if lastEvent.Type != structs.TaskStarted {
  2175  			return false, fmt.Errorf("expected last event to be task starting but was %#v", lastEvent)
  2176  		}
  2177  		return true, nil
  2178  	}, func(err error) {
  2179  		require.NoError(t, err)
  2180  	})
  2181  }
  2182  
  2183  // TestTaskRunner_VaultManager_Signal asserts that the alloc is signalled when the alloc
  2184  // derived vault token expires, when task is configured with signal change mode
  2185  func TestTaskRunner_VaultManager_Signal(t *testing.T) {
  2186  	t.Parallel()
  2187  
  2188  	alloc := mock.BatchAlloc()
  2189  	task := alloc.Job.TaskGroups[0].Tasks[0]
  2190  	task.Config = map[string]interface{}{
  2191  		"run_for": "10s",
  2192  	}
  2193  	task.Vault = &structs.Vault{
  2194  		Policies:     []string{"default"},
  2195  		ChangeMode:   structs.VaultChangeModeSignal,
  2196  		ChangeSignal: "SIGUSR1",
  2197  	}
  2198  
  2199  	conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
  2200  	defer cleanup()
  2201  
  2202  	tr, err := NewTaskRunner(conf)
  2203  	require.NoError(t, err)
  2204  	defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
  2205  	go tr.Run()
  2206  
  2207  	testWaitForTaskToStart(t, tr)
  2208  
  2209  	tr.vaultTokenLock.Lock()
  2210  	token := tr.vaultToken
  2211  	tr.vaultTokenLock.Unlock()
  2212  
  2213  	require.NotEmpty(t, token)
  2214  
  2215  	vault := conf.Vault.(*vaultclient.MockVaultClient)
  2216  	renewalCh, ok := vault.RenewTokens()[token]
  2217  	require.True(t, ok, "no renewal channel for token")
  2218  
  2219  	renewalCh <- fmt.Errorf("Test killing")
  2220  	close(renewalCh)
  2221  
  2222  	testutil.WaitForResult(func() (bool, error) {
  2223  		state := tr.TaskState()
  2224  
  2225  		if len(state.Events) == 0 {
  2226  			return false, fmt.Errorf("no events yet")
  2227  		}
  2228  
  2229  		foundSignaling := false
  2230  		for _, e := range state.Events {
  2231  			if e.Type == structs.TaskSignaling {
  2232  				foundSignaling = true
  2233  			}
  2234  		}
  2235  
  2236  		if !foundSignaling {
  2237  			return false, fmt.Errorf("no signaling event yet: %#v", state.Events)
  2238  		}
  2239  
  2240  		return true, nil
  2241  	}, func(err error) {
  2242  		require.NoError(t, err)
  2243  	})
  2244  
  2245  }
  2246  
  2247  // TestTaskRunner_UnregisterConsul_Retries asserts a task is unregistered from
  2248  // Consul when waiting to be retried.
  2249  func TestTaskRunner_UnregisterConsul_Retries(t *testing.T) {
  2250  	t.Parallel()
  2251  
  2252  	alloc := mock.Alloc()
  2253  	// Make the restart policy try one ctx.update
  2254  	rp := &structs.RestartPolicy{
  2255  		Attempts: 1,
  2256  		Interval: 10 * time.Minute,
  2257  		Delay:    time.Nanosecond,
  2258  		Mode:     structs.RestartPolicyModeFail,
  2259  	}
  2260  	alloc.Job.TaskGroups[0].RestartPolicy = rp
  2261  	task := alloc.Job.TaskGroups[0].Tasks[0]
  2262  	task.RestartPolicy = rp
  2263  	task.Driver = "mock_driver"
  2264  	task.Config = map[string]interface{}{
  2265  		"exit_code": "1",
  2266  		"run_for":   "1ns",
  2267  	}
  2268  
  2269  	conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
  2270  	defer cleanup()
  2271  
  2272  	tr, err := NewTaskRunner(conf)
  2273  	require.NoError(t, err)
  2274  	defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
  2275  	tr.Run()
  2276  
  2277  	state := tr.TaskState()
  2278  	require.Equal(t, structs.TaskStateDead, state.State)
  2279  
  2280  	consul := conf.Consul.(*consulapi.MockConsulServiceClient)
  2281  	consulOps := consul.GetOps()
  2282  	require.Len(t, consulOps, 8)
  2283  
  2284  	// Initial add
  2285  	require.Equal(t, "add", consulOps[0].Op)
  2286  
  2287  	// Removing canary and non-canary entries on first exit
  2288  	require.Equal(t, "remove", consulOps[1].Op)
  2289  	require.Equal(t, "remove", consulOps[2].Op)
  2290  
  2291  	// Second add on retry
  2292  	require.Equal(t, "add", consulOps[3].Op)
  2293  
  2294  	// Removing canary and non-canary entries on retry
  2295  	require.Equal(t, "remove", consulOps[4].Op)
  2296  	require.Equal(t, "remove", consulOps[5].Op)
  2297  
  2298  	// Removing canary and non-canary entries on stop
  2299  	require.Equal(t, "remove", consulOps[6].Op)
  2300  	require.Equal(t, "remove", consulOps[7].Op)
  2301  }
  2302  
  2303  // testWaitForTaskToStart waits for the task to be running or fails the test
  2304  func testWaitForTaskToStart(t *testing.T, tr *TaskRunner) {
  2305  	testutil.WaitForResult(func() (bool, error) {
  2306  		ts := tr.TaskState()
  2307  		return ts.State == structs.TaskStateRunning, fmt.Errorf("%v", ts.State)
  2308  	}, func(err error) {
  2309  		require.NoError(t, err)
  2310  	})
  2311  }
  2312  
  2313  // TestTaskRunner_BaseLabels tests that the base labels for the task metrics
  2314  // are set appropriately.
  2315  func TestTaskRunner_BaseLabels(t *testing.T) {
  2316  	t.Parallel()
  2317  	require := require.New(t)
  2318  
  2319  	alloc := mock.BatchAlloc()
  2320  	alloc.Namespace = "not-default"
  2321  	task := alloc.Job.TaskGroups[0].Tasks[0]
  2322  	task.Driver = "raw_exec"
  2323  	task.Config = map[string]interface{}{
  2324  		"command": "whoami",
  2325  	}
  2326  
  2327  	config, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
  2328  	defer cleanup()
  2329  
  2330  	tr, err := NewTaskRunner(config)
  2331  	require.NoError(err)
  2332  
  2333  	labels := map[string]string{}
  2334  	for _, e := range tr.baseLabels {
  2335  		labels[e.Name] = e.Value
  2336  	}
  2337  	require.Equal(alloc.Job.Name, labels["job"])
  2338  	require.Equal(alloc.TaskGroup, labels["task_group"])
  2339  	require.Equal(task.Name, labels["task"])
  2340  	require.Equal(alloc.ID, labels["alloc_id"])
  2341  	require.Equal(alloc.Namespace, labels["namespace"])
  2342  }