github.com/ncodes/nomad@v0.5.7-0.20170403112158-97adf4a74fb3/client/alloc_runner_test.go (about)

     1  package client
     2  
     3  import (
     4  	"fmt"
     5  	"io/ioutil"
     6  	"os"
     7  	"path/filepath"
     8  	"testing"
     9  	"text/template"
    10  	"time"
    11  
    12  	"github.com/hashicorp/go-multierror"
    13  	"github.com/ncodes/nomad/nomad/mock"
    14  	"github.com/ncodes/nomad/nomad/structs"
    15  	"github.com/ncodes/nomad/testutil"
    16  
    17  	"github.com/ncodes/nomad/client/config"
    18  	ctestutil "github.com/ncodes/nomad/client/testutil"
    19  	"github.com/ncodes/nomad/client/vaultclient"
    20  )
    21  
    22  type MockAllocStateUpdater struct {
    23  	Count  int
    24  	Allocs []*structs.Allocation
    25  }
    26  
    27  func (m *MockAllocStateUpdater) Update(alloc *structs.Allocation) {
    28  	m.Count += 1
    29  	m.Allocs = append(m.Allocs, alloc)
    30  }
    31  
    32  func testAllocRunnerFromAlloc(alloc *structs.Allocation, restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
    33  	logger := testLogger()
    34  	conf := config.DefaultConfig()
    35  	conf.StateDir = os.TempDir()
    36  	conf.AllocDir = os.TempDir()
    37  	upd := &MockAllocStateUpdater{}
    38  	if !restarts {
    39  		*alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0}
    40  		alloc.Job.Type = structs.JobTypeBatch
    41  	}
    42  	vclient := vaultclient.NewMockVaultClient()
    43  	ar := NewAllocRunner(logger, conf, upd.Update, alloc, vclient)
    44  	return upd, ar
    45  }
    46  
    47  func testAllocRunner(restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
    48  	return testAllocRunnerFromAlloc(mock.Alloc(), restarts)
    49  }
    50  
    51  func TestAllocRunner_SimpleRun(t *testing.T) {
    52  	ctestutil.ExecCompatible(t)
    53  	upd, ar := testAllocRunner(false)
    54  	go ar.Run()
    55  	defer ar.Destroy()
    56  
    57  	testutil.WaitForResult(func() (bool, error) {
    58  		if upd.Count == 0 {
    59  			return false, fmt.Errorf("No updates")
    60  		}
    61  		last := upd.Allocs[upd.Count-1]
    62  		if last.ClientStatus != structs.AllocClientStatusComplete {
    63  			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
    64  		}
    65  		return true, nil
    66  	}, func(err error) {
    67  		t.Fatalf("err: %v", err)
    68  	})
    69  }
    70  
    71  // TestAllocRuner_RetryArtifact ensures that if one task in a task group is
    72  // retrying fetching an artifact, other tasks in the group should be able
    73  // to proceed.
    74  func TestAllocRunner_RetryArtifact(t *testing.T) {
    75  	ctestutil.ExecCompatible(t)
    76  
    77  	alloc := mock.Alloc()
    78  	alloc.Job.Type = structs.JobTypeBatch
    79  	alloc.Job.TaskGroups[0].RestartPolicy.Mode = structs.RestartPolicyModeFail
    80  	alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 1
    81  	alloc.Job.TaskGroups[0].RestartPolicy.Delay = time.Duration(4*testutil.TestMultiplier()) * time.Second
    82  
    83  	task := alloc.Job.TaskGroups[0].Tasks[0]
    84  	task.Driver = "mock_driver"
    85  	task.Config = map[string]interface{}{
    86  		"exit_code": "0",
    87  		"run_for":   "1s",
    88  	}
    89  
    90  	// Create a new task with a bad artifact
    91  	badtask := alloc.Job.TaskGroups[0].Tasks[0].Copy()
    92  	badtask.Name = "bad"
    93  	badtask.Artifacts = []*structs.TaskArtifact{
    94  		{GetterSource: "http://127.1.1.111:12315/foo/bar/baz"},
    95  	}
    96  
    97  	alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, badtask)
    98  	upd, ar := testAllocRunnerFromAlloc(alloc, true)
    99  	go ar.Run()
   100  	defer ar.Destroy()
   101  
   102  	testutil.WaitForResult(func() (bool, error) {
   103  		if upd.Count < 6 {
   104  			return false, fmt.Errorf("Not enough updates")
   105  		}
   106  		last := upd.Allocs[upd.Count-1]
   107  
   108  		// web task should have completed successfully while bad task
   109  		// retries artififact fetching
   110  		webstate := last.TaskStates["web"]
   111  		if webstate.State != structs.TaskStateDead {
   112  			return false, fmt.Errorf("expected web to be dead but found %q", last.TaskStates["web"].State)
   113  		}
   114  		if !webstate.Successful() {
   115  			return false, fmt.Errorf("expected web to have exited successfully")
   116  		}
   117  
   118  		// bad task should have failed
   119  		badstate := last.TaskStates["bad"]
   120  		if badstate.State != structs.TaskStateDead {
   121  			return false, fmt.Errorf("expected bad to be dead but found %q", badstate.State)
   122  		}
   123  		if !badstate.Failed {
   124  			return false, fmt.Errorf("expected bad to have failed: %#v", badstate.Events)
   125  		}
   126  		return true, nil
   127  	}, func(err error) {
   128  		t.Fatalf("err: %v", err)
   129  	})
   130  }
   131  
   132  func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) {
   133  	ctestutil.ExecCompatible(t)
   134  	upd, ar := testAllocRunner(false)
   135  
   136  	// Ensure task takes some time
   137  	task := ar.alloc.Job.TaskGroups[0].Tasks[0]
   138  	task.Config["command"] = "/bin/sleep"
   139  	task.Config["args"] = []string{"10"}
   140  	go ar.Run()
   141  
   142  	testutil.WaitForResult(func() (bool, error) {
   143  		if upd.Count == 0 {
   144  			return false, fmt.Errorf("No updates")
   145  		}
   146  		last := upd.Allocs[upd.Count-1]
   147  		if last.ClientStatus != structs.AllocClientStatusRunning {
   148  			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
   149  		}
   150  		return true, nil
   151  	}, func(err error) {
   152  		t.Fatalf("err: %v", err)
   153  	})
   154  
   155  	// Update the alloc to be terminal which should cause the alloc runner to
   156  	// stop the tasks and wait for a destroy.
   157  	update := ar.alloc.Copy()
   158  	update.DesiredStatus = structs.AllocDesiredStatusStop
   159  	ar.Update(update)
   160  
   161  	testutil.WaitForResult(func() (bool, error) {
   162  		if upd.Count == 0 {
   163  			return false, nil
   164  		}
   165  
   166  		// Check the status has changed.
   167  		last := upd.Allocs[upd.Count-1]
   168  		if last.ClientStatus != structs.AllocClientStatusComplete {
   169  			return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
   170  		}
   171  
   172  		// Check the state still exists
   173  		if _, err := os.Stat(ar.stateFilePath()); err != nil {
   174  			return false, fmt.Errorf("state file destroyed: %v", err)
   175  		}
   176  
   177  		// Check the alloc directory still exists
   178  		if _, err := os.Stat(ar.allocDir.AllocDir); err != nil {
   179  			return false, fmt.Errorf("alloc dir destroyed: %v", ar.allocDir.AllocDir)
   180  		}
   181  
   182  		return true, nil
   183  	}, func(err error) {
   184  		t.Fatalf("err: %v", err)
   185  	})
   186  
   187  	// Send the destroy signal and ensure the AllocRunner cleans up.
   188  	ar.Destroy()
   189  
   190  	testutil.WaitForResult(func() (bool, error) {
   191  		if upd.Count == 0 {
   192  			return false, nil
   193  		}
   194  
   195  		// Check the status has changed.
   196  		last := upd.Allocs[upd.Count-1]
   197  		if last.ClientStatus != structs.AllocClientStatusComplete {
   198  			return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
   199  		}
   200  
   201  		// Check the state was cleaned
   202  		if _, err := os.Stat(ar.stateFilePath()); err == nil {
   203  			return false, fmt.Errorf("state file still exists: %v", ar.stateFilePath())
   204  		} else if !os.IsNotExist(err) {
   205  			return false, fmt.Errorf("stat err: %v", err)
   206  		}
   207  
   208  		// Check the alloc directory was cleaned
   209  		if _, err := os.Stat(ar.allocDir.AllocDir); err == nil {
   210  			return false, fmt.Errorf("alloc dir still exists: %v", ar.allocDir.AllocDir)
   211  		} else if !os.IsNotExist(err) {
   212  			return false, fmt.Errorf("stat err: %v", err)
   213  		}
   214  
   215  		return true, nil
   216  	}, func(err error) {
   217  		t.Fatalf("err: %v", err)
   218  	})
   219  }
   220  
   221  func TestAllocRunner_Destroy(t *testing.T) {
   222  	ctestutil.ExecCompatible(t)
   223  	upd, ar := testAllocRunner(false)
   224  
   225  	// Ensure task takes some time
   226  	task := ar.alloc.Job.TaskGroups[0].Tasks[0]
   227  	task.Config["command"] = "/bin/sleep"
   228  	task.Config["args"] = []string{"10"}
   229  	go ar.Run()
   230  	start := time.Now()
   231  
   232  	// Begin the tear down
   233  	go func() {
   234  		time.Sleep(1 * time.Second)
   235  		ar.Destroy()
   236  	}()
   237  
   238  	testutil.WaitForResult(func() (bool, error) {
   239  		if upd.Count == 0 {
   240  			return false, nil
   241  		}
   242  
   243  		// Check the status has changed.
   244  		last := upd.Allocs[upd.Count-1]
   245  		if last.ClientStatus != structs.AllocClientStatusComplete {
   246  			return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
   247  		}
   248  
   249  		// Check the state was cleaned
   250  		if _, err := os.Stat(ar.stateFilePath()); err == nil {
   251  			return false, fmt.Errorf("state file still exists: %v", ar.stateFilePath())
   252  		} else if !os.IsNotExist(err) {
   253  			return false, fmt.Errorf("stat err: %v", err)
   254  		}
   255  
   256  		// Check the alloc directory was cleaned
   257  		if _, err := os.Stat(ar.allocDir.AllocDir); err == nil {
   258  			return false, fmt.Errorf("alloc dir still exists: %v", ar.allocDir.AllocDir)
   259  		} else if !os.IsNotExist(err) {
   260  			return false, fmt.Errorf("stat err: %v", err)
   261  		}
   262  
   263  		return true, nil
   264  	}, func(err error) {
   265  		t.Fatalf("err: %v", err)
   266  	})
   267  
   268  	if elapsed := time.Since(start); elapsed > 20*time.Second {
   269  		t.Fatalf("took too long to terminate: %s", elapsed)
   270  	}
   271  }
   272  
   273  func TestAllocRunner_Update(t *testing.T) {
   274  	ctestutil.ExecCompatible(t)
   275  	_, ar := testAllocRunner(false)
   276  
   277  	// Ensure task takes some time
   278  	task := ar.alloc.Job.TaskGroups[0].Tasks[0]
   279  	task.Config["command"] = "/bin/sleep"
   280  	task.Config["args"] = []string{"10"}
   281  	go ar.Run()
   282  	defer ar.Destroy()
   283  
   284  	// Update the alloc definition
   285  	newAlloc := new(structs.Allocation)
   286  	*newAlloc = *ar.alloc
   287  	newAlloc.Name = "FOO"
   288  	newAlloc.AllocModifyIndex++
   289  	ar.Update(newAlloc)
   290  
   291  	// Check the alloc runner stores the update allocation.
   292  	testutil.WaitForResult(func() (bool, error) {
   293  		return ar.Alloc().Name == "FOO", nil
   294  	}, func(err error) {
   295  		t.Fatalf("err: %v %#v", err, ar.Alloc())
   296  	})
   297  }
   298  
   299  func TestAllocRunner_SaveRestoreState(t *testing.T) {
   300  	alloc := mock.Alloc()
   301  	task := alloc.Job.TaskGroups[0].Tasks[0]
   302  	task.Driver = "mock_driver"
   303  	task.Config = map[string]interface{}{
   304  		"exit_code": "0",
   305  		"run_for":   "10s",
   306  	}
   307  
   308  	upd, ar := testAllocRunnerFromAlloc(alloc, false)
   309  	go ar.Run()
   310  
   311  	// Snapshot state
   312  	testutil.WaitForResult(func() (bool, error) {
   313  		return len(ar.tasks) == 1, nil
   314  	}, func(err error) {
   315  		t.Fatalf("task never started: %v", err)
   316  	})
   317  
   318  	err := ar.SaveState()
   319  	if err != nil {
   320  		t.Fatalf("err: %v", err)
   321  	}
   322  
   323  	// Create a new alloc runner
   324  	l2 := prefixedTestLogger("----- ar2:  ")
   325  	ar2 := NewAllocRunner(l2, ar.config, upd.Update,
   326  		&structs.Allocation{ID: ar.alloc.ID}, ar.vaultClient)
   327  	err = ar2.RestoreState()
   328  	if err != nil {
   329  		t.Fatalf("err: %v", err)
   330  	}
   331  	go ar2.Run()
   332  
   333  	testutil.WaitForResult(func() (bool, error) {
   334  		if len(ar2.tasks) != 1 {
   335  			return false, fmt.Errorf("Incorrect number of tasks")
   336  		}
   337  
   338  		if upd.Count == 0 {
   339  			return false, nil
   340  		}
   341  
   342  		last := upd.Allocs[upd.Count-1]
   343  		return last.ClientStatus == structs.AllocClientStatusRunning, nil
   344  	}, func(err error) {
   345  		t.Fatalf("err: %v %#v %#v", err, upd.Allocs[0], ar2.alloc.TaskStates["web"])
   346  	})
   347  
   348  	// Destroy and wait
   349  	ar2.Destroy()
   350  	start := time.Now()
   351  
   352  	testutil.WaitForResult(func() (bool, error) {
   353  		alloc := ar2.Alloc()
   354  		if alloc.ClientStatus != structs.AllocClientStatusComplete {
   355  			return false, fmt.Errorf("Bad client status; got %v; want %v", alloc.ClientStatus, structs.AllocClientStatusComplete)
   356  		}
   357  		return true, nil
   358  	}, func(err error) {
   359  		t.Fatalf("err: %v %#v %#v", err, upd.Allocs[0], ar.alloc.TaskStates)
   360  	})
   361  
   362  	if time.Since(start) > time.Duration(testutil.TestMultiplier()*5)*time.Second {
   363  		t.Fatalf("took too long to terminate")
   364  	}
   365  }
   366  
   367  func TestAllocRunner_SaveRestoreState_TerminalAlloc(t *testing.T) {
   368  	ctestutil.ExecCompatible(t)
   369  	upd, ar := testAllocRunner(false)
   370  	ar.logger = prefixedTestLogger("ar1: ")
   371  
   372  	// Ensure task takes some time
   373  
   374  	ar.alloc.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver"
   375  	task := ar.alloc.Job.TaskGroups[0].Tasks[0]
   376  	task.Config["run_for"] = "10s"
   377  	go ar.Run()
   378  
   379  	testutil.WaitForResult(func() (bool, error) {
   380  		if upd.Count == 0 {
   381  			return false, fmt.Errorf("No updates")
   382  		}
   383  		last := upd.Allocs[upd.Count-1]
   384  		if last.ClientStatus != structs.AllocClientStatusRunning {
   385  			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
   386  		}
   387  		return true, nil
   388  	}, func(err error) {
   389  		t.Fatalf("err: %v", err)
   390  	})
   391  
   392  	// Update the alloc to be terminal which should cause the alloc runner to
   393  	// stop the tasks and wait for a destroy.
   394  	update := ar.alloc.Copy()
   395  	update.DesiredStatus = structs.AllocDesiredStatusStop
   396  	ar.Update(update)
   397  
   398  	testutil.WaitForResult(func() (bool, error) {
   399  		return ar.alloc.DesiredStatus == structs.AllocDesiredStatusStop, nil
   400  	}, func(err error) {
   401  		t.Fatalf("err: %v", err)
   402  	})
   403  
   404  	err := ar.SaveState()
   405  	if err != nil {
   406  		t.Fatalf("err: %v", err)
   407  	}
   408  
   409  	// Ensure ar1 doesn't recreate the state file
   410  	ar.persistLock.Lock()
   411  	defer ar.persistLock.Unlock()
   412  
   413  	// Ensure both alloc runners don't destroy
   414  	ar.destroy = true
   415  
   416  	// Create a new alloc runner
   417  	ar2 := NewAllocRunner(ar.logger, ar.config, upd.Update,
   418  		&structs.Allocation{ID: ar.alloc.ID}, ar.vaultClient)
   419  	ar2.logger = prefixedTestLogger("ar2: ")
   420  	err = ar2.RestoreState()
   421  	if err != nil {
   422  		t.Fatalf("err: %v", err)
   423  	}
   424  	go ar2.Run()
   425  	ar2.logger.Println("[TESTING] starting second alloc runner")
   426  
   427  	testutil.WaitForResult(func() (bool, error) {
   428  		// Check the state still exists
   429  		if _, err := os.Stat(ar.stateFilePath()); err != nil {
   430  			return false, fmt.Errorf("state file destroyed: %v", err)
   431  		}
   432  
   433  		// Check the alloc directory still exists
   434  		if _, err := os.Stat(ar.allocDir.AllocDir); err != nil {
   435  			return false, fmt.Errorf("alloc dir destroyed: %v", ar.allocDir.AllocDir)
   436  		}
   437  
   438  		return true, nil
   439  	}, func(err error) {
   440  		t.Fatalf("err: %v %#v %#v", err, upd.Allocs[0], ar.alloc.TaskStates)
   441  	})
   442  
   443  	// Send the destroy signal and ensure the AllocRunner cleans up.
   444  	ar2.logger.Println("[TESTING] destroying second alloc runner")
   445  	ar2.Destroy()
   446  
   447  	testutil.WaitForResult(func() (bool, error) {
   448  		if upd.Count == 0 {
   449  			return false, nil
   450  		}
   451  
   452  		// Check the status has changed.
   453  		last := upd.Allocs[upd.Count-1]
   454  		if last.ClientStatus != structs.AllocClientStatusComplete {
   455  			return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
   456  		}
   457  
   458  		// Check the state was cleaned
   459  		if _, err := os.Stat(ar.stateFilePath()); err == nil {
   460  			return false, fmt.Errorf("state file still exists: %v", ar.stateFilePath())
   461  		} else if !os.IsNotExist(err) {
   462  			return false, fmt.Errorf("stat err: %v", err)
   463  		}
   464  
   465  		// Check the alloc directory was cleaned
   466  		if _, err := os.Stat(ar.allocDir.AllocDir); err == nil {
   467  			return false, fmt.Errorf("alloc dir still exists: %v", ar.allocDir.AllocDir)
   468  		} else if !os.IsNotExist(err) {
   469  			return false, fmt.Errorf("stat err: %v", err)
   470  		}
   471  
   472  		return true, nil
   473  	}, func(err error) {
   474  		t.Fatalf("err: %v", err)
   475  	})
   476  }
   477  
   478  // Ensure pre-#2132 state files containing the Context struct are properly
   479  // migrated to the new format.
   480  //
   481  // Old Context State:
   482  //
   483  //  "Context": {
   484  //    "AllocDir": {
   485  //      "AllocDir": "/path/to/allocs/2a54fcff-fc44-8d4f-e025-53c48e9cbbbb",
   486  //      "SharedDir": "/path/to/allocs/2a54fcff-fc44-8d4f-e025-53c48e9cbbbb/alloc",
   487  //      "TaskDirs": {
   488  //        "echo1": "/path/to/allocs/2a54fcff-fc44-8d4f-e025-53c48e9cbbbb/echo1"
   489  //      }
   490  //    },
   491  //    "AllocID": "2a54fcff-fc44-8d4f-e025-53c48e9cbbbb"
   492  //  }
   493  func TestAllocRunner_RestoreOldState(t *testing.T) {
   494  	alloc := mock.Alloc()
   495  	task := alloc.Job.TaskGroups[0].Tasks[0]
   496  	task.Driver = "mock_driver"
   497  	task.Config = map[string]interface{}{
   498  		"exit_code": "0",
   499  		"run_for":   "10s",
   500  	}
   501  
   502  	logger := testLogger()
   503  	conf := config.DefaultConfig()
   504  	conf.StateDir = os.TempDir()
   505  	conf.AllocDir = os.TempDir()
   506  
   507  	if err := os.MkdirAll(filepath.Join(conf.StateDir, "alloc", alloc.ID), 0777); err != nil {
   508  		t.Fatalf("error creating state dir: %v", err)
   509  	}
   510  	statePath := filepath.Join(conf.StateDir, "alloc", alloc.ID, "state.json")
   511  	w, err := os.Create(statePath)
   512  	if err != nil {
   513  		t.Fatalf("error creating state file: %v", err)
   514  	}
   515  	tmplctx := &struct {
   516  		AllocID  string
   517  		AllocDir string
   518  	}{alloc.ID, conf.AllocDir}
   519  	err = template.Must(template.New("test_state").Parse(`{
   520    "Version": "0.5.1",
   521    "Alloc": {
   522      "ID": "{{ .AllocID }}",
   523      "Name": "example",
   524      "JobID": "example",
   525      "Job": {
   526        "ID": "example",
   527        "Name": "example",
   528        "Type": "batch",
   529        "TaskGroups": [
   530          {
   531            "Name": "example",
   532            "Tasks": [
   533              {
   534                "Name": "example",
   535                "Driver": "mock",
   536                "Config": {
   537                  "exit_code": "0",
   538  		"run_for": "10s"
   539                }
   540              }
   541            ]
   542          }
   543        ]
   544      },
   545      "TaskGroup": "example",
   546      "DesiredStatus": "run",
   547      "ClientStatus": "running",
   548      "TaskStates": {
   549        "example": {
   550          "State": "running",
   551          "Failed": false,
   552          "Events": []
   553        }
   554      }
   555    },
   556    "Context": {
   557      "AllocDir": {
   558        "AllocDir": "{{ .AllocDir }}/{{ .AllocID }}",
   559        "SharedDir": "{{ .AllocDir }}/{{ .AllocID }}/alloc",
   560        "TaskDirs": {
   561          "example": "{{ .AllocDir }}/{{ .AllocID }}/example"
   562        }
   563      },
   564      "AllocID": "{{ .AllocID }}"
   565    }
   566  }`)).Execute(w, tmplctx)
   567  	if err != nil {
   568  		t.Fatalf("error writing state file: %v", err)
   569  	}
   570  	w.Close()
   571  
   572  	upd := &MockAllocStateUpdater{}
   573  	*alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0}
   574  	alloc.Job.Type = structs.JobTypeBatch
   575  	vclient := vaultclient.NewMockVaultClient()
   576  	ar := NewAllocRunner(logger, conf, upd.Update, alloc, vclient)
   577  	defer ar.Destroy()
   578  
   579  	// RestoreState should fail on the task state since we only test the
   580  	// alloc state restoring.
   581  	err = ar.RestoreState()
   582  	if err == nil {
   583  		t.Fatal("expected error restoring Task state")
   584  	}
   585  	merr, ok := err.(*multierror.Error)
   586  	if !ok {
   587  		t.Fatalf("expected RestoreState to return a multierror but found: %T -> %v", err, err)
   588  	}
   589  	if len(merr.Errors) != 1 {
   590  		t.Fatalf("expected exactly 1 error from RestoreState but found: %d: %v", len(merr.Errors), err)
   591  	}
   592  	if expected := "task runner snapshot includes nil Task"; merr.Errors[0].Error() != expected {
   593  		t.Fatalf("expected %q but got: %q", expected, merr.Errors[0].Error())
   594  	}
   595  
   596  	if err := ar.SaveState(); err != nil {
   597  		t.Fatalf("error saving new state: %v", err)
   598  	}
   599  }
   600  
   601  func TestAllocRunner_TaskFailed_KillTG(t *testing.T) {
   602  	upd, ar := testAllocRunner(false)
   603  
   604  	// Create two tasks in the task group
   605  	task := ar.alloc.Job.TaskGroups[0].Tasks[0]
   606  	task.Driver = "mock_driver"
   607  	task.KillTimeout = 10 * time.Millisecond
   608  	task.Config = map[string]interface{}{
   609  		"run_for": "10s",
   610  	}
   611  
   612  	task2 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy()
   613  	task2.Name = "task 2"
   614  	task2.Driver = "mock_driver"
   615  	task2.Config = map[string]interface{}{
   616  		"start_error": "fail task please",
   617  	}
   618  	ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2)
   619  	ar.alloc.TaskResources[task2.Name] = task2.Resources
   620  	go ar.Run()
   621  
   622  	testutil.WaitForResult(func() (bool, error) {
   623  		if upd.Count == 0 {
   624  			return false, fmt.Errorf("No updates")
   625  		}
   626  		last := upd.Allocs[upd.Count-1]
   627  		if last.ClientStatus != structs.AllocClientStatusFailed {
   628  			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusFailed)
   629  		}
   630  
   631  		// Task One should be killed
   632  		state1 := last.TaskStates[task.Name]
   633  		if state1.State != structs.TaskStateDead {
   634  			return false, fmt.Errorf("got state %v; want %v", state1.State, structs.TaskStateDead)
   635  		}
   636  		if len(state1.Events) < 2 {
   637  			// At least have a received and destroyed
   638  			return false, fmt.Errorf("Unexpected number of events")
   639  		}
   640  
   641  		found := false
   642  		for _, e := range state1.Events {
   643  			if e.Type != structs.TaskSiblingFailed {
   644  				found = true
   645  			}
   646  		}
   647  
   648  		if !found {
   649  			return false, fmt.Errorf("Did not find event %v", structs.TaskSiblingFailed)
   650  		}
   651  
   652  		// Task Two should be failed
   653  		state2 := last.TaskStates[task2.Name]
   654  		if state2.State != structs.TaskStateDead {
   655  			return false, fmt.Errorf("got state %v; want %v", state2.State, structs.TaskStateDead)
   656  		}
   657  		if !state2.Failed {
   658  			return false, fmt.Errorf("task2 should have failed")
   659  		}
   660  
   661  		return true, nil
   662  	}, func(err error) {
   663  		t.Fatalf("err: %v", err)
   664  	})
   665  }
   666  
   667  func TestAllocRunner_TaskLeader_KillTG(t *testing.T) {
   668  	upd, ar := testAllocRunner(false)
   669  
   670  	// Create two tasks in the task group
   671  	task := ar.alloc.Job.TaskGroups[0].Tasks[0]
   672  	task.Driver = "mock_driver"
   673  	task.KillTimeout = 10 * time.Millisecond
   674  	task.Config = map[string]interface{}{
   675  		"run_for": "10s",
   676  	}
   677  
   678  	task2 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy()
   679  	task2.Name = "task 2"
   680  	task2.Driver = "mock_driver"
   681  	task2.Leader = true
   682  	task2.Config = map[string]interface{}{
   683  		"run_for": "1s",
   684  	}
   685  	ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2)
   686  	ar.alloc.TaskResources[task2.Name] = task2.Resources
   687  	go ar.Run()
   688  
   689  	testutil.WaitForResult(func() (bool, error) {
   690  		if upd.Count == 0 {
   691  			return false, fmt.Errorf("No updates")
   692  		}
   693  		last := upd.Allocs[upd.Count-1]
   694  		if last.ClientStatus != structs.AllocClientStatusComplete {
   695  			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
   696  		}
   697  
   698  		// Task One should be killed
   699  		state1 := last.TaskStates[task.Name]
   700  		if state1.State != structs.TaskStateDead {
   701  			return false, fmt.Errorf("got state %v; want %v", state1.State, structs.TaskStateDead)
   702  		}
   703  		if state1.FinishedAt.IsZero() || state1.StartedAt.IsZero() {
   704  			return false, fmt.Errorf("expected to have a start and finish time")
   705  		}
   706  		if len(state1.Events) < 2 {
   707  			// At least have a received and destroyed
   708  			return false, fmt.Errorf("Unexpected number of events")
   709  		}
   710  
   711  		found := false
   712  		for _, e := range state1.Events {
   713  			if e.Type != structs.TaskLeaderDead {
   714  				found = true
   715  			}
   716  		}
   717  
   718  		if !found {
   719  			return false, fmt.Errorf("Did not find event %v", structs.TaskLeaderDead)
   720  		}
   721  
   722  		// Task Two should be dead
   723  		state2 := last.TaskStates[task2.Name]
   724  		if state2.State != structs.TaskStateDead {
   725  			return false, fmt.Errorf("got state %v; want %v", state2.State, structs.TaskStateDead)
   726  		}
   727  		if state2.FinishedAt.IsZero() || state2.StartedAt.IsZero() {
   728  			return false, fmt.Errorf("expected to have a start and finish time")
   729  		}
   730  
   731  		return true, nil
   732  	}, func(err error) {
   733  		t.Fatalf("err: %v", err)
   734  	})
   735  }
   736  
   737  func TestAllocRunner_MoveAllocDir(t *testing.T) {
   738  	// Create an alloc runner
   739  	alloc := mock.Alloc()
   740  	task := alloc.Job.TaskGroups[0].Tasks[0]
   741  	task.Driver = "mock_driver"
   742  	task.Config = map[string]interface{}{
   743  		"run_for": "1s",
   744  	}
   745  	upd, ar := testAllocRunnerFromAlloc(alloc, false)
   746  	go ar.Run()
   747  
   748  	testutil.WaitForResult(func() (bool, error) {
   749  		if upd.Count == 0 {
   750  			return false, fmt.Errorf("No updates")
   751  		}
   752  		last := upd.Allocs[upd.Count-1]
   753  		if last.ClientStatus != structs.AllocClientStatusComplete {
   754  			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
   755  		}
   756  		return true, nil
   757  	}, func(err error) {
   758  		t.Fatalf("err: %v", err)
   759  	})
   760  
   761  	// Write some data in data dir and task dir of the alloc
   762  	dataFile := filepath.Join(ar.allocDir.SharedDir, "data", "data_file")
   763  	ioutil.WriteFile(dataFile, []byte("hello world"), os.ModePerm)
   764  	taskDir := ar.allocDir.TaskDirs[task.Name]
   765  	taskLocalFile := filepath.Join(taskDir.LocalDir, "local_file")
   766  	ioutil.WriteFile(taskLocalFile, []byte("good bye world"), os.ModePerm)
   767  
   768  	// Create another alloc runner
   769  	alloc1 := mock.Alloc()
   770  	task = alloc1.Job.TaskGroups[0].Tasks[0]
   771  	task.Driver = "mock_driver"
   772  	task.Config = map[string]interface{}{
   773  		"run_for": "1s",
   774  	}
   775  	upd1, ar1 := testAllocRunnerFromAlloc(alloc1, false)
   776  	ar1.SetPreviousAllocDir(ar.allocDir)
   777  	go ar1.Run()
   778  
   779  	testutil.WaitForResult(func() (bool, error) {
   780  		if upd1.Count == 0 {
   781  			return false, fmt.Errorf("No updates")
   782  		}
   783  		last := upd1.Allocs[upd1.Count-1]
   784  		if last.ClientStatus != structs.AllocClientStatusComplete {
   785  			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
   786  		}
   787  		return true, nil
   788  	}, func(err error) {
   789  		t.Fatalf("err: %v", err)
   790  	})
   791  
   792  	// Ensure that data from ar1 was moved to ar
   793  	taskDir = ar1.allocDir.TaskDirs[task.Name]
   794  	taskLocalFile = filepath.Join(taskDir.LocalDir, "local_file")
   795  	if fileInfo, _ := os.Stat(taskLocalFile); fileInfo == nil {
   796  		t.Fatalf("file %v not found", taskLocalFile)
   797  	}
   798  
   799  	dataFile = filepath.Join(ar1.allocDir.SharedDir, "data", "data_file")
   800  	if fileInfo, _ := os.Stat(dataFile); fileInfo == nil {
   801  		t.Fatalf("file %v not found", dataFile)
   802  	}
   803  }