github.com/hhrutter/nomad@v0.6.0-rc2.0.20170723054333-80c4b03f0705/client/alloc_runner_test.go

github.com/hhrutter/nomad@v0.6.0-rc2.0.20170723054333-80c4b03f0705/client/alloc_runner_test.go (about)

     1  package client
     2  
     3  import (
     4  	"fmt"
     5  	"io/ioutil"
     6  	"os"
     7  	"path/filepath"
     8  	"strings"
     9  	"sync"
    10  	"testing"
    11  	"text/template"
    12  	"time"
    13  
    14  	"github.com/boltdb/bolt"
    15  	"github.com/hashicorp/consul/api"
    16  	"github.com/hashicorp/go-multierror"
    17  	"github.com/hashicorp/nomad/nomad/mock"
    18  	"github.com/hashicorp/nomad/nomad/structs"
    19  	"github.com/hashicorp/nomad/testutil"
    20  	"github.com/kr/pretty"
    21  
    22  	"github.com/hashicorp/nomad/client/config"
    23  	"github.com/hashicorp/nomad/client/vaultclient"
    24  )
    25  
    26  type MockAllocStateUpdater struct {
    27  	Allocs []*structs.Allocation
    28  	mu     sync.Mutex
    29  }
    30  
    31  // Update fulfills the TaskStateUpdater interface
    32  func (m *MockAllocStateUpdater) Update(alloc *structs.Allocation) {
    33  	m.mu.Lock()
    34  	m.Allocs = append(m.Allocs, alloc)
    35  	m.mu.Unlock()
    36  }
    37  
    38  // Last returns the total number of updates and the last alloc (or nil)
    39  func (m *MockAllocStateUpdater) Last() (int, *structs.Allocation) {
    40  	m.mu.Lock()
    41  	defer m.mu.Unlock()
    42  	n := len(m.Allocs)
    43  	if n == 0 {
    44  		return 0, nil
    45  	}
    46  	return n, m.Allocs[n-1].Copy()
    47  }
    48  
    49  func testAllocRunnerFromAlloc(alloc *structs.Allocation, restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
    50  	logger := testLogger()
    51  	conf := config.DefaultConfig()
    52  	conf.Node = mock.Node()
    53  	conf.StateDir = os.TempDir()
    54  	conf.AllocDir = os.TempDir()
    55  	tmp, _ := ioutil.TempFile("", "state-db")
    56  	db, _ := bolt.Open(tmp.Name(), 0600, nil)
    57  	upd := &MockAllocStateUpdater{}
    58  	if !restarts {
    59  		*alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0}
    60  		alloc.Job.Type = structs.JobTypeBatch
    61  	}
    62  	vclient := vaultclient.NewMockVaultClient()
    63  	ar := NewAllocRunner(logger, conf, db, upd.Update, alloc, vclient, newMockConsulServiceClient())
    64  	return upd, ar
    65  }
    66  
    67  func testAllocRunner(restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
    68  	// Use mock driver
    69  	alloc := mock.Alloc()
    70  	task := alloc.Job.TaskGroups[0].Tasks[0]
    71  	task.Driver = "mock_driver"
    72  	task.Config["run_for"] = "500ms"
    73  	return testAllocRunnerFromAlloc(alloc, restarts)
    74  }
    75  
    76  func TestAllocRunner_SimpleRun(t *testing.T) {
    77  	t.Parallel()
    78  	upd, ar := testAllocRunner(false)
    79  	go ar.Run()
    80  	defer ar.Destroy()
    81  
    82  	testutil.WaitForResult(func() (bool, error) {
    83  		_, last := upd.Last()
    84  		if last == nil {
    85  			return false, fmt.Errorf("No updates")
    86  		}
    87  		if last.ClientStatus != structs.AllocClientStatusComplete {
    88  			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
    89  		}
    90  		return true, nil
    91  	}, func(err error) {
    92  		t.Fatalf("err: %v", err)
    93  	})
    94  }
    95  
    96  // Test that the watcher will mark the allocation as unhealthy.
    97  func TestAllocRunner_DeploymentHealth_Unhealthy_BadStart(t *testing.T) {
    98  	t.Parallel()
    99  
   100  	// Ensure the task fails and restarts
   101  	upd, ar := testAllocRunner(false)
   102  
   103  	// Make the task fail
   104  	task := ar.alloc.Job.TaskGroups[0].Tasks[0]
   105  	task.Driver = "mock_driver"
   106  	task.Config["start_error"] = "test error"
   107  
   108  	// Make the alloc be part of a deployment
   109  	ar.alloc.DeploymentID = structs.GenerateUUID()
   110  	ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   111  	ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_TaskStates
   112  	ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1
   113  
   114  	go ar.Run()
   115  	defer ar.Destroy()
   116  
   117  	testutil.WaitForResult(func() (bool, error) {
   118  		_, last := upd.Last()
   119  		if last == nil {
   120  			return false, fmt.Errorf("No updates")
   121  		}
   122  		if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
   123  			return false, fmt.Errorf("want deployment status unhealthy; got unset")
   124  		} else if *last.DeploymentStatus.Healthy {
   125  			return false, fmt.Errorf("want deployment status unhealthy; got healthy")
   126  		}
   127  		return true, nil
   128  	}, func(err error) {
   129  		t.Fatalf("err: %v", err)
   130  	})
   131  }
   132  
   133  // Test that the watcher will mark the allocation as unhealthy if it hits its
   134  // deadline.
   135  func TestAllocRunner_DeploymentHealth_Unhealthy_Deadline(t *testing.T) {
   136  	t.Parallel()
   137  
   138  	// Ensure the task fails and restarts
   139  	upd, ar := testAllocRunner(false)
   140  
   141  	// Make the task block
   142  	task := ar.alloc.Job.TaskGroups[0].Tasks[0]
   143  	task.Driver = "mock_driver"
   144  	task.Config["start_block_for"] = "2s"
   145  	task.Config["run_for"] = "10s"
   146  
   147  	// Make the alloc be part of a deployment
   148  	ar.alloc.DeploymentID = structs.GenerateUUID()
   149  	ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   150  	ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_TaskStates
   151  	ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1
   152  	ar.alloc.Job.TaskGroups[0].Update.HealthyDeadline = 100 * time.Millisecond
   153  
   154  	go ar.Run()
   155  	defer ar.Destroy()
   156  
   157  	testutil.WaitForResult(func() (bool, error) {
   158  		_, last := upd.Last()
   159  		if last == nil {
   160  			return false, fmt.Errorf("No updates")
   161  		}
   162  		if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
   163  			return false, fmt.Errorf("want deployment status unhealthy; got unset")
   164  		} else if *last.DeploymentStatus.Healthy {
   165  			return false, fmt.Errorf("want deployment status unhealthy; got healthy")
   166  		}
   167  		return true, nil
   168  	}, func(err error) {
   169  		t.Fatalf("err: %v", err)
   170  	})
   171  }
   172  
   173  // Test that the watcher will mark the allocation as healthy.
   174  func TestAllocRunner_DeploymentHealth_Healthy_NoChecks(t *testing.T) {
   175  	t.Parallel()
   176  
   177  	// Ensure the task fails and restarts
   178  	upd, ar := testAllocRunner(false)
   179  
   180  	// Make the task run healthy
   181  	task := ar.alloc.Job.TaskGroups[0].Tasks[0]
   182  	task.Driver = "mock_driver"
   183  	task.Config["run_for"] = "10s"
   184  
   185  	// Create a task that takes longer to become healthy
   186  	ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task.Copy())
   187  	task2 := ar.alloc.Job.TaskGroups[0].Tasks[1]
   188  	task2.Name = "task 2"
   189  	task2.Config["start_block_for"] = "500ms"
   190  
   191  	// Make the alloc be part of a deployment
   192  	ar.alloc.DeploymentID = structs.GenerateUUID()
   193  	ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   194  	ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_TaskStates
   195  	ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1
   196  	ar.alloc.Job.TaskGroups[0].Update.MinHealthyTime = 100 * time.Millisecond
   197  
   198  	start := time.Now()
   199  	go ar.Run()
   200  	defer ar.Destroy()
   201  
   202  	testutil.WaitForResult(func() (bool, error) {
   203  		_, last := upd.Last()
   204  		if last == nil {
   205  			return false, fmt.Errorf("No updates")
   206  		}
   207  		if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
   208  			return false, fmt.Errorf("want deployment status unhealthy; got unset")
   209  		} else if !*last.DeploymentStatus.Healthy {
   210  			return false, fmt.Errorf("want deployment status healthy; got unhealthy")
   211  		}
   212  		return true, nil
   213  	}, func(err error) {
   214  		t.Fatalf("err: %v", err)
   215  	})
   216  	if d := time.Now().Sub(start); d < 500*time.Millisecond {
   217  		t.Fatalf("didn't wait for second task group. Only took %v", d)
   218  	}
   219  }
   220  
   221  // Test that the watcher will mark the allocation as healthy with checks
   222  func TestAllocRunner_DeploymentHealth_Healthy_Checks(t *testing.T) {
   223  	t.Parallel()
   224  
   225  	// Ensure the task fails and restarts
   226  	upd, ar := testAllocRunner(false)
   227  
   228  	// Make the task fail
   229  	task := ar.alloc.Job.TaskGroups[0].Tasks[0]
   230  	task.Driver = "mock_driver"
   231  	task.Config["run_for"] = "10s"
   232  
   233  	// Create a task that has no checks
   234  	ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task.Copy())
   235  	task2 := ar.alloc.Job.TaskGroups[0].Tasks[1]
   236  	task2.Name = "task 2"
   237  	task2.Services = nil
   238  
   239  	// Make the alloc be part of a deployment
   240  	ar.alloc.DeploymentID = structs.GenerateUUID()
   241  	ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   242  	ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_Checks
   243  	ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1
   244  	ar.alloc.Job.TaskGroups[0].Update.MinHealthyTime = 100 * time.Millisecond
   245  
   246  	checkHealthy := &api.AgentCheck{
   247  		CheckID: structs.GenerateUUID(),
   248  		Status:  api.HealthPassing,
   249  	}
   250  	checkUnhealthy := &api.AgentCheck{
   251  		CheckID: checkHealthy.CheckID,
   252  		Status:  api.HealthWarning,
   253  	}
   254  
   255  	// Only return the check as healthy after a duration
   256  	trigger := time.After(500 * time.Millisecond)
   257  	ar.consulClient.(*mockConsulServiceClient).checksFn = func(a *structs.Allocation) ([]*api.AgentCheck, error) {
   258  		select {
   259  		case <-trigger:
   260  			return []*api.AgentCheck{checkHealthy}, nil
   261  		default:
   262  			return []*api.AgentCheck{checkUnhealthy}, nil
   263  		}
   264  	}
   265  
   266  	start := time.Now()
   267  	go ar.Run()
   268  	defer ar.Destroy()
   269  
   270  	testutil.WaitForResult(func() (bool, error) {
   271  		_, last := upd.Last()
   272  		if last == nil {
   273  			return false, fmt.Errorf("No updates")
   274  		}
   275  		if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
   276  			return false, fmt.Errorf("want deployment status unhealthy; got unset")
   277  		} else if !*last.DeploymentStatus.Healthy {
   278  			return false, fmt.Errorf("want deployment status healthy; got unhealthy")
   279  		}
   280  		return true, nil
   281  	}, func(err error) {
   282  		t.Fatalf("err: %v", err)
   283  	})
   284  
   285  	if d := time.Now().Sub(start); d < 500*time.Millisecond {
   286  		t.Fatalf("didn't wait for second task group. Only took %v", d)
   287  	}
   288  }
   289  
   290  // Test that the watcher will mark the allocation as healthy.
   291  func TestAllocRunner_DeploymentHealth_Healthy_UpdatedDeployment(t *testing.T) {
   292  	t.Parallel()
   293  
   294  	// Ensure the task fails and restarts
   295  	upd, ar := testAllocRunner(false)
   296  
   297  	// Make the task run healthy
   298  	task := ar.alloc.Job.TaskGroups[0].Tasks[0]
   299  	task.Driver = "mock_driver"
   300  	task.Config["run_for"] = "30s"
   301  
   302  	// Make the alloc be part of a deployment
   303  	ar.alloc.DeploymentID = structs.GenerateUUID()
   304  	ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
   305  	ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_TaskStates
   306  	ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1
   307  	ar.alloc.Job.TaskGroups[0].Update.MinHealthyTime = 100 * time.Millisecond
   308  
   309  	go ar.Run()
   310  	defer ar.Destroy()
   311  
   312  	testutil.WaitForResult(func() (bool, error) {
   313  		_, last := upd.Last()
   314  		if last == nil {
   315  			return false, fmt.Errorf("No updates")
   316  		}
   317  		if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
   318  			return false, fmt.Errorf("want deployment status unhealthy; got unset")
   319  		} else if !*last.DeploymentStatus.Healthy {
   320  			return false, fmt.Errorf("want deployment status healthy; got unhealthy")
   321  		}
   322  		return true, nil
   323  	}, func(err error) {
   324  		t.Fatalf("err: %v", err)
   325  	})
   326  
   327  	// Mimick an update to a new deployment id
   328  	oldCount, last := upd.Last()
   329  	last.DeploymentStatus = nil
   330  	last.DeploymentID = structs.GenerateUUID()
   331  	ar.Update(last)
   332  
   333  	testutil.WaitForResult(func() (bool, error) {
   334  		newCount, last := upd.Last()
   335  		if newCount <= oldCount {
   336  			return false, fmt.Errorf("No new updates")
   337  		}
   338  		if last.DeploymentStatus == nil || last.DeploymentStatus.Healthy == nil {
   339  			return false, fmt.Errorf("want deployment status unhealthy; got unset")
   340  		} else if !*last.DeploymentStatus.Healthy {
   341  			return false, fmt.Errorf("want deployment status healthy; got unhealthy")
   342  		}
   343  		return true, nil
   344  	}, func(err error) {
   345  		t.Fatalf("err: %v", err)
   346  	})
   347  }
   348  
   349  // TestAllocRuner_RetryArtifact ensures that if one task in a task group is
   350  // retrying fetching an artifact, other tasks in the group should be able
   351  // to proceed.
   352  func TestAllocRunner_RetryArtifact(t *testing.T) {
   353  	t.Parallel()
   354  
   355  	alloc := mock.Alloc()
   356  	alloc.Job.Type = structs.JobTypeBatch
   357  	alloc.Job.TaskGroups[0].RestartPolicy.Mode = structs.RestartPolicyModeFail
   358  	alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 1
   359  	alloc.Job.TaskGroups[0].RestartPolicy.Delay = time.Duration(4*testutil.TestMultiplier()) * time.Second
   360  
   361  	task := alloc.Job.TaskGroups[0].Tasks[0]
   362  	task.Driver = "mock_driver"
   363  	task.Config = map[string]interface{}{
   364  		"exit_code": "0",
   365  		"run_for":   "1s",
   366  	}
   367  
   368  	// Create a new task with a bad artifact
   369  	badtask := alloc.Job.TaskGroups[0].Tasks[0].Copy()
   370  	badtask.Name = "bad"
   371  	badtask.Artifacts = []*structs.TaskArtifact{
   372  		{GetterSource: "http://127.0.0.1:0/foo/bar/baz"},
   373  	}
   374  
   375  	alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, badtask)
   376  	upd, ar := testAllocRunnerFromAlloc(alloc, true)
   377  	go ar.Run()
   378  	defer ar.Destroy()
   379  
   380  	testutil.WaitForResult(func() (bool, error) {
   381  		count, last := upd.Last()
   382  		if min := 6; count < min {
   383  			return false, fmt.Errorf("Not enough updates (%d < %d)", count, min)
   384  		}
   385  
   386  		// web task should have completed successfully while bad task
   387  		// retries artififact fetching
   388  		webstate := last.TaskStates["web"]
   389  		if webstate.State != structs.TaskStateDead {
   390  			return false, fmt.Errorf("expected web to be dead but found %q", last.TaskStates["web"].State)
   391  		}
   392  		if !webstate.Successful() {
   393  			return false, fmt.Errorf("expected web to have exited successfully")
   394  		}
   395  
   396  		// bad task should have failed
   397  		badstate := last.TaskStates["bad"]
   398  		if badstate.State != structs.TaskStateDead {
   399  			return false, fmt.Errorf("expected bad to be dead but found %q", badstate.State)
   400  		}
   401  		if !badstate.Failed {
   402  			return false, fmt.Errorf("expected bad to have failed: %#v", badstate.Events)
   403  		}
   404  		return true, nil
   405  	}, func(err error) {
   406  		t.Fatalf("err: %v", err)
   407  	})
   408  }
   409  
   410  func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) {
   411  	t.Parallel()
   412  	upd, ar := testAllocRunner(false)
   413  
   414  	// Ensure task takes some time
   415  	task := ar.alloc.Job.TaskGroups[0].Tasks[0]
   416  	task.Driver = "mock_driver"
   417  	task.Config["run_for"] = "10s"
   418  	go ar.Run()
   419  
   420  	testutil.WaitForResult(func() (bool, error) {
   421  		_, last := upd.Last()
   422  		if last == nil {
   423  			return false, fmt.Errorf("No updates")
   424  		}
   425  		if last.ClientStatus != structs.AllocClientStatusRunning {
   426  			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
   427  		}
   428  		return true, nil
   429  	}, func(err error) {
   430  		t.Fatalf("err: %v", err)
   431  	})
   432  
   433  	// Update the alloc to be terminal which should cause the alloc runner to
   434  	// stop the tasks and wait for a destroy.
   435  	update := ar.alloc.Copy()
   436  	update.DesiredStatus = structs.AllocDesiredStatusStop
   437  	ar.Update(update)
   438  
   439  	testutil.WaitForResult(func() (bool, error) {
   440  		_, last := upd.Last()
   441  		if last == nil {
   442  			return false, fmt.Errorf("No updates")
   443  		}
   444  
   445  		// Check the status has changed.
   446  		if last.ClientStatus != structs.AllocClientStatusComplete {
   447  			return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
   448  		}
   449  
   450  		// Check the allocation state still exists
   451  		if err := ar.stateDB.View(func(tx *bolt.Tx) error {
   452  			if !allocationBucketExists(tx, ar.Alloc().ID) {
   453  				return fmt.Errorf("no bucket for alloc")
   454  			}
   455  
   456  			return nil
   457  		}); err != nil {
   458  			return false, fmt.Errorf("state destroyed")
   459  		}
   460  
   461  		// Check the alloc directory still exists
   462  		if _, err := os.Stat(ar.allocDir.AllocDir); err != nil {
   463  			return false, fmt.Errorf("alloc dir destroyed: %v", ar.allocDir.AllocDir)
   464  		}
   465  
   466  		return true, nil
   467  	}, func(err error) {
   468  		t.Fatalf("err: %v", err)
   469  	})
   470  
   471  	// Send the destroy signal and ensure the AllocRunner cleans up.
   472  	ar.Destroy()
   473  
   474  	testutil.WaitForResult(func() (bool, error) {
   475  		_, last := upd.Last()
   476  		if last == nil {
   477  			return false, fmt.Errorf("No updates")
   478  		}
   479  
   480  		// Check the status has changed.
   481  		if last.ClientStatus != structs.AllocClientStatusComplete {
   482  			return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
   483  		}
   484  
   485  		// Check the state was cleaned
   486  		if err := ar.stateDB.View(func(tx *bolt.Tx) error {
   487  			if allocationBucketExists(tx, ar.Alloc().ID) {
   488  				return fmt.Errorf("bucket for alloc exists")
   489  			}
   490  
   491  			return nil
   492  		}); err != nil {
   493  			return false, fmt.Errorf("state not destroyed")
   494  		}
   495  
   496  		// Check the alloc directory was cleaned
   497  		if _, err := os.Stat(ar.allocDir.AllocDir); err == nil {
   498  			return false, fmt.Errorf("alloc dir still exists: %v", ar.allocDir.AllocDir)
   499  		} else if !os.IsNotExist(err) {
   500  			return false, fmt.Errorf("stat err: %v", err)
   501  		}
   502  
   503  		return true, nil
   504  	}, func(err error) {
   505  		t.Fatalf("err: %v", err)
   506  	})
   507  }
   508  
   509  func TestAllocRunner_Destroy(t *testing.T) {
   510  	t.Parallel()
   511  	upd, ar := testAllocRunner(false)
   512  
   513  	// Ensure task takes some time
   514  	task := ar.alloc.Job.TaskGroups[0].Tasks[0]
   515  	task.Driver = "mock_driver"
   516  	task.Config["run_for"] = "10s"
   517  	go ar.Run()
   518  	start := time.Now()
   519  
   520  	// Begin the tear down
   521  	go func() {
   522  		time.Sleep(1 * time.Second)
   523  		ar.Destroy()
   524  	}()
   525  
   526  	testutil.WaitForResult(func() (bool, error) {
   527  		_, last := upd.Last()
   528  		if last == nil {
   529  			return false, fmt.Errorf("No updates")
   530  		}
   531  
   532  		// Check the status has changed.
   533  		if last.ClientStatus != structs.AllocClientStatusComplete {
   534  			return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
   535  		}
   536  
   537  		// Check the state was cleaned
   538  		if err := ar.stateDB.View(func(tx *bolt.Tx) error {
   539  			if allocationBucketExists(tx, ar.Alloc().ID) {
   540  				return fmt.Errorf("bucket for alloc exists")
   541  			}
   542  
   543  			return nil
   544  		}); err != nil {
   545  			return false, fmt.Errorf("state not destroyed: %v", err)
   546  		}
   547  
   548  		// Check the alloc directory was cleaned
   549  		if _, err := os.Stat(ar.allocDir.AllocDir); err == nil {
   550  			return false, fmt.Errorf("alloc dir still exists: %v", ar.allocDir.AllocDir)
   551  		} else if !os.IsNotExist(err) {
   552  			return false, fmt.Errorf("stat err: %v", err)
   553  		}
   554  
   555  		return true, nil
   556  	}, func(err error) {
   557  		t.Fatalf("err: %v", err)
   558  	})
   559  
   560  	if elapsed := time.Since(start); elapsed > 20*time.Second {
   561  		t.Fatalf("took too long to terminate: %s", elapsed)
   562  	}
   563  }
   564  
   565  func TestAllocRunner_Update(t *testing.T) {
   566  	t.Parallel()
   567  	_, ar := testAllocRunner(false)
   568  
   569  	// Deep copy the alloc to avoid races when updating
   570  	newAlloc := ar.Alloc().Copy()
   571  
   572  	// Ensure task takes some time
   573  	task := ar.alloc.Job.TaskGroups[0].Tasks[0]
   574  	task.Driver = "mock_driver"
   575  	task.Config["run_for"] = "10s"
   576  	go ar.Run()
   577  	defer ar.Destroy()
   578  
   579  	// Update the alloc definition
   580  	newAlloc.Name = "FOO"
   581  	newAlloc.AllocModifyIndex++
   582  	ar.Update(newAlloc)
   583  
   584  	// Check the alloc runner stores the update allocation.
   585  	testutil.WaitForResult(func() (bool, error) {
   586  		return ar.Alloc().Name == "FOO", nil
   587  	}, func(err error) {
   588  		t.Fatalf("err: %v %#v", err, ar.Alloc())
   589  	})
   590  }
   591  
   592  func TestAllocRunner_SaveRestoreState(t *testing.T) {
   593  	t.Parallel()
   594  	alloc := mock.Alloc()
   595  	task := alloc.Job.TaskGroups[0].Tasks[0]
   596  	task.Driver = "mock_driver"
   597  	task.Config = map[string]interface{}{
   598  		"exit_code": "0",
   599  		"run_for":   "10s",
   600  	}
   601  
   602  	upd, ar := testAllocRunnerFromAlloc(alloc, false)
   603  	go ar.Run()
   604  	defer ar.Destroy()
   605  
   606  	// Snapshot state
   607  	testutil.WaitForResult(func() (bool, error) {
   608  		ar.taskLock.RLock()
   609  		defer ar.taskLock.RUnlock()
   610  		return len(ar.tasks) == 1, nil
   611  	}, func(err error) {
   612  		t.Fatalf("task never started: %v", err)
   613  	})
   614  
   615  	err := ar.SaveState()
   616  	if err != nil {
   617  		t.Fatalf("err: %v", err)
   618  	}
   619  
   620  	// Create a new alloc runner
   621  	l2 := prefixedTestLogger("----- ar2:  ")
   622  	ar2 := NewAllocRunner(l2, ar.config, ar.stateDB, upd.Update,
   623  		&structs.Allocation{ID: ar.alloc.ID}, ar.vaultClient,
   624  		ar.consulClient)
   625  	err = ar2.RestoreState()
   626  	if err != nil {
   627  		t.Fatalf("err: %v", err)
   628  	}
   629  	go ar2.Run()
   630  
   631  	testutil.WaitForResult(func() (bool, error) {
   632  		if len(ar2.tasks) != 1 {
   633  			return false, fmt.Errorf("Incorrect number of tasks")
   634  		}
   635  
   636  		_, last := upd.Last()
   637  		if last == nil {
   638  			return false, nil
   639  		}
   640  
   641  		return last.ClientStatus == structs.AllocClientStatusRunning, nil
   642  	}, func(err error) {
   643  		_, last := upd.Last()
   644  		t.Fatalf("err: %v %#v %#v", err, last, last.TaskStates["web"])
   645  	})
   646  
   647  	// Destroy and wait
   648  	ar2.Destroy()
   649  	start := time.Now()
   650  
   651  	testutil.WaitForResult(func() (bool, error) {
   652  		alloc := ar2.Alloc()
   653  		if alloc.ClientStatus != structs.AllocClientStatusComplete {
   654  			return false, fmt.Errorf("Bad client status; got %v; want %v", alloc.ClientStatus, structs.AllocClientStatusComplete)
   655  		}
   656  		return true, nil
   657  	}, func(err error) {
   658  		_, last := upd.Last()
   659  		t.Fatalf("err: %v %#v %#v", err, last, last.TaskStates)
   660  	})
   661  
   662  	if time.Since(start) > time.Duration(testutil.TestMultiplier()*5)*time.Second {
   663  		t.Fatalf("took too long to terminate")
   664  	}
   665  }
   666  
   667  func TestAllocRunner_SaveRestoreState_TerminalAlloc(t *testing.T) {
   668  	t.Parallel()
   669  	upd, ar := testAllocRunner(false)
   670  	ar.logger = prefixedTestLogger("ar1: ")
   671  
   672  	// Ensure task takes some time
   673  	ar.alloc.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver"
   674  	task := ar.alloc.Job.TaskGroups[0].Tasks[0]
   675  	task.Config["run_for"] = "10s"
   676  	go ar.Run()
   677  	defer ar.Destroy()
   678  
   679  	testutil.WaitForResult(func() (bool, error) {
   680  		_, last := upd.Last()
   681  		if last == nil {
   682  			return false, fmt.Errorf("No updates")
   683  		}
   684  
   685  		if last.ClientStatus != structs.AllocClientStatusRunning {
   686  			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
   687  		}
   688  		return true, nil
   689  	}, func(err error) {
   690  		t.Fatalf("err: %v", err)
   691  	})
   692  
   693  	// Update the alloc to be terminal which should cause the alloc runner to
   694  	// stop the tasks and wait for a destroy.
   695  	update := ar.alloc.Copy()
   696  	update.DesiredStatus = structs.AllocDesiredStatusStop
   697  	ar.Update(update)
   698  
   699  	testutil.WaitForResult(func() (bool, error) {
   700  		return ar.Alloc().DesiredStatus == structs.AllocDesiredStatusStop, nil
   701  	}, func(err error) {
   702  		t.Fatalf("err: %v", err)
   703  	})
   704  
   705  	err := ar.SaveState()
   706  	if err != nil {
   707  		t.Fatalf("err: %v", err)
   708  	}
   709  
   710  	// Ensure ar1 doesn't recreate the state file
   711  	ar.allocLock.Lock()
   712  	defer ar.allocLock.Unlock()
   713  
   714  	// Create a new alloc runner
   715  	ar2 := NewAllocRunner(ar.logger, ar.config, ar.stateDB, upd.Update,
   716  		&structs.Allocation{ID: ar.alloc.ID}, ar.vaultClient, ar.consulClient)
   717  	ar2.logger = prefixedTestLogger("ar2: ")
   718  	err = ar2.RestoreState()
   719  	if err != nil {
   720  		t.Fatalf("err: %v", err)
   721  	}
   722  	ar2.logger.Println("[TESTING] running second alloc runner")
   723  	go ar2.Run()
   724  	defer ar2.Destroy() // Just-in-case of failure before Destroy below
   725  
   726  	testutil.WaitForResult(func() (bool, error) {
   727  		// Check the state still exists
   728  		if err := ar.stateDB.View(func(tx *bolt.Tx) error {
   729  			if !allocationBucketExists(tx, ar2.Alloc().ID) {
   730  				return fmt.Errorf("no bucket for alloc")
   731  			}
   732  
   733  			return nil
   734  		}); err != nil {
   735  			return false, fmt.Errorf("state destroyed")
   736  		}
   737  
   738  		// Check the alloc directory still exists
   739  		if _, err := os.Stat(ar.allocDir.AllocDir); err != nil {
   740  			return false, fmt.Errorf("alloc dir destroyed: %v", ar.allocDir.AllocDir)
   741  		}
   742  
   743  		return true, nil
   744  	}, func(err error) {
   745  		_, last := upd.Last()
   746  		t.Fatalf("err: %v %#v %#v", err, last, last.TaskStates)
   747  	})
   748  
   749  	// Send the destroy signal and ensure the AllocRunner cleans up.
   750  	ar2.logger.Println("[TESTING] destroying second alloc runner")
   751  	ar2.Destroy()
   752  
   753  	testutil.WaitForResult(func() (bool, error) {
   754  		_, last := upd.Last()
   755  		if last == nil {
   756  			return false, fmt.Errorf("No updates")
   757  		}
   758  
   759  		// Check the status has changed.
   760  		if last.ClientStatus != structs.AllocClientStatusComplete {
   761  			return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
   762  		}
   763  
   764  		// Check the state was cleaned
   765  		if err := ar.stateDB.View(func(tx *bolt.Tx) error {
   766  			if allocationBucketExists(tx, ar2.Alloc().ID) {
   767  				return fmt.Errorf("bucket for alloc exists")
   768  			}
   769  
   770  			return nil
   771  		}); err != nil {
   772  			return false, fmt.Errorf("state not destroyed")
   773  		}
   774  
   775  		// Check the alloc directory was cleaned
   776  		if _, err := os.Stat(ar.allocDir.AllocDir); err == nil {
   777  			return false, fmt.Errorf("alloc dir still exists: %v", ar.allocDir.AllocDir)
   778  		} else if !os.IsNotExist(err) {
   779  			return false, fmt.Errorf("stat err: %v", err)
   780  		}
   781  
   782  		return true, nil
   783  	}, func(err error) {
   784  		t.Fatalf("err: %v", err)
   785  	})
   786  }
   787  
   788  // TestAllocRunner_SaveRestoreState_Upgrade asserts that pre-0.6 exec tasks are
   789  // restarted on upgrade.
   790  func TestAllocRunner_SaveRestoreState_Upgrade(t *testing.T) {
   791  	t.Parallel()
   792  	alloc := mock.Alloc()
   793  	task := alloc.Job.TaskGroups[0].Tasks[0]
   794  	task.Driver = "mock_driver"
   795  	task.Config = map[string]interface{}{
   796  		"exit_code": "0",
   797  		"run_for":   "10s",
   798  	}
   799  
   800  	upd, ar := testAllocRunnerFromAlloc(alloc, false)
   801  	// Hack in old version to cause an upgrade on RestoreState
   802  	origConfig := ar.config.Copy()
   803  	ar.config.Version = "0.5.6"
   804  	go ar.Run()
   805  	defer ar.Destroy()
   806  
   807  	// Snapshot state
   808  	testutil.WaitForResult(func() (bool, error) {
   809  		_, last := upd.Last()
   810  		if last == nil {
   811  			return false, fmt.Errorf("No updates")
   812  		}
   813  
   814  		if last.ClientStatus != structs.AllocClientStatusRunning {
   815  			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning)
   816  		}
   817  		return true, nil
   818  	}, func(err error) {
   819  		t.Fatalf("task never started: %v", err)
   820  	})
   821  
   822  	err := ar.SaveState()
   823  	if err != nil {
   824  		t.Fatalf("err: %v", err)
   825  	}
   826  
   827  	// Create a new alloc runner
   828  	l2 := prefixedTestLogger("----- ar2:  ")
   829  	ar2 := NewAllocRunner(l2, origConfig, ar.stateDB, upd.Update, &structs.Allocation{ID: ar.alloc.ID}, ar.vaultClient, ar.consulClient)
   830  	err = ar2.RestoreState()
   831  	if err != nil {
   832  		t.Fatalf("err: %v", err)
   833  	}
   834  	go ar2.Run()
   835  	defer ar2.Destroy() // Just-in-case of failure before Destroy below
   836  
   837  	testutil.WaitForResult(func() (bool, error) {
   838  		count, last := upd.Last()
   839  		if min := 3; count < min {
   840  			return false, fmt.Errorf("expected at least %d updates but found %d", min, count)
   841  		}
   842  		for _, ev := range last.TaskStates["web"].Events {
   843  			if strings.HasSuffix(ev.RestartReason, pre06ScriptCheckReason) {
   844  				return true, nil
   845  			}
   846  		}
   847  		return false, fmt.Errorf("no restart with proper reason found")
   848  	}, func(err error) {
   849  		count, last := upd.Last()
   850  		t.Fatalf("err: %v\nAllocs: %d\nweb state: % #v", err, count, pretty.Formatter(last.TaskStates["web"]))
   851  	})
   852  
   853  	// Destroy and wait
   854  	ar2.Destroy()
   855  	start := time.Now()
   856  
   857  	testutil.WaitForResult(func() (bool, error) {
   858  		alloc := ar2.Alloc()
   859  		if alloc.ClientStatus != structs.AllocClientStatusComplete {
   860  			return false, fmt.Errorf("Bad client status; got %v; want %v", alloc.ClientStatus, structs.AllocClientStatusComplete)
   861  		}
   862  		return true, nil
   863  	}, func(err error) {
   864  		_, last := upd.Last()
   865  		t.Fatalf("err: %v %#v %#v", err, last, last.TaskStates)
   866  	})
   867  
   868  	if time.Since(start) > time.Duration(testutil.TestMultiplier()*5)*time.Second {
   869  		t.Fatalf("took too long to terminate")
   870  	}
   871  }
   872  
   873  // Ensure pre-#2132 state files containing the Context struct are properly
   874  // migrated to the new format.
   875  //
   876  // Old Context State:
   877  //
   878  //  "Context": {
   879  //    "AllocDir": {
   880  //      "AllocDir": "/path/to/allocs/2a54fcff-fc44-8d4f-e025-53c48e9cbbbb",
   881  //      "SharedDir": "/path/to/allocs/2a54fcff-fc44-8d4f-e025-53c48e9cbbbb/alloc",
   882  //      "TaskDirs": {
   883  //        "echo1": "/path/to/allocs/2a54fcff-fc44-8d4f-e025-53c48e9cbbbb/echo1"
   884  //      }
   885  //    },
   886  //    "AllocID": "2a54fcff-fc44-8d4f-e025-53c48e9cbbbb"
   887  //  }
   888  func TestAllocRunner_RestoreOldState(t *testing.T) {
   889  	t.Parallel()
   890  	alloc := mock.Alloc()
   891  	task := alloc.Job.TaskGroups[0].Tasks[0]
   892  	task.Driver = "mock_driver"
   893  	task.Config = map[string]interface{}{
   894  		"exit_code": "0",
   895  		"run_for":   "10s",
   896  	}
   897  
   898  	logger := testLogger()
   899  	conf := config.DefaultConfig()
   900  	conf.Node = mock.Node()
   901  	conf.StateDir = os.TempDir()
   902  	conf.AllocDir = os.TempDir()
   903  	tmp, err := ioutil.TempFile("", "state-db")
   904  	if err != nil {
   905  		t.Fatalf("error creating state db file: %v", err)
   906  	}
   907  	db, err := bolt.Open(tmp.Name(), 0600, nil)
   908  	if err != nil {
   909  		t.Fatalf("error creating state db: %v", err)
   910  	}
   911  
   912  	if err := os.MkdirAll(filepath.Join(conf.StateDir, "alloc", alloc.ID), 0777); err != nil {
   913  		t.Fatalf("error creating state dir: %v", err)
   914  	}
   915  	statePath := filepath.Join(conf.StateDir, "alloc", alloc.ID, "state.json")
   916  	w, err := os.Create(statePath)
   917  	if err != nil {
   918  		t.Fatalf("error creating state file: %v", err)
   919  	}
   920  	tmplctx := &struct {
   921  		AllocID  string
   922  		AllocDir string
   923  	}{alloc.ID, conf.AllocDir}
   924  	err = template.Must(template.New("test_state").Parse(`{
   925    "Version": "0.5.1",
   926    "Alloc": {
   927      "ID": "{{ .AllocID }}",
   928      "Name": "example",
   929      "JobID": "example",
   930      "Job": {
   931        "ID": "example",
   932        "Name": "example",
   933        "Type": "batch",
   934        "TaskGroups": [
   935          {
   936            "Name": "example",
   937            "Tasks": [
   938              {
   939                "Name": "example",
   940                "Driver": "mock",
   941                "Config": {
   942                  "exit_code": "0",
   943  		"run_for": "10s"
   944                }
   945              }
   946            ]
   947          }
   948        ]
   949      },
   950      "TaskGroup": "example",
   951      "DesiredStatus": "run",
   952      "ClientStatus": "running",
   953      "TaskStates": {
   954        "example": {
   955          "State": "running",
   956          "Failed": false,
   957          "Events": []
   958        }
   959      }
   960    },
   961    "Context": {
   962      "AllocDir": {
   963        "AllocDir": "{{ .AllocDir }}/{{ .AllocID }}",
   964        "SharedDir": "{{ .AllocDir }}/{{ .AllocID }}/alloc",
   965        "TaskDirs": {
   966          "example": "{{ .AllocDir }}/{{ .AllocID }}/example"
   967        }
   968      },
   969      "AllocID": "{{ .AllocID }}"
   970    }
   971  }`)).Execute(w, tmplctx)
   972  	if err != nil {
   973  		t.Fatalf("error writing state file: %v", err)
   974  	}
   975  	w.Close()
   976  
   977  	upd := &MockAllocStateUpdater{}
   978  	*alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0}
   979  	alloc.Job.Type = structs.JobTypeBatch
   980  	vclient := vaultclient.NewMockVaultClient()
   981  	cclient := newMockConsulServiceClient()
   982  	ar := NewAllocRunner(logger, conf, db, upd.Update, alloc, vclient, cclient)
   983  	defer ar.Destroy()
   984  
   985  	// RestoreState should fail on the task state since we only test the
   986  	// alloc state restoring.
   987  	err = ar.RestoreState()
   988  	if err == nil {
   989  		t.Fatal("expected error restoring Task state")
   990  	}
   991  	merr, ok := err.(*multierror.Error)
   992  	if !ok {
   993  		t.Fatalf("expected RestoreState to return a multierror but found: %T -> %v", err, err)
   994  	}
   995  	if len(merr.Errors) != 1 {
   996  		t.Fatalf("expected exactly 1 error from RestoreState but found: %d: %v", len(merr.Errors), err)
   997  	}
   998  	if expected := "failed to get task bucket"; !strings.Contains(merr.Errors[0].Error(), expected) {
   999  		t.Fatalf("expected %q but got: %q", expected, merr.Errors[0].Error())
  1000  	}
  1001  
  1002  	if err := ar.SaveState(); err != nil {
  1003  		t.Fatalf("error saving new state: %v", err)
  1004  	}
  1005  }
  1006  
  1007  func TestAllocRunner_TaskFailed_KillTG(t *testing.T) {
  1008  	t.Parallel()
  1009  	upd, ar := testAllocRunner(false)
  1010  
  1011  	// Create two tasks in the task group
  1012  	task := ar.alloc.Job.TaskGroups[0].Tasks[0]
  1013  	task.Driver = "mock_driver"
  1014  	task.KillTimeout = 10 * time.Millisecond
  1015  	task.Config = map[string]interface{}{
  1016  		"run_for": "10s",
  1017  	}
  1018  
  1019  	task2 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy()
  1020  	task2.Name = "task 2"
  1021  	task2.Driver = "mock_driver"
  1022  	task2.Config = map[string]interface{}{
  1023  		"start_error": "fail task please",
  1024  	}
  1025  	ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2)
  1026  	ar.alloc.TaskResources[task2.Name] = task2.Resources
  1027  	go ar.Run()
  1028  	defer ar.Destroy()
  1029  
  1030  	testutil.WaitForResult(func() (bool, error) {
  1031  		_, last := upd.Last()
  1032  		if last == nil {
  1033  			return false, fmt.Errorf("No updates")
  1034  		}
  1035  		if last.ClientStatus != structs.AllocClientStatusFailed {
  1036  			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusFailed)
  1037  		}
  1038  
  1039  		// Task One should be killed
  1040  		state1 := last.TaskStates[task.Name]
  1041  		if state1.State != structs.TaskStateDead {
  1042  			return false, fmt.Errorf("got state %v; want %v", state1.State, structs.TaskStateDead)
  1043  		}
  1044  		if len(state1.Events) < 2 {
  1045  			// At least have a received and destroyed
  1046  			return false, fmt.Errorf("Unexpected number of events")
  1047  		}
  1048  
  1049  		found := false
  1050  		for _, e := range state1.Events {
  1051  			if e.Type != structs.TaskSiblingFailed {
  1052  				found = true
  1053  			}
  1054  		}
  1055  
  1056  		if !found {
  1057  			return false, fmt.Errorf("Did not find event %v", structs.TaskSiblingFailed)
  1058  		}
  1059  
  1060  		// Task Two should be failed
  1061  		state2 := last.TaskStates[task2.Name]
  1062  		if state2.State != structs.TaskStateDead {
  1063  			return false, fmt.Errorf("got state %v; want %v", state2.State, structs.TaskStateDead)
  1064  		}
  1065  		if !state2.Failed {
  1066  			return false, fmt.Errorf("task2 should have failed")
  1067  		}
  1068  
  1069  		return true, nil
  1070  	}, func(err error) {
  1071  		t.Fatalf("err: %v", err)
  1072  	})
  1073  }
  1074  
  1075  func TestAllocRunner_TaskLeader_KillTG(t *testing.T) {
  1076  	t.Parallel()
  1077  	upd, ar := testAllocRunner(false)
  1078  
  1079  	// Create two tasks in the task group
  1080  	task := ar.alloc.Job.TaskGroups[0].Tasks[0]
  1081  	task.Driver = "mock_driver"
  1082  	task.KillTimeout = 10 * time.Millisecond
  1083  	task.Config = map[string]interface{}{
  1084  		"run_for": "10s",
  1085  	}
  1086  
  1087  	task2 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy()
  1088  	task2.Name = "task 2"
  1089  	task2.Driver = "mock_driver"
  1090  	task2.Leader = true
  1091  	task2.Config = map[string]interface{}{
  1092  		"run_for": "1s",
  1093  	}
  1094  	ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2)
  1095  	ar.alloc.TaskResources[task2.Name] = task2.Resources
  1096  	go ar.Run()
  1097  	defer ar.Destroy()
  1098  
  1099  	testutil.WaitForResult(func() (bool, error) {
  1100  		_, last := upd.Last()
  1101  		if last == nil {
  1102  			return false, fmt.Errorf("No updates")
  1103  		}
  1104  		if last.ClientStatus != structs.AllocClientStatusComplete {
  1105  			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
  1106  		}
  1107  
  1108  		// Task One should be killed
  1109  		state1 := last.TaskStates[task.Name]
  1110  		if state1.State != structs.TaskStateDead {
  1111  			return false, fmt.Errorf("got state %v; want %v", state1.State, structs.TaskStateDead)
  1112  		}
  1113  		if state1.FinishedAt.IsZero() || state1.StartedAt.IsZero() {
  1114  			return false, fmt.Errorf("expected to have a start and finish time")
  1115  		}
  1116  		if len(state1.Events) < 2 {
  1117  			// At least have a received and destroyed
  1118  			return false, fmt.Errorf("Unexpected number of events")
  1119  		}
  1120  
  1121  		found := false
  1122  		for _, e := range state1.Events {
  1123  			if e.Type != structs.TaskLeaderDead {
  1124  				found = true
  1125  			}
  1126  		}
  1127  
  1128  		if !found {
  1129  			return false, fmt.Errorf("Did not find event %v", structs.TaskLeaderDead)
  1130  		}
  1131  
  1132  		// Task Two should be dead
  1133  		state2 := last.TaskStates[task2.Name]
  1134  		if state2.State != structs.TaskStateDead {
  1135  			return false, fmt.Errorf("got state %v; want %v", state2.State, structs.TaskStateDead)
  1136  		}
  1137  		if state2.FinishedAt.IsZero() || state2.StartedAt.IsZero() {
  1138  			return false, fmt.Errorf("expected to have a start and finish time")
  1139  		}
  1140  
  1141  		return true, nil
  1142  	}, func(err error) {
  1143  		t.Fatalf("err: %v", err)
  1144  	})
  1145  }
  1146  
  1147  // TestAllocRunner_TaskLeader_StopTG asserts that when stopping a task group
  1148  // with a leader the leader is stopped before other tasks.
  1149  func TestAllocRunner_TaskLeader_StopTG(t *testing.T) {
  1150  	t.Parallel()
  1151  	upd, ar := testAllocRunner(false)
  1152  
  1153  	// Create 3 tasks in the task group
  1154  	task := ar.alloc.Job.TaskGroups[0].Tasks[0]
  1155  	task.Name = "follower1"
  1156  	task.Driver = "mock_driver"
  1157  	task.KillTimeout = 10 * time.Millisecond
  1158  	task.Config = map[string]interface{}{
  1159  		"run_for": "10s",
  1160  	}
  1161  
  1162  	task2 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy()
  1163  	task2.Name = "leader"
  1164  	task2.Driver = "mock_driver"
  1165  	task2.Leader = true
  1166  	task2.KillTimeout = 10 * time.Millisecond
  1167  	task2.Config = map[string]interface{}{
  1168  		"run_for": "10s",
  1169  	}
  1170  
  1171  	task3 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy()
  1172  	task3.Name = "follower2"
  1173  	task3.Driver = "mock_driver"
  1174  	task3.KillTimeout = 10 * time.Millisecond
  1175  	task3.Config = map[string]interface{}{
  1176  		"run_for": "10s",
  1177  	}
  1178  	ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2, task3)
  1179  	ar.alloc.TaskResources[task2.Name] = task2.Resources
  1180  	defer ar.Destroy()
  1181  
  1182  	go ar.Run()
  1183  
  1184  	// Wait for tasks to start
  1185  	oldCount, last := upd.Last()
  1186  	testutil.WaitForResult(func() (bool, error) {
  1187  		oldCount, last = upd.Last()
  1188  		if last == nil {
  1189  			return false, fmt.Errorf("No updates")
  1190  		}
  1191  		if n := len(last.TaskStates); n != 3 {
  1192  			return false, fmt.Errorf("Not enough task states (want: 3; found %d)", n)
  1193  		}
  1194  		for name, state := range last.TaskStates {
  1195  			if state.State != structs.TaskStateRunning {
  1196  				return false, fmt.Errorf("Task %q is not running yet (it's %q)", name, state.State)
  1197  			}
  1198  		}
  1199  		return true, nil
  1200  	}, func(err error) {
  1201  		t.Fatalf("err: %v", err)
  1202  	})
  1203  
  1204  	// Stop alloc
  1205  	update := ar.Alloc()
  1206  	update.DesiredStatus = structs.AllocDesiredStatusStop
  1207  	ar.Update(update)
  1208  
  1209  	// Wait for tasks to stop
  1210  	testutil.WaitForResult(func() (bool, error) {
  1211  		newCount, last := upd.Last()
  1212  		if newCount == oldCount {
  1213  			return false, fmt.Errorf("no new updates (count: %d)", newCount)
  1214  		}
  1215  		if last.TaskStates["leader"].FinishedAt.UnixNano() >= last.TaskStates["follower1"].FinishedAt.UnixNano() {
  1216  			return false, fmt.Errorf("expected leader to finish before follower1: %s >= %s",
  1217  				last.TaskStates["leader"].FinishedAt, last.TaskStates["follower1"].FinishedAt)
  1218  		}
  1219  		if last.TaskStates["leader"].FinishedAt.UnixNano() >= last.TaskStates["follower2"].FinishedAt.UnixNano() {
  1220  			return false, fmt.Errorf("expected leader to finish before follower2: %s >= %s",
  1221  				last.TaskStates["leader"].FinishedAt, last.TaskStates["follower2"].FinishedAt)
  1222  		}
  1223  		return true, nil
  1224  	}, func(err error) {
  1225  		count, last := upd.Last()
  1226  		t.Logf("Updates: %d", count)
  1227  		for name, state := range last.TaskStates {
  1228  			t.Logf("%s: %s", name, state.State)
  1229  		}
  1230  		t.Fatalf("err: %v", err)
  1231  	})
  1232  }
  1233  
  1234  func TestAllocRunner_MoveAllocDir(t *testing.T) {
  1235  	t.Parallel()
  1236  	// Create an alloc runner
  1237  	alloc := mock.Alloc()
  1238  	task := alloc.Job.TaskGroups[0].Tasks[0]
  1239  	task.Driver = "mock_driver"
  1240  	task.Config = map[string]interface{}{
  1241  		"run_for": "1s",
  1242  	}
  1243  	upd, ar := testAllocRunnerFromAlloc(alloc, false)
  1244  	go ar.Run()
  1245  	defer ar.Destroy()
  1246  
  1247  	testutil.WaitForResult(func() (bool, error) {
  1248  		_, last := upd.Last()
  1249  		if last == nil {
  1250  			return false, fmt.Errorf("No updates")
  1251  		}
  1252  		if last.ClientStatus != structs.AllocClientStatusComplete {
  1253  			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
  1254  		}
  1255  		return true, nil
  1256  	}, func(err error) {
  1257  		t.Fatalf("err: %v", err)
  1258  	})
  1259  
  1260  	// Write some data in data dir and task dir of the alloc
  1261  	dataFile := filepath.Join(ar.allocDir.SharedDir, "data", "data_file")
  1262  	ioutil.WriteFile(dataFile, []byte("hello world"), os.ModePerm)
  1263  	taskDir := ar.allocDir.TaskDirs[task.Name]
  1264  	taskLocalFile := filepath.Join(taskDir.LocalDir, "local_file")
  1265  	ioutil.WriteFile(taskLocalFile, []byte("good bye world"), os.ModePerm)
  1266  
  1267  	// Create another alloc runner
  1268  	alloc1 := mock.Alloc()
  1269  	task = alloc1.Job.TaskGroups[0].Tasks[0]
  1270  	task.Driver = "mock_driver"
  1271  	task.Config = map[string]interface{}{
  1272  		"run_for": "1s",
  1273  	}
  1274  	upd1, ar1 := testAllocRunnerFromAlloc(alloc1, false)
  1275  	ar1.SetPreviousAllocDir(ar.allocDir)
  1276  	go ar1.Run()
  1277  	defer ar1.Destroy()
  1278  
  1279  	testutil.WaitForResult(func() (bool, error) {
  1280  		_, last := upd1.Last()
  1281  		if last == nil {
  1282  			return false, fmt.Errorf("No updates")
  1283  		}
  1284  		if last.ClientStatus != structs.AllocClientStatusComplete {
  1285  			return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete)
  1286  		}
  1287  		return true, nil
  1288  	}, func(err error) {
  1289  		t.Fatalf("err: %v", err)
  1290  	})
  1291  
  1292  	// Ensure that data from ar1 was moved to ar
  1293  	taskDir = ar1.allocDir.TaskDirs[task.Name]
  1294  	taskLocalFile = filepath.Join(taskDir.LocalDir, "local_file")
  1295  	if fileInfo, _ := os.Stat(taskLocalFile); fileInfo == nil {
  1296  		t.Fatalf("file %v not found", taskLocalFile)
  1297  	}
  1298  
  1299  	dataFile = filepath.Join(ar1.allocDir.SharedDir, "data", "data_file")
  1300  	if fileInfo, _ := os.Stat(dataFile); fileInfo == nil {
  1301  		t.Fatalf("file %v not found", dataFile)
  1302  	}
  1303  }