github.com/smithx10/nomad@v0.9.1-rc1/client/allocrunner/alloc_runner_unix_test.go (about) 1 // +build !windows 2 3 package allocrunner 4 5 import ( 6 "encoding/json" 7 "fmt" 8 "os" 9 "syscall" 10 "testing" 11 "time" 12 13 "github.com/hashicorp/nomad/client/consul" 14 "github.com/hashicorp/nomad/client/state" 15 "github.com/hashicorp/nomad/nomad/mock" 16 "github.com/hashicorp/nomad/nomad/structs" 17 "github.com/hashicorp/nomad/testutil" 18 "github.com/stretchr/testify/require" 19 ) 20 21 // TestAllocRunner_Restore_RunningTerminal asserts that restoring a terminal 22 // alloc with a running task properly kills the running the task. This is meant 23 // to simulate a Nomad agent crash after receiving an updated alloc with 24 // DesiredStatus=Stop, persisting the update, but crashing before terminating 25 // the task. 26 func TestAllocRunner_Restore_RunningTerminal(t *testing.T) { 27 t.Parallel() 28 29 // 1. Run task 30 // 2. Shutdown alloc runner 31 // 3. Set alloc.desiredstatus=false 32 // 4. Start new alloc runner 33 // 5. Assert task and logmon are cleaned up 34 35 alloc := mock.Alloc() 36 task := alloc.Job.TaskGroups[0].Tasks[0] 37 task.Driver = "mock_driver" 38 task.Config = map[string]interface{}{ 39 "run_for": "1h", 40 } 41 42 conf, cleanup := testAllocRunnerConfig(t, alloc.Copy()) 43 defer cleanup() 44 45 // Maintain state for subsequent run 46 conf.StateDB = state.NewMemDB(conf.Logger) 47 48 // Start and wait for task to be running 49 ar, err := NewAllocRunner(conf) 50 require.NoError(t, err) 51 go ar.Run() 52 defer destroy(ar) 53 54 testutil.WaitForResult(func() (bool, error) { 55 s := ar.AllocState() 56 return s.ClientStatus == structs.AllocClientStatusRunning, fmt.Errorf("expected running, got %s", s.ClientStatus) 57 }, func(err error) { 58 require.NoError(t, err) 59 }) 60 61 // Shutdown the AR and manually change the state to mimic a crash where 62 // a stopped alloc update is received, but Nomad crashes before 63 // stopping the alloc. 64 ar.Shutdown() 65 select { 66 case <-ar.ShutdownCh(): 67 case <-time.After(30 * time.Second): 68 require.Fail(t, "AR took too long to exit") 69 } 70 71 // Assert logmon is still running. This is a super ugly hack that pulls 72 // logmon's PID out of its reattach config, but it does properly ensure 73 // logmon gets cleaned up. 74 ls, _, err := conf.StateDB.GetTaskRunnerState(alloc.ID, task.Name) 75 require.NoError(t, err) 76 require.NotNil(t, ls) 77 78 logmonReattach := struct { 79 Pid int 80 }{} 81 err = json.Unmarshal([]byte(ls.Hooks["logmon"].Data["reattach_config"]), &logmonReattach) 82 require.NoError(t, err) 83 84 logmonProc, _ := os.FindProcess(logmonReattach.Pid) 85 require.NoError(t, logmonProc.Signal(syscall.Signal(0))) 86 87 // Fake alloc terminal during Restore() 88 alloc.DesiredStatus = structs.AllocDesiredStatusStop 89 alloc.ModifyIndex++ 90 alloc.AllocModifyIndex++ 91 92 // Start a new alloc runner and assert it gets stopped 93 conf2, cleanup2 := testAllocRunnerConfig(t, alloc) 94 defer cleanup2() 95 96 // Use original statedb to maintain hook state 97 conf2.StateDB = conf.StateDB 98 99 // Restore, start, and wait for task to be killed 100 ar2, err := NewAllocRunner(conf2) 101 require.NoError(t, err) 102 103 require.NoError(t, ar2.Restore()) 104 105 go ar2.Run() 106 defer destroy(ar2) 107 108 select { 109 case <-ar2.WaitCh(): 110 case <-time.After(30 * time.Second): 111 } 112 113 // Assert logmon was cleaned up 114 require.Error(t, logmonProc.Signal(syscall.Signal(0))) 115 116 // Assert consul was cleaned up: 117 // 2 removals (canary+noncanary) during prekill 118 // 2 removals (canary+noncanary) during exited 119 consulOps := conf2.Consul.(*consul.MockConsulServiceClient).GetOps() 120 require.Len(t, consulOps, 4) 121 for _, op := range consulOps { 122 require.Equal(t, "remove", op.Op) 123 } 124 125 // Assert terminated task event was emitted 126 events := ar2.AllocState().TaskStates[task.Name].Events 127 require.Len(t, events, 4) 128 require.Equal(t, events[0].Type, structs.TaskReceived) 129 require.Equal(t, events[1].Type, structs.TaskSetup) 130 require.Equal(t, events[2].Type, structs.TaskStarted) 131 require.Equal(t, events[3].Type, structs.TaskTerminated) 132 }