github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/allocrunner/taskrunner/logmon_hook_unix_test.go (about) 1 // +build !windows 2 3 package taskrunner 4 5 import ( 6 "context" 7 "encoding/json" 8 "fmt" 9 "io/ioutil" 10 "os" 11 "syscall" 12 "testing" 13 "time" 14 15 "github.com/hashicorp/nomad/client/allocrunner/interfaces" 16 "github.com/hashicorp/nomad/helper/testlog" 17 "github.com/hashicorp/nomad/nomad/mock" 18 "github.com/hashicorp/nomad/testutil" 19 "github.com/shirou/gopsutil/process" 20 "github.com/stretchr/testify/require" 21 ) 22 23 // TestTaskRunner_LogmonHook_StartCrashStop simulates logmon crashing while the 24 // Nomad client is restarting and asserts failing to reattach to logmon causes 25 // nomad to spawn a new logmon. 26 func TestTaskRunner_LogmonHook_StartCrashStop(t *testing.T) { 27 t.Parallel() 28 29 alloc := mock.BatchAlloc() 30 task := alloc.Job.TaskGroups[0].Tasks[0] 31 32 dir, err := ioutil.TempDir("", "nomadtest") 33 require.NoError(t, err) 34 defer func() { 35 require.NoError(t, os.RemoveAll(dir)) 36 }() 37 38 hookConf := newLogMonHookConfig(task.Name, dir) 39 runner := &TaskRunner{logmonHookConfig: hookConf} 40 hook := newLogMonHook(runner, testlog.HCLogger(t)) 41 42 req := interfaces.TaskPrestartRequest{ 43 Task: task, 44 } 45 resp := interfaces.TaskPrestartResponse{} 46 47 // First start 48 require.NoError(t, hook.Prestart(context.Background(), &req, &resp)) 49 defer hook.Stop(context.Background(), nil, nil) 50 51 origState := resp.State 52 origHookData := resp.State[logmonReattachKey] 53 require.NotEmpty(t, origHookData) 54 55 // Pluck PID out of reattach synthesize a crash 56 reattach := struct { 57 Pid int 58 }{} 59 require.NoError(t, json.Unmarshal([]byte(origHookData), &reattach)) 60 pid := reattach.Pid 61 require.NotZero(t, pid) 62 63 proc, _ := os.FindProcess(pid) 64 65 // Assert logmon is running 66 require.NoError(t, proc.Signal(syscall.Signal(0))) 67 68 // Kill it 69 require.NoError(t, proc.Signal(os.Kill)) 70 71 // Since signals are asynchronous wait for the process to die 72 testutil.WaitForResult(func() (bool, error) { 73 err := proc.Signal(syscall.Signal(0)) 74 return err != nil, fmt.Errorf("pid %d still running", pid) 75 }, func(err error) { 76 require.NoError(t, err) 77 }) 78 79 // Running prestart again should return a recoverable error with no 80 // reattach config to cause the task to be restarted with a new logmon. 81 req.PreviousState = map[string]string{ 82 logmonReattachKey: origHookData, 83 } 84 resp = interfaces.TaskPrestartResponse{} 85 err = hook.Prestart(context.Background(), &req, &resp) 86 require.NoError(t, err) 87 require.NotEqual(t, origState, resp.State) 88 89 // Running stop should shutdown logmon 90 require.NoError(t, hook.Stop(context.Background(), nil, nil)) 91 } 92 93 // TestTaskRunner_LogmonHook_ShutdownMidStart simulates logmon crashing while the 94 // Nomad client is calling Start() and asserts that we recover and spawn a new logmon. 95 func TestTaskRunner_LogmonHook_ShutdownMidStart(t *testing.T) { 96 t.Parallel() 97 98 alloc := mock.BatchAlloc() 99 task := alloc.Job.TaskGroups[0].Tasks[0] 100 101 dir, err := ioutil.TempDir("", "nomadtest") 102 require.NoError(t, err) 103 defer func() { 104 require.NoError(t, os.RemoveAll(dir)) 105 }() 106 107 hookConf := newLogMonHookConfig(task.Name, dir) 108 runner := &TaskRunner{logmonHookConfig: hookConf} 109 hook := newLogMonHook(runner, testlog.HCLogger(t)) 110 111 req := interfaces.TaskPrestartRequest{ 112 Task: task, 113 } 114 resp := interfaces.TaskPrestartResponse{} 115 116 // First start 117 require.NoError(t, hook.Prestart(context.Background(), &req, &resp)) 118 defer hook.Stop(context.Background(), nil, nil) 119 120 origState := resp.State 121 origHookData := resp.State[logmonReattachKey] 122 require.NotEmpty(t, origHookData) 123 124 // Pluck PID out of reattach synthesize a crash 125 reattach := struct { 126 Pid int 127 }{} 128 require.NoError(t, json.Unmarshal([]byte(origHookData), &reattach)) 129 pid := reattach.Pid 130 require.NotZero(t, pid) 131 132 proc, err := process.NewProcess(int32(pid)) 133 require.NoError(t, err) 134 135 // Assert logmon is running 136 require.NoError(t, proc.SendSignal(syscall.Signal(0))) 137 138 // SIGSTOP would freeze process without it being considered 139 // exited; so this causes process to be non-exited at beginning of call 140 // then we kill process while Start call is running 141 require.NoError(t, proc.SendSignal(syscall.SIGSTOP)) 142 testutil.WaitForResult(func() (bool, error) { 143 status, err := proc.Status() 144 if err != nil { 145 return false, err 146 } 147 148 if status != "T" && status != "T+" { 149 return false, fmt.Errorf("process is not asleep yet: %v", status) 150 } 151 152 return true, nil 153 }, func(err error) { 154 require.NoError(t, err) 155 }) 156 157 go func() { 158 time.Sleep(2 * time.Second) 159 160 proc.SendSignal(syscall.SIGCONT) 161 proc.Kill() 162 }() 163 164 req.PreviousState = map[string]string{ 165 logmonReattachKey: origHookData, 166 } 167 168 initLogmon, initClient := hook.logmon, hook.logmonPluginClient 169 170 resp = interfaces.TaskPrestartResponse{} 171 err = hook.Prestart(context.Background(), &req, &resp) 172 require.NoError(t, err) 173 require.NotEqual(t, origState, resp.State) 174 175 // assert that we got a new client and logmon 176 require.True(t, initLogmon != hook.logmon) 177 require.True(t, initClient != hook.logmonPluginClient) 178 }