github.com/bigcommerce/nomad@v0.9.3-bc/client/allocrunner/taskrunner/logmon_hook_unix_test.go (about) 1 // +build !windows 2 3 package taskrunner 4 5 import ( 6 "context" 7 "encoding/json" 8 "fmt" 9 "io/ioutil" 10 "os" 11 "syscall" 12 "testing" 13 "time" 14 15 "github.com/hashicorp/nomad/client/allocrunner/interfaces" 16 "github.com/hashicorp/nomad/helper/testlog" 17 "github.com/hashicorp/nomad/nomad/mock" 18 "github.com/hashicorp/nomad/testutil" 19 "github.com/shirou/gopsutil/process" 20 "github.com/stretchr/testify/require" 21 ) 22 23 // TestTaskRunner_LogmonHook_StartCrashStop simulates logmon crashing while the 24 // Nomad client is restarting and asserts failing to reattach to logmon causes 25 // nomad to spawn a new logmon. 26 func TestTaskRunner_LogmonHook_StartCrashStop(t *testing.T) { 27 t.Parallel() 28 29 alloc := mock.BatchAlloc() 30 task := alloc.Job.TaskGroups[0].Tasks[0] 31 32 dir, err := ioutil.TempDir("", "nomadtest") 33 require.NoError(t, err) 34 defer func() { 35 require.NoError(t, os.RemoveAll(dir)) 36 }() 37 38 hookConf := newLogMonHookConfig(task.Name, dir) 39 hook := newLogMonHook(hookConf, testlog.HCLogger(t)) 40 41 req := interfaces.TaskPrestartRequest{ 42 Task: task, 43 } 44 resp := interfaces.TaskPrestartResponse{} 45 46 // First start 47 require.NoError(t, hook.Prestart(context.Background(), &req, &resp)) 48 defer hook.Stop(context.Background(), nil, nil) 49 50 origState := resp.State 51 origHookData := resp.State[logmonReattachKey] 52 require.NotEmpty(t, origHookData) 53 54 // Pluck PID out of reattach synthesize a crash 55 reattach := struct { 56 Pid int 57 }{} 58 require.NoError(t, json.Unmarshal([]byte(origHookData), &reattach)) 59 pid := reattach.Pid 60 require.NotZero(t, pid) 61 62 proc, _ := os.FindProcess(pid) 63 64 // Assert logmon is running 65 require.NoError(t, proc.Signal(syscall.Signal(0))) 66 67 // Kill it 68 require.NoError(t, proc.Signal(os.Kill)) 69 70 // Since signals are asynchronous wait for the process to die 71 testutil.WaitForResult(func() (bool, error) { 72 err := proc.Signal(syscall.Signal(0)) 73 return err != nil, fmt.Errorf("pid %d still running", pid) 74 }, func(err error) { 75 require.NoError(t, err) 76 }) 77 78 // Running prestart again should return a recoverable error with no 79 // reattach config to cause the task to be restarted with a new logmon. 80 req.PreviousState = map[string]string{ 81 logmonReattachKey: origHookData, 82 } 83 resp = interfaces.TaskPrestartResponse{} 84 err = hook.Prestart(context.Background(), &req, &resp) 85 require.NoError(t, err) 86 require.NotEqual(t, origState, resp.State) 87 88 // Running stop should shutdown logmon 89 require.NoError(t, hook.Stop(context.Background(), nil, nil)) 90 } 91 92 // TestTaskRunner_LogmonHook_ShutdownMidStart simulates logmon crashing while the 93 // Nomad client is calling Start() and asserts that we recover and spawn a new logmon. 94 func TestTaskRunner_LogmonHook_ShutdownMidStart(t *testing.T) { 95 t.Parallel() 96 97 alloc := mock.BatchAlloc() 98 task := alloc.Job.TaskGroups[0].Tasks[0] 99 100 dir, err := ioutil.TempDir("", "nomadtest") 101 require.NoError(t, err) 102 defer func() { 103 require.NoError(t, os.RemoveAll(dir)) 104 }() 105 106 hookConf := newLogMonHookConfig(task.Name, dir) 107 hook := newLogMonHook(hookConf, testlog.HCLogger(t)) 108 109 req := interfaces.TaskPrestartRequest{ 110 Task: task, 111 } 112 resp := interfaces.TaskPrestartResponse{} 113 114 // First start 115 require.NoError(t, hook.Prestart(context.Background(), &req, &resp)) 116 defer hook.Stop(context.Background(), nil, nil) 117 118 origState := resp.State 119 origHookData := resp.State[logmonReattachKey] 120 require.NotEmpty(t, origHookData) 121 122 // Pluck PID out of reattach synthesize a crash 123 reattach := struct { 124 Pid int 125 }{} 126 require.NoError(t, json.Unmarshal([]byte(origHookData), &reattach)) 127 pid := reattach.Pid 128 require.NotZero(t, pid) 129 130 proc, err := process.NewProcess(int32(pid)) 131 require.NoError(t, err) 132 133 // Assert logmon is running 134 require.NoError(t, proc.SendSignal(syscall.Signal(0))) 135 136 // SIGSTOP would freeze process without it being considered 137 // exited; so this causes process to be non-exited at beginning of call 138 // then we kill process while Start call is running 139 require.NoError(t, proc.SendSignal(syscall.SIGSTOP)) 140 testutil.WaitForResult(func() (bool, error) { 141 status, err := proc.Status() 142 if err != nil { 143 return false, err 144 } 145 146 if status != "T" && status != "T+" { 147 return false, fmt.Errorf("process is not asleep yet: %v", status) 148 } 149 150 return true, nil 151 }, func(err error) { 152 require.NoError(t, err) 153 }) 154 155 go func() { 156 time.Sleep(2 * time.Second) 157 158 proc.SendSignal(syscall.SIGCONT) 159 proc.Kill() 160 }() 161 162 req.PreviousState = map[string]string{ 163 logmonReattachKey: origHookData, 164 } 165 166 initLogmon, initClient := hook.logmon, hook.logmonPluginClient 167 168 resp = interfaces.TaskPrestartResponse{} 169 err = hook.Prestart(context.Background(), &req, &resp) 170 require.NoError(t, err) 171 require.NotEqual(t, origState, resp.State) 172 173 // assert that we got a new client and logmon 174 require.True(t, initLogmon != hook.logmon) 175 require.True(t, initClient != hook.logmonPluginClient) 176 }