github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/taskrunner/logmon_hook_unix_test.go (about) 1 //go:build !windows 2 // +build !windows 3 4 package taskrunner 5 6 import ( 7 "context" 8 "encoding/json" 9 "fmt" 10 "os" 11 "syscall" 12 "testing" 13 "time" 14 15 "github.com/hashicorp/nomad/ci" 16 "github.com/hashicorp/nomad/client/allocrunner/interfaces" 17 "github.com/hashicorp/nomad/helper/testlog" 18 "github.com/hashicorp/nomad/nomad/mock" 19 "github.com/hashicorp/nomad/testutil" 20 "github.com/shirou/gopsutil/v3/process" 21 "github.com/stretchr/testify/require" 22 ) 23 24 // TestTaskRunner_LogmonHook_StartCrashStop simulates logmon crashing while the 25 // Nomad client is restarting and asserts failing to reattach to logmon causes 26 // nomad to spawn a new logmon. 27 func TestTaskRunner_LogmonHook_StartCrashStop(t *testing.T) { 28 ci.Parallel(t) 29 30 alloc := mock.BatchAlloc() 31 task := alloc.Job.TaskGroups[0].Tasks[0] 32 33 dir := t.TempDir() 34 35 hookConf := newLogMonHookConfig(task.Name, dir) 36 runner := &TaskRunner{logmonHookConfig: hookConf} 37 hook := newLogMonHook(runner, testlog.HCLogger(t)) 38 39 req := interfaces.TaskPrestartRequest{ 40 Task: task, 41 } 42 resp := interfaces.TaskPrestartResponse{} 43 44 // First start 45 require.NoError(t, hook.Prestart(context.Background(), &req, &resp)) 46 defer hook.Stop(context.Background(), nil, nil) 47 48 origState := resp.State 49 origHookData := resp.State[logmonReattachKey] 50 require.NotEmpty(t, origHookData) 51 52 // Pluck PID out of reattach synthesize a crash 53 reattach := struct { 54 Pid int 55 }{} 56 require.NoError(t, json.Unmarshal([]byte(origHookData), &reattach)) 57 pid := reattach.Pid 58 require.NotZero(t, pid) 59 60 proc, _ := os.FindProcess(pid) 61 62 // Assert logmon is running 63 require.NoError(t, proc.Signal(syscall.Signal(0))) 64 65 // Kill it 66 require.NoError(t, proc.Signal(os.Kill)) 67 68 // Since signals are asynchronous wait for the process to die 69 testutil.WaitForResult(func() (bool, error) { 70 err := proc.Signal(syscall.Signal(0)) 71 return err != nil, fmt.Errorf("pid %d still running", pid) 72 }, func(err error) { 73 require.NoError(t, err) 74 }) 75 76 // Running prestart again should return a recoverable error with no 77 // reattach config to cause the task to be restarted with a new logmon. 78 req.PreviousState = map[string]string{ 79 logmonReattachKey: origHookData, 80 } 81 resp = interfaces.TaskPrestartResponse{} 82 err := hook.Prestart(context.Background(), &req, &resp) 83 require.NoError(t, err) 84 require.NotEqual(t, origState, resp.State) 85 86 // Running stop should shutdown logmon 87 require.NoError(t, hook.Stop(context.Background(), nil, nil)) 88 } 89 90 // TestTaskRunner_LogmonHook_ShutdownMidStart simulates logmon crashing while the 91 // Nomad client is calling Start() and asserts that we recover and spawn a new logmon. 92 func TestTaskRunner_LogmonHook_ShutdownMidStart(t *testing.T) { 93 ci.Parallel(t) 94 95 alloc := mock.BatchAlloc() 96 task := alloc.Job.TaskGroups[0].Tasks[0] 97 98 dir := t.TempDir() 99 100 hookConf := newLogMonHookConfig(task.Name, dir) 101 runner := &TaskRunner{logmonHookConfig: hookConf} 102 hook := newLogMonHook(runner, testlog.HCLogger(t)) 103 104 req := interfaces.TaskPrestartRequest{ 105 Task: task, 106 } 107 resp := interfaces.TaskPrestartResponse{} 108 109 // First start 110 require.NoError(t, hook.Prestart(context.Background(), &req, &resp)) 111 defer hook.Stop(context.Background(), nil, nil) 112 113 origState := resp.State 114 origHookData := resp.State[logmonReattachKey] 115 require.NotEmpty(t, origHookData) 116 117 // Pluck PID out of reattach synthesize a crash 118 reattach := struct { 119 Pid int 120 }{} 121 require.NoError(t, json.Unmarshal([]byte(origHookData), &reattach)) 122 pid := reattach.Pid 123 require.NotZero(t, pid) 124 125 proc, err := process.NewProcess(int32(pid)) 126 require.NoError(t, err) 127 128 // Assert logmon is running 129 require.NoError(t, proc.SendSignal(syscall.Signal(0))) 130 131 // SIGSTOP would freeze process without it being considered 132 // exited; so this causes process to be non-exited at beginning of call 133 // then we kill process while Start call is running 134 require.NoError(t, proc.SendSignal(syscall.SIGSTOP)) 135 testutil.WaitForResult(func() (bool, error) { 136 status, err := proc.Status() 137 if err != nil { 138 return false, err 139 } 140 if len(status) == 0 { 141 return false, fmt.Errorf("process status did not return value") 142 } 143 if status[0] != "stop" { 144 return false, fmt.Errorf("process is not stopped yet: %v", status) 145 } 146 147 return true, nil 148 }, func(err error) { 149 require.NoError(t, err) 150 }) 151 152 go func() { 153 time.Sleep(2 * time.Second) 154 155 proc.SendSignal(syscall.SIGCONT) 156 proc.Kill() 157 }() 158 159 req.PreviousState = map[string]string{ 160 logmonReattachKey: origHookData, 161 } 162 163 initLogmon, initClient := hook.logmon, hook.logmonPluginClient 164 165 resp = interfaces.TaskPrestartResponse{} 166 err = hook.Prestart(context.Background(), &req, &resp) 167 require.NoError(t, err) 168 require.NotEqual(t, origState, resp.State) 169 170 // assert that we got a new client and logmon 171 require.True(t, initLogmon != hook.logmon) 172 require.True(t, initClient != hook.logmonPluginClient) 173 }