github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/allocrunner/taskrunner/logmon_hook_unix_test.go (about)

     1  // +build !windows
     2  
     3  package taskrunner
     4  
     5  import (
     6  	"context"
     7  	"encoding/json"
     8  	"fmt"
     9  	"io/ioutil"
    10  	"os"
    11  	"syscall"
    12  	"testing"
    13  	"time"
    14  
    15  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
    16  	"github.com/hashicorp/nomad/helper/testlog"
    17  	"github.com/hashicorp/nomad/nomad/mock"
    18  	"github.com/hashicorp/nomad/testutil"
    19  	"github.com/shirou/gopsutil/process"
    20  	"github.com/stretchr/testify/require"
    21  )
    22  
    23  // TestTaskRunner_LogmonHook_StartCrashStop simulates logmon crashing while the
    24  // Nomad client is restarting and asserts failing to reattach to logmon causes
    25  // nomad to spawn a new logmon.
    26  func TestTaskRunner_LogmonHook_StartCrashStop(t *testing.T) {
    27  	t.Parallel()
    28  
    29  	alloc := mock.BatchAlloc()
    30  	task := alloc.Job.TaskGroups[0].Tasks[0]
    31  
    32  	dir, err := ioutil.TempDir("", "nomadtest")
    33  	require.NoError(t, err)
    34  	defer func() {
    35  		require.NoError(t, os.RemoveAll(dir))
    36  	}()
    37  
    38  	hookConf := newLogMonHookConfig(task.Name, dir)
    39  	runner := &TaskRunner{logmonHookConfig: hookConf}
    40  	hook := newLogMonHook(runner, testlog.HCLogger(t))
    41  
    42  	req := interfaces.TaskPrestartRequest{
    43  		Task: task,
    44  	}
    45  	resp := interfaces.TaskPrestartResponse{}
    46  
    47  	// First start
    48  	require.NoError(t, hook.Prestart(context.Background(), &req, &resp))
    49  	defer hook.Stop(context.Background(), nil, nil)
    50  
    51  	origState := resp.State
    52  	origHookData := resp.State[logmonReattachKey]
    53  	require.NotEmpty(t, origHookData)
    54  
    55  	// Pluck PID out of reattach synthesize a crash
    56  	reattach := struct {
    57  		Pid int
    58  	}{}
    59  	require.NoError(t, json.Unmarshal([]byte(origHookData), &reattach))
    60  	pid := reattach.Pid
    61  	require.NotZero(t, pid)
    62  
    63  	proc, _ := os.FindProcess(pid)
    64  
    65  	// Assert logmon is running
    66  	require.NoError(t, proc.Signal(syscall.Signal(0)))
    67  
    68  	// Kill it
    69  	require.NoError(t, proc.Signal(os.Kill))
    70  
    71  	// Since signals are asynchronous wait for the process to die
    72  	testutil.WaitForResult(func() (bool, error) {
    73  		err := proc.Signal(syscall.Signal(0))
    74  		return err != nil, fmt.Errorf("pid %d still running", pid)
    75  	}, func(err error) {
    76  		require.NoError(t, err)
    77  	})
    78  
    79  	// Running prestart again should return a recoverable error with no
    80  	// reattach config to cause the task to be restarted with a new logmon.
    81  	req.PreviousState = map[string]string{
    82  		logmonReattachKey: origHookData,
    83  	}
    84  	resp = interfaces.TaskPrestartResponse{}
    85  	err = hook.Prestart(context.Background(), &req, &resp)
    86  	require.NoError(t, err)
    87  	require.NotEqual(t, origState, resp.State)
    88  
    89  	// Running stop should shutdown logmon
    90  	require.NoError(t, hook.Stop(context.Background(), nil, nil))
    91  }
    92  
    93  // TestTaskRunner_LogmonHook_ShutdownMidStart simulates logmon crashing while the
    94  // Nomad client is calling Start() and asserts that we recover and spawn a new logmon.
    95  func TestTaskRunner_LogmonHook_ShutdownMidStart(t *testing.T) {
    96  	t.Parallel()
    97  
    98  	alloc := mock.BatchAlloc()
    99  	task := alloc.Job.TaskGroups[0].Tasks[0]
   100  
   101  	dir, err := ioutil.TempDir("", "nomadtest")
   102  	require.NoError(t, err)
   103  	defer func() {
   104  		require.NoError(t, os.RemoveAll(dir))
   105  	}()
   106  
   107  	hookConf := newLogMonHookConfig(task.Name, dir)
   108  	runner := &TaskRunner{logmonHookConfig: hookConf}
   109  	hook := newLogMonHook(runner, testlog.HCLogger(t))
   110  
   111  	req := interfaces.TaskPrestartRequest{
   112  		Task: task,
   113  	}
   114  	resp := interfaces.TaskPrestartResponse{}
   115  
   116  	// First start
   117  	require.NoError(t, hook.Prestart(context.Background(), &req, &resp))
   118  	defer hook.Stop(context.Background(), nil, nil)
   119  
   120  	origState := resp.State
   121  	origHookData := resp.State[logmonReattachKey]
   122  	require.NotEmpty(t, origHookData)
   123  
   124  	// Pluck PID out of reattach synthesize a crash
   125  	reattach := struct {
   126  		Pid int
   127  	}{}
   128  	require.NoError(t, json.Unmarshal([]byte(origHookData), &reattach))
   129  	pid := reattach.Pid
   130  	require.NotZero(t, pid)
   131  
   132  	proc, err := process.NewProcess(int32(pid))
   133  	require.NoError(t, err)
   134  
   135  	// Assert logmon is running
   136  	require.NoError(t, proc.SendSignal(syscall.Signal(0)))
   137  
   138  	// SIGSTOP would freeze process without it being considered
   139  	// exited; so this causes process to be non-exited at beginning of call
   140  	// then we kill process while Start call is running
   141  	require.NoError(t, proc.SendSignal(syscall.SIGSTOP))
   142  	testutil.WaitForResult(func() (bool, error) {
   143  		status, err := proc.Status()
   144  		if err != nil {
   145  			return false, err
   146  		}
   147  
   148  		if status != "T" && status != "T+" {
   149  			return false, fmt.Errorf("process is not asleep yet: %v", status)
   150  		}
   151  
   152  		return true, nil
   153  	}, func(err error) {
   154  		require.NoError(t, err)
   155  	})
   156  
   157  	go func() {
   158  		time.Sleep(2 * time.Second)
   159  
   160  		proc.SendSignal(syscall.SIGCONT)
   161  		proc.Kill()
   162  	}()
   163  
   164  	req.PreviousState = map[string]string{
   165  		logmonReattachKey: origHookData,
   166  	}
   167  
   168  	initLogmon, initClient := hook.logmon, hook.logmonPluginClient
   169  
   170  	resp = interfaces.TaskPrestartResponse{}
   171  	err = hook.Prestart(context.Background(), &req, &resp)
   172  	require.NoError(t, err)
   173  	require.NotEqual(t, origState, resp.State)
   174  
   175  	// assert that we got a new client and logmon
   176  	require.True(t, initLogmon != hook.logmon)
   177  	require.True(t, initClient != hook.logmonPluginClient)
   178  }