github.com/bigcommerce/nomad@v0.9.3-bc/client/allocrunner/taskrunner/logmon_hook_unix_test.go (about)

     1  // +build !windows
     2  
     3  package taskrunner
     4  
     5  import (
     6  	"context"
     7  	"encoding/json"
     8  	"fmt"
     9  	"io/ioutil"
    10  	"os"
    11  	"syscall"
    12  	"testing"
    13  	"time"
    14  
    15  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
    16  	"github.com/hashicorp/nomad/helper/testlog"
    17  	"github.com/hashicorp/nomad/nomad/mock"
    18  	"github.com/hashicorp/nomad/testutil"
    19  	"github.com/shirou/gopsutil/process"
    20  	"github.com/stretchr/testify/require"
    21  )
    22  
    23  // TestTaskRunner_LogmonHook_StartCrashStop simulates logmon crashing while the
    24  // Nomad client is restarting and asserts failing to reattach to logmon causes
    25  // nomad to spawn a new logmon.
    26  func TestTaskRunner_LogmonHook_StartCrashStop(t *testing.T) {
    27  	t.Parallel()
    28  
    29  	alloc := mock.BatchAlloc()
    30  	task := alloc.Job.TaskGroups[0].Tasks[0]
    31  
    32  	dir, err := ioutil.TempDir("", "nomadtest")
    33  	require.NoError(t, err)
    34  	defer func() {
    35  		require.NoError(t, os.RemoveAll(dir))
    36  	}()
    37  
    38  	hookConf := newLogMonHookConfig(task.Name, dir)
    39  	hook := newLogMonHook(hookConf, testlog.HCLogger(t))
    40  
    41  	req := interfaces.TaskPrestartRequest{
    42  		Task: task,
    43  	}
    44  	resp := interfaces.TaskPrestartResponse{}
    45  
    46  	// First start
    47  	require.NoError(t, hook.Prestart(context.Background(), &req, &resp))
    48  	defer hook.Stop(context.Background(), nil, nil)
    49  
    50  	origState := resp.State
    51  	origHookData := resp.State[logmonReattachKey]
    52  	require.NotEmpty(t, origHookData)
    53  
    54  	// Pluck PID out of reattach synthesize a crash
    55  	reattach := struct {
    56  		Pid int
    57  	}{}
    58  	require.NoError(t, json.Unmarshal([]byte(origHookData), &reattach))
    59  	pid := reattach.Pid
    60  	require.NotZero(t, pid)
    61  
    62  	proc, _ := os.FindProcess(pid)
    63  
    64  	// Assert logmon is running
    65  	require.NoError(t, proc.Signal(syscall.Signal(0)))
    66  
    67  	// Kill it
    68  	require.NoError(t, proc.Signal(os.Kill))
    69  
    70  	// Since signals are asynchronous wait for the process to die
    71  	testutil.WaitForResult(func() (bool, error) {
    72  		err := proc.Signal(syscall.Signal(0))
    73  		return err != nil, fmt.Errorf("pid %d still running", pid)
    74  	}, func(err error) {
    75  		require.NoError(t, err)
    76  	})
    77  
    78  	// Running prestart again should return a recoverable error with no
    79  	// reattach config to cause the task to be restarted with a new logmon.
    80  	req.PreviousState = map[string]string{
    81  		logmonReattachKey: origHookData,
    82  	}
    83  	resp = interfaces.TaskPrestartResponse{}
    84  	err = hook.Prestart(context.Background(), &req, &resp)
    85  	require.NoError(t, err)
    86  	require.NotEqual(t, origState, resp.State)
    87  
    88  	// Running stop should shutdown logmon
    89  	require.NoError(t, hook.Stop(context.Background(), nil, nil))
    90  }
    91  
    92  // TestTaskRunner_LogmonHook_ShutdownMidStart simulates logmon crashing while the
    93  // Nomad client is calling Start() and asserts that we recover and spawn a new logmon.
    94  func TestTaskRunner_LogmonHook_ShutdownMidStart(t *testing.T) {
    95  	t.Parallel()
    96  
    97  	alloc := mock.BatchAlloc()
    98  	task := alloc.Job.TaskGroups[0].Tasks[0]
    99  
   100  	dir, err := ioutil.TempDir("", "nomadtest")
   101  	require.NoError(t, err)
   102  	defer func() {
   103  		require.NoError(t, os.RemoveAll(dir))
   104  	}()
   105  
   106  	hookConf := newLogMonHookConfig(task.Name, dir)
   107  	hook := newLogMonHook(hookConf, testlog.HCLogger(t))
   108  
   109  	req := interfaces.TaskPrestartRequest{
   110  		Task: task,
   111  	}
   112  	resp := interfaces.TaskPrestartResponse{}
   113  
   114  	// First start
   115  	require.NoError(t, hook.Prestart(context.Background(), &req, &resp))
   116  	defer hook.Stop(context.Background(), nil, nil)
   117  
   118  	origState := resp.State
   119  	origHookData := resp.State[logmonReattachKey]
   120  	require.NotEmpty(t, origHookData)
   121  
   122  	// Pluck PID out of reattach synthesize a crash
   123  	reattach := struct {
   124  		Pid int
   125  	}{}
   126  	require.NoError(t, json.Unmarshal([]byte(origHookData), &reattach))
   127  	pid := reattach.Pid
   128  	require.NotZero(t, pid)
   129  
   130  	proc, err := process.NewProcess(int32(pid))
   131  	require.NoError(t, err)
   132  
   133  	// Assert logmon is running
   134  	require.NoError(t, proc.SendSignal(syscall.Signal(0)))
   135  
   136  	// SIGSTOP would freeze process without it being considered
   137  	// exited; so this causes process to be non-exited at beginning of call
   138  	// then we kill process while Start call is running
   139  	require.NoError(t, proc.SendSignal(syscall.SIGSTOP))
   140  	testutil.WaitForResult(func() (bool, error) {
   141  		status, err := proc.Status()
   142  		if err != nil {
   143  			return false, err
   144  		}
   145  
   146  		if status != "T" && status != "T+" {
   147  			return false, fmt.Errorf("process is not asleep yet: %v", status)
   148  		}
   149  
   150  		return true, nil
   151  	}, func(err error) {
   152  		require.NoError(t, err)
   153  	})
   154  
   155  	go func() {
   156  		time.Sleep(2 * time.Second)
   157  
   158  		proc.SendSignal(syscall.SIGCONT)
   159  		proc.Kill()
   160  	}()
   161  
   162  	req.PreviousState = map[string]string{
   163  		logmonReattachKey: origHookData,
   164  	}
   165  
   166  	initLogmon, initClient := hook.logmon, hook.logmonPluginClient
   167  
   168  	resp = interfaces.TaskPrestartResponse{}
   169  	err = hook.Prestart(context.Background(), &req, &resp)
   170  	require.NoError(t, err)
   171  	require.NotEqual(t, origState, resp.State)
   172  
   173  	// assert that we got a new client and logmon
   174  	require.True(t, initLogmon != hook.logmon)
   175  	require.True(t, initClient != hook.logmonPluginClient)
   176  }