github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/taskrunner/logmon_hook_unix_test.go (about)

     1  //go:build !windows
     2  // +build !windows
     3  
     4  package taskrunner
     5  
     6  import (
     7  	"context"
     8  	"encoding/json"
     9  	"fmt"
    10  	"os"
    11  	"syscall"
    12  	"testing"
    13  	"time"
    14  
    15  	"github.com/hashicorp/nomad/ci"
    16  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
    17  	"github.com/hashicorp/nomad/helper/testlog"
    18  	"github.com/hashicorp/nomad/nomad/mock"
    19  	"github.com/hashicorp/nomad/testutil"
    20  	"github.com/shirou/gopsutil/v3/process"
    21  	"github.com/stretchr/testify/require"
    22  )
    23  
    24  // TestTaskRunner_LogmonHook_StartCrashStop simulates logmon crashing while the
    25  // Nomad client is restarting and asserts failing to reattach to logmon causes
    26  // nomad to spawn a new logmon.
    27  func TestTaskRunner_LogmonHook_StartCrashStop(t *testing.T) {
    28  	ci.Parallel(t)
    29  
    30  	alloc := mock.BatchAlloc()
    31  	task := alloc.Job.TaskGroups[0].Tasks[0]
    32  
    33  	dir := t.TempDir()
    34  
    35  	hookConf := newLogMonHookConfig(task.Name, dir)
    36  	runner := &TaskRunner{logmonHookConfig: hookConf}
    37  	hook := newLogMonHook(runner, testlog.HCLogger(t))
    38  
    39  	req := interfaces.TaskPrestartRequest{
    40  		Task: task,
    41  	}
    42  	resp := interfaces.TaskPrestartResponse{}
    43  
    44  	// First start
    45  	require.NoError(t, hook.Prestart(context.Background(), &req, &resp))
    46  	defer hook.Stop(context.Background(), nil, nil)
    47  
    48  	origState := resp.State
    49  	origHookData := resp.State[logmonReattachKey]
    50  	require.NotEmpty(t, origHookData)
    51  
    52  	// Pluck PID out of reattach synthesize a crash
    53  	reattach := struct {
    54  		Pid int
    55  	}{}
    56  	require.NoError(t, json.Unmarshal([]byte(origHookData), &reattach))
    57  	pid := reattach.Pid
    58  	require.NotZero(t, pid)
    59  
    60  	proc, _ := os.FindProcess(pid)
    61  
    62  	// Assert logmon is running
    63  	require.NoError(t, proc.Signal(syscall.Signal(0)))
    64  
    65  	// Kill it
    66  	require.NoError(t, proc.Signal(os.Kill))
    67  
    68  	// Since signals are asynchronous wait for the process to die
    69  	testutil.WaitForResult(func() (bool, error) {
    70  		err := proc.Signal(syscall.Signal(0))
    71  		return err != nil, fmt.Errorf("pid %d still running", pid)
    72  	}, func(err error) {
    73  		require.NoError(t, err)
    74  	})
    75  
    76  	// Running prestart again should return a recoverable error with no
    77  	// reattach config to cause the task to be restarted with a new logmon.
    78  	req.PreviousState = map[string]string{
    79  		logmonReattachKey: origHookData,
    80  	}
    81  	resp = interfaces.TaskPrestartResponse{}
    82  	err := hook.Prestart(context.Background(), &req, &resp)
    83  	require.NoError(t, err)
    84  	require.NotEqual(t, origState, resp.State)
    85  
    86  	// Running stop should shutdown logmon
    87  	require.NoError(t, hook.Stop(context.Background(), nil, nil))
    88  }
    89  
    90  // TestTaskRunner_LogmonHook_ShutdownMidStart simulates logmon crashing while the
    91  // Nomad client is calling Start() and asserts that we recover and spawn a new logmon.
    92  func TestTaskRunner_LogmonHook_ShutdownMidStart(t *testing.T) {
    93  	ci.Parallel(t)
    94  
    95  	alloc := mock.BatchAlloc()
    96  	task := alloc.Job.TaskGroups[0].Tasks[0]
    97  
    98  	dir := t.TempDir()
    99  
   100  	hookConf := newLogMonHookConfig(task.Name, dir)
   101  	runner := &TaskRunner{logmonHookConfig: hookConf}
   102  	hook := newLogMonHook(runner, testlog.HCLogger(t))
   103  
   104  	req := interfaces.TaskPrestartRequest{
   105  		Task: task,
   106  	}
   107  	resp := interfaces.TaskPrestartResponse{}
   108  
   109  	// First start
   110  	require.NoError(t, hook.Prestart(context.Background(), &req, &resp))
   111  	defer hook.Stop(context.Background(), nil, nil)
   112  
   113  	origState := resp.State
   114  	origHookData := resp.State[logmonReattachKey]
   115  	require.NotEmpty(t, origHookData)
   116  
   117  	// Pluck PID out of reattach synthesize a crash
   118  	reattach := struct {
   119  		Pid int
   120  	}{}
   121  	require.NoError(t, json.Unmarshal([]byte(origHookData), &reattach))
   122  	pid := reattach.Pid
   123  	require.NotZero(t, pid)
   124  
   125  	proc, err := process.NewProcess(int32(pid))
   126  	require.NoError(t, err)
   127  
   128  	// Assert logmon is running
   129  	require.NoError(t, proc.SendSignal(syscall.Signal(0)))
   130  
   131  	// SIGSTOP would freeze process without it being considered
   132  	// exited; so this causes process to be non-exited at beginning of call
   133  	// then we kill process while Start call is running
   134  	require.NoError(t, proc.SendSignal(syscall.SIGSTOP))
   135  	testutil.WaitForResult(func() (bool, error) {
   136  		status, err := proc.Status()
   137  		if err != nil {
   138  			return false, err
   139  		}
   140  		if len(status) == 0 {
   141  			return false, fmt.Errorf("process status did not return value")
   142  		}
   143  		if status[0] != "stop" {
   144  			return false, fmt.Errorf("process is not stopped yet: %v", status)
   145  		}
   146  
   147  		return true, nil
   148  	}, func(err error) {
   149  		require.NoError(t, err)
   150  	})
   151  
   152  	go func() {
   153  		time.Sleep(2 * time.Second)
   154  
   155  		proc.SendSignal(syscall.SIGCONT)
   156  		proc.Kill()
   157  	}()
   158  
   159  	req.PreviousState = map[string]string{
   160  		logmonReattachKey: origHookData,
   161  	}
   162  
   163  	initLogmon, initClient := hook.logmon, hook.logmonPluginClient
   164  
   165  	resp = interfaces.TaskPrestartResponse{}
   166  	err = hook.Prestart(context.Background(), &req, &resp)
   167  	require.NoError(t, err)
   168  	require.NotEqual(t, origState, resp.State)
   169  
   170  	// assert that we got a new client and logmon
   171  	require.True(t, initLogmon != hook.logmon)
   172  	require.True(t, initClient != hook.logmonPluginClient)
   173  }