github.com/smintz/nomad@v0.8.3/command/agent/consul/script_test.go

github.com/smintz/nomad@v0.8.3/command/agent/consul/script_test.go (about)

     1  package consul
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"os/exec"
     8  	"testing"
     9  	"time"
    10  
    11  	"github.com/hashicorp/consul/api"
    12  	"github.com/hashicorp/nomad/helper/testlog"
    13  	"github.com/hashicorp/nomad/helper/testtask"
    14  	"github.com/hashicorp/nomad/nomad/structs"
    15  )
    16  
    17  func TestMain(m *testing.M) {
    18  	if !testtask.Run() {
    19  		os.Exit(m.Run())
    20  	}
    21  }
    22  
    23  // blockingScriptExec implements ScriptExec by running a subcommand that never
    24  // exits.
    25  type blockingScriptExec struct {
    26  	// running is ticked before blocking to allow synchronizing operations
    27  	running chan struct{}
    28  
    29  	// set to true if Exec is called and has exited
    30  	exited bool
    31  }
    32  
    33  func newBlockingScriptExec() *blockingScriptExec {
    34  	return &blockingScriptExec{running: make(chan struct{})}
    35  }
    36  
    37  func (b *blockingScriptExec) Exec(ctx context.Context, _ string, _ []string) ([]byte, int, error) {
    38  	b.running <- struct{}{}
    39  	cmd := exec.CommandContext(ctx, testtask.Path(), "sleep", "9000h")
    40  	testtask.SetCmdEnv(cmd)
    41  	err := cmd.Run()
    42  	code := 0
    43  	if exitErr, ok := err.(*exec.ExitError); ok {
    44  		if !exitErr.Success() {
    45  			code = 1
    46  		}
    47  	}
    48  	b.exited = true
    49  	return []byte{}, code, err
    50  }
    51  
    52  // TestConsulScript_Exec_Cancel asserts cancelling a script check shortcircuits
    53  // any running scripts.
    54  func TestConsulScript_Exec_Cancel(t *testing.T) {
    55  	serviceCheck := structs.ServiceCheck{
    56  		Name:     "sleeper",
    57  		Interval: time.Hour,
    58  		Timeout:  time.Hour,
    59  	}
    60  	exec := newBlockingScriptExec()
    61  
    62  	// pass nil for heartbeater as it shouldn't be called
    63  	check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, nil, testlog.Logger(t), nil)
    64  	handle := check.run()
    65  
    66  	// wait until Exec is called
    67  	<-exec.running
    68  
    69  	// cancel now that we're blocked in exec
    70  	handle.cancel()
    71  
    72  	select {
    73  	case <-handle.wait():
    74  	case <-time.After(3 * time.Second):
    75  		t.Fatalf("timed out waiting for script check to exit")
    76  	}
    77  	if !exec.exited {
    78  		t.Errorf("expected script executor to run and exit but it has not")
    79  	}
    80  }
    81  
    82  type execStatus struct {
    83  	checkID string
    84  	output  string
    85  	status  string
    86  }
    87  
    88  // fakeHeartbeater implements the heartbeater interface to allow mocking out
    89  // Consul in script executor tests.
    90  type fakeHeartbeater struct {
    91  	updates chan execStatus
    92  }
    93  
    94  func (f *fakeHeartbeater) UpdateTTL(checkID, output, status string) error {
    95  	f.updates <- execStatus{checkID: checkID, output: output, status: status}
    96  	return nil
    97  }
    98  
    99  func newFakeHeartbeater() *fakeHeartbeater {
   100  	return &fakeHeartbeater{updates: make(chan execStatus)}
   101  }
   102  
   103  // TestConsulScript_Exec_Timeout asserts a script will be killed when the
   104  // timeout is reached.
   105  func TestConsulScript_Exec_Timeout(t *testing.T) {
   106  	t.Parallel() // run the slow tests in parallel
   107  	serviceCheck := structs.ServiceCheck{
   108  		Name:     "sleeper",
   109  		Interval: time.Hour,
   110  		Timeout:  time.Second,
   111  	}
   112  	exec := newBlockingScriptExec()
   113  
   114  	hb := newFakeHeartbeater()
   115  	check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.Logger(t), nil)
   116  	handle := check.run()
   117  	defer handle.cancel() // just-in-case cleanup
   118  	<-exec.running
   119  
   120  	// Check for UpdateTTL call
   121  	select {
   122  	case update := <-hb.updates:
   123  		if update.status != api.HealthCritical {
   124  			t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update)
   125  		}
   126  	case <-time.After(3 * time.Second):
   127  		t.Fatalf("timed out waiting for script check to exit")
   128  	}
   129  	if !exec.exited {
   130  		t.Errorf("expected script executor to run and exit but it has not")
   131  	}
   132  
   133  	// Cancel and watch for exit
   134  	handle.cancel()
   135  	select {
   136  	case <-handle.wait():
   137  		// ok!
   138  	case update := <-hb.updates:
   139  		t.Errorf("unexpected UpdateTTL call on exit with status=%q", update)
   140  	case <-time.After(3 * time.Second):
   141  		t.Fatalf("timed out waiting for script check to exit")
   142  	}
   143  }
   144  
   145  // sleeperExec sleeps for 100ms but returns successfully to allow testing timeout conditions
   146  type sleeperExec struct{}
   147  
   148  func (sleeperExec) Exec(context.Context, string, []string) ([]byte, int, error) {
   149  	time.Sleep(100 * time.Millisecond)
   150  	return []byte{}, 0, nil
   151  }
   152  
   153  // TestConsulScript_Exec_TimeoutCritical asserts a script will be killed when
   154  // the timeout is reached and always set a critical status regardless of what
   155  // Exec returns.
   156  func TestConsulScript_Exec_TimeoutCritical(t *testing.T) {
   157  	t.Parallel() // run the slow tests in parallel
   158  	serviceCheck := structs.ServiceCheck{
   159  		Name:     "sleeper",
   160  		Interval: time.Hour,
   161  		Timeout:  time.Nanosecond,
   162  	}
   163  	hb := newFakeHeartbeater()
   164  	check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, sleeperExec{}, hb, testlog.Logger(t), nil)
   165  	handle := check.run()
   166  	defer handle.cancel() // just-in-case cleanup
   167  
   168  	// Check for UpdateTTL call
   169  	select {
   170  	case update := <-hb.updates:
   171  		if update.status != api.HealthCritical {
   172  			t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update)
   173  		}
   174  		if update.output != context.DeadlineExceeded.Error() {
   175  			t.Errorf("expected output=%q but found: %q", context.DeadlineExceeded.Error(), update.output)
   176  		}
   177  	case <-time.After(3 * time.Second):
   178  		t.Fatalf("timed out waiting for script check to timeout")
   179  	}
   180  }
   181  
   182  // simpleExec is a fake ScriptExecutor that returns whatever is specified.
   183  type simpleExec struct {
   184  	code int
   185  	err  error
   186  }
   187  
   188  func (s simpleExec) Exec(context.Context, string, []string) ([]byte, int, error) {
   189  	return []byte(fmt.Sprintf("code=%d err=%v", s.code, s.err)), s.code, s.err
   190  }
   191  
   192  // newSimpleExec creates a new ScriptExecutor that returns the given code and err.
   193  func newSimpleExec(code int, err error) simpleExec {
   194  	return simpleExec{code: code, err: err}
   195  }
   196  
   197  // TestConsulScript_Exec_Shutdown asserts a script will be executed once more
   198  // when told to shutdown.
   199  func TestConsulScript_Exec_Shutdown(t *testing.T) {
   200  	serviceCheck := structs.ServiceCheck{
   201  		Name:     "sleeper",
   202  		Interval: time.Hour,
   203  		Timeout:  3 * time.Second,
   204  	}
   205  
   206  	hb := newFakeHeartbeater()
   207  	shutdown := make(chan struct{})
   208  	exec := newSimpleExec(0, nil)
   209  	check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.Logger(t), shutdown)
   210  	handle := check.run()
   211  	defer handle.cancel() // just-in-case cleanup
   212  
   213  	// Tell scriptCheck to exit
   214  	close(shutdown)
   215  
   216  	select {
   217  	case update := <-hb.updates:
   218  		if update.status != api.HealthPassing {
   219  			t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update)
   220  		}
   221  	case <-time.After(3 * time.Second):
   222  		t.Fatalf("timed out waiting for script check to exit")
   223  	}
   224  
   225  	select {
   226  	case <-handle.wait():
   227  		// ok!
   228  	case <-time.After(3 * time.Second):
   229  		t.Fatalf("timed out waiting for script check to exit")
   230  	}
   231  }
   232  
   233  func TestConsulScript_Exec_Codes(t *testing.T) {
   234  	run := func(code int, err error, expected string) func(t *testing.T) {
   235  		return func(t *testing.T) {
   236  			t.Parallel()
   237  			serviceCheck := structs.ServiceCheck{
   238  				Name:     "test",
   239  				Interval: time.Hour,
   240  				Timeout:  3 * time.Second,
   241  			}
   242  
   243  			hb := newFakeHeartbeater()
   244  			shutdown := make(chan struct{})
   245  			exec := newSimpleExec(code, err)
   246  			check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.Logger(t), shutdown)
   247  			handle := check.run()
   248  			defer handle.cancel()
   249  
   250  			select {
   251  			case update := <-hb.updates:
   252  				if update.status != expected {
   253  					t.Errorf("expected %q but received %q", expected, update)
   254  				}
   255  				// assert output is being reported
   256  				expectedOutput := fmt.Sprintf("code=%d err=%v", code, err)
   257  				if err != nil {
   258  					expectedOutput = err.Error()
   259  				}
   260  				if update.output != expectedOutput {
   261  					t.Errorf("expected output=%q but found: %q", expectedOutput, update.output)
   262  				}
   263  			case <-time.After(3 * time.Second):
   264  				t.Fatalf("timed out waiting for script check to exec")
   265  			}
   266  		}
   267  	}
   268  
   269  	// Test exit codes with errors
   270  	t.Run("Passing", run(0, nil, api.HealthPassing))
   271  	t.Run("Warning", run(1, nil, api.HealthWarning))
   272  	t.Run("Critical-2", run(2, nil, api.HealthCritical))
   273  	t.Run("Critical-9000", run(9000, nil, api.HealthCritical))
   274  
   275  	// Errors should always cause Critical status
   276  	err := fmt.Errorf("test error")
   277  	t.Run("Error-0", run(0, err, api.HealthCritical))
   278  	t.Run("Error-1", run(1, err, api.HealthCritical))
   279  	t.Run("Error-2", run(2, err, api.HealthCritical))
   280  	t.Run("Error-9000", run(9000, err, api.HealthCritical))
   281  }