github.com/smithx10/nomad@v0.9.1-rc1/command/agent/consul/script_test.go

github.com/smithx10/nomad@v0.9.1-rc1/command/agent/consul/script_test.go (about)

     1  package consul
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"os/exec"
     8  	"sync/atomic"
     9  	"testing"
    10  	"time"
    11  
    12  	"github.com/hashicorp/consul/api"
    13  	"github.com/hashicorp/nomad/helper/testlog"
    14  	"github.com/hashicorp/nomad/helper/testtask"
    15  	"github.com/hashicorp/nomad/nomad/structs"
    16  )
    17  
    18  func TestMain(m *testing.M) {
    19  	if !testtask.Run() {
    20  		os.Exit(m.Run())
    21  	}
    22  }
    23  
    24  // blockingScriptExec implements ScriptExec by running a subcommand that never
    25  // exits.
    26  type blockingScriptExec struct {
    27  	// pctx is canceled *only* for test cleanup. Just like real
    28  	// ScriptExecutors its Exec method cannot be canceled directly -- only
    29  	// with a timeout.
    30  	pctx context.Context
    31  
    32  	// running is ticked before blocking to allow synchronizing operations
    33  	running chan struct{}
    34  
    35  	// set to 1 with atomics if Exec is called and has exited
    36  	exited int32
    37  }
    38  
    39  // newBlockingScriptExec returns a ScriptExecutor that blocks Exec() until the
    40  // caller recvs on the b.running chan. It also returns a CancelFunc for test
    41  // cleanup only. The runtime cannot cancel ScriptExecutors before their timeout
    42  // expires.
    43  func newBlockingScriptExec() (*blockingScriptExec, context.CancelFunc) {
    44  	ctx, cancel := context.WithCancel(context.Background())
    45  	exec := &blockingScriptExec{
    46  		pctx:    ctx,
    47  		running: make(chan struct{}),
    48  	}
    49  	return exec, cancel
    50  }
    51  
    52  func (b *blockingScriptExec) Exec(dur time.Duration, _ string, _ []string) ([]byte, int, error) {
    53  	b.running <- struct{}{}
    54  	ctx, cancel := context.WithTimeout(b.pctx, dur)
    55  	defer cancel()
    56  	cmd := exec.CommandContext(ctx, testtask.Path(), "sleep", "9000h")
    57  	testtask.SetCmdEnv(cmd)
    58  	err := cmd.Run()
    59  	code := 0
    60  	if exitErr, ok := err.(*exec.ExitError); ok {
    61  		if !exitErr.Success() {
    62  			code = 1
    63  		}
    64  	}
    65  	atomic.StoreInt32(&b.exited, 1)
    66  	return []byte{}, code, err
    67  }
    68  
    69  // TestConsulScript_Exec_Cancel asserts cancelling a script check shortcircuits
    70  // any running scripts.
    71  func TestConsulScript_Exec_Cancel(t *testing.T) {
    72  	serviceCheck := structs.ServiceCheck{
    73  		Name:     "sleeper",
    74  		Interval: time.Hour,
    75  		Timeout:  time.Hour,
    76  	}
    77  	exec, cancel := newBlockingScriptExec()
    78  	defer cancel()
    79  
    80  	// pass nil for heartbeater as it shouldn't be called
    81  	check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, nil, testlog.HCLogger(t), nil)
    82  	handle := check.run()
    83  
    84  	// wait until Exec is called
    85  	<-exec.running
    86  
    87  	// cancel now that we're blocked in exec
    88  	handle.cancel()
    89  
    90  	select {
    91  	case <-handle.wait():
    92  	case <-time.After(3 * time.Second):
    93  		t.Fatalf("timed out waiting for script check to exit")
    94  	}
    95  
    96  	// The underlying ScriptExecutor (newBlockScriptExec) *cannot* be
    97  	// canceled. Only a wrapper around it obeys the context cancelation.
    98  	if atomic.LoadInt32(&exec.exited) == 1 {
    99  		t.Errorf("expected script executor to still be running after timeout")
   100  	}
   101  }
   102  
   103  type execStatus struct {
   104  	checkID string
   105  	output  string
   106  	status  string
   107  }
   108  
   109  // fakeHeartbeater implements the heartbeater interface to allow mocking out
   110  // Consul in script executor tests.
   111  type fakeHeartbeater struct {
   112  	updates chan execStatus
   113  }
   114  
   115  func (f *fakeHeartbeater) UpdateTTL(checkID, output, status string) error {
   116  	f.updates <- execStatus{checkID: checkID, output: output, status: status}
   117  	return nil
   118  }
   119  
   120  func newFakeHeartbeater() *fakeHeartbeater {
   121  	return &fakeHeartbeater{updates: make(chan execStatus)}
   122  }
   123  
   124  // TestConsulScript_Exec_TimeoutBasic asserts a script will be killed when the
   125  // timeout is reached.
   126  func TestConsulScript_Exec_TimeoutBasic(t *testing.T) {
   127  	t.Parallel()
   128  
   129  	serviceCheck := structs.ServiceCheck{
   130  		Name:     "sleeper",
   131  		Interval: time.Hour,
   132  		Timeout:  time.Second,
   133  	}
   134  
   135  	exec, cancel := newBlockingScriptExec()
   136  	defer cancel()
   137  
   138  	hb := newFakeHeartbeater()
   139  	check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.HCLogger(t), nil)
   140  	handle := check.run()
   141  	defer handle.cancel() // just-in-case cleanup
   142  	<-exec.running
   143  
   144  	// Check for UpdateTTL call
   145  	select {
   146  	case update := <-hb.updates:
   147  		if update.status != api.HealthCritical {
   148  			t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update)
   149  		}
   150  	case <-time.After(3 * time.Second):
   151  		t.Fatalf("timed out waiting for script check to exit")
   152  	}
   153  
   154  	// The underlying ScriptExecutor (newBlockScriptExec) *cannot* be
   155  	// canceled. Only a wrapper around it obeys the context cancelation.
   156  	if atomic.LoadInt32(&exec.exited) == 1 {
   157  		t.Errorf("expected script executor to still be running after timeout")
   158  	}
   159  
   160  	// Cancel and watch for exit
   161  	handle.cancel()
   162  	select {
   163  	case <-handle.wait():
   164  		// ok!
   165  	case update := <-hb.updates:
   166  		t.Errorf("unexpected UpdateTTL call on exit with status=%q", update)
   167  	case <-time.After(3 * time.Second):
   168  		t.Fatalf("timed out waiting for script check to exit")
   169  	}
   170  }
   171  
   172  // sleeperExec sleeps for 100ms but returns successfully to allow testing timeout conditions
   173  type sleeperExec struct{}
   174  
   175  func (sleeperExec) Exec(time.Duration, string, []string) ([]byte, int, error) {
   176  	time.Sleep(100 * time.Millisecond)
   177  	return []byte{}, 0, nil
   178  }
   179  
   180  // TestConsulScript_Exec_TimeoutCritical asserts a script will be killed when
   181  // the timeout is reached and always set a critical status regardless of what
   182  // Exec returns.
   183  func TestConsulScript_Exec_TimeoutCritical(t *testing.T) {
   184  	t.Parallel()
   185  
   186  	serviceCheck := structs.ServiceCheck{
   187  		Name:     "sleeper",
   188  		Interval: time.Hour,
   189  		Timeout:  time.Nanosecond,
   190  	}
   191  	hb := newFakeHeartbeater()
   192  	check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, sleeperExec{}, hb, testlog.HCLogger(t), nil)
   193  	handle := check.run()
   194  	defer handle.cancel() // just-in-case cleanup
   195  
   196  	// Check for UpdateTTL call
   197  	select {
   198  	case update := <-hb.updates:
   199  		if update.status != api.HealthCritical {
   200  			t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update)
   201  		}
   202  		if update.output != context.DeadlineExceeded.Error() {
   203  			t.Errorf("expected output=%q but found: %q", context.DeadlineExceeded.Error(), update.output)
   204  		}
   205  	case <-time.After(3 * time.Second):
   206  		t.Fatalf("timed out waiting for script check to timeout")
   207  	}
   208  }
   209  
   210  // simpleExec is a fake ScriptExecutor that returns whatever is specified.
   211  type simpleExec struct {
   212  	code int
   213  	err  error
   214  }
   215  
   216  func (s simpleExec) Exec(time.Duration, string, []string) ([]byte, int, error) {
   217  	return []byte(fmt.Sprintf("code=%d err=%v", s.code, s.err)), s.code, s.err
   218  }
   219  
   220  // newSimpleExec creates a new ScriptExecutor that returns the given code and err.
   221  func newSimpleExec(code int, err error) simpleExec {
   222  	return simpleExec{code: code, err: err}
   223  }
   224  
   225  // TestConsulScript_Exec_Shutdown asserts a script will be executed once more
   226  // when told to shutdown.
   227  func TestConsulScript_Exec_Shutdown(t *testing.T) {
   228  	serviceCheck := structs.ServiceCheck{
   229  		Name:     "sleeper",
   230  		Interval: time.Hour,
   231  		Timeout:  3 * time.Second,
   232  	}
   233  
   234  	hb := newFakeHeartbeater()
   235  	shutdown := make(chan struct{})
   236  	exec := newSimpleExec(0, nil)
   237  	check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.HCLogger(t), shutdown)
   238  	handle := check.run()
   239  	defer handle.cancel() // just-in-case cleanup
   240  
   241  	// Tell scriptCheck to exit
   242  	close(shutdown)
   243  
   244  	select {
   245  	case update := <-hb.updates:
   246  		if update.status != api.HealthPassing {
   247  			t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update)
   248  		}
   249  	case <-time.After(3 * time.Second):
   250  		t.Fatalf("timed out waiting for script check to exit")
   251  	}
   252  
   253  	select {
   254  	case <-handle.wait():
   255  		// ok!
   256  	case <-time.After(3 * time.Second):
   257  		t.Fatalf("timed out waiting for script check to exit")
   258  	}
   259  }
   260  
   261  func TestConsulScript_Exec_Codes(t *testing.T) {
   262  	run := func(code int, err error, expected string) func(t *testing.T) {
   263  		return func(t *testing.T) {
   264  			t.Parallel()
   265  			serviceCheck := structs.ServiceCheck{
   266  				Name:     "test",
   267  				Interval: time.Hour,
   268  				Timeout:  3 * time.Second,
   269  			}
   270  
   271  			hb := newFakeHeartbeater()
   272  			shutdown := make(chan struct{})
   273  			exec := newSimpleExec(code, err)
   274  			check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.HCLogger(t), shutdown)
   275  			handle := check.run()
   276  			defer handle.cancel()
   277  
   278  			select {
   279  			case update := <-hb.updates:
   280  				if update.status != expected {
   281  					t.Errorf("expected %q but received %q", expected, update)
   282  				}
   283  				// assert output is being reported
   284  				expectedOutput := fmt.Sprintf("code=%d err=%v", code, err)
   285  				if err != nil {
   286  					expectedOutput = err.Error()
   287  				}
   288  				if update.output != expectedOutput {
   289  					t.Errorf("expected output=%q but found: %q", expectedOutput, update.output)
   290  				}
   291  			case <-time.After(3 * time.Second):
   292  				t.Fatalf("timed out waiting for script check to exec")
   293  			}
   294  		}
   295  	}
   296  
   297  	// Test exit codes with errors
   298  	t.Run("Passing", run(0, nil, api.HealthPassing))
   299  	t.Run("Warning", run(1, nil, api.HealthWarning))
   300  	t.Run("Critical-2", run(2, nil, api.HealthCritical))
   301  	t.Run("Critical-9000", run(9000, nil, api.HealthCritical))
   302  
   303  	// Errors should always cause Critical status
   304  	err := fmt.Errorf("test error")
   305  	t.Run("Error-0", run(0, err, api.HealthCritical))
   306  	t.Run("Error-1", run(1, err, api.HealthCritical))
   307  	t.Run("Error-2", run(2, err, api.HealthCritical))
   308  	t.Run("Error-9000", run(9000, err, api.HealthCritical))
   309  }