github.com/djenriquez/nomad-1@v0.8.1/command/agent/consul/script_test.go (about)

     1  package consul
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"os/exec"
     8  	"testing"
     9  	"time"
    10  
    11  	"github.com/hashicorp/consul/api"
    12  	"github.com/hashicorp/nomad/helper/testtask"
    13  	"github.com/hashicorp/nomad/nomad/structs"
    14  )
    15  
    16  func TestMain(m *testing.M) {
    17  	if !testtask.Run() {
    18  		os.Exit(m.Run())
    19  	}
    20  }
    21  
    22  // blockingScriptExec implements ScriptExec by running a subcommand that never
    23  // exits.
    24  type blockingScriptExec struct {
    25  	// running is ticked before blocking to allow synchronizing operations
    26  	running chan struct{}
    27  
    28  	// set to true if Exec is called and has exited
    29  	exited bool
    30  }
    31  
    32  func newBlockingScriptExec() *blockingScriptExec {
    33  	return &blockingScriptExec{running: make(chan struct{})}
    34  }
    35  
    36  func (b *blockingScriptExec) Exec(ctx context.Context, _ string, _ []string) ([]byte, int, error) {
    37  	b.running <- struct{}{}
    38  	cmd := exec.CommandContext(ctx, testtask.Path(), "sleep", "9000h")
    39  	testtask.SetCmdEnv(cmd)
    40  	err := cmd.Run()
    41  	code := 0
    42  	if exitErr, ok := err.(*exec.ExitError); ok {
    43  		if !exitErr.Success() {
    44  			code = 1
    45  		}
    46  	}
    47  	b.exited = true
    48  	return []byte{}, code, err
    49  }
    50  
    51  // TestConsulScript_Exec_Cancel asserts cancelling a script check shortcircuits
    52  // any running scripts.
    53  func TestConsulScript_Exec_Cancel(t *testing.T) {
    54  	serviceCheck := structs.ServiceCheck{
    55  		Name:     "sleeper",
    56  		Interval: time.Hour,
    57  		Timeout:  time.Hour,
    58  	}
    59  	exec := newBlockingScriptExec()
    60  
    61  	// pass nil for heartbeater as it shouldn't be called
    62  	check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, nil, testLogger(), nil)
    63  	handle := check.run()
    64  
    65  	// wait until Exec is called
    66  	<-exec.running
    67  
    68  	// cancel now that we're blocked in exec
    69  	handle.cancel()
    70  
    71  	select {
    72  	case <-handle.wait():
    73  	case <-time.After(3 * time.Second):
    74  		t.Fatalf("timed out waiting for script check to exit")
    75  	}
    76  	if !exec.exited {
    77  		t.Errorf("expected script executor to run and exit but it has not")
    78  	}
    79  }
    80  
    81  type execStatus struct {
    82  	checkID string
    83  	output  string
    84  	status  string
    85  }
    86  
    87  // fakeHeartbeater implements the heartbeater interface to allow mocking out
    88  // Consul in script executor tests.
    89  type fakeHeartbeater struct {
    90  	updates chan execStatus
    91  }
    92  
    93  func (f *fakeHeartbeater) UpdateTTL(checkID, output, status string) error {
    94  	f.updates <- execStatus{checkID: checkID, output: output, status: status}
    95  	return nil
    96  }
    97  
    98  func newFakeHeartbeater() *fakeHeartbeater {
    99  	return &fakeHeartbeater{updates: make(chan execStatus)}
   100  }
   101  
   102  // TestConsulScript_Exec_Timeout asserts a script will be killed when the
   103  // timeout is reached.
   104  func TestConsulScript_Exec_Timeout(t *testing.T) {
   105  	t.Parallel() // run the slow tests in parallel
   106  	serviceCheck := structs.ServiceCheck{
   107  		Name:     "sleeper",
   108  		Interval: time.Hour,
   109  		Timeout:  time.Second,
   110  	}
   111  	exec := newBlockingScriptExec()
   112  
   113  	hb := newFakeHeartbeater()
   114  	check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testLogger(), nil)
   115  	handle := check.run()
   116  	defer handle.cancel() // just-in-case cleanup
   117  	<-exec.running
   118  
   119  	// Check for UpdateTTL call
   120  	select {
   121  	case update := <-hb.updates:
   122  		if update.status != api.HealthCritical {
   123  			t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update)
   124  		}
   125  	case <-time.After(3 * time.Second):
   126  		t.Fatalf("timed out waiting for script check to exit")
   127  	}
   128  	if !exec.exited {
   129  		t.Errorf("expected script executor to run and exit but it has not")
   130  	}
   131  
   132  	// Cancel and watch for exit
   133  	handle.cancel()
   134  	select {
   135  	case <-handle.wait():
   136  		// ok!
   137  	case update := <-hb.updates:
   138  		t.Errorf("unexpected UpdateTTL call on exit with status=%q", update)
   139  	case <-time.After(3 * time.Second):
   140  		t.Fatalf("timed out waiting for script check to exit")
   141  	}
   142  }
   143  
   144  // sleeperExec sleeps for 100ms but returns successfully to allow testing timeout conditions
   145  type sleeperExec struct{}
   146  
   147  func (sleeperExec) Exec(context.Context, string, []string) ([]byte, int, error) {
   148  	time.Sleep(100 * time.Millisecond)
   149  	return []byte{}, 0, nil
   150  }
   151  
   152  // TestConsulScript_Exec_TimeoutCritical asserts a script will be killed when
   153  // the timeout is reached and always set a critical status regardless of what
   154  // Exec returns.
   155  func TestConsulScript_Exec_TimeoutCritical(t *testing.T) {
   156  	t.Parallel() // run the slow tests in parallel
   157  	serviceCheck := structs.ServiceCheck{
   158  		Name:     "sleeper",
   159  		Interval: time.Hour,
   160  		Timeout:  time.Nanosecond,
   161  	}
   162  	hb := newFakeHeartbeater()
   163  	check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, sleeperExec{}, hb, testLogger(), nil)
   164  	handle := check.run()
   165  	defer handle.cancel() // just-in-case cleanup
   166  
   167  	// Check for UpdateTTL call
   168  	select {
   169  	case update := <-hb.updates:
   170  		if update.status != api.HealthCritical {
   171  			t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update)
   172  		}
   173  		if update.output != context.DeadlineExceeded.Error() {
   174  			t.Errorf("expected output=%q but found: %q", context.DeadlineExceeded.Error(), update.output)
   175  		}
   176  	case <-time.After(3 * time.Second):
   177  		t.Fatalf("timed out waiting for script check to timeout")
   178  	}
   179  }
   180  
   181  // simpleExec is a fake ScriptExecutor that returns whatever is specified.
   182  type simpleExec struct {
   183  	code int
   184  	err  error
   185  }
   186  
   187  func (s simpleExec) Exec(context.Context, string, []string) ([]byte, int, error) {
   188  	return []byte(fmt.Sprintf("code=%d err=%v", s.code, s.err)), s.code, s.err
   189  }
   190  
   191  // newSimpleExec creates a new ScriptExecutor that returns the given code and err.
   192  func newSimpleExec(code int, err error) simpleExec {
   193  	return simpleExec{code: code, err: err}
   194  }
   195  
   196  // TestConsulScript_Exec_Shutdown asserts a script will be executed once more
   197  // when told to shutdown.
   198  func TestConsulScript_Exec_Shutdown(t *testing.T) {
   199  	serviceCheck := structs.ServiceCheck{
   200  		Name:     "sleeper",
   201  		Interval: time.Hour,
   202  		Timeout:  3 * time.Second,
   203  	}
   204  
   205  	hb := newFakeHeartbeater()
   206  	shutdown := make(chan struct{})
   207  	exec := newSimpleExec(0, nil)
   208  	check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testLogger(), shutdown)
   209  	handle := check.run()
   210  	defer handle.cancel() // just-in-case cleanup
   211  
   212  	// Tell scriptCheck to exit
   213  	close(shutdown)
   214  
   215  	select {
   216  	case update := <-hb.updates:
   217  		if update.status != api.HealthPassing {
   218  			t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update)
   219  		}
   220  	case <-time.After(3 * time.Second):
   221  		t.Fatalf("timed out waiting for script check to exit")
   222  	}
   223  
   224  	select {
   225  	case <-handle.wait():
   226  		// ok!
   227  	case <-time.After(3 * time.Second):
   228  		t.Fatalf("timed out waiting for script check to exit")
   229  	}
   230  }
   231  
   232  func TestConsulScript_Exec_Codes(t *testing.T) {
   233  	run := func(code int, err error, expected string) func(t *testing.T) {
   234  		return func(t *testing.T) {
   235  			t.Parallel()
   236  			serviceCheck := structs.ServiceCheck{
   237  				Name:     "test",
   238  				Interval: time.Hour,
   239  				Timeout:  3 * time.Second,
   240  			}
   241  
   242  			hb := newFakeHeartbeater()
   243  			shutdown := make(chan struct{})
   244  			exec := newSimpleExec(code, err)
   245  			check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testLogger(), shutdown)
   246  			handle := check.run()
   247  			defer handle.cancel()
   248  
   249  			select {
   250  			case update := <-hb.updates:
   251  				if update.status != expected {
   252  					t.Errorf("expected %q but received %q", expected, update)
   253  				}
   254  				// assert output is being reported
   255  				expectedOutput := fmt.Sprintf("code=%d err=%v", code, err)
   256  				if err != nil {
   257  					expectedOutput = err.Error()
   258  				}
   259  				if update.output != expectedOutput {
   260  					t.Errorf("expected output=%q but found: %q", expectedOutput, update.output)
   261  				}
   262  			case <-time.After(3 * time.Second):
   263  				t.Fatalf("timed out waiting for script check to exec")
   264  			}
   265  		}
   266  	}
   267  
   268  	// Test exit codes with errors
   269  	t.Run("Passing", run(0, nil, api.HealthPassing))
   270  	t.Run("Warning", run(1, nil, api.HealthWarning))
   271  	t.Run("Critical-2", run(2, nil, api.HealthCritical))
   272  	t.Run("Critical-9000", run(9000, nil, api.HealthCritical))
   273  
   274  	// Errors should always cause Critical status
   275  	err := fmt.Errorf("test error")
   276  	t.Run("Error-0", run(0, err, api.HealthCritical))
   277  	t.Run("Error-1", run(1, err, api.HealthCritical))
   278  	t.Run("Error-2", run(2, err, api.HealthCritical))
   279  	t.Run("Error-9000", run(9000, err, api.HealthCritical))
   280  }