github.com/bigcommerce/nomad@v0.9.3-bc/client/allocrunner/taskrunner/restarts/restarts_test.go (about)

     1  package restarts
     2  
     3  import (
     4  	"fmt"
     5  	"testing"
     6  	"time"
     7  
     8  	"github.com/hashicorp/nomad/nomad/structs"
     9  	"github.com/hashicorp/nomad/plugins/drivers"
    10  )
    11  
    12  func testPolicy(success bool, mode string) *structs.RestartPolicy {
    13  	return &structs.RestartPolicy{
    14  		Interval: 2 * time.Minute,
    15  		Delay:    1 * time.Second,
    16  		Attempts: 3,
    17  		Mode:     mode,
    18  	}
    19  }
    20  
    21  // withinJitter is a helper that returns whether the returned delay is within
    22  // the jitter.
    23  func withinJitter(expected, actual time.Duration) bool {
    24  	return float64((actual.Nanoseconds()-expected.Nanoseconds())/
    25  		expected.Nanoseconds()) <= jitter
    26  }
    27  
    28  func testExitResult(exit int) *drivers.ExitResult {
    29  	return &drivers.ExitResult{
    30  		ExitCode: exit,
    31  	}
    32  }
    33  
    34  func TestClient_RestartTracker_ModeDelay(t *testing.T) {
    35  	t.Parallel()
    36  	p := testPolicy(true, structs.RestartPolicyModeDelay)
    37  	rt := NewRestartTracker(p, structs.JobTypeService)
    38  	for i := 0; i < p.Attempts; i++ {
    39  		state, when := rt.SetExitResult(testExitResult(127)).GetState()
    40  		if state != structs.TaskRestarting {
    41  			t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting)
    42  		}
    43  		if !withinJitter(p.Delay, when) {
    44  			t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay)
    45  		}
    46  	}
    47  
    48  	// Follow up restarts should cause delay.
    49  	for i := 0; i < 3; i++ {
    50  		state, when := rt.SetExitResult(testExitResult(127)).GetState()
    51  		if state != structs.TaskRestarting {
    52  			t.Fail()
    53  		}
    54  		if !(when > p.Delay && when <= p.Interval) {
    55  			t.Fatalf("NextRestart() returned %v; want > %v and <= %v", when, p.Delay, p.Interval)
    56  		}
    57  	}
    58  }
    59  
    60  func TestClient_RestartTracker_ModeFail(t *testing.T) {
    61  	t.Parallel()
    62  	p := testPolicy(true, structs.RestartPolicyModeFail)
    63  	rt := NewRestartTracker(p, structs.JobTypeSystem)
    64  	for i := 0; i < p.Attempts; i++ {
    65  		state, when := rt.SetExitResult(testExitResult(127)).GetState()
    66  		if state != structs.TaskRestarting {
    67  			t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting)
    68  		}
    69  		if !withinJitter(p.Delay, when) {
    70  			t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay)
    71  		}
    72  	}
    73  
    74  	// Next restart should cause fail
    75  	if state, _ := rt.SetExitResult(testExitResult(127)).GetState(); state != structs.TaskNotRestarting {
    76  		t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskNotRestarting)
    77  	}
    78  }
    79  
    80  func TestClient_RestartTracker_NoRestartOnSuccess(t *testing.T) {
    81  	t.Parallel()
    82  	p := testPolicy(false, structs.RestartPolicyModeDelay)
    83  	rt := NewRestartTracker(p, structs.JobTypeBatch)
    84  	if state, _ := rt.SetExitResult(testExitResult(0)).GetState(); state != structs.TaskTerminated {
    85  		t.Fatalf("NextRestart() returned %v, expected: %v", state, structs.TaskTerminated)
    86  	}
    87  }
    88  
    89  func TestClient_RestartTracker_ZeroAttempts(t *testing.T) {
    90  	t.Parallel()
    91  	p := testPolicy(true, structs.RestartPolicyModeFail)
    92  	p.Attempts = 0
    93  
    94  	// Test with a non-zero exit code
    95  	rt := NewRestartTracker(p, structs.JobTypeService)
    96  	if state, when := rt.SetExitResult(testExitResult(1)).GetState(); state != structs.TaskNotRestarting {
    97  		t.Fatalf("expect no restart, got restart/delay: %v/%v", state, when)
    98  	}
    99  
   100  	// Even with a zero (successful) exit code non-batch jobs should exit
   101  	// with TaskNotRestarting
   102  	rt = NewRestartTracker(p, structs.JobTypeService)
   103  	if state, when := rt.SetExitResult(testExitResult(0)).GetState(); state != structs.TaskNotRestarting {
   104  		t.Fatalf("expect no restart, got restart/delay: %v/%v", state, when)
   105  	}
   106  
   107  	// Batch jobs with a zero exit code and 0 attempts *do* exit cleanly
   108  	// with Terminated
   109  	rt = NewRestartTracker(p, structs.JobTypeBatch)
   110  	if state, when := rt.SetExitResult(testExitResult(0)).GetState(); state != structs.TaskTerminated {
   111  		t.Fatalf("expect terminated, got restart/delay: %v/%v", state, when)
   112  	}
   113  
   114  	// Batch jobs with a non-zero exit code and 0 attempts exit with
   115  	// TaskNotRestarting
   116  	rt = NewRestartTracker(p, structs.JobTypeBatch)
   117  	if state, when := rt.SetExitResult(testExitResult(1)).GetState(); state != structs.TaskNotRestarting {
   118  		t.Fatalf("expect no restart, got restart/delay: %v/%v", state, when)
   119  	}
   120  }
   121  
   122  func TestClient_RestartTracker_TaskKilled(t *testing.T) {
   123  	t.Parallel()
   124  	p := testPolicy(true, structs.RestartPolicyModeFail)
   125  	p.Attempts = 0
   126  	rt := NewRestartTracker(p, structs.JobTypeService)
   127  	if state, when := rt.SetKilled().GetState(); state != structs.TaskKilled && when != 0 {
   128  		t.Fatalf("expect no restart; got %v %v", state, when)
   129  	}
   130  }
   131  
   132  func TestClient_RestartTracker_RestartTriggered(t *testing.T) {
   133  	t.Parallel()
   134  	p := testPolicy(true, structs.RestartPolicyModeFail)
   135  	p.Attempts = 0
   136  	rt := NewRestartTracker(p, structs.JobTypeService)
   137  	if state, when := rt.SetRestartTriggered(false).GetState(); state != structs.TaskRestarting && when != 0 {
   138  		t.Fatalf("expect restart immediately, got %v %v", state, when)
   139  	}
   140  }
   141  
   142  func TestClient_RestartTracker_RestartTriggered_Failure(t *testing.T) {
   143  	t.Parallel()
   144  	p := testPolicy(true, structs.RestartPolicyModeFail)
   145  	p.Attempts = 1
   146  	rt := NewRestartTracker(p, structs.JobTypeService)
   147  	if state, when := rt.SetRestartTriggered(true).GetState(); state != structs.TaskRestarting || when == 0 {
   148  		t.Fatalf("expect restart got %v %v", state, when)
   149  	}
   150  	if state, when := rt.SetRestartTriggered(true).GetState(); state != structs.TaskNotRestarting || when != 0 {
   151  		t.Fatalf("expect failed got %v %v", state, when)
   152  	}
   153  }
   154  
   155  func TestClient_RestartTracker_StartError_Recoverable_Fail(t *testing.T) {
   156  	t.Parallel()
   157  	p := testPolicy(true, structs.RestartPolicyModeFail)
   158  	rt := NewRestartTracker(p, structs.JobTypeSystem)
   159  	recErr := structs.NewRecoverableError(fmt.Errorf("foo"), true)
   160  	for i := 0; i < p.Attempts; i++ {
   161  		state, when := rt.SetStartError(recErr).GetState()
   162  		if state != structs.TaskRestarting {
   163  			t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting)
   164  		}
   165  		if !withinJitter(p.Delay, when) {
   166  			t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay)
   167  		}
   168  	}
   169  
   170  	// Next restart should cause fail
   171  	if state, _ := rt.SetStartError(recErr).GetState(); state != structs.TaskNotRestarting {
   172  		t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskNotRestarting)
   173  	}
   174  }
   175  
   176  func TestClient_RestartTracker_StartError_Recoverable_Delay(t *testing.T) {
   177  	t.Parallel()
   178  	p := testPolicy(true, structs.RestartPolicyModeDelay)
   179  	rt := NewRestartTracker(p, structs.JobTypeSystem)
   180  	recErr := structs.NewRecoverableError(fmt.Errorf("foo"), true)
   181  	for i := 0; i < p.Attempts; i++ {
   182  		state, when := rt.SetStartError(recErr).GetState()
   183  		if state != structs.TaskRestarting {
   184  			t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting)
   185  		}
   186  		if !withinJitter(p.Delay, when) {
   187  			t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay)
   188  		}
   189  	}
   190  
   191  	// Next restart should cause delay
   192  	state, when := rt.SetStartError(recErr).GetState()
   193  	if state != structs.TaskRestarting {
   194  		t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskRestarting)
   195  	}
   196  	if !(when > p.Delay && when <= p.Interval) {
   197  		t.Fatalf("NextRestart() returned %v; want > %v and <= %v", when, p.Delay, p.Interval)
   198  	}
   199  }