github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/allocrunner/taskrunner/restarts/restarts_test.go (about)

     1  package restarts
     2  
     3  import (
     4  	"fmt"
     5  	"testing"
     6  	"time"
     7  
     8  	"github.com/stretchr/testify/require"
     9  
    10  	"github.com/hashicorp/nomad/nomad/structs"
    11  	"github.com/hashicorp/nomad/plugins/drivers"
    12  )
    13  
    14  func testPolicy(success bool, mode string) *structs.RestartPolicy {
    15  	return &structs.RestartPolicy{
    16  		Interval: 2 * time.Minute,
    17  		Delay:    1 * time.Second,
    18  		Attempts: 3,
    19  		Mode:     mode,
    20  	}
    21  }
    22  
    23  // withinJitter is a helper that returns whether the returned delay is within
    24  // the jitter.
    25  func withinJitter(expected, actual time.Duration) bool {
    26  	return float64((actual.Nanoseconds()-expected.Nanoseconds())/
    27  		expected.Nanoseconds()) <= jitter
    28  }
    29  
    30  func testExitResult(exit int) *drivers.ExitResult {
    31  	return &drivers.ExitResult{
    32  		ExitCode: exit,
    33  	}
    34  }
    35  
    36  func TestClient_RestartTracker_ModeDelay(t *testing.T) {
    37  	t.Parallel()
    38  	p := testPolicy(true, structs.RestartPolicyModeDelay)
    39  	rt := NewRestartTracker(p, structs.JobTypeService, nil)
    40  	for i := 0; i < p.Attempts; i++ {
    41  		state, when := rt.SetExitResult(testExitResult(127)).GetState()
    42  		if state != structs.TaskRestarting {
    43  			t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting)
    44  		}
    45  		if !withinJitter(p.Delay, when) {
    46  			t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay)
    47  		}
    48  	}
    49  
    50  	// Follow up restarts should cause delay.
    51  	for i := 0; i < 3; i++ {
    52  		state, when := rt.SetExitResult(testExitResult(127)).GetState()
    53  		if state != structs.TaskRestarting {
    54  			t.Fail()
    55  		}
    56  		if !(when > p.Delay && when <= p.Interval) {
    57  			t.Fatalf("NextRestart() returned %v; want > %v and <= %v", when, p.Delay, p.Interval)
    58  		}
    59  	}
    60  }
    61  
    62  func TestClient_RestartTracker_ModeFail(t *testing.T) {
    63  	t.Parallel()
    64  	p := testPolicy(true, structs.RestartPolicyModeFail)
    65  	rt := NewRestartTracker(p, structs.JobTypeSystem, nil)
    66  	for i := 0; i < p.Attempts; i++ {
    67  		state, when := rt.SetExitResult(testExitResult(127)).GetState()
    68  		if state != structs.TaskRestarting {
    69  			t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting)
    70  		}
    71  		if !withinJitter(p.Delay, when) {
    72  			t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay)
    73  		}
    74  	}
    75  
    76  	// Next restart should cause fail
    77  	if state, _ := rt.SetExitResult(testExitResult(127)).GetState(); state != structs.TaskNotRestarting {
    78  		t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskNotRestarting)
    79  	}
    80  }
    81  
    82  func TestClient_RestartTracker_NoRestartOnSuccess(t *testing.T) {
    83  	t.Parallel()
    84  	p := testPolicy(false, structs.RestartPolicyModeDelay)
    85  	rt := NewRestartTracker(p, structs.JobTypeBatch, nil)
    86  	if state, _ := rt.SetExitResult(testExitResult(0)).GetState(); state != structs.TaskTerminated {
    87  		t.Fatalf("NextRestart() returned %v, expected: %v", state, structs.TaskTerminated)
    88  	}
    89  }
    90  
    91  func TestClient_RestartTracker_ZeroAttempts(t *testing.T) {
    92  	t.Parallel()
    93  	p := testPolicy(true, structs.RestartPolicyModeFail)
    94  	p.Attempts = 0
    95  
    96  	// Test with a non-zero exit code
    97  	rt := NewRestartTracker(p, structs.JobTypeService, nil)
    98  	if state, when := rt.SetExitResult(testExitResult(1)).GetState(); state != structs.TaskNotRestarting {
    99  		t.Fatalf("expect no restart, got restart/delay: %v/%v", state, when)
   100  	}
   101  
   102  	// Even with a zero (successful) exit code non-batch jobs should exit
   103  	// with TaskNotRestarting
   104  	rt = NewRestartTracker(p, structs.JobTypeService, nil)
   105  	if state, when := rt.SetExitResult(testExitResult(0)).GetState(); state != structs.TaskNotRestarting {
   106  		t.Fatalf("expect no restart, got restart/delay: %v/%v", state, when)
   107  	}
   108  
   109  	// Batch jobs with a zero exit code and 0 attempts *do* exit cleanly
   110  	// with Terminated
   111  	rt = NewRestartTracker(p, structs.JobTypeBatch, nil)
   112  	if state, when := rt.SetExitResult(testExitResult(0)).GetState(); state != structs.TaskTerminated {
   113  		t.Fatalf("expect terminated, got restart/delay: %v/%v", state, when)
   114  	}
   115  
   116  	// Batch jobs with a non-zero exit code and 0 attempts exit with
   117  	// TaskNotRestarting
   118  	rt = NewRestartTracker(p, structs.JobTypeBatch, nil)
   119  	if state, when := rt.SetExitResult(testExitResult(1)).GetState(); state != structs.TaskNotRestarting {
   120  		t.Fatalf("expect no restart, got restart/delay: %v/%v", state, when)
   121  	}
   122  }
   123  
   124  func TestClient_RestartTracker_TaskKilled(t *testing.T) {
   125  	t.Parallel()
   126  	p := testPolicy(true, structs.RestartPolicyModeFail)
   127  	p.Attempts = 0
   128  	rt := NewRestartTracker(p, structs.JobTypeService, nil)
   129  	if state, when := rt.SetKilled().GetState(); state != structs.TaskKilled && when != 0 {
   130  		t.Fatalf("expect no restart; got %v %v", state, when)
   131  	}
   132  }
   133  
   134  func TestClient_RestartTracker_RestartTriggered(t *testing.T) {
   135  	t.Parallel()
   136  	p := testPolicy(true, structs.RestartPolicyModeFail)
   137  	p.Attempts = 0
   138  	rt := NewRestartTracker(p, structs.JobTypeService, nil)
   139  	if state, when := rt.SetRestartTriggered(false).GetState(); state != structs.TaskRestarting && when != 0 {
   140  		t.Fatalf("expect restart immediately, got %v %v", state, when)
   141  	}
   142  }
   143  
   144  func TestClient_RestartTracker_RestartTriggered_Failure(t *testing.T) {
   145  	t.Parallel()
   146  	p := testPolicy(true, structs.RestartPolicyModeFail)
   147  	p.Attempts = 1
   148  	rt := NewRestartTracker(p, structs.JobTypeService, nil)
   149  	if state, when := rt.SetRestartTriggered(true).GetState(); state != structs.TaskRestarting || when == 0 {
   150  		t.Fatalf("expect restart got %v %v", state, when)
   151  	}
   152  	if state, when := rt.SetRestartTriggered(true).GetState(); state != structs.TaskNotRestarting || when != 0 {
   153  		t.Fatalf("expect failed got %v %v", state, when)
   154  	}
   155  }
   156  
   157  func TestClient_RestartTracker_StartError_Recoverable_Fail(t *testing.T) {
   158  	t.Parallel()
   159  	p := testPolicy(true, structs.RestartPolicyModeFail)
   160  	rt := NewRestartTracker(p, structs.JobTypeSystem, nil)
   161  	recErr := structs.NewRecoverableError(fmt.Errorf("foo"), true)
   162  	for i := 0; i < p.Attempts; i++ {
   163  		state, when := rt.SetStartError(recErr).GetState()
   164  		if state != structs.TaskRestarting {
   165  			t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting)
   166  		}
   167  		if !withinJitter(p.Delay, when) {
   168  			t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay)
   169  		}
   170  	}
   171  
   172  	// Next restart should cause fail
   173  	if state, _ := rt.SetStartError(recErr).GetState(); state != structs.TaskNotRestarting {
   174  		t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskNotRestarting)
   175  	}
   176  }
   177  
   178  func TestClient_RestartTracker_StartError_Recoverable_Delay(t *testing.T) {
   179  	t.Parallel()
   180  	p := testPolicy(true, structs.RestartPolicyModeDelay)
   181  	rt := NewRestartTracker(p, structs.JobTypeSystem, nil)
   182  	recErr := structs.NewRecoverableError(fmt.Errorf("foo"), true)
   183  	for i := 0; i < p.Attempts; i++ {
   184  		state, when := rt.SetStartError(recErr).GetState()
   185  		if state != structs.TaskRestarting {
   186  			t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting)
   187  		}
   188  		if !withinJitter(p.Delay, when) {
   189  			t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay)
   190  		}
   191  	}
   192  
   193  	// Next restart should cause delay
   194  	state, when := rt.SetStartError(recErr).GetState()
   195  	if state != structs.TaskRestarting {
   196  		t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskRestarting)
   197  	}
   198  	if !(when > p.Delay && when <= p.Interval) {
   199  		t.Fatalf("NextRestart() returned %v; want > %v and <= %v", when, p.Delay, p.Interval)
   200  	}
   201  }
   202  
   203  func TestClient_RestartTracker_Lifecycle(t *testing.T) {
   204  	t.Parallel()
   205  
   206  	testCase := []struct {
   207  		name                   string
   208  		taskLifecycleConfig    *structs.TaskLifecycleConfig
   209  		jobType                string
   210  		shouldRestartOnSuccess bool
   211  		shouldRestartOnFailure bool
   212  	}{
   213  		{
   214  			name:                   "system job no lifecycle",
   215  			taskLifecycleConfig:    nil,
   216  			jobType:                structs.JobTypeSystem,
   217  			shouldRestartOnSuccess: true,
   218  			shouldRestartOnFailure: true,
   219  		},
   220  		{
   221  			name:                   "service job no lifecycle",
   222  			taskLifecycleConfig:    nil,
   223  			jobType:                structs.JobTypeService,
   224  			shouldRestartOnSuccess: true,
   225  			shouldRestartOnFailure: true,
   226  		},
   227  		{
   228  			name:                   "batch job no lifecycle",
   229  			taskLifecycleConfig:    nil,
   230  			jobType:                structs.JobTypeBatch,
   231  			shouldRestartOnSuccess: false,
   232  			shouldRestartOnFailure: true,
   233  		},
   234  		{
   235  			name: "system job w/ ephemeral prestart hook",
   236  			taskLifecycleConfig: &structs.TaskLifecycleConfig{
   237  				Hook:    structs.TaskLifecycleHookPrestart,
   238  				Sidecar: false,
   239  			},
   240  			jobType:                structs.JobTypeSystem,
   241  			shouldRestartOnSuccess: false,
   242  			shouldRestartOnFailure: true,
   243  		},
   244  		{
   245  			name: "system job w/ sidecar prestart hook",
   246  			taskLifecycleConfig: &structs.TaskLifecycleConfig{
   247  				Hook:    structs.TaskLifecycleHookPrestart,
   248  				Sidecar: true,
   249  			},
   250  			jobType:                structs.JobTypeSystem,
   251  			shouldRestartOnSuccess: true,
   252  			shouldRestartOnFailure: true,
   253  		},
   254  		{
   255  			name: "service job w/ ephemeral prestart hook",
   256  			taskLifecycleConfig: &structs.TaskLifecycleConfig{
   257  				Hook:    structs.TaskLifecycleHookPrestart,
   258  				Sidecar: false,
   259  			},
   260  			jobType:                structs.JobTypeService,
   261  			shouldRestartOnSuccess: false,
   262  			shouldRestartOnFailure: true,
   263  		},
   264  		{
   265  			name: "service job w/ sidecar prestart hook",
   266  			taskLifecycleConfig: &structs.TaskLifecycleConfig{
   267  				Hook:    structs.TaskLifecycleHookPrestart,
   268  				Sidecar: true,
   269  			},
   270  			jobType:                structs.JobTypeService,
   271  			shouldRestartOnSuccess: true,
   272  			shouldRestartOnFailure: true,
   273  		},
   274  		{
   275  			name: "batch job w/ ephemeral prestart hook",
   276  			taskLifecycleConfig: &structs.TaskLifecycleConfig{
   277  				Hook:    structs.TaskLifecycleHookPrestart,
   278  				Sidecar: false,
   279  			},
   280  			jobType:                structs.JobTypeService,
   281  			shouldRestartOnSuccess: false,
   282  			shouldRestartOnFailure: true,
   283  		},
   284  		{
   285  			name: "batch job w/ sidecar prestart hook",
   286  			taskLifecycleConfig: &structs.TaskLifecycleConfig{
   287  				Hook:    structs.TaskLifecycleHookPrestart,
   288  				Sidecar: true,
   289  			},
   290  			jobType:                structs.JobTypeBatch,
   291  			shouldRestartOnSuccess: true,
   292  			shouldRestartOnFailure: true,
   293  		},
   294  	}
   295  
   296  	for _, testCase := range testCase {
   297  		t.Run(testCase.name, func(t *testing.T) {
   298  			restartPolicy := testPolicy(true, testCase.jobType)
   299  			restartTracker := NewRestartTracker(restartPolicy, testCase.jobType, testCase.taskLifecycleConfig)
   300  
   301  			state, _ := restartTracker.SetExitResult(testExitResult(0)).GetState()
   302  			if !testCase.shouldRestartOnSuccess {
   303  				require.Equal(t, structs.TaskTerminated, state)
   304  			} else {
   305  				require.Equal(t, structs.TaskRestarting, state)
   306  			}
   307  
   308  			state, _ = restartTracker.SetExitResult(testExitResult(127)).GetState()
   309  			if !testCase.shouldRestartOnFailure {
   310  				require.Equal(t, structs.TaskTerminated, state)
   311  			} else {
   312  				require.Equal(t, structs.TaskRestarting, state)
   313  			}
   314  		})
   315  	}
   316  }