github.com/bigcommerce/nomad@v0.9.3-bc/client/allocrunner/taskrunner/restarts/restarts_test.go (about) 1 package restarts 2 3 import ( 4 "fmt" 5 "testing" 6 "time" 7 8 "github.com/hashicorp/nomad/nomad/structs" 9 "github.com/hashicorp/nomad/plugins/drivers" 10 ) 11 12 func testPolicy(success bool, mode string) *structs.RestartPolicy { 13 return &structs.RestartPolicy{ 14 Interval: 2 * time.Minute, 15 Delay: 1 * time.Second, 16 Attempts: 3, 17 Mode: mode, 18 } 19 } 20 21 // withinJitter is a helper that returns whether the returned delay is within 22 // the jitter. 23 func withinJitter(expected, actual time.Duration) bool { 24 return float64((actual.Nanoseconds()-expected.Nanoseconds())/ 25 expected.Nanoseconds()) <= jitter 26 } 27 28 func testExitResult(exit int) *drivers.ExitResult { 29 return &drivers.ExitResult{ 30 ExitCode: exit, 31 } 32 } 33 34 func TestClient_RestartTracker_ModeDelay(t *testing.T) { 35 t.Parallel() 36 p := testPolicy(true, structs.RestartPolicyModeDelay) 37 rt := NewRestartTracker(p, structs.JobTypeService) 38 for i := 0; i < p.Attempts; i++ { 39 state, when := rt.SetExitResult(testExitResult(127)).GetState() 40 if state != structs.TaskRestarting { 41 t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting) 42 } 43 if !withinJitter(p.Delay, when) { 44 t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay) 45 } 46 } 47 48 // Follow up restarts should cause delay. 49 for i := 0; i < 3; i++ { 50 state, when := rt.SetExitResult(testExitResult(127)).GetState() 51 if state != structs.TaskRestarting { 52 t.Fail() 53 } 54 if !(when > p.Delay && when <= p.Interval) { 55 t.Fatalf("NextRestart() returned %v; want > %v and <= %v", when, p.Delay, p.Interval) 56 } 57 } 58 } 59 60 func TestClient_RestartTracker_ModeFail(t *testing.T) { 61 t.Parallel() 62 p := testPolicy(true, structs.RestartPolicyModeFail) 63 rt := NewRestartTracker(p, structs.JobTypeSystem) 64 for i := 0; i < p.Attempts; i++ { 65 state, when := rt.SetExitResult(testExitResult(127)).GetState() 66 if state != structs.TaskRestarting { 67 t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting) 68 } 69 if !withinJitter(p.Delay, when) { 70 t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay) 71 } 72 } 73 74 // Next restart should cause fail 75 if state, _ := rt.SetExitResult(testExitResult(127)).GetState(); state != structs.TaskNotRestarting { 76 t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskNotRestarting) 77 } 78 } 79 80 func TestClient_RestartTracker_NoRestartOnSuccess(t *testing.T) { 81 t.Parallel() 82 p := testPolicy(false, structs.RestartPolicyModeDelay) 83 rt := NewRestartTracker(p, structs.JobTypeBatch) 84 if state, _ := rt.SetExitResult(testExitResult(0)).GetState(); state != structs.TaskTerminated { 85 t.Fatalf("NextRestart() returned %v, expected: %v", state, structs.TaskTerminated) 86 } 87 } 88 89 func TestClient_RestartTracker_ZeroAttempts(t *testing.T) { 90 t.Parallel() 91 p := testPolicy(true, structs.RestartPolicyModeFail) 92 p.Attempts = 0 93 94 // Test with a non-zero exit code 95 rt := NewRestartTracker(p, structs.JobTypeService) 96 if state, when := rt.SetExitResult(testExitResult(1)).GetState(); state != structs.TaskNotRestarting { 97 t.Fatalf("expect no restart, got restart/delay: %v/%v", state, when) 98 } 99 100 // Even with a zero (successful) exit code non-batch jobs should exit 101 // with TaskNotRestarting 102 rt = NewRestartTracker(p, structs.JobTypeService) 103 if state, when := rt.SetExitResult(testExitResult(0)).GetState(); state != structs.TaskNotRestarting { 104 t.Fatalf("expect no restart, got restart/delay: %v/%v", state, when) 105 } 106 107 // Batch jobs with a zero exit code and 0 attempts *do* exit cleanly 108 // with Terminated 109 rt = NewRestartTracker(p, structs.JobTypeBatch) 110 if state, when := rt.SetExitResult(testExitResult(0)).GetState(); state != structs.TaskTerminated { 111 t.Fatalf("expect terminated, got restart/delay: %v/%v", state, when) 112 } 113 114 // Batch jobs with a non-zero exit code and 0 attempts exit with 115 // TaskNotRestarting 116 rt = NewRestartTracker(p, structs.JobTypeBatch) 117 if state, when := rt.SetExitResult(testExitResult(1)).GetState(); state != structs.TaskNotRestarting { 118 t.Fatalf("expect no restart, got restart/delay: %v/%v", state, when) 119 } 120 } 121 122 func TestClient_RestartTracker_TaskKilled(t *testing.T) { 123 t.Parallel() 124 p := testPolicy(true, structs.RestartPolicyModeFail) 125 p.Attempts = 0 126 rt := NewRestartTracker(p, structs.JobTypeService) 127 if state, when := rt.SetKilled().GetState(); state != structs.TaskKilled && when != 0 { 128 t.Fatalf("expect no restart; got %v %v", state, when) 129 } 130 } 131 132 func TestClient_RestartTracker_RestartTriggered(t *testing.T) { 133 t.Parallel() 134 p := testPolicy(true, structs.RestartPolicyModeFail) 135 p.Attempts = 0 136 rt := NewRestartTracker(p, structs.JobTypeService) 137 if state, when := rt.SetRestartTriggered(false).GetState(); state != structs.TaskRestarting && when != 0 { 138 t.Fatalf("expect restart immediately, got %v %v", state, when) 139 } 140 } 141 142 func TestClient_RestartTracker_RestartTriggered_Failure(t *testing.T) { 143 t.Parallel() 144 p := testPolicy(true, structs.RestartPolicyModeFail) 145 p.Attempts = 1 146 rt := NewRestartTracker(p, structs.JobTypeService) 147 if state, when := rt.SetRestartTriggered(true).GetState(); state != structs.TaskRestarting || when == 0 { 148 t.Fatalf("expect restart got %v %v", state, when) 149 } 150 if state, when := rt.SetRestartTriggered(true).GetState(); state != structs.TaskNotRestarting || when != 0 { 151 t.Fatalf("expect failed got %v %v", state, when) 152 } 153 } 154 155 func TestClient_RestartTracker_StartError_Recoverable_Fail(t *testing.T) { 156 t.Parallel() 157 p := testPolicy(true, structs.RestartPolicyModeFail) 158 rt := NewRestartTracker(p, structs.JobTypeSystem) 159 recErr := structs.NewRecoverableError(fmt.Errorf("foo"), true) 160 for i := 0; i < p.Attempts; i++ { 161 state, when := rt.SetStartError(recErr).GetState() 162 if state != structs.TaskRestarting { 163 t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting) 164 } 165 if !withinJitter(p.Delay, when) { 166 t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay) 167 } 168 } 169 170 // Next restart should cause fail 171 if state, _ := rt.SetStartError(recErr).GetState(); state != structs.TaskNotRestarting { 172 t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskNotRestarting) 173 } 174 } 175 176 func TestClient_RestartTracker_StartError_Recoverable_Delay(t *testing.T) { 177 t.Parallel() 178 p := testPolicy(true, structs.RestartPolicyModeDelay) 179 rt := NewRestartTracker(p, structs.JobTypeSystem) 180 recErr := structs.NewRecoverableError(fmt.Errorf("foo"), true) 181 for i := 0; i < p.Attempts; i++ { 182 state, when := rt.SetStartError(recErr).GetState() 183 if state != structs.TaskRestarting { 184 t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting) 185 } 186 if !withinJitter(p.Delay, when) { 187 t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay) 188 } 189 } 190 191 // Next restart should cause delay 192 state, when := rt.SetStartError(recErr).GetState() 193 if state != structs.TaskRestarting { 194 t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskRestarting) 195 } 196 if !(when > p.Delay && when <= p.Interval) { 197 t.Fatalf("NextRestart() returned %v; want > %v and <= %v", when, p.Delay, p.Interval) 198 } 199 }