github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/allocrunner/taskrunner/restarts/restarts_test.go (about) 1 package restarts 2 3 import ( 4 "fmt" 5 "testing" 6 "time" 7 8 "github.com/stretchr/testify/require" 9 10 "github.com/hashicorp/nomad/nomad/structs" 11 "github.com/hashicorp/nomad/plugins/drivers" 12 ) 13 14 func testPolicy(success bool, mode string) *structs.RestartPolicy { 15 return &structs.RestartPolicy{ 16 Interval: 2 * time.Minute, 17 Delay: 1 * time.Second, 18 Attempts: 3, 19 Mode: mode, 20 } 21 } 22 23 // withinJitter is a helper that returns whether the returned delay is within 24 // the jitter. 25 func withinJitter(expected, actual time.Duration) bool { 26 return float64((actual.Nanoseconds()-expected.Nanoseconds())/ 27 expected.Nanoseconds()) <= jitter 28 } 29 30 func testExitResult(exit int) *drivers.ExitResult { 31 return &drivers.ExitResult{ 32 ExitCode: exit, 33 } 34 } 35 36 func TestClient_RestartTracker_ModeDelay(t *testing.T) { 37 t.Parallel() 38 p := testPolicy(true, structs.RestartPolicyModeDelay) 39 rt := NewRestartTracker(p, structs.JobTypeService, nil) 40 for i := 0; i < p.Attempts; i++ { 41 state, when := rt.SetExitResult(testExitResult(127)).GetState() 42 if state != structs.TaskRestarting { 43 t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting) 44 } 45 if !withinJitter(p.Delay, when) { 46 t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay) 47 } 48 } 49 50 // Follow up restarts should cause delay. 51 for i := 0; i < 3; i++ { 52 state, when := rt.SetExitResult(testExitResult(127)).GetState() 53 if state != structs.TaskRestarting { 54 t.Fail() 55 } 56 if !(when > p.Delay && when <= p.Interval) { 57 t.Fatalf("NextRestart() returned %v; want > %v and <= %v", when, p.Delay, p.Interval) 58 } 59 } 60 } 61 62 func TestClient_RestartTracker_ModeFail(t *testing.T) { 63 t.Parallel() 64 p := testPolicy(true, structs.RestartPolicyModeFail) 65 rt := NewRestartTracker(p, structs.JobTypeSystem, nil) 66 for i := 0; i < p.Attempts; i++ { 67 state, when := rt.SetExitResult(testExitResult(127)).GetState() 68 if state != structs.TaskRestarting { 69 t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting) 70 } 71 if !withinJitter(p.Delay, when) { 72 t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay) 73 } 74 } 75 76 // Next restart should cause fail 77 if state, _ := rt.SetExitResult(testExitResult(127)).GetState(); state != structs.TaskNotRestarting { 78 t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskNotRestarting) 79 } 80 } 81 82 func TestClient_RestartTracker_NoRestartOnSuccess(t *testing.T) { 83 t.Parallel() 84 p := testPolicy(false, structs.RestartPolicyModeDelay) 85 rt := NewRestartTracker(p, structs.JobTypeBatch, nil) 86 if state, _ := rt.SetExitResult(testExitResult(0)).GetState(); state != structs.TaskTerminated { 87 t.Fatalf("NextRestart() returned %v, expected: %v", state, structs.TaskTerminated) 88 } 89 } 90 91 func TestClient_RestartTracker_ZeroAttempts(t *testing.T) { 92 t.Parallel() 93 p := testPolicy(true, structs.RestartPolicyModeFail) 94 p.Attempts = 0 95 96 // Test with a non-zero exit code 97 rt := NewRestartTracker(p, structs.JobTypeService, nil) 98 if state, when := rt.SetExitResult(testExitResult(1)).GetState(); state != structs.TaskNotRestarting { 99 t.Fatalf("expect no restart, got restart/delay: %v/%v", state, when) 100 } 101 102 // Even with a zero (successful) exit code non-batch jobs should exit 103 // with TaskNotRestarting 104 rt = NewRestartTracker(p, structs.JobTypeService, nil) 105 if state, when := rt.SetExitResult(testExitResult(0)).GetState(); state != structs.TaskNotRestarting { 106 t.Fatalf("expect no restart, got restart/delay: %v/%v", state, when) 107 } 108 109 // Batch jobs with a zero exit code and 0 attempts *do* exit cleanly 110 // with Terminated 111 rt = NewRestartTracker(p, structs.JobTypeBatch, nil) 112 if state, when := rt.SetExitResult(testExitResult(0)).GetState(); state != structs.TaskTerminated { 113 t.Fatalf("expect terminated, got restart/delay: %v/%v", state, when) 114 } 115 116 // Batch jobs with a non-zero exit code and 0 attempts exit with 117 // TaskNotRestarting 118 rt = NewRestartTracker(p, structs.JobTypeBatch, nil) 119 if state, when := rt.SetExitResult(testExitResult(1)).GetState(); state != structs.TaskNotRestarting { 120 t.Fatalf("expect no restart, got restart/delay: %v/%v", state, when) 121 } 122 } 123 124 func TestClient_RestartTracker_TaskKilled(t *testing.T) { 125 t.Parallel() 126 p := testPolicy(true, structs.RestartPolicyModeFail) 127 p.Attempts = 0 128 rt := NewRestartTracker(p, structs.JobTypeService, nil) 129 if state, when := rt.SetKilled().GetState(); state != structs.TaskKilled && when != 0 { 130 t.Fatalf("expect no restart; got %v %v", state, when) 131 } 132 } 133 134 func TestClient_RestartTracker_RestartTriggered(t *testing.T) { 135 t.Parallel() 136 p := testPolicy(true, structs.RestartPolicyModeFail) 137 p.Attempts = 0 138 rt := NewRestartTracker(p, structs.JobTypeService, nil) 139 if state, when := rt.SetRestartTriggered(false).GetState(); state != structs.TaskRestarting && when != 0 { 140 t.Fatalf("expect restart immediately, got %v %v", state, when) 141 } 142 } 143 144 func TestClient_RestartTracker_RestartTriggered_Failure(t *testing.T) { 145 t.Parallel() 146 p := testPolicy(true, structs.RestartPolicyModeFail) 147 p.Attempts = 1 148 rt := NewRestartTracker(p, structs.JobTypeService, nil) 149 if state, when := rt.SetRestartTriggered(true).GetState(); state != structs.TaskRestarting || when == 0 { 150 t.Fatalf("expect restart got %v %v", state, when) 151 } 152 if state, when := rt.SetRestartTriggered(true).GetState(); state != structs.TaskNotRestarting || when != 0 { 153 t.Fatalf("expect failed got %v %v", state, when) 154 } 155 } 156 157 func TestClient_RestartTracker_StartError_Recoverable_Fail(t *testing.T) { 158 t.Parallel() 159 p := testPolicy(true, structs.RestartPolicyModeFail) 160 rt := NewRestartTracker(p, structs.JobTypeSystem, nil) 161 recErr := structs.NewRecoverableError(fmt.Errorf("foo"), true) 162 for i := 0; i < p.Attempts; i++ { 163 state, when := rt.SetStartError(recErr).GetState() 164 if state != structs.TaskRestarting { 165 t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting) 166 } 167 if !withinJitter(p.Delay, when) { 168 t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay) 169 } 170 } 171 172 // Next restart should cause fail 173 if state, _ := rt.SetStartError(recErr).GetState(); state != structs.TaskNotRestarting { 174 t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskNotRestarting) 175 } 176 } 177 178 func TestClient_RestartTracker_StartError_Recoverable_Delay(t *testing.T) { 179 t.Parallel() 180 p := testPolicy(true, structs.RestartPolicyModeDelay) 181 rt := NewRestartTracker(p, structs.JobTypeSystem, nil) 182 recErr := structs.NewRecoverableError(fmt.Errorf("foo"), true) 183 for i := 0; i < p.Attempts; i++ { 184 state, when := rt.SetStartError(recErr).GetState() 185 if state != structs.TaskRestarting { 186 t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting) 187 } 188 if !withinJitter(p.Delay, when) { 189 t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay) 190 } 191 } 192 193 // Next restart should cause delay 194 state, when := rt.SetStartError(recErr).GetState() 195 if state != structs.TaskRestarting { 196 t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskRestarting) 197 } 198 if !(when > p.Delay && when <= p.Interval) { 199 t.Fatalf("NextRestart() returned %v; want > %v and <= %v", when, p.Delay, p.Interval) 200 } 201 } 202 203 func TestClient_RestartTracker_Lifecycle(t *testing.T) { 204 t.Parallel() 205 206 testCase := []struct { 207 name string 208 taskLifecycleConfig *structs.TaskLifecycleConfig 209 jobType string 210 shouldRestartOnSuccess bool 211 shouldRestartOnFailure bool 212 }{ 213 { 214 name: "system job no lifecycle", 215 taskLifecycleConfig: nil, 216 jobType: structs.JobTypeSystem, 217 shouldRestartOnSuccess: true, 218 shouldRestartOnFailure: true, 219 }, 220 { 221 name: "service job no lifecycle", 222 taskLifecycleConfig: nil, 223 jobType: structs.JobTypeService, 224 shouldRestartOnSuccess: true, 225 shouldRestartOnFailure: true, 226 }, 227 { 228 name: "batch job no lifecycle", 229 taskLifecycleConfig: nil, 230 jobType: structs.JobTypeBatch, 231 shouldRestartOnSuccess: false, 232 shouldRestartOnFailure: true, 233 }, 234 { 235 name: "system job w/ ephemeral prestart hook", 236 taskLifecycleConfig: &structs.TaskLifecycleConfig{ 237 Hook: structs.TaskLifecycleHookPrestart, 238 Sidecar: false, 239 }, 240 jobType: structs.JobTypeSystem, 241 shouldRestartOnSuccess: false, 242 shouldRestartOnFailure: true, 243 }, 244 { 245 name: "system job w/ sidecar prestart hook", 246 taskLifecycleConfig: &structs.TaskLifecycleConfig{ 247 Hook: structs.TaskLifecycleHookPrestart, 248 Sidecar: true, 249 }, 250 jobType: structs.JobTypeSystem, 251 shouldRestartOnSuccess: true, 252 shouldRestartOnFailure: true, 253 }, 254 { 255 name: "service job w/ ephemeral prestart hook", 256 taskLifecycleConfig: &structs.TaskLifecycleConfig{ 257 Hook: structs.TaskLifecycleHookPrestart, 258 Sidecar: false, 259 }, 260 jobType: structs.JobTypeService, 261 shouldRestartOnSuccess: false, 262 shouldRestartOnFailure: true, 263 }, 264 { 265 name: "service job w/ sidecar prestart hook", 266 taskLifecycleConfig: &structs.TaskLifecycleConfig{ 267 Hook: structs.TaskLifecycleHookPrestart, 268 Sidecar: true, 269 }, 270 jobType: structs.JobTypeService, 271 shouldRestartOnSuccess: true, 272 shouldRestartOnFailure: true, 273 }, 274 { 275 name: "batch job w/ ephemeral prestart hook", 276 taskLifecycleConfig: &structs.TaskLifecycleConfig{ 277 Hook: structs.TaskLifecycleHookPrestart, 278 Sidecar: false, 279 }, 280 jobType: structs.JobTypeService, 281 shouldRestartOnSuccess: false, 282 shouldRestartOnFailure: true, 283 }, 284 { 285 name: "batch job w/ sidecar prestart hook", 286 taskLifecycleConfig: &structs.TaskLifecycleConfig{ 287 Hook: structs.TaskLifecycleHookPrestart, 288 Sidecar: true, 289 }, 290 jobType: structs.JobTypeBatch, 291 shouldRestartOnSuccess: true, 292 shouldRestartOnFailure: true, 293 }, 294 } 295 296 for _, testCase := range testCase { 297 t.Run(testCase.name, func(t *testing.T) { 298 restartPolicy := testPolicy(true, testCase.jobType) 299 restartTracker := NewRestartTracker(restartPolicy, testCase.jobType, testCase.taskLifecycleConfig) 300 301 state, _ := restartTracker.SetExitResult(testExitResult(0)).GetState() 302 if !testCase.shouldRestartOnSuccess { 303 require.Equal(t, structs.TaskTerminated, state) 304 } else { 305 require.Equal(t, structs.TaskRestarting, state) 306 } 307 308 state, _ = restartTracker.SetExitResult(testExitResult(127)).GetState() 309 if !testCase.shouldRestartOnFailure { 310 require.Equal(t, structs.TaskTerminated, state) 311 } else { 312 require.Equal(t, structs.TaskRestarting, state) 313 } 314 }) 315 } 316 }