github.com/emate/nomad@v0.8.2-wo-binpacking/e2e/rescheduling/server_side_restarts_test.go (about) 1 package rescheduling 2 3 import ( 4 "sort" 5 "time" 6 7 "github.com/hashicorp/nomad/api" 8 "github.com/hashicorp/nomad/jobspec" 9 . "github.com/onsi/ginkgo" 10 . "github.com/onsi/gomega" 11 12 "github.com/hashicorp/nomad/helper" 13 "github.com/hashicorp/nomad/helper/uuid" 14 "github.com/hashicorp/nomad/nomad/structs" 15 ) 16 17 var _ = Describe("Server Side Restart Tests", func() { 18 19 var ( 20 jobs *api.Jobs 21 system *api.System 22 job *api.Job 23 err error 24 specFile string 25 26 // allocStatuses is a helper function that pulls 27 // out client statuses from a slice of allocs 28 allocStatuses = func() []string { 29 allocs, _, err := jobs.Allocations(*job.ID, false, nil) 30 Expect(err).ShouldNot(HaveOccurred()) 31 var ret []string 32 for _, a := range allocs { 33 ret = append(ret, a.ClientStatus) 34 } 35 sort.Strings(ret) 36 return ret 37 } 38 39 // allocStatusesRescheduled is a helper function that pulls 40 // out client statuses only from rescheduled allocs 41 allocStatusesRescheduled = func() []string { 42 allocs, _, err := jobs.Allocations(*job.ID, false, nil) 43 Expect(err).ShouldNot(HaveOccurred()) 44 var ret []string 45 for _, a := range allocs { 46 if (a.RescheduleTracker != nil && len(a.RescheduleTracker.Events) > 0) || a.FollowupEvalID != "" { 47 ret = append(ret, a.ClientStatus) 48 } 49 } 50 return ret 51 } 52 53 // deploymentStatus is a helper function that returns deployment status of all deployments 54 // sorted by time 55 deploymentStatus = func() []string { 56 deploys, _, err := jobs.Deployments(*job.ID, nil) 57 Expect(err).ShouldNot(HaveOccurred()) 58 var ret []string 59 sort.Slice(deploys, func(i, j int) bool { 60 return deploys[i].CreateIndex < deploys[j].CreateIndex 61 }) 62 for _, d := range deploys { 63 ret = append(ret, d.Status) 64 } 65 return ret 66 } 67 ) 68 69 BeforeSuite(func() { 70 conf := api.DefaultConfig() 71 72 // Create client 73 client, err := api.NewClient(conf) 74 Expect(err).ShouldNot(HaveOccurred()) 75 jobs = client.Jobs() 76 system = client.System() 77 }) 78 79 JustBeforeEach(func() { 80 job, err = jobspec.ParseFile(specFile) 81 Expect(err).ShouldNot(HaveOccurred()) 82 job.ID = helper.StringToPtr(uuid.Generate()) 83 resp, _, err := jobs.Register(job, nil) 84 Expect(err).ShouldNot(HaveOccurred()) 85 Expect(resp.EvalID).ShouldNot(BeEmpty()) 86 87 }) 88 89 AfterEach(func() { 90 //Deregister job 91 jobs.Deregister(*job.ID, true, nil) 92 system.GarbageCollect() 93 }) 94 95 Describe("Reschedule Stanza Tests", func() { 96 97 Context("No reschedule attempts", func() { 98 BeforeEach(func() { 99 specFile = "input/norescheduling.hcl" 100 }) 101 102 It("Should have exactly three allocs and all failed", func() { 103 Eventually(allocStatuses, 5*time.Second, time.Second).Should(ConsistOf([]string{"failed", "failed", "failed"})) 104 }) 105 }) 106 107 Context("System jobs should never be rescheduled", func() { 108 BeforeEach(func() { 109 specFile = "input/rescheduling_system.hcl" 110 }) 111 112 It("Should have exactly one failed alloc", func() { 113 Eventually(allocStatuses, 10*time.Second, time.Second).Should(ConsistOf([]string{"failed"})) 114 }) 115 }) 116 117 Context("Default Rescheduling", func() { 118 BeforeEach(func() { 119 specFile = "input/rescheduling_default.hcl" 120 }) 121 It("Should have exactly three allocs and all failed after 5 secs", func() { 122 Eventually(allocStatuses, 5*time.Second, time.Second).Should(ConsistOf([]string{"failed", "failed", "failed"})) 123 }) 124 // wait until first exponential delay kicks in and rescheduling is attempted 125 It("Should have exactly six allocs and all failed after 35 secs", func() { 126 if !*slow { 127 Skip("Skipping slow test") 128 } 129 Eventually(allocStatuses, 35*time.Second, time.Second).Should(ConsistOf([]string{"failed", "failed", "failed", "failed", "failed", "failed"})) 130 }) 131 }) 132 133 Context("Reschedule attempts maxed out", func() { 134 BeforeEach(func() { 135 specFile = "input/rescheduling_fail.hcl" 136 }) 137 It("Should have all failed", func() { 138 Eventually(allocStatuses, 6*time.Second, time.Second).ShouldNot( 139 SatisfyAll(ContainElement("pending"), 140 ContainElement("running"))) 141 }) 142 Context("Updating job to change its version", func() { 143 It("Should have running allocs now", func() { 144 job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "sleep 15000"} 145 _, _, err := jobs.Register(job, nil) 146 Expect(err).ShouldNot(HaveOccurred()) 147 Eventually(allocStatuses, 5*time.Second, time.Second).Should(ContainElement("running")) 148 }) 149 }) 150 }) 151 152 Context("Reschedule attempts succeeded", func() { 153 BeforeEach(func() { 154 specFile = "input/reschedule_success.hcl" 155 }) 156 It("Should have some running allocs", func() { 157 Eventually(allocStatuses, 6*time.Second, time.Second).Should( 158 ContainElement("running")) 159 }) 160 }) 161 162 Context("Reschedule with update stanza", func() { 163 BeforeEach(func() { 164 specFile = "input/rescheduling_update.hcl" 165 }) 166 It("Should have all running allocs", func() { 167 Eventually(allocStatuses, 3*time.Second, time.Second).Should( 168 ConsistOf([]string{"running", "running", "running"})) 169 }) 170 Context("Updating job to make allocs fail", func() { 171 It("Should have no rescheduled allocs", func() { 172 job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"} 173 _, _, err := jobs.Register(job, nil) 174 Expect(err).ShouldNot(HaveOccurred()) 175 Eventually(allocStatusesRescheduled, 2*time.Second, time.Second).Should(BeEmpty()) 176 }) 177 }) 178 179 }) 180 181 Context("Reschedule with canary", func() { 182 BeforeEach(func() { 183 specFile = "input/rescheduling_canary.hcl" 184 }) 185 It("Should have running allocs and successful deployment", func() { 186 Eventually(allocStatuses, 3*time.Second, time.Second).Should( 187 ConsistOf([]string{"running", "running", "running"})) 188 189 time.Sleep(2 * time.Second) //TODO(preetha) figure out why this wasn't working with ginkgo constructs 190 Eventually(deploymentStatus(), 2*time.Second, time.Second).Should( 191 ContainElement(structs.DeploymentStatusSuccessful)) 192 }) 193 194 Context("Updating job to make allocs fail", func() { 195 It("Should have no rescheduled allocs", func() { 196 job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"} 197 _, _, err := jobs.Register(job, nil) 198 Expect(err).ShouldNot(HaveOccurred()) 199 Eventually(allocStatusesRescheduled, 2*time.Second, time.Second).Should(BeEmpty()) 200 201 // Verify new deployment and its status 202 time.Sleep(3 * time.Second) //TODO(preetha) figure out why this wasn't working with ginkgo constructs 203 Eventually(deploymentStatus(), 2*time.Second, time.Second).Should( 204 ContainElement(structs.DeploymentStatusFailed)) 205 }) 206 }) 207 208 }) 209 210 Context("Reschedule with canary and auto revert ", func() { 211 BeforeEach(func() { 212 specFile = "input/rescheduling_canary_autorevert.hcl" 213 }) 214 It("Should have running allocs and successful deployment", func() { 215 Eventually(allocStatuses, 3*time.Second, time.Second).Should( 216 ConsistOf([]string{"running", "running", "running"})) 217 218 time.Sleep(2 * time.Second) 219 Eventually(deploymentStatus(), 2*time.Second, time.Second).Should( 220 ContainElement(structs.DeploymentStatusSuccessful)) 221 222 // Make an update that causes the job to fail 223 job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"} 224 _, _, err := jobs.Register(job, nil) 225 Expect(err).ShouldNot(HaveOccurred()) 226 Eventually(allocStatusesRescheduled, 2*time.Second, time.Second).Should(BeEmpty()) 227 228 // Wait for the revert 229 Eventually(allocStatuses, 3*time.Second, time.Second).Should( 230 ConsistOf([]string{"failed", "failed", "failed", "running", "running", "running"})) 231 232 // Verify new deployment and its status 233 // There should be one successful, one failed, and one more successful (after revert) 234 time.Sleep(5 * time.Second) //TODO(preetha) figure out why this wasn't working with ginkgo constructs 235 Eventually(deploymentStatus(), 2*time.Second, time.Second).Should( 236 ConsistOf(structs.DeploymentStatusSuccessful, structs.DeploymentStatusFailed, structs.DeploymentStatusSuccessful)) 237 }) 238 239 }) 240 241 Context("Reschedule with max parallel/auto_revert false", func() { 242 BeforeEach(func() { 243 specFile = "input/rescheduling_maxp.hcl" 244 }) 245 It("Should have running allocs and successful deployment", func() { 246 Eventually(allocStatuses, 3*time.Second, time.Second).Should( 247 ConsistOf([]string{"running", "running", "running"})) 248 249 time.Sleep(2 * time.Second) 250 Eventually(deploymentStatus(), 2*time.Second, time.Second).Should( 251 ContainElement(structs.DeploymentStatusSuccessful)) 252 }) 253 254 Context("Updating job to make allocs fail", func() { 255 It("Should have no rescheduled allocs", func() { 256 job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"} 257 _, _, err := jobs.Register(job, nil) 258 Expect(err).ShouldNot(HaveOccurred()) 259 Eventually(allocStatusesRescheduled, 2*time.Second, time.Second).Should(BeEmpty()) 260 261 // Should have 1 failed from max_parallel 262 Eventually(allocStatuses, 3*time.Second, time.Second).Should( 263 ConsistOf([]string{"complete", "failed", "running", "running"})) 264 265 // Verify new deployment and its status 266 time.Sleep(2 * time.Second) 267 Eventually(deploymentStatus(), 2*time.Second, time.Second).Should( 268 ContainElement(structs.DeploymentStatusFailed)) 269 }) 270 }) 271 272 }) 273 274 Context("Reschedule with max parallel and auto revert true ", func() { 275 BeforeEach(func() { 276 specFile = "input/rescheduling_maxp_autorevert.hcl" 277 }) 278 It("Should have running allocs and successful deployment", func() { 279 Eventually(allocStatuses, 3*time.Second, time.Second).Should( 280 ConsistOf([]string{"running", "running", "running"})) 281 282 time.Sleep(4 * time.Second) 283 Eventually(deploymentStatus(), 2*time.Second, time.Second).Should( 284 ContainElement(structs.DeploymentStatusSuccessful)) 285 286 // Make an update that causes the job to fail 287 job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"} 288 _, _, err := jobs.Register(job, nil) 289 Expect(err).ShouldNot(HaveOccurred()) 290 Eventually(allocStatusesRescheduled, 2*time.Second, time.Second).Should(BeEmpty()) 291 292 // Wait for the revert 293 Eventually(allocStatuses, 3*time.Second, time.Second).Should( 294 ConsistOf([]string{"complete", "failed", "running", "running", "running"})) 295 296 // Verify new deployment and its status 297 // There should be one successful, one failed, and one more successful (after revert) 298 time.Sleep(5 * time.Second) 299 Eventually(deploymentStatus(), 2*time.Second, time.Second).Should( 300 ConsistOf(structs.DeploymentStatusSuccessful, structs.DeploymentStatusFailed, structs.DeploymentStatusSuccessful)) 301 }) 302 303 }) 304 305 }) 306 307 })