github.com/emate/nomad@v0.8.2-wo-binpacking/e2e/rescheduling/server_side_restarts_test.go (about)

     1  package rescheduling
     2  
     3  import (
     4  	"sort"
     5  	"time"
     6  
     7  	"github.com/hashicorp/nomad/api"
     8  	"github.com/hashicorp/nomad/jobspec"
     9  	. "github.com/onsi/ginkgo"
    10  	. "github.com/onsi/gomega"
    11  
    12  	"github.com/hashicorp/nomad/helper"
    13  	"github.com/hashicorp/nomad/helper/uuid"
    14  	"github.com/hashicorp/nomad/nomad/structs"
    15  )
    16  
    17  var _ = Describe("Server Side Restart Tests", func() {
    18  
    19  	var (
    20  		jobs     *api.Jobs
    21  		system   *api.System
    22  		job      *api.Job
    23  		err      error
    24  		specFile string
    25  
    26  		// allocStatuses is a helper function that pulls
    27  		// out client statuses from a slice of allocs
    28  		allocStatuses = func() []string {
    29  			allocs, _, err := jobs.Allocations(*job.ID, false, nil)
    30  			Expect(err).ShouldNot(HaveOccurred())
    31  			var ret []string
    32  			for _, a := range allocs {
    33  				ret = append(ret, a.ClientStatus)
    34  			}
    35  			sort.Strings(ret)
    36  			return ret
    37  		}
    38  
    39  		// allocStatusesRescheduled is a helper function that pulls
    40  		// out client statuses only from rescheduled allocs
    41  		allocStatusesRescheduled = func() []string {
    42  			allocs, _, err := jobs.Allocations(*job.ID, false, nil)
    43  			Expect(err).ShouldNot(HaveOccurred())
    44  			var ret []string
    45  			for _, a := range allocs {
    46  				if (a.RescheduleTracker != nil && len(a.RescheduleTracker.Events) > 0) || a.FollowupEvalID != "" {
    47  					ret = append(ret, a.ClientStatus)
    48  				}
    49  			}
    50  			return ret
    51  		}
    52  
    53  		// deploymentStatus is a helper function that returns deployment status of all deployments
    54  		// sorted by time
    55  		deploymentStatus = func() []string {
    56  			deploys, _, err := jobs.Deployments(*job.ID, nil)
    57  			Expect(err).ShouldNot(HaveOccurred())
    58  			var ret []string
    59  			sort.Slice(deploys, func(i, j int) bool {
    60  				return deploys[i].CreateIndex < deploys[j].CreateIndex
    61  			})
    62  			for _, d := range deploys {
    63  				ret = append(ret, d.Status)
    64  			}
    65  			return ret
    66  		}
    67  	)
    68  
    69  	BeforeSuite(func() {
    70  		conf := api.DefaultConfig()
    71  
    72  		// Create client
    73  		client, err := api.NewClient(conf)
    74  		Expect(err).ShouldNot(HaveOccurred())
    75  		jobs = client.Jobs()
    76  		system = client.System()
    77  	})
    78  
    79  	JustBeforeEach(func() {
    80  		job, err = jobspec.ParseFile(specFile)
    81  		Expect(err).ShouldNot(HaveOccurred())
    82  		job.ID = helper.StringToPtr(uuid.Generate())
    83  		resp, _, err := jobs.Register(job, nil)
    84  		Expect(err).ShouldNot(HaveOccurred())
    85  		Expect(resp.EvalID).ShouldNot(BeEmpty())
    86  
    87  	})
    88  
    89  	AfterEach(func() {
    90  		//Deregister job
    91  		jobs.Deregister(*job.ID, true, nil)
    92  		system.GarbageCollect()
    93  	})
    94  
    95  	Describe("Reschedule Stanza Tests", func() {
    96  
    97  		Context("No reschedule attempts", func() {
    98  			BeforeEach(func() {
    99  				specFile = "input/norescheduling.hcl"
   100  			})
   101  
   102  			It("Should have exactly three allocs and all failed", func() {
   103  				Eventually(allocStatuses, 5*time.Second, time.Second).Should(ConsistOf([]string{"failed", "failed", "failed"}))
   104  			})
   105  		})
   106  
   107  		Context("System jobs should never be rescheduled", func() {
   108  			BeforeEach(func() {
   109  				specFile = "input/rescheduling_system.hcl"
   110  			})
   111  
   112  			It("Should have exactly one failed alloc", func() {
   113  				Eventually(allocStatuses, 10*time.Second, time.Second).Should(ConsistOf([]string{"failed"}))
   114  			})
   115  		})
   116  
   117  		Context("Default Rescheduling", func() {
   118  			BeforeEach(func() {
   119  				specFile = "input/rescheduling_default.hcl"
   120  			})
   121  			It("Should have exactly three allocs and all failed after 5 secs", func() {
   122  				Eventually(allocStatuses, 5*time.Second, time.Second).Should(ConsistOf([]string{"failed", "failed", "failed"}))
   123  			})
   124  			// wait until first exponential delay kicks in and rescheduling is attempted
   125  			It("Should have exactly six allocs and all failed after 35 secs", func() {
   126  				if !*slow {
   127  					Skip("Skipping slow test")
   128  				}
   129  				Eventually(allocStatuses, 35*time.Second, time.Second).Should(ConsistOf([]string{"failed", "failed", "failed", "failed", "failed", "failed"}))
   130  			})
   131  		})
   132  
   133  		Context("Reschedule attempts maxed out", func() {
   134  			BeforeEach(func() {
   135  				specFile = "input/rescheduling_fail.hcl"
   136  			})
   137  			It("Should have all failed", func() {
   138  				Eventually(allocStatuses, 6*time.Second, time.Second).ShouldNot(
   139  					SatisfyAll(ContainElement("pending"),
   140  						ContainElement("running")))
   141  			})
   142  			Context("Updating job to change its version", func() {
   143  				It("Should have running allocs now", func() {
   144  					job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "sleep 15000"}
   145  					_, _, err := jobs.Register(job, nil)
   146  					Expect(err).ShouldNot(HaveOccurred())
   147  					Eventually(allocStatuses, 5*time.Second, time.Second).Should(ContainElement("running"))
   148  				})
   149  			})
   150  		})
   151  
   152  		Context("Reschedule attempts succeeded", func() {
   153  			BeforeEach(func() {
   154  				specFile = "input/reschedule_success.hcl"
   155  			})
   156  			It("Should have some running allocs", func() {
   157  				Eventually(allocStatuses, 6*time.Second, time.Second).Should(
   158  					ContainElement("running"))
   159  			})
   160  		})
   161  
   162  		Context("Reschedule with update stanza", func() {
   163  			BeforeEach(func() {
   164  				specFile = "input/rescheduling_update.hcl"
   165  			})
   166  			It("Should have all running allocs", func() {
   167  				Eventually(allocStatuses, 3*time.Second, time.Second).Should(
   168  					ConsistOf([]string{"running", "running", "running"}))
   169  			})
   170  			Context("Updating job to make allocs fail", func() {
   171  				It("Should have no rescheduled allocs", func() {
   172  					job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
   173  					_, _, err := jobs.Register(job, nil)
   174  					Expect(err).ShouldNot(HaveOccurred())
   175  					Eventually(allocStatusesRescheduled, 2*time.Second, time.Second).Should(BeEmpty())
   176  				})
   177  			})
   178  
   179  		})
   180  
   181  		Context("Reschedule with canary", func() {
   182  			BeforeEach(func() {
   183  				specFile = "input/rescheduling_canary.hcl"
   184  			})
   185  			It("Should have running allocs and successful deployment", func() {
   186  				Eventually(allocStatuses, 3*time.Second, time.Second).Should(
   187  					ConsistOf([]string{"running", "running", "running"}))
   188  
   189  				time.Sleep(2 * time.Second) //TODO(preetha) figure out why this wasn't working with ginkgo constructs
   190  				Eventually(deploymentStatus(), 2*time.Second, time.Second).Should(
   191  					ContainElement(structs.DeploymentStatusSuccessful))
   192  			})
   193  
   194  			Context("Updating job to make allocs fail", func() {
   195  				It("Should have no rescheduled allocs", func() {
   196  					job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
   197  					_, _, err := jobs.Register(job, nil)
   198  					Expect(err).ShouldNot(HaveOccurred())
   199  					Eventually(allocStatusesRescheduled, 2*time.Second, time.Second).Should(BeEmpty())
   200  
   201  					// Verify new deployment and its status
   202  					time.Sleep(3 * time.Second) //TODO(preetha) figure out why this wasn't working with ginkgo constructs
   203  					Eventually(deploymentStatus(), 2*time.Second, time.Second).Should(
   204  						ContainElement(structs.DeploymentStatusFailed))
   205  				})
   206  			})
   207  
   208  		})
   209  
   210  		Context("Reschedule with canary and auto revert ", func() {
   211  			BeforeEach(func() {
   212  				specFile = "input/rescheduling_canary_autorevert.hcl"
   213  			})
   214  			It("Should have running allocs and successful deployment", func() {
   215  				Eventually(allocStatuses, 3*time.Second, time.Second).Should(
   216  					ConsistOf([]string{"running", "running", "running"}))
   217  
   218  				time.Sleep(2 * time.Second)
   219  				Eventually(deploymentStatus(), 2*time.Second, time.Second).Should(
   220  					ContainElement(structs.DeploymentStatusSuccessful))
   221  
   222  				// Make an update that causes the job to fail
   223  				job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
   224  				_, _, err := jobs.Register(job, nil)
   225  				Expect(err).ShouldNot(HaveOccurred())
   226  				Eventually(allocStatusesRescheduled, 2*time.Second, time.Second).Should(BeEmpty())
   227  
   228  				// Wait for the revert
   229  				Eventually(allocStatuses, 3*time.Second, time.Second).Should(
   230  					ConsistOf([]string{"failed", "failed", "failed", "running", "running", "running"}))
   231  
   232  				// Verify new deployment and its status
   233  				// There should be one successful, one failed, and one more successful (after revert)
   234  				time.Sleep(5 * time.Second) //TODO(preetha) figure out why this wasn't working with ginkgo constructs
   235  				Eventually(deploymentStatus(), 2*time.Second, time.Second).Should(
   236  					ConsistOf(structs.DeploymentStatusSuccessful, structs.DeploymentStatusFailed, structs.DeploymentStatusSuccessful))
   237  			})
   238  
   239  		})
   240  
   241  		Context("Reschedule with max parallel/auto_revert false", func() {
   242  			BeforeEach(func() {
   243  				specFile = "input/rescheduling_maxp.hcl"
   244  			})
   245  			It("Should have running allocs and successful deployment", func() {
   246  				Eventually(allocStatuses, 3*time.Second, time.Second).Should(
   247  					ConsistOf([]string{"running", "running", "running"}))
   248  
   249  				time.Sleep(2 * time.Second)
   250  				Eventually(deploymentStatus(), 2*time.Second, time.Second).Should(
   251  					ContainElement(structs.DeploymentStatusSuccessful))
   252  			})
   253  
   254  			Context("Updating job to make allocs fail", func() {
   255  				It("Should have no rescheduled allocs", func() {
   256  					job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
   257  					_, _, err := jobs.Register(job, nil)
   258  					Expect(err).ShouldNot(HaveOccurred())
   259  					Eventually(allocStatusesRescheduled, 2*time.Second, time.Second).Should(BeEmpty())
   260  
   261  					// Should have 1 failed from max_parallel
   262  					Eventually(allocStatuses, 3*time.Second, time.Second).Should(
   263  						ConsistOf([]string{"complete", "failed", "running", "running"}))
   264  
   265  					// Verify new deployment and its status
   266  					time.Sleep(2 * time.Second)
   267  					Eventually(deploymentStatus(), 2*time.Second, time.Second).Should(
   268  						ContainElement(structs.DeploymentStatusFailed))
   269  				})
   270  			})
   271  
   272  		})
   273  
   274  		Context("Reschedule with max parallel and auto revert true ", func() {
   275  			BeforeEach(func() {
   276  				specFile = "input/rescheduling_maxp_autorevert.hcl"
   277  			})
   278  			It("Should have running allocs and successful deployment", func() {
   279  				Eventually(allocStatuses, 3*time.Second, time.Second).Should(
   280  					ConsistOf([]string{"running", "running", "running"}))
   281  
   282  				time.Sleep(4 * time.Second)
   283  				Eventually(deploymentStatus(), 2*time.Second, time.Second).Should(
   284  					ContainElement(structs.DeploymentStatusSuccessful))
   285  
   286  				// Make an update that causes the job to fail
   287  				job.TaskGroups[0].Tasks[0].Config["args"] = []string{"-c", "lol"}
   288  				_, _, err := jobs.Register(job, nil)
   289  				Expect(err).ShouldNot(HaveOccurred())
   290  				Eventually(allocStatusesRescheduled, 2*time.Second, time.Second).Should(BeEmpty())
   291  
   292  				// Wait for the revert
   293  				Eventually(allocStatuses, 3*time.Second, time.Second).Should(
   294  					ConsistOf([]string{"complete", "failed", "running", "running", "running"}))
   295  
   296  				// Verify new deployment and its status
   297  				// There should be one successful, one failed, and one more successful (after revert)
   298  				time.Sleep(5 * time.Second)
   299  				Eventually(deploymentStatus(), 2*time.Second, time.Second).Should(
   300  					ConsistOf(structs.DeploymentStatusSuccessful, structs.DeploymentStatusFailed, structs.DeploymentStatusSuccessful))
   301  			})
   302  
   303  		})
   304  
   305  	})
   306  
   307  })