github.com/pf-qiu/concourse/v6@v6.7.3-0.20201207032516-1f455d73275f/topgun/both/worker_stalling_test.go (about)

     1  package topgun_test
     2  
     3  import (
     4  	"io/ioutil"
     5  	"os"
     6  	"regexp"
     7  	"time"
     8  
     9  	. "github.com/pf-qiu/concourse/v6/topgun/common"
    10  	_ "github.com/lib/pq"
    11  	. "github.com/onsi/ginkgo"
    12  	. "github.com/onsi/gomega"
    13  	"github.com/onsi/gomega/gbytes"
    14  	"github.com/onsi/gomega/gexec"
    15  )
    16  
    17  var _ = Describe("Worker stalling", func() {
    18  	Context("with two workers available", func() {
    19  		BeforeEach(func() {
    20  			Deploy(
    21  				"deployments/concourse.yml",
    22  				"-o", "operations/worker-instances.yml",
    23  				"-v", "worker_instances=2",
    24  			)
    25  		})
    26  
    27  		It("initially runs tasks across all workers", func() {
    28  			usedWorkers := map[string]struct{}{}
    29  			Eventually(func() map[string]struct{} {
    30  				Fly.Run("execute", "-c", "tasks/tiny.yml")
    31  				workerNames := WorkersWithContainers()
    32  				for _, w := range workerNames {
    33  					usedWorkers[w] = struct{}{}
    34  				}
    35  				return usedWorkers
    36  			}, 10*time.Minute).Should(HaveLen(2))
    37  		})
    38  
    39  		Context("when one worker goes away", func() {
    40  			var stalledWorkerName string
    41  
    42  			BeforeEach(func() {
    43  				Bosh("ssh", "worker/0", "-c", "sudo /var/vcap/bosh/bin/monit stop worker")
    44  				stalledWorkerName = WaitForStalledWorker()
    45  			})
    46  
    47  			AfterEach(func() {
    48  				Bosh("ssh", "worker/0", "-c", "sudo /var/vcap/bosh/bin/monit start worker")
    49  				WaitForWorkersToBeRunning(2)
    50  			})
    51  
    52  			It("enters 'stalled' state and is no longer used for new containers", func() {
    53  				for i := 0; i < 10; i++ {
    54  					Fly.Run("execute", "-c", "tasks/tiny.yml")
    55  					usedWorkers := WorkersWithContainers()
    56  					Expect(usedWorkers).To(HaveLen(1))
    57  					Expect(usedWorkers).ToNot(ContainElement(stalledWorkerName))
    58  				}
    59  			})
    60  
    61  			It("can be pruned while in stalled state", func() {
    62  				Fly.Run("prune-worker", "-w", stalledWorkerName)
    63  				WaitForWorkersToBeRunning(1)
    64  			})
    65  		})
    66  	})
    67  
    68  	Context("with no other worker available", func() {
    69  		BeforeEach(func() {
    70  			Deploy("deployments/concourse.yml")
    71  		})
    72  
    73  		Context("when the worker stalls while a build is running", func() {
    74  			var buildSession *gexec.Session
    75  			var buildID string
    76  
    77  			BeforeEach(func() {
    78  				buildSession = Fly.Start("execute", "-c", "tasks/wait.yml")
    79  				Eventually(buildSession).Should(gbytes.Say("executing build"))
    80  
    81  				buildRegex := regexp.MustCompile(`executing build (\d+)`)
    82  				matches := buildRegex.FindSubmatch(buildSession.Out.Contents())
    83  				buildID = string(matches[1])
    84  
    85  				//For the initializing block
    86  				Eventually(buildSession).Should(gbytes.Say("echo 'waiting for /tmp/stop-waiting to exist'"))
    87  				//For the output from the running step
    88  				Eventually(buildSession).Should(gbytes.Say("waiting for /tmp/stop-waiting to exist"))
    89  
    90  				By("stopping the worker without draining")
    91  				Bosh("ssh", "worker/0", "-c", "sudo /var/vcap/bosh/bin/monit stop worker")
    92  
    93  				By("waiting for it to stall")
    94  				_ = WaitForStalledWorker()
    95  			})
    96  
    97  			AfterEach(func() {
    98  				Bosh("ssh", "worker/0", "-c", "sudo /var/vcap/bosh/bin/monit start worker")
    99  				WaitForWorkersToBeRunning(1)
   100  
   101  				buildSession.Signal(os.Interrupt)
   102  				<-buildSession.Exited
   103  			})
   104  
   105  			Context("when the worker does not come back", func() {
   106  				It("does not fail the build", func() {
   107  					Consistently(buildSession).ShouldNot(gexec.Exit())
   108  				})
   109  			})
   110  
   111  			Context("when the worker comes back", func() {
   112  				BeforeEach(func() {
   113  					Bosh("ssh", "worker/0", "-c", "sudo /var/vcap/bosh/bin/monit start worker")
   114  					WaitForWorkersToBeRunning(1)
   115  				})
   116  
   117  				It("resumes the build", func() {
   118  					By("reattaching to the build")
   119  					// consume all output so far
   120  					_, err := ioutil.ReadAll(buildSession.Out)
   121  					Expect(err).ToNot(HaveOccurred())
   122  
   123  					// wait for new output
   124  					Eventually(buildSession).Should(gbytes.Say("waiting for /tmp/stop-waiting to exist"))
   125  
   126  					By("hijacking the build to tell it to finish")
   127  					Eventually(func() int {
   128  						session := Fly.Start(
   129  							"hijack",
   130  							"-b", buildID,
   131  							"-s", "one-off",
   132  							"touch", "/tmp/stop-waiting",
   133  						)
   134  
   135  						<-session.Exited
   136  						return session.ExitCode()
   137  					}).Should(Equal(0))
   138  
   139  					By("waiting for the build to exit")
   140  					Eventually(buildSession).Should(gbytes.Say("done"))
   141  					<-buildSession.Exited
   142  					Expect(buildSession.ExitCode()).To(Equal(0))
   143  				})
   144  			})
   145  		})
   146  	})
   147  })