github.com/wtsi-ssg/wrstat/v4@v4.5.1/scheduler/scheduler_test.go (about)

     1  /*******************************************************************************
     2   * Copyright (c) 2021 Genome Research Ltd.
     3   *
     4   * Author: Sendu Bala <sb10@sanger.ac.uk>
     5   *
     6   * Permission is hereby granted, free of charge, to any person obtaining
     7   * a copy of this software and associated documentation files (the
     8   * "Software"), to deal in the Software without restriction, including
     9   * without limitation the rights to use, copy, modify, merge, publish,
    10   * distribute, sublicense, and/or sell copies of the Software, and to
    11   * permit persons to whom the Software is furnished to do so, subject to
    12   * the following conditions:
    13   *
    14   * The above copyright notice and this permission notice shall be included
    15   * in all copies or substantial portions of the Software.
    16   *
    17   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    18   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    19   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    20   * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    21   * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    22   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    23   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    24   ******************************************************************************/
    25  
    26  package scheduler
    27  
    28  import (
    29  	"context"
    30  	"fmt"
    31  	"os"
    32  	"path/filepath"
    33  	"testing"
    34  	"time"
    35  
    36  	"github.com/VertebrateResequencing/wr/jobqueue"
    37  	jqs "github.com/VertebrateResequencing/wr/jobqueue/scheduler"
    38  	"github.com/inconshreveable/log15"
    39  	"github.com/phayes/freeport"
    40  	. "github.com/smartystreets/goconvey/convey"
    41  )
    42  
    43  const userOnlyPerm = 0700
    44  
    45  func TestStatFile(t *testing.T) {
    46  	deployment := "development"
    47  	timeout := 10 * time.Second
    48  	logger := log15.New()
    49  	ctx := context.Background()
    50  
    51  	Convey("You can get unique strings", t, func() {
    52  		str := UniqueString()
    53  		So(len(str), ShouldEqual, 20)
    54  
    55  		str2 := UniqueString()
    56  		So(len(str2), ShouldEqual, 20)
    57  		So(str2, ShouldNotEqual, str)
    58  	})
    59  
    60  	Convey("When the jobqueue server is up", t, func() {
    61  		config, d := prepareWrConfig(t)
    62  		defer d()
    63  		server := serve(t, config)
    64  		defer server.Stop(ctx, true)
    65  
    66  		Convey("You can make a Scheduler", func() {
    67  			s, err := New(deployment, "", "", timeout, logger, false)
    68  			So(err, ShouldBeNil)
    69  			So(s, ShouldNotBeNil)
    70  
    71  			wd, err := os.Getwd()
    72  			So(err, ShouldBeNil)
    73  			So(s.cwd, ShouldEqual, wd)
    74  
    75  			exe, err := os.Executable()
    76  			So(err, ShouldBeNil)
    77  			So(s.Executable(), ShouldEqual, exe)
    78  
    79  			So(s.jq, ShouldNotBeNil)
    80  
    81  			Convey("which lets you create jobs", func() {
    82  				job := s.NewJob("cmd", "rep", "req", "", "", nil)
    83  				So(job.Cmd, ShouldEqual, "cmd")
    84  				So(job.RepGroup, ShouldEqual, "rep")
    85  				So(job.ReqGroup, ShouldEqual, "req")
    86  				So(job.Cwd, ShouldEqual, wd)
    87  				So(job.CwdMatters, ShouldBeTrue)
    88  				So(job.Requirements, ShouldResemble, &jqs.Requirements{RAM: 100, Time: 10 * time.Second, Cores: 1, Disk: 1})
    89  				So(job.Retries, ShouldEqual, 30)
    90  				So(job.DepGroups, ShouldBeNil)
    91  				So(job.Dependencies, ShouldBeNil)
    92  				So(job.Override, ShouldEqual, 0)
    93  
    94  				job2 := s.NewJob("cmd2", "rep", "req", "a", "b", nil)
    95  				So(job2.Cmd, ShouldEqual, "cmd2")
    96  				So(job2.DepGroups, ShouldResemble, []string{"a"})
    97  				So(job2.Dependencies, ShouldResemble, jobqueue.Dependencies{{DepGroup: "b"}})
    98  
    99  				Convey("which you can add to the queue", func() {
   100  					err = s.SubmitJobs([]*jobqueue.Job{job, job2})
   101  					So(err, ShouldBeNil)
   102  
   103  					info := server.GetServerStats()
   104  					So(info.Ready, ShouldEqual, 2)
   105  
   106  					Convey("but you get an error if there are duplicates", func() {
   107  						err = s.SubmitJobs([]*jobqueue.Job{job, job2})
   108  						So(err, ShouldNotBeNil)
   109  						So(err, ShouldEqual, errDupJobs)
   110  
   111  						info := server.GetServerStats()
   112  						So(info.Ready, ShouldEqual, 2)
   113  					})
   114  				})
   115  
   116  				Convey("which you can't add to the queue if the server is down", func() {
   117  					server.Stop(ctx, true)
   118  					err = s.SubmitJobs([]*jobqueue.Job{job, job2})
   119  					So(err, ShouldNotBeNil)
   120  				})
   121  
   122  				Convey("which you can't add to the queue if you disconnected", func() {
   123  					err = s.Disconnect()
   124  					So(err, ShouldBeNil)
   125  					err = s.SubmitJobs([]*jobqueue.Job{job, job2})
   126  					So(err, ShouldNotBeNil)
   127  				})
   128  			})
   129  		})
   130  
   131  		Convey("You can make a Scheduler with a specified cwd and it creates jobs in there", func() {
   132  			cwd := t.TempDir()
   133  
   134  			s, err := New(deployment, cwd, "", timeout, logger, false)
   135  			So(err, ShouldBeNil)
   136  			So(s, ShouldNotBeNil)
   137  
   138  			job := s.NewJob("cmd", "rep", "req", "", "", nil)
   139  			So(job.Cwd, ShouldEqual, cwd)
   140  			So(job.CwdMatters, ShouldBeTrue)
   141  		})
   142  
   143  		Convey("You can't create a Scheduler in an invalid dir", func() {
   144  			d := cdNonExistantDir(t)
   145  			defer d()
   146  
   147  			s, err := New(deployment, "", "", timeout, logger, false)
   148  			So(err, ShouldNotBeNil)
   149  			So(s, ShouldBeNil)
   150  		})
   151  
   152  		Convey("You can't create a Scheduler if you pass an invalid dir", func() {
   153  			s, err := New(deployment, "/non_existent", "", timeout, logger, false)
   154  			So(err, ShouldNotBeNil)
   155  			So(s, ShouldBeNil)
   156  		})
   157  
   158  		Convey("You can make a Scheduler that creates sudo jobs", func() {
   159  			s, err := New(deployment, "", "", timeout, logger, true)
   160  			So(err, ShouldBeNil)
   161  			So(s, ShouldNotBeNil)
   162  
   163  			job := s.NewJob("cmd", "rep", "req", "", "", nil)
   164  			So(job.Cmd, ShouldEqual, "sudo cmd")
   165  		})
   166  
   167  		Convey("You can make a Scheduler with a Req override", func() {
   168  			s, err := New(deployment, "", "", timeout, logger, false)
   169  			So(err, ShouldBeNil)
   170  			So(s, ShouldNotBeNil)
   171  
   172  			req := DefaultRequirements()
   173  			req.RAM = 16000
   174  
   175  			job := s.NewJob("cmd", "rep", "req", "", "", req)
   176  			So(job.Requirements.RAM, ShouldEqual, 16000)
   177  			So(job.Override, ShouldEqual, 1)
   178  		})
   179  
   180  		Convey("You can make a Scheduler with a queue override", func() {
   181  			s, err := New(deployment, "", "foo", timeout, logger, false)
   182  			So(err, ShouldBeNil)
   183  			So(s, ShouldNotBeNil)
   184  
   185  			dreq := DefaultRequirements()
   186  
   187  			job := s.NewJob("cmd", "rep", "req", "", "", nil)
   188  			So(job.Requirements.RAM, ShouldEqual, dreq.RAM)
   189  			So(job.Override, ShouldEqual, 0)
   190  			So(job.Requirements.Other, ShouldResemble, map[string]string{"scheduler_queue": "foo"})
   191  		})
   192  	})
   193  
   194  	Convey("When the jobqueue server is not up, you can't make a Scheduler", t, func() {
   195  		_, d := prepareWrConfig(t)
   196  		defer d()
   197  
   198  		s, err := New(deployment, "", "", timeout, logger, false)
   199  		So(err, ShouldNotBeNil)
   200  		So(s, ShouldBeNil)
   201  	})
   202  }
   203  
   204  // cdNonExistantDir changes directory to a temp directory, then deletes that
   205  // directory. It returns a function you should defer to change back to your
   206  // original directory.
   207  func cdNonExistantDir(t *testing.T) func() {
   208  	t.Helper()
   209  
   210  	tmpDir, d := cdTmpDir(t)
   211  
   212  	os.RemoveAll(tmpDir)
   213  
   214  	return d
   215  }
   216  
   217  // cdTmpDir changes directory to a temp directory. It returns the path to the
   218  // temp dir and a function you should defer to change back to your original
   219  // directory. The tmp dir will be automatically deleted when tests end.
   220  func cdTmpDir(t *testing.T) (string, func()) {
   221  	t.Helper()
   222  
   223  	tmpDir := t.TempDir()
   224  
   225  	cwd, err := os.Getwd()
   226  	if err != nil {
   227  		t.Fatal(err)
   228  	}
   229  
   230  	err = os.Chdir(tmpDir)
   231  	if err != nil {
   232  		t.Fatal(err)
   233  	}
   234  
   235  	d := func() {
   236  		err = os.Chdir(cwd)
   237  		if err != nil {
   238  			t.Logf("Chdir failed: %s", err)
   239  		}
   240  	}
   241  
   242  	return tmpDir, d
   243  }
   244  
   245  // prepareWrConfig creates a temp directory, changes to that directory, creates
   246  // a wr config file with available ports set, then returns a ServerConfig with
   247  // that configuration. It also returns a function you should defer, which
   248  // changes directory back.
   249  func prepareWrConfig(t *testing.T) (jobqueue.ServerConfig, func()) {
   250  	t.Helper()
   251  
   252  	dir, d := cdTmpDir(t)
   253  
   254  	clientPort, err := freeport.GetFreePort()
   255  	if err != nil {
   256  		t.Fatalf("getting free port failed: %s", err)
   257  	}
   258  
   259  	webPort, err := freeport.GetFreePort()
   260  	if err != nil {
   261  		t.Fatalf("getting free port failed: %s", err)
   262  	}
   263  
   264  	managerDir := filepath.Join(dir, ".wr")
   265  	managerDirActual := managerDir + "_development"
   266  
   267  	err = os.MkdirAll(managerDirActual, userOnlyPerm)
   268  	if err != nil {
   269  		t.Fatal(err)
   270  	}
   271  
   272  	config := jobqueue.ServerConfig{
   273  		Port:            fmt.Sprintf("%d", clientPort),
   274  		WebPort:         fmt.Sprintf("%d", webPort),
   275  		SchedulerName:   "local",
   276  		SchedulerConfig: &jqs.ConfigLocal{Shell: "bash"},
   277  		DBFile:          filepath.Join(managerDirActual, "db"),
   278  		DBFileBackup:    filepath.Join(managerDirActual, "db_bk"),
   279  		TokenFile:       filepath.Join(managerDirActual, "client.token"),
   280  		CAFile:          filepath.Join(managerDirActual, "ca.pem"),
   281  		CertFile:        filepath.Join(managerDirActual, "cert.pem"),
   282  		CertDomain:      "localhost",
   283  		KeyFile:         filepath.Join(managerDirActual, "key.pem"),
   284  		Deployment:      "development",
   285  	}
   286  
   287  	f, err := os.Create(filepath.Join(dir, ".wr_config.yml"))
   288  	if err != nil {
   289  		t.Fatal(err)
   290  	}
   291  
   292  	configData := `managerport: "%s"
   293  managerweb: "%s"
   294  managerdir: "%s"`
   295  
   296  	_, err = f.WriteString(fmt.Sprintf(configData, config.Port, config.WebPort, managerDir))
   297  	if err != nil {
   298  		t.Fatal(err)
   299  	}
   300  
   301  	return config, d
   302  }
   303  
   304  // serve calls Serve() but with a retry for 5s on failure. This allows time for
   305  // a server that we recently stopped in a prior test to really not be listening
   306  // on the ports any more.
   307  func serve(t *testing.T, config jobqueue.ServerConfig) *jobqueue.Server {
   308  	t.Helper()
   309  
   310  	server, _, _, err := jobqueue.Serve(context.Background(), config)
   311  	if err != nil {
   312  		server, err = serveWithRetries(t, config)
   313  	}
   314  
   315  	if err != nil {
   316  		t.Fatal(err)
   317  	}
   318  
   319  	return server
   320  }
   321  
   322  // serveWithRetries does the retrying part of serve().
   323  func serveWithRetries(t *testing.T, config jobqueue.ServerConfig) (server *jobqueue.Server, err error) {
   324  	t.Helper()
   325  
   326  	limit := time.After(10 * time.Second)
   327  	ticker := time.NewTicker(500 * time.Millisecond)
   328  
   329  	for {
   330  		select {
   331  		case <-ticker.C:
   332  			server, _, _, err = jobqueue.Serve(context.Background(), config)
   333  			if err != nil {
   334  				continue
   335  			}
   336  
   337  			ticker.Stop()
   338  
   339  			return
   340  		case <-limit:
   341  			ticker.Stop()
   342  
   343  			return
   344  		}
   345  	}
   346  }