github.com/wtsi-ssg/wrstat@v1.1.4-0.20221008232152-3030622a8cf8/scheduler/scheduler.go (about)

     1  /*******************************************************************************
     2   * Copyright (c) 2021 Genome Research Ltd.
     3   *
     4   * Author: Sendu Bala <sb10@sanger.ac.uk>
     5   *
     6   * Permission is hereby granted, free of charge, to any person obtaining
     7   * a copy of this software and associated documentation files (the
     8   * "Software"), to deal in the Software without restriction, including
     9   * without limitation the rights to use, copy, modify, merge, publish,
    10   * distribute, sublicense, and/or sell copies of the Software, and to
    11   * permit persons to whom the Software is furnished to do so, subject to
    12   * the following conditions:
    13   *
    14   * The above copyright notice and this permission notice shall be included
    15   * in all copies or substantial portions of the Software.
    16   *
    17   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    18   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    19   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    20   * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    21   * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    22   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    23   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    24   ******************************************************************************/
    25  
    26  package scheduler
    27  
    28  import (
    29  	"context"
    30  	"os"
    31  	"time"
    32  
    33  	"github.com/VertebrateResequencing/wr/jobqueue"
    34  	jqs "github.com/VertebrateResequencing/wr/jobqueue/scheduler"
    35  	"github.com/inconshreveable/log15"
    36  	"github.com/rs/xid"
    37  	"github.com/wtsi-ssg/wr/clog"
    38  )
    39  
    40  type Error string
    41  
    42  func (e Error) Error() string { return string(e) }
    43  
    44  const errDupJobs = Error("some of the added jobs were duplicates")
    45  
    46  // some consts for the jobs returned by NewJob().
    47  const jobRetries uint8 = 30
    48  const reqRAM = 100
    49  const reqTime = 10 * time.Second
    50  const reqCores = 1
    51  const reqDisk = 1
    52  
    53  // Scheduler can be used to schedule commands to be executed by adding them to
    54  // wr's queue.
    55  type Scheduler struct {
    56  	cwd   string
    57  	exe   string
    58  	jq    *jobqueue.Client
    59  	sudo  bool
    60  	queue string
    61  }
    62  
    63  // New returns a Scheduler that is connected to wr manager using the given
    64  // deployment, timeout and logger. If sudo is true, NewJob() will prefix 'sudo'
    65  // to commands. Added jobs will have the given cwd, which matters. If cwd is
    66  // blank, the current working dir is used. If queue is not blank, that queue
    67  // will be used during NewJob().
    68  func New(deployment, cwd, queue string, timeout time.Duration, logger log15.Logger,
    69  	sudo bool) (*Scheduler, error) {
    70  	cwd, err := pickCWD(cwd)
    71  	if err != nil {
    72  		return nil, err
    73  	}
    74  
    75  	jq, err := jobqueue.ConnectUsingConfig(clog.ContextWithLogHandler(context.Background(),
    76  		logger.GetHandler()), deployment, timeout)
    77  	if err != nil {
    78  		return nil, err
    79  	}
    80  
    81  	exe, err := os.Executable()
    82  
    83  	return &Scheduler{
    84  		cwd:   cwd,
    85  		exe:   exe,
    86  		queue: queue,
    87  		jq:    jq,
    88  		sudo:  sudo,
    89  	}, err
    90  }
    91  
    92  // pickCWD checks the given directory exists, returns an error. If the given
    93  // dir is blank, returns the current working directory.
    94  func pickCWD(cwd string) (string, error) {
    95  	if cwd == "" {
    96  		return os.Getwd()
    97  	}
    98  
    99  	_, err := os.Stat(cwd)
   100  
   101  	return cwd, err
   102  }
   103  
   104  // Executable is a convenience function that returns the same as
   105  // os.Executable(), but without the error.
   106  func (s *Scheduler) Executable() string {
   107  	return s.exe
   108  }
   109  
   110  // DefaultRequirements returns a minimal set of requirments, which is what
   111  // NewJob() will use by default.
   112  func DefaultRequirements() *jqs.Requirements {
   113  	return &jqs.Requirements{
   114  		RAM:   reqRAM,
   115  		Time:  reqTime,
   116  		Cores: reqCores,
   117  		Disk:  reqDisk,
   118  	}
   119  }
   120  
   121  // NewJob is a convenience function for creating Jobs. It sets the job's Cwd
   122  // to the current working directory, sets CwdMatters to true, applies the given
   123  // Requirements, and sets Retries to 3.
   124  //
   125  // If this Scheduler had been made with sudo: true, cmd will be prefixed with
   126  // 'sudo '.
   127  //
   128  // THe supplied depGroup and dep can be blank to not set DepGroups and
   129  // Dependencies.
   130  //
   131  // If req is supplied, sets the job override to 1. Otherwise, req will default
   132  // to a minimal set of requirments, and override will be 0. If this Scheduler
   133  // had been made with a queue override, the requirements will be altered to add
   134  // that queue.
   135  func (s *Scheduler) NewJob(cmd, repGroup, reqGroup, depGroup, dep string, req *jqs.Requirements) *jobqueue.Job {
   136  	if s.sudo {
   137  		cmd = "sudo " + cmd
   138  	}
   139  
   140  	req, override := s.determineOverrideAndReq(req)
   141  
   142  	return &jobqueue.Job{
   143  		Cmd:          cmd,
   144  		Cwd:          s.cwd,
   145  		CwdMatters:   true,
   146  		RepGroup:     repGroup,
   147  		ReqGroup:     reqGroup,
   148  		Requirements: req,
   149  		DepGroups:    createDepGroups(depGroup),
   150  		Dependencies: createDependencies(dep),
   151  		Retries:      jobRetries,
   152  		Override:     override,
   153  	}
   154  }
   155  
   156  // createDepGroups returns the given depGroup inside a string slice, unless
   157  // blank, in which case returns nil slice.
   158  func createDepGroups(depGroup string) []string {
   159  	var depGroups []string
   160  	if depGroup != "" {
   161  		depGroups = []string{depGroup}
   162  	}
   163  
   164  	return depGroups
   165  }
   166  
   167  // createDependencies returns the given dep as a Dependencies if not blank,
   168  // otherwise nil.
   169  func createDependencies(dep string) jobqueue.Dependencies {
   170  	var dependencies jobqueue.Dependencies
   171  	if dep != "" {
   172  		dependencies = jobqueue.Dependencies{{DepGroup: dep}}
   173  	}
   174  
   175  	return dependencies
   176  }
   177  
   178  // determineOverrideAndReq returns the given req and an override of 1 if req is
   179  // not nil, otherwise returns a default req and override of 0.
   180  func (s *Scheduler) determineOverrideAndReq(req *jqs.Requirements) (*jqs.Requirements, uint8) {
   181  	override := 1
   182  
   183  	if req == nil {
   184  		req = DefaultRequirements()
   185  		override = 0
   186  	}
   187  
   188  	if s.queue != "" {
   189  		other := req.Other
   190  		if other == nil {
   191  			other = make(map[string]string)
   192  		}
   193  
   194  		other["scheduler_queue"] = s.queue
   195  		req.Other = other
   196  	}
   197  
   198  	return req, uint8(override)
   199  }
   200  
   201  // SubmitJobs adds the given jobs to wr's queue, passing through current
   202  // environment variables.
   203  //
   204  // Previously added identical jobs that have since been archived will get added
   205  // again.
   206  //
   207  // If any duplicate jobs were added, an error will be returned.
   208  func (s *Scheduler) SubmitJobs(jobs []*jobqueue.Job) error {
   209  	inserts, _, err := s.jq.Add(jobs, os.Environ(), false)
   210  	if err != nil {
   211  		return err
   212  	}
   213  
   214  	if inserts != len(jobs) {
   215  		return errDupJobs
   216  	}
   217  
   218  	return nil
   219  }
   220  
   221  // Disconnect disconnects from the manager. You should defer this after New().
   222  func (s *Scheduler) Disconnect() error {
   223  	return s.jq.Disconnect()
   224  }
   225  
   226  // UniqueString returns a unique string that could be useful for supplying as
   227  // depGroup values to NewJob() etc. The length is always 20 characters.
   228  func UniqueString() string {
   229  	return xid.New().String()
   230  }