github.com/cozy/cozy-stack@v0.0.0-20240603063001-31110fa4cae1/model/job/redis_broker.go (about)

     1  package job
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"math/rand"
     8  	"strconv"
     9  	"strings"
    10  	"sync/atomic"
    11  	"time"
    12  
    13  	"github.com/cozy/cozy-stack/pkg/config/config"
    14  	"github.com/cozy/cozy-stack/pkg/limits"
    15  	"github.com/cozy/cozy-stack/pkg/logger"
    16  	"github.com/cozy/cozy-stack/pkg/prefixer"
    17  	multierror "github.com/hashicorp/go-multierror"
    18  	"github.com/redis/go-redis/v9"
    19  )
    20  
    21  const (
    22  	// redisPrefix is the prefix for jobs queues in redis.
    23  	redisPrefix = "j/"
    24  	// redisHighPrioritySuffix suffix is the suffix used for prioritized queue.
    25  	redisHighPrioritySuffix = "/p0"
    26  )
    27  
    28  type redisBroker struct {
    29  	client         redis.UniversalClient
    30  	ctx            context.Context
    31  	workers        []*Worker
    32  	workersRunning []*Worker
    33  	workersTypes   []string
    34  	running        uint32
    35  	closed         chan struct{}
    36  }
    37  
    38  // NewRedisBroker creates a new broker that will use redis to distribute
    39  // the jobs among several cozy-stack processes.
    40  func NewRedisBroker(client redis.UniversalClient) Broker {
    41  	return &redisBroker{
    42  		client: client,
    43  		ctx:    context.Background(),
    44  		closed: make(chan struct{}),
    45  	}
    46  }
    47  
    48  // StartWorkers polling jobs from redis queues
    49  func (b *redisBroker) StartWorkers(ws WorkersList) error {
    50  	if !atomic.CompareAndSwapUint32(&b.running, 0, 1) {
    51  		return ErrClosed
    52  	}
    53  
    54  	for _, conf := range ws {
    55  		b.workersTypes = append(b.workersTypes, conf.WorkerType)
    56  		w := NewWorker(conf)
    57  		b.workers = append(b.workers, w)
    58  		if conf.Concurrency <= 0 {
    59  			continue
    60  		}
    61  		b.workersRunning = append(b.workersRunning, w)
    62  		ch := make(chan *Job)
    63  		if err := w.Start(ch); err != nil {
    64  			return err
    65  		}
    66  		go b.pollLoop(redisPrefix+conf.WorkerType, ch)
    67  	}
    68  
    69  	if len(b.workersRunning) > 0 {
    70  		joblog.Infof("Started redis broker for %d workers type", len(b.workersRunning))
    71  	}
    72  
    73  	// XXX for retro-compat
    74  	if slots := config.GetConfig().Jobs.NbWorkers; len(b.workersRunning) > 0 && slots > 0 {
    75  		joblog.Warnf("Limiting the number of total concurrent workers to %d", slots)
    76  		joblog.Warnf("Please update your configuration file to avoid a hard limit")
    77  		setNbSlots(slots)
    78  	}
    79  
    80  	return nil
    81  }
    82  
    83  func (b *redisBroker) WorkersTypes() []string {
    84  	return b.workersTypes
    85  }
    86  
    87  func (b *redisBroker) ShutdownWorkers(ctx context.Context) error {
    88  	if !atomic.CompareAndSwapUint32(&b.running, 1, 0) {
    89  		return ErrClosed
    90  	}
    91  	if len(b.workersRunning) == 0 {
    92  		return nil
    93  	}
    94  
    95  	fmt.Print("  shutting down redis broker...")
    96  	defer b.client.Close()
    97  
    98  	for i := 0; i < len(b.workersRunning); i++ {
    99  		select {
   100  		case <-ctx.Done():
   101  			fmt.Println("failed:", ctx.Err())
   102  			return ctx.Err()
   103  		case <-b.closed:
   104  		}
   105  	}
   106  
   107  	errs := make(chan error)
   108  	for _, w := range b.workersRunning {
   109  		go func(w *Worker) { errs <- w.Shutdown(ctx) }(w)
   110  	}
   111  
   112  	var errm error
   113  	for i := 0; i < len(b.workersRunning); i++ {
   114  		if err := <-errs; err != nil {
   115  			errm = multierror.Append(errm, err)
   116  		}
   117  	}
   118  
   119  	if errm != nil {
   120  		fmt.Println("failed: ", errm)
   121  	} else {
   122  		fmt.Println("ok")
   123  	}
   124  	return errm
   125  }
   126  
   127  var redisBRPopTimeout = 10 * time.Second
   128  
   129  // SetRedisTimeoutForTest is used by unit test to avoid waiting 10 seconds on
   130  // cleanup.
   131  func SetRedisTimeoutForTest() {
   132  	redisBRPopTimeout = 1 * time.Second
   133  }
   134  
   135  func (b *redisBroker) pollLoop(key string, ch chan<- *Job) {
   136  	defer func() {
   137  		b.closed <- struct{}{}
   138  	}()
   139  
   140  	rng := rand.New(rand.NewSource(time.Now().UnixNano()))
   141  	for {
   142  		if atomic.LoadUint32(&b.running) == 0 {
   143  			return
   144  		}
   145  
   146  		// The brpop redis command will always take elements in priority from the
   147  		// first key containing elements at the call. By always priorizing the
   148  		// manual queue, this would cause a starvation for our main queue if too
   149  		// many "manual" jobs are pushed. By randomizing the order we make sure we
   150  		// avoid such starvation. For one in three call, the main queue is
   151  		// selected.
   152  		keyP0 := key + redisHighPrioritySuffix
   153  		keyP1 := key
   154  		if rng.Intn(3) == 0 {
   155  			keyP1, keyP0 = keyP0, keyP1
   156  		}
   157  		results, err := b.client.BRPop(b.ctx, redisBRPopTimeout, keyP0, keyP1).Result()
   158  		if err != nil || len(results) < 2 {
   159  			time.Sleep(100 * time.Millisecond)
   160  			continue
   161  		}
   162  
   163  		key, val := results[0], results[1]
   164  		if len(key) < len(redisPrefix) {
   165  			joblog.Warnf("Invalid key %s", key)
   166  			continue
   167  		}
   168  
   169  		parts := strings.SplitN(val, "/", 2)
   170  		if len(parts) != 2 {
   171  			joblog.Warnf("Invalid val %s", val)
   172  			continue
   173  		}
   174  
   175  		jobID := parts[1]
   176  		parts = strings.SplitN(parts[0], "%", 2)
   177  		prefix := parts[0]
   178  		var cluster int
   179  		if len(parts) > 1 {
   180  			cluster, _ = strconv.Atoi(parts[1])
   181  		}
   182  		job, err := Get(prefixer.NewPrefixer(cluster, "", prefix), jobID)
   183  		if err != nil {
   184  			joblog.Warnf("Cannot find job %s on domain %s (%d): %s",
   185  				jobID, prefix, cluster, err)
   186  			continue
   187  		}
   188  
   189  		ch <- job
   190  	}
   191  }
   192  
   193  // PushJob will produce a new Job with the given options and enqueue the job in
   194  // the proper queue.
   195  func (b *redisBroker) PushJob(db prefixer.Prefixer, req *JobRequest) (*Job, error) {
   196  	if atomic.LoadUint32(&b.running) == 0 {
   197  		return nil, ErrClosed
   198  	}
   199  
   200  	var worker *Worker
   201  	for _, w := range b.workers {
   202  		if w.Type == req.WorkerType {
   203  			worker = w
   204  			break
   205  		}
   206  	}
   207  	if worker == nil && req.WorkerType != "client" {
   208  		return nil, ErrUnknownWorker
   209  	}
   210  
   211  	// Check for limits
   212  	ct, err := GetCounterTypeFromWorkerType(req.WorkerType)
   213  	if err == nil {
   214  		err := config.GetRateLimiter().CheckRateLimit(db, ct)
   215  		if errors.Is(err, limits.ErrRateLimitReached) {
   216  			joblog.WithFields(logger.Fields{
   217  				"worker_type": req.WorkerType,
   218  				"instance":    db.DomainName(),
   219  			}).Warn(err.Error())
   220  			return nil, err
   221  		}
   222  		if limits.IsLimitReachedOrExceeded(err) {
   223  			return nil, err
   224  		}
   225  	}
   226  
   227  	job := NewJob(db, req)
   228  	if worker != nil && worker.Conf.BeforeHook != nil {
   229  		ok, err := worker.Conf.BeforeHook(job)
   230  		if err != nil {
   231  			return nil, err
   232  		}
   233  		if !ok {
   234  			return job, nil
   235  		}
   236  	}
   237  
   238  	if err := job.Create(); err != nil {
   239  		return nil, err
   240  	}
   241  
   242  	// For client jobs, we don't need to enqueue the job in redis.
   243  	if worker == nil {
   244  		return job, nil
   245  	}
   246  
   247  	key := redisPrefix + job.WorkerType
   248  	prefix := job.DBPrefix()
   249  	if cluster := job.DBCluster(); cluster > 0 {
   250  		prefix = fmt.Sprintf("%s%%%d", prefix, cluster)
   251  	}
   252  	val := prefix + "/" + job.JobID
   253  
   254  	// When the job is manual, it is being pushed in a specific prioritized
   255  	// queue.
   256  	if job.Manual {
   257  		key += redisHighPrioritySuffix
   258  	}
   259  
   260  	if err := b.client.LPush(b.ctx, key, val).Err(); err != nil {
   261  		return nil, err
   262  	}
   263  
   264  	return job, nil
   265  }
   266  
   267  // QueueLen returns the size of the number of elements in queue of the
   268  // specified worker type.
   269  func (b *redisBroker) WorkerQueueLen(workerType string) (int, error) {
   270  	key := redisPrefix + workerType
   271  	l1, err := b.client.LLen(b.ctx, key).Result()
   272  	if err != nil {
   273  		return 0, err
   274  	}
   275  	l2, err := b.client.LLen(b.ctx, key+redisHighPrioritySuffix).Result()
   276  	if err != nil {
   277  		return 0, err
   278  	}
   279  	return int(l1 + l2), nil
   280  }
   281  
   282  func (b *redisBroker) WorkerIsReserved(workerType string) (bool, error) {
   283  	for _, w := range b.workers {
   284  		if w.Type == workerType {
   285  			return w.Conf.Reserved, nil
   286  		}
   287  	}
   288  	return false, ErrUnknownWorker
   289  }