github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/batch/executor.go

github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/batch/executor.go (about)

     1  package batch
     2  
     3  import (
     4  	"context"
     5  	"time"
     6  
     7  	"github.com/treeverse/lakefs/pkg/logging"
     8  )
     9  
    10  // RequestBufferSize is the amount of requests users can dispatch that haven't been processed yet before
    11  // dispatching new ones would start blocking.
    12  const RequestBufferSize = 1 << 17
    13  
    14  type Executer interface {
    15  	Execute() (interface{}, error)
    16  }
    17  
    18  type ExecuterFunc func() (interface{}, error)
    19  
    20  func (b ExecuterFunc) Execute() (interface{}, error) {
    21  	return b()
    22  }
    23  
    24  type Tracker interface {
    25  	// Batched is called when a request is added to an existing batch.
    26  	Batched()
    27  }
    28  
    29  type DelayFn func(dur time.Duration)
    30  
    31  type Batcher interface {
    32  	BatchFor(ctx context.Context, key string, dur time.Duration, exec Executer) (interface{}, error)
    33  }
    34  
    35  type NoOpBatchingExecutor struct{}
    36  
    37  // contextKey used to keep values on context.Context
    38  type contextKey string
    39  
    40  // SkipBatchContextKey existence on a context will eliminate the request batching
    41  const SkipBatchContextKey contextKey = "skip_batch"
    42  
    43  func (n *NoOpBatchingExecutor) BatchFor(_ context.Context, _ string, _ time.Duration, exec Executer) (interface{}, error) {
    44  	return exec.Execute()
    45  }
    46  
    47  // ConditionalExecutor will batch requests only if SkipBatchContextKey is not on the context
    48  // of the batch request.
    49  type ConditionalExecutor struct {
    50  	executor *Executor
    51  }
    52  
    53  func NewConditionalExecutor(logger logging.Logger) *ConditionalExecutor {
    54  	return &ConditionalExecutor{executor: NewExecutor(logger)}
    55  }
    56  
    57  func (c *ConditionalExecutor) Run(ctx context.Context) {
    58  	c.executor.Run(ctx)
    59  }
    60  
    61  func (c *ConditionalExecutor) BatchFor(ctx context.Context, key string, timeout time.Duration, exec Executer) (interface{}, error) {
    62  	if ctx.Value(SkipBatchContextKey) != nil {
    63  		return exec.Execute()
    64  	}
    65  	return c.executor.BatchFor(ctx, key, timeout, exec)
    66  }
    67  
    68  type response struct {
    69  	v   interface{}
    70  	err error
    71  }
    72  
    73  type request struct {
    74  	key        string
    75  	timeout    time.Duration
    76  	exec       Executer
    77  	onResponse chan *response
    78  }
    79  
    80  type Executor struct {
    81  	// requests is the channel accepting inbound requests
    82  	requests chan *request
    83  	// execs is the internal channel used to dispatch the callback functions.
    84  	// Several requests with the same key in a given duration will trigger a single write to exec said key.
    85  	execs        chan string
    86  	waitingOnKey map[string][]*request
    87  	Logger       logging.Logger
    88  	Delay        DelayFn
    89  }
    90  
    91  func NopExecutor() *NoOpBatchingExecutor {
    92  	return &NoOpBatchingExecutor{}
    93  }
    94  
    95  func NewExecutor(logger logging.Logger) *Executor {
    96  	return &Executor{
    97  		requests:     make(chan *request, RequestBufferSize),
    98  		execs:        make(chan string, RequestBufferSize),
    99  		waitingOnKey: make(map[string][]*request),
   100  		Logger:       logger,
   101  		Delay:        time.Sleep,
   102  	}
   103  }
   104  
   105  func (e *Executor) BatchFor(_ context.Context, key string, timeout time.Duration, exec Executer) (interface{}, error) {
   106  	cb := make(chan *response)
   107  	e.requests <- &request{
   108  		key:        key,
   109  		timeout:    timeout,
   110  		exec:       exec,
   111  		onResponse: cb,
   112  	}
   113  	res := <-cb
   114  	return res.v, res.err
   115  }
   116  
   117  func (e *Executor) Run(ctx context.Context) {
   118  	for {
   119  		select {
   120  		case <-ctx.Done():
   121  			return
   122  		case req := <-e.requests:
   123  			// see if we have it scheduled already
   124  			if _, exists := e.waitingOnKey[req.key]; !exists {
   125  				e.waitingOnKey[req.key] = []*request{req}
   126  				// this is a new key, let's fire a timer for it
   127  				go func(req *request) {
   128  					e.Delay(req.timeout)
   129  					e.execs <- req.key
   130  				}(req)
   131  			} else {
   132  				if b, ok := req.exec.(Tracker); ok {
   133  					b.Batched()
   134  				}
   135  				e.waitingOnKey[req.key] = append(e.waitingOnKey[req.key], req)
   136  			}
   137  		case execKey := <-e.execs:
   138  			// let's take all callbacks
   139  			waiters := e.waitingOnKey[execKey]
   140  			delete(e.waitingOnKey, execKey)
   141  			go func(key string) {
   142  				// execute and call all mapped callbacks
   143  				v, err := waiters[0].exec.Execute()
   144  				if e.Logger.IsTracing() {
   145  					e.Logger.WithFields(logging.Fields{
   146  						"waiters": len(waiters),
   147  						"key":     key,
   148  					}).Trace("dispatched execute result")
   149  				}
   150  				for _, waiter := range waiters {
   151  					waiter.onResponse <- &response{v, err}
   152  				}
   153  			}(execKey)
   154  		}
   155  	}
   156  }