github.com/SaurabhDubey-Groww/go-cloud@v0.0.0-20221124105541-b26c29285fd8/pubsub/batcher/batcher.go (about)

     1  // Copyright 2018 The Go Cloud Development Kit Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     https://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package batcher supports batching of items. Create a Batcher with a handler and
    16  // add items to it. Items are accumulated while handler calls are in progress; when
    17  // the handler returns, it will be called again with items accumulated since the last
    18  // call. Multiple concurrent calls to the handler are supported.
    19  package batcher // import "gocloud.dev/pubsub/batcher"
    20  
    21  import (
    22  	"context"
    23  	"errors"
    24  	"reflect"
    25  	"sync"
    26  )
    27  
    28  // Split determines how to split n (representing n items) into batches based on
    29  // opts. It returns a slice of batch sizes.
    30  //
    31  // For example, Split(10) might return [10], [5, 5], or [2, 2, 2, 2, 2]
    32  // depending on opts. opts may be nil to accept defaults.
    33  //
    34  // Split will return nil if n is less than o.MinBatchSize.
    35  //
    36  // The sum of returned batches may be less than n (e.g., if n is 10x larger
    37  // than o.MaxBatchSize, but o.MaxHandlers is less than 10).
    38  func Split(n int, opts *Options) []int {
    39  	o := newOptionsWithDefaults(opts)
    40  	if n < o.MinBatchSize {
    41  		// No batch yet.
    42  		return nil
    43  	}
    44  	if o.MaxBatchSize == 0 {
    45  		// One batch is fine.
    46  		return []int{n}
    47  	}
    48  
    49  	// TODO(rvangent): Consider trying to even out the batch sizes.
    50  	// For example, n=10 with MaxBatchSize 9 and MaxHandlers 2 will Split
    51  	// to [9, 1]; it could be [5, 5].
    52  	var batches []int
    53  	for n >= o.MinBatchSize && len(batches) < o.MaxHandlers {
    54  		b := o.MaxBatchSize
    55  		if b > n {
    56  			b = n
    57  		}
    58  		batches = append(batches, b)
    59  		n -= b
    60  	}
    61  	return batches
    62  }
    63  
    64  // A Batcher batches items.
    65  type Batcher struct {
    66  	opts          Options
    67  	handler       func(interface{}) error
    68  	itemSliceZero reflect.Value  // nil (zero value) for slice of items
    69  	wg            sync.WaitGroup // tracks active Add calls
    70  
    71  	mu        sync.Mutex
    72  	pending   []waiter // items waiting to be handled
    73  	nHandlers int      // number of currently running handler goroutines
    74  	shutdown  bool
    75  }
    76  
    77  // Message is larger than the maximum batch byte size
    78  var ErrMessageTooLarge = errors.New("batcher: message too large")
    79  
    80  type sizableItem interface {
    81  	ByteSize() int
    82  }
    83  
    84  type waiter struct {
    85  	item interface{}
    86  	errc chan error
    87  }
    88  
    89  // Options sets options for Batcher.
    90  type Options struct {
    91  	// Maximum number of concurrent handlers. Defaults to 1.
    92  	MaxHandlers int
    93  	// Minimum size of a batch. Defaults to 1.
    94  	MinBatchSize int
    95  	// Maximum size of a batch. 0 means no limit.
    96  	MaxBatchSize int
    97  	// Maximum bytesize of a batch. 0 means no limit.
    98  	MaxBatchByteSize int
    99  }
   100  
   101  // newOptionsWithDefaults returns Options with defaults applied to opts.
   102  // opts may be nil to accept all defaults.
   103  func newOptionsWithDefaults(opts *Options) Options {
   104  	var o Options
   105  	if opts != nil {
   106  		o = *opts
   107  	}
   108  	if o.MaxHandlers == 0 {
   109  		o.MaxHandlers = 1
   110  	}
   111  	if o.MinBatchSize == 0 {
   112  		o.MinBatchSize = 1
   113  	}
   114  	return o
   115  }
   116  
   117  // newMergedOptions returns o merged with opts.
   118  func (o *Options) NewMergedOptions(opts *Options) *Options {
   119  	maxH := o.MaxHandlers
   120  	if opts.MaxHandlers != 0 && (maxH == 0 || opts.MaxHandlers < maxH) {
   121  		maxH = opts.MaxHandlers
   122  	}
   123  	minB := o.MinBatchSize
   124  	if opts.MinBatchSize != 0 && (minB == 0 || opts.MinBatchSize > minB) {
   125  		minB = opts.MinBatchSize
   126  	}
   127  	maxB := o.MaxBatchSize
   128  	if opts.MaxBatchSize != 0 && (maxB == 0 || opts.MaxBatchSize < maxB) {
   129  		maxB = opts.MaxBatchSize
   130  	}
   131  	maxBB := o.MaxBatchByteSize
   132  	if opts.MaxBatchByteSize != 0 && (maxBB == 0 || opts.MaxBatchByteSize < maxBB) {
   133  		maxBB = opts.MaxBatchByteSize
   134  	}
   135  	c := &Options{
   136  		MaxHandlers:      maxH,
   137  		MinBatchSize:     minB,
   138  		MaxBatchSize:     maxB,
   139  		MaxBatchByteSize: maxBB,
   140  	}
   141  	return c
   142  }
   143  
   144  // New creates a new Batcher.
   145  //
   146  // itemType is type that will be batched. For example, if you
   147  // want to create batches of *Entry, pass reflect.TypeOf(&Entry{}) for itemType.
   148  //
   149  // opts can be nil to accept defaults.
   150  //
   151  // handler is a function that will be called on each bundle. If itemExample is
   152  // of type T, the argument to handler is of type []T.
   153  func New(itemType reflect.Type, opts *Options, handler func(interface{}) error) *Batcher {
   154  	return &Batcher{
   155  		opts:          newOptionsWithDefaults(opts),
   156  		handler:       handler,
   157  		itemSliceZero: reflect.Zero(reflect.SliceOf(itemType)),
   158  	}
   159  }
   160  
   161  // Add adds an item to the batcher. It blocks until the handler has
   162  // processed the item and reports the error that the handler returned.
   163  // If Shutdown has been called, Add immediately returns an error.
   164  func (b *Batcher) Add(ctx context.Context, item interface{}) error {
   165  	c := b.AddNoWait(item)
   166  	// Wait until either our result is ready or the context is done.
   167  	select {
   168  	case err := <-c:
   169  		return err
   170  	case <-ctx.Done():
   171  		return ctx.Err()
   172  	}
   173  }
   174  
   175  // AddNoWait adds an item to the batcher and returns immediately. When the handler is
   176  // called on the item, the handler's error return value will be sent to the channel
   177  // returned from AddNoWait.
   178  func (b *Batcher) AddNoWait(item interface{}) <-chan error {
   179  	b.mu.Lock()
   180  	defer b.mu.Unlock()
   181  
   182  	// Create a channel to receive the error from the handler.
   183  	c := make(chan error, 1)
   184  	if b.shutdown {
   185  		c <- errors.New("batcher: shut down")
   186  		return c
   187  	}
   188  
   189  	if b.opts.MaxBatchByteSize > 0 {
   190  		if sizable, ok := item.(sizableItem); ok {
   191  			if sizable.ByteSize() > b.opts.MaxBatchByteSize {
   192  				c <- ErrMessageTooLarge
   193  				return c
   194  			}
   195  		}
   196  	}
   197  
   198  	// Add the item to the pending list.
   199  	b.pending = append(b.pending, waiter{item, c})
   200  	if b.nHandlers < b.opts.MaxHandlers {
   201  		// If we can start a handler, do so with the item just added and any others that are pending.
   202  		batch := b.nextBatch()
   203  		if batch != nil {
   204  			b.wg.Add(1)
   205  			go func() {
   206  				b.callHandler(batch)
   207  				b.wg.Done()
   208  			}()
   209  			b.nHandlers++
   210  		}
   211  	}
   212  	// If we can't start a handler, then one of the currently running handlers will
   213  	// take our item.
   214  	return c
   215  }
   216  
   217  // nextBatch returns the batch to process, and updates b.pending.
   218  // It returns nil if there's no batch ready for processing.
   219  // b.mu must be held.
   220  func (b *Batcher) nextBatch() []waiter {
   221  	if len(b.pending) < b.opts.MinBatchSize {
   222  		return nil
   223  	}
   224  
   225  	if b.opts.MaxBatchByteSize == 0 && (b.opts.MaxBatchSize == 0 || len(b.pending) <= b.opts.MaxBatchSize) {
   226  		// Send it all!
   227  		batch := b.pending
   228  		b.pending = nil
   229  		return batch
   230  	}
   231  
   232  	batch := make([]waiter, 0, len(b.pending))
   233  	batchByteSize := 0
   234  	for _, msg := range b.pending {
   235  		itemByteSize := 0
   236  		if sizable, ok := msg.item.(sizableItem); ok {
   237  			itemByteSize = sizable.ByteSize()
   238  		}
   239  		reachedMaxSize := b.opts.MaxBatchSize > 0 && len(batch)+1 > b.opts.MaxBatchSize
   240  		reachedMaxByteSize := b.opts.MaxBatchByteSize > 0 && batchByteSize+itemByteSize > b.opts.MaxBatchByteSize
   241  
   242  		if reachedMaxSize || reachedMaxByteSize {
   243  			break
   244  		}
   245  		batch = append(batch, msg)
   246  		batchByteSize = batchByteSize + itemByteSize
   247  	}
   248  
   249  	b.pending = b.pending[len(batch):]
   250  	return batch
   251  }
   252  
   253  func (b *Batcher) callHandler(batch []waiter) {
   254  	for batch != nil {
   255  
   256  		// Collect the items into a slice of the example type.
   257  		items := b.itemSliceZero
   258  		for _, m := range batch {
   259  			items = reflect.Append(items, reflect.ValueOf(m.item))
   260  		}
   261  		// Call the handler and report the result to all waiting
   262  		// callers of Add.
   263  		err := b.handler(items.Interface())
   264  		for _, m := range batch {
   265  			m.errc <- err
   266  		}
   267  		b.mu.Lock()
   268  		// If there is more work, keep running; otherwise exit. Take the new batch
   269  		// and decrement the handler count atomically, so that newly added items will
   270  		// always get to run.
   271  		batch = b.nextBatch()
   272  		if batch == nil {
   273  			b.nHandlers--
   274  		}
   275  		b.mu.Unlock()
   276  	}
   277  }
   278  
   279  // Shutdown waits for all active calls to Add to finish, then
   280  // returns. After Shutdown is called, all subsequent calls to Add fail.
   281  // Shutdown should be called only once.
   282  func (b *Batcher) Shutdown() {
   283  	b.mu.Lock()
   284  	b.shutdown = true
   285  	b.mu.Unlock()
   286  	b.wg.Wait()
   287  }