github.com/thiagoyeds/go-cloud@v0.26.0/pubsub/batcher/batcher.go (about)

     1  // Copyright 2018 The Go Cloud Development Kit Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     https://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package batcher supports batching of items. Create a Batcher with a handler and
    16  // add items to it. Items are accumulated while handler calls are in progress; when
    17  // the handler returns, it will be called again with items accumulated since the last
    18  // call. Multiple concurrent calls to the handler are supported.
    19  package batcher // import "gocloud.dev/pubsub/batcher"
    20  
    21  import (
    22  	"context"
    23  	"errors"
    24  	"reflect"
    25  	"sync"
    26  )
    27  
    28  // Split determines how to split n (representing n items) into batches based on
    29  // opts. It returns a slice of batch sizes.
    30  //
    31  // For example, Split(10) might return [10], [5, 5], or [2, 2, 2, 2, 2]
    32  // depending on opts. opts may be nil to accept defaults.
    33  //
    34  // Split will return nil if n is less than o.MinBatchSize.
    35  //
    36  // The sum of returned batches may be less than n (e.g., if n is 10x larger
    37  // than o.MaxBatchSize, but o.MaxHandlers is less than 10).
    38  func Split(n int, opts *Options) []int {
    39  	o := newOptionsWithDefaults(opts)
    40  	if n < o.MinBatchSize {
    41  		// No batch yet.
    42  		return nil
    43  	}
    44  	if o.MaxBatchSize == 0 {
    45  		// One batch is fine.
    46  		return []int{n}
    47  	}
    48  
    49  	// TODO(rvangent): Consider trying to even out the batch sizes.
    50  	// For example, n=10 with MaxBatchSize 9 and MaxHandlers 2 will Split
    51  	// to [9, 1]; it could be [5, 5].
    52  	var batches []int
    53  	for n >= o.MinBatchSize && len(batches) < o.MaxHandlers {
    54  		b := o.MaxBatchSize
    55  		if b > n {
    56  			b = n
    57  		}
    58  		batches = append(batches, b)
    59  		n -= b
    60  	}
    61  	return batches
    62  }
    63  
    64  // A Batcher batches items.
    65  type Batcher struct {
    66  	opts          Options
    67  	handler       func(interface{}) error
    68  	itemSliceZero reflect.Value  // nil (zero value) for slice of items
    69  	wg            sync.WaitGroup // tracks active Add calls
    70  
    71  	mu        sync.Mutex
    72  	pending   []waiter // items waiting to be handled
    73  	nHandlers int      // number of currently running handler goroutines
    74  	shutdown  bool
    75  }
    76  
    77  // Message is larger than the maximum batch byte size
    78  var ErrMessageTooLarge = errors.New("batcher: message too large")
    79  
    80  type sizableItem interface {
    81  	ByteSize() int
    82  }
    83  
    84  type waiter struct {
    85  	item interface{}
    86  	errc chan error
    87  }
    88  
    89  // Options sets options for Batcher.
    90  type Options struct {
    91  	// Maximum number of concurrent handlers. Defaults to 1.
    92  	MaxHandlers int
    93  	// Minimum size of a batch. Defaults to 1.
    94  	MinBatchSize int
    95  	// Maximum size of a batch. 0 means no limit.
    96  	MaxBatchSize int
    97  	// Maximum bytesize of a batch. 0 means no limit.
    98  	MaxBatchByteSize int
    99  }
   100  
   101  // newOptionsWithDefaults returns Options with defaults applied to opts.
   102  // opts may be nil to accept all defaults.
   103  func newOptionsWithDefaults(opts *Options) Options {
   104  	var o Options
   105  	if opts != nil {
   106  		o = *opts
   107  	}
   108  	if o.MaxHandlers == 0 {
   109  		o.MaxHandlers = 1
   110  	}
   111  	if o.MinBatchSize == 0 {
   112  		o.MinBatchSize = 1
   113  	}
   114  	return o
   115  }
   116  
   117  // New creates a new Batcher.
   118  //
   119  // itemType is type that will be batched. For example, if you
   120  // want to create batches of *Entry, pass reflect.TypeOf(&Entry{}) for itemType.
   121  //
   122  // opts can be nil to accept defaults.
   123  //
   124  // handler is a function that will be called on each bundle. If itemExample is
   125  // of type T, the argument to handler is of type []T.
   126  func New(itemType reflect.Type, opts *Options, handler func(interface{}) error) *Batcher {
   127  	return &Batcher{
   128  		opts:          newOptionsWithDefaults(opts),
   129  		handler:       handler,
   130  		itemSliceZero: reflect.Zero(reflect.SliceOf(itemType)),
   131  	}
   132  }
   133  
   134  // Add adds an item to the batcher. It blocks until the handler has
   135  // processed the item and reports the error that the handler returned.
   136  // If Shutdown has been called, Add immediately returns an error.
   137  func (b *Batcher) Add(ctx context.Context, item interface{}) error {
   138  	c := b.AddNoWait(item)
   139  	// Wait until either our result is ready or the context is done.
   140  	select {
   141  	case err := <-c:
   142  		return err
   143  	case <-ctx.Done():
   144  		return ctx.Err()
   145  	}
   146  }
   147  
   148  // AddNoWait adds an item to the batcher and returns immediately. When the handler is
   149  // called on the item, the handler's error return value will be sent to the channel
   150  // returned from AddNoWait.
   151  func (b *Batcher) AddNoWait(item interface{}) <-chan error {
   152  	b.mu.Lock()
   153  	defer b.mu.Unlock()
   154  
   155  	// Create a channel to receive the error from the handler.
   156  	c := make(chan error, 1)
   157  	if b.shutdown {
   158  		c <- errors.New("batcher: shut down")
   159  		return c
   160  	}
   161  
   162  	if b.opts.MaxBatchByteSize > 0 {
   163  		if sizable, ok := item.(sizableItem); ok {
   164  			if sizable.ByteSize() > b.opts.MaxBatchByteSize {
   165  				c <- ErrMessageTooLarge
   166  				return c
   167  			}
   168  		}
   169  	}
   170  
   171  	// Add the item to the pending list.
   172  	b.pending = append(b.pending, waiter{item, c})
   173  	if b.nHandlers < b.opts.MaxHandlers {
   174  		// If we can start a handler, do so with the item just added and any others that are pending.
   175  		batch := b.nextBatch()
   176  		if batch != nil {
   177  			b.wg.Add(1)
   178  			go func() {
   179  				b.callHandler(batch)
   180  				b.wg.Done()
   181  			}()
   182  			b.nHandlers++
   183  		}
   184  	}
   185  	// If we can't start a handler, then one of the currently running handlers will
   186  	// take our item.
   187  	return c
   188  }
   189  
   190  // nextBatch returns the batch to process, and updates b.pending.
   191  // It returns nil if there's no batch ready for processing.
   192  // b.mu must be held.
   193  func (b *Batcher) nextBatch() []waiter {
   194  	if len(b.pending) < b.opts.MinBatchSize {
   195  		return nil
   196  	}
   197  
   198  	if b.opts.MaxBatchByteSize == 0 && (b.opts.MaxBatchSize == 0 || len(b.pending) <= b.opts.MaxBatchSize) {
   199  		// Send it all!
   200  		batch := b.pending
   201  		b.pending = nil
   202  		return batch
   203  	}
   204  
   205  	batch := make([]waiter, 0, len(b.pending))
   206  	batchByteSize := 0
   207  	for _, msg := range b.pending {
   208  		itemByteSize := 0
   209  		if sizable, ok := msg.item.(sizableItem); ok {
   210  			itemByteSize = sizable.ByteSize()
   211  		}
   212  		reachedMaxSize := b.opts.MaxBatchSize > 0 && len(batch)+1 > b.opts.MaxBatchSize
   213  		reachedMaxByteSize := b.opts.MaxBatchByteSize > 0 && batchByteSize+itemByteSize > b.opts.MaxBatchByteSize
   214  
   215  		if reachedMaxSize || reachedMaxByteSize {
   216  			break
   217  		}
   218  		batch = append(batch, msg)
   219  		batchByteSize = batchByteSize + itemByteSize
   220  	}
   221  
   222  	b.pending = b.pending[len(batch):]
   223  	return batch
   224  }
   225  
   226  func (b *Batcher) callHandler(batch []waiter) {
   227  	for batch != nil {
   228  
   229  		// Collect the items into a slice of the example type.
   230  		items := b.itemSliceZero
   231  		for _, m := range batch {
   232  			items = reflect.Append(items, reflect.ValueOf(m.item))
   233  		}
   234  		// Call the handler and report the result to all waiting
   235  		// callers of Add.
   236  		err := b.handler(items.Interface())
   237  		for _, m := range batch {
   238  			m.errc <- err
   239  		}
   240  		b.mu.Lock()
   241  		// If there is more work, keep running; otherwise exit. Take the new batch
   242  		// and decrement the handler count atomically, so that newly added items will
   243  		// always get to run.
   244  		batch = b.nextBatch()
   245  		if batch == nil {
   246  			b.nHandlers--
   247  		}
   248  		b.mu.Unlock()
   249  	}
   250  }
   251  
   252  // Shutdown waits for all active calls to Add to finish, then
   253  // returns. After Shutdown is called, all subsequent calls to Add fail.
   254  // Shutdown should be called only once.
   255  func (b *Batcher) Shutdown() {
   256  	b.mu.Lock()
   257  	b.shutdown = true
   258  	b.mu.Unlock()
   259  	b.wg.Wait()
   260  }