github.com/thiagoyeds/go-cloud@v0.26.0/pubsub/batcher/batcher.go (about) 1 // Copyright 2018 The Go Cloud Development Kit Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package batcher supports batching of items. Create a Batcher with a handler and 16 // add items to it. Items are accumulated while handler calls are in progress; when 17 // the handler returns, it will be called again with items accumulated since the last 18 // call. Multiple concurrent calls to the handler are supported. 19 package batcher // import "gocloud.dev/pubsub/batcher" 20 21 import ( 22 "context" 23 "errors" 24 "reflect" 25 "sync" 26 ) 27 28 // Split determines how to split n (representing n items) into batches based on 29 // opts. It returns a slice of batch sizes. 30 // 31 // For example, Split(10) might return [10], [5, 5], or [2, 2, 2, 2, 2] 32 // depending on opts. opts may be nil to accept defaults. 33 // 34 // Split will return nil if n is less than o.MinBatchSize. 35 // 36 // The sum of returned batches may be less than n (e.g., if n is 10x larger 37 // than o.MaxBatchSize, but o.MaxHandlers is less than 10). 38 func Split(n int, opts *Options) []int { 39 o := newOptionsWithDefaults(opts) 40 if n < o.MinBatchSize { 41 // No batch yet. 42 return nil 43 } 44 if o.MaxBatchSize == 0 { 45 // One batch is fine. 46 return []int{n} 47 } 48 49 // TODO(rvangent): Consider trying to even out the batch sizes. 50 // For example, n=10 with MaxBatchSize 9 and MaxHandlers 2 will Split 51 // to [9, 1]; it could be [5, 5]. 52 var batches []int 53 for n >= o.MinBatchSize && len(batches) < o.MaxHandlers { 54 b := o.MaxBatchSize 55 if b > n { 56 b = n 57 } 58 batches = append(batches, b) 59 n -= b 60 } 61 return batches 62 } 63 64 // A Batcher batches items. 65 type Batcher struct { 66 opts Options 67 handler func(interface{}) error 68 itemSliceZero reflect.Value // nil (zero value) for slice of items 69 wg sync.WaitGroup // tracks active Add calls 70 71 mu sync.Mutex 72 pending []waiter // items waiting to be handled 73 nHandlers int // number of currently running handler goroutines 74 shutdown bool 75 } 76 77 // Message is larger than the maximum batch byte size 78 var ErrMessageTooLarge = errors.New("batcher: message too large") 79 80 type sizableItem interface { 81 ByteSize() int 82 } 83 84 type waiter struct { 85 item interface{} 86 errc chan error 87 } 88 89 // Options sets options for Batcher. 90 type Options struct { 91 // Maximum number of concurrent handlers. Defaults to 1. 92 MaxHandlers int 93 // Minimum size of a batch. Defaults to 1. 94 MinBatchSize int 95 // Maximum size of a batch. 0 means no limit. 96 MaxBatchSize int 97 // Maximum bytesize of a batch. 0 means no limit. 98 MaxBatchByteSize int 99 } 100 101 // newOptionsWithDefaults returns Options with defaults applied to opts. 102 // opts may be nil to accept all defaults. 103 func newOptionsWithDefaults(opts *Options) Options { 104 var o Options 105 if opts != nil { 106 o = *opts 107 } 108 if o.MaxHandlers == 0 { 109 o.MaxHandlers = 1 110 } 111 if o.MinBatchSize == 0 { 112 o.MinBatchSize = 1 113 } 114 return o 115 } 116 117 // New creates a new Batcher. 118 // 119 // itemType is type that will be batched. For example, if you 120 // want to create batches of *Entry, pass reflect.TypeOf(&Entry{}) for itemType. 121 // 122 // opts can be nil to accept defaults. 123 // 124 // handler is a function that will be called on each bundle. If itemExample is 125 // of type T, the argument to handler is of type []T. 126 func New(itemType reflect.Type, opts *Options, handler func(interface{}) error) *Batcher { 127 return &Batcher{ 128 opts: newOptionsWithDefaults(opts), 129 handler: handler, 130 itemSliceZero: reflect.Zero(reflect.SliceOf(itemType)), 131 } 132 } 133 134 // Add adds an item to the batcher. It blocks until the handler has 135 // processed the item and reports the error that the handler returned. 136 // If Shutdown has been called, Add immediately returns an error. 137 func (b *Batcher) Add(ctx context.Context, item interface{}) error { 138 c := b.AddNoWait(item) 139 // Wait until either our result is ready or the context is done. 140 select { 141 case err := <-c: 142 return err 143 case <-ctx.Done(): 144 return ctx.Err() 145 } 146 } 147 148 // AddNoWait adds an item to the batcher and returns immediately. When the handler is 149 // called on the item, the handler's error return value will be sent to the channel 150 // returned from AddNoWait. 151 func (b *Batcher) AddNoWait(item interface{}) <-chan error { 152 b.mu.Lock() 153 defer b.mu.Unlock() 154 155 // Create a channel to receive the error from the handler. 156 c := make(chan error, 1) 157 if b.shutdown { 158 c <- errors.New("batcher: shut down") 159 return c 160 } 161 162 if b.opts.MaxBatchByteSize > 0 { 163 if sizable, ok := item.(sizableItem); ok { 164 if sizable.ByteSize() > b.opts.MaxBatchByteSize { 165 c <- ErrMessageTooLarge 166 return c 167 } 168 } 169 } 170 171 // Add the item to the pending list. 172 b.pending = append(b.pending, waiter{item, c}) 173 if b.nHandlers < b.opts.MaxHandlers { 174 // If we can start a handler, do so with the item just added and any others that are pending. 175 batch := b.nextBatch() 176 if batch != nil { 177 b.wg.Add(1) 178 go func() { 179 b.callHandler(batch) 180 b.wg.Done() 181 }() 182 b.nHandlers++ 183 } 184 } 185 // If we can't start a handler, then one of the currently running handlers will 186 // take our item. 187 return c 188 } 189 190 // nextBatch returns the batch to process, and updates b.pending. 191 // It returns nil if there's no batch ready for processing. 192 // b.mu must be held. 193 func (b *Batcher) nextBatch() []waiter { 194 if len(b.pending) < b.opts.MinBatchSize { 195 return nil 196 } 197 198 if b.opts.MaxBatchByteSize == 0 && (b.opts.MaxBatchSize == 0 || len(b.pending) <= b.opts.MaxBatchSize) { 199 // Send it all! 200 batch := b.pending 201 b.pending = nil 202 return batch 203 } 204 205 batch := make([]waiter, 0, len(b.pending)) 206 batchByteSize := 0 207 for _, msg := range b.pending { 208 itemByteSize := 0 209 if sizable, ok := msg.item.(sizableItem); ok { 210 itemByteSize = sizable.ByteSize() 211 } 212 reachedMaxSize := b.opts.MaxBatchSize > 0 && len(batch)+1 > b.opts.MaxBatchSize 213 reachedMaxByteSize := b.opts.MaxBatchByteSize > 0 && batchByteSize+itemByteSize > b.opts.MaxBatchByteSize 214 215 if reachedMaxSize || reachedMaxByteSize { 216 break 217 } 218 batch = append(batch, msg) 219 batchByteSize = batchByteSize + itemByteSize 220 } 221 222 b.pending = b.pending[len(batch):] 223 return batch 224 } 225 226 func (b *Batcher) callHandler(batch []waiter) { 227 for batch != nil { 228 229 // Collect the items into a slice of the example type. 230 items := b.itemSliceZero 231 for _, m := range batch { 232 items = reflect.Append(items, reflect.ValueOf(m.item)) 233 } 234 // Call the handler and report the result to all waiting 235 // callers of Add. 236 err := b.handler(items.Interface()) 237 for _, m := range batch { 238 m.errc <- err 239 } 240 b.mu.Lock() 241 // If there is more work, keep running; otherwise exit. Take the new batch 242 // and decrement the handler count atomically, so that newly added items will 243 // always get to run. 244 batch = b.nextBatch() 245 if batch == nil { 246 b.nHandlers-- 247 } 248 b.mu.Unlock() 249 } 250 } 251 252 // Shutdown waits for all active calls to Add to finish, then 253 // returns. After Shutdown is called, all subsequent calls to Add fail. 254 // Shutdown should be called only once. 255 func (b *Batcher) Shutdown() { 256 b.mu.Lock() 257 b.shutdown = true 258 b.mu.Unlock() 259 b.wg.Wait() 260 }