github.com/kaleido-io/firefly@v0.0.0-20210622132723-8b4b6aacb971/internal/batch/batch_processor.go (about) 1 // Copyright © 2021 Kaleido, Inc. 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package batch 18 19 import ( 20 "context" 21 "crypto/sha256" 22 "database/sql/driver" 23 "encoding/binary" 24 "fmt" 25 "time" 26 27 "github.com/kaleido-io/firefly/internal/log" 28 "github.com/kaleido-io/firefly/internal/retry" 29 "github.com/kaleido-io/firefly/pkg/database" 30 "github.com/kaleido-io/firefly/pkg/fftypes" 31 ) 32 33 type batchWork struct { 34 msg *fftypes.Message 35 data []*fftypes.Data 36 dispatched chan *batchDispatch 37 abandoned bool 38 } 39 40 type batchDispatch struct { 41 msg *fftypes.Message 42 batchID *fftypes.UUID 43 } 44 45 type batchProcessorConf struct { 46 Options 47 namespace string 48 author string 49 group *fftypes.Bytes32 50 dispatch DispatchHandler 51 processorQuiescing func() 52 } 53 54 type batchProcessor struct { 55 ctx context.Context 56 database database.Plugin 57 name string 58 cancelCtx func() 59 closed bool 60 newWork chan *batchWork 61 persistWork chan *batchWork 62 sealBatch chan bool 63 batchSealed chan bool 64 retry *retry.Retry 65 conf *batchProcessorConf 66 } 67 68 func newBatchProcessor(ctx context.Context, di database.Plugin, conf *batchProcessorConf, retry *retry.Retry) *batchProcessor { 69 pCtx := log.WithLogField(ctx, "role", fmt.Sprintf("batchproc-%s:%s", conf.namespace, conf.author)) 70 pCtx, cancelCtx := context.WithCancel(pCtx) 71 bp := &batchProcessor{ 72 ctx: pCtx, 73 cancelCtx: cancelCtx, 74 database: di, 75 name: fmt.Sprintf("%s:%s", conf.namespace, conf.author), 76 newWork: make(chan *batchWork), 77 persistWork: make(chan *batchWork, conf.BatchMaxSize), 78 sealBatch: make(chan bool), 79 batchSealed: make(chan bool), 80 retry: retry, 81 conf: conf, 82 } 83 go bp.assemblyLoop() 84 go bp.persistenceLoop() 85 return bp 86 } 87 88 // The assemblyLoop accepts work into the pipe as quickly as possible. 89 // It dispatches work asynchronously to the peristenceLoop, which is responsible for 90 // calling back each piece of work once persisted into a batch 91 // (doesn't wait until that batch is sealed/dispatched). 92 // The assemblyLoop seals batches when they are full, or timeout. 93 func (bp *batchProcessor) assemblyLoop() { 94 defer bp.close() 95 defer close(bp.sealBatch) // close persitenceLoop when we exit 96 l := log.L(bp.ctx) 97 var batchSize uint 98 var lastBatchSealed = time.Now() 99 var quiescing bool 100 for { 101 // We timeout waiting at the point we think we're ready for disposal, 102 // unless we've started a batch in which case we wait for what's left 103 // of the batch timeout 104 timeToWait := bp.conf.DisposeTimeout 105 if quiescing { 106 timeToWait = 100 * time.Millisecond 107 } else if batchSize > 0 { 108 timeToWait = bp.conf.BatchTimeout - time.Since(lastBatchSealed) 109 } 110 timeout := time.NewTimer(timeToWait) 111 112 // Wait for work, the timeout, or close 113 var timedOut, closed bool 114 select { 115 case <-timeout.C: 116 timedOut = true 117 case work, ok := <-bp.newWork: 118 if ok && !work.abandoned { 119 batchSize++ 120 bp.persistWork <- work 121 } else { 122 closed = true 123 } 124 } 125 126 // Don't include the sealing time in the duration 127 batchFull := batchSize >= bp.conf.BatchMaxSize 128 l.Debugf("Assembly batch loop: Size=%d Full=%t", batchSize, batchFull) 129 130 batchDuration := time.Since(lastBatchSealed) 131 if quiescing && batchSize == 0 { 132 l.Debugf("Batch assembler disposed after %.2fs of inactivity", float64(batchDuration)/float64(time.Second)) 133 return 134 } 135 136 if closed || batchDuration > bp.conf.DisposeTimeout { 137 bp.conf.processorQuiescing() 138 quiescing = true 139 } 140 141 if (quiescing || timedOut || batchFull) && batchSize > 0 { 142 bp.sealBatch <- true 143 <-bp.batchSealed 144 l.Debugf("Assembly batch sealed") 145 lastBatchSealed = time.Now() 146 batchSize = 0 147 } 148 149 } 150 } 151 152 func (bp *batchProcessor) createOrAddToBatch(batch *fftypes.Batch, newWork []*batchWork, seal bool) *fftypes.Batch { 153 l := log.L(bp.ctx) 154 if batch == nil { 155 batchID := fftypes.NewUUID() 156 l.Debugf("New batch %s", batchID) 157 batch = &fftypes.Batch{ 158 ID: batchID, 159 Namespace: bp.conf.namespace, 160 Author: bp.conf.author, 161 Group: bp.conf.group, 162 Payload: fftypes.BatchPayload{}, 163 Created: fftypes.Now(), 164 } 165 } 166 for _, w := range newWork { 167 if w.msg != nil { 168 w.msg.BatchID = batch.ID 169 w.msg.Local = false 170 batch.Payload.Messages = append(batch.Payload.Messages, w.msg) 171 } 172 batch.Payload.Data = append(batch.Payload.Data, w.data...) 173 } 174 if seal { 175 // Generate a new Transaction reference, which will be used to record status of the associated transaction as it happens 176 batch.Payload.TX = fftypes.TransactionRef{ 177 Type: fftypes.TransactionTypeBatchPin, 178 ID: fftypes.NewUUID(), 179 } 180 batch.Hash = batch.Payload.Hash() 181 l.Debugf("Batch %s sealed. Hash=%s", batch.ID, batch.Hash) 182 } 183 return batch 184 } 185 186 func (bp *batchProcessor) maskContext(ctx context.Context, msg *fftypes.Message, topic string) (contextOrPin *fftypes.Bytes32, err error) { 187 188 hashBuilder := sha256.New() 189 hashBuilder.Write([]byte(topic)) 190 191 // For broadcast we do not need to mask the context, which is just the hash 192 // of the topic. There would be no way to unmask it if we did, because we don't have 193 // the full list of senders to know what their next hashes should be. 194 if msg.Header.Group == nil { 195 return fftypes.HashResult(hashBuilder), nil 196 } 197 198 // For private groups, we need to make the topic specific to the group (which is 199 // a salt for the hash as it is not on chain) 200 hashBuilder.Write((*msg.Header.Group)[:]) 201 202 // The combination of the topic and group is the context 203 contextHash := fftypes.HashResult(hashBuilder) 204 205 // Get the next nonce for this context - we're the authority in the nextwork on this, 206 // as we are the sender. 207 gc := &fftypes.Nonce{ 208 Context: contextHash, 209 Group: msg.Header.Group, 210 Topic: topic, 211 } 212 err = bp.database.UpsertNonceNext(ctx, gc) 213 if err != nil { 214 return nil, err 215 } 216 217 // Now combine our sending identity, and this nonce, to produce the hash that should 218 // be expected by all members of the group as the next nonce from us on this topic. 219 hashBuilder.Write([]byte(msg.Header.Author)) 220 nonceBytes := make([]byte, 8) 221 binary.BigEndian.PutUint64(nonceBytes, uint64(gc.Nonce)) 222 hashBuilder.Write(nonceBytes) 223 224 return fftypes.HashResult(hashBuilder), err 225 } 226 227 func (bp *batchProcessor) maskContexts(ctx context.Context, batch *fftypes.Batch) ([]*fftypes.Bytes32, error) { 228 // Calculate the sequence hashes 229 contextsOrPins := make([]*fftypes.Bytes32, 0, len(batch.Payload.Messages)) 230 for _, msg := range batch.Payload.Messages { 231 for _, topic := range msg.Header.Topics { 232 contextOrPin, err := bp.maskContext(ctx, msg, topic) 233 if err != nil { 234 return nil, err 235 } 236 contextsOrPins = append(contextsOrPins, contextOrPin) 237 if msg.Header.Group != nil { 238 msg.Pins = append(msg.Pins, contextOrPin.String()) 239 } 240 } 241 } 242 return contextsOrPins, nil 243 } 244 245 func (bp *batchProcessor) dispatchBatch(batch *fftypes.Batch, pins []*fftypes.Bytes32) { 246 // Call the dispatcher to do the heavy lifting - will only exit if we're closed 247 _ = bp.retry.Do(bp.ctx, "batch dispatch", func(attempt int) (retry bool, err error) { 248 err = bp.conf.dispatch(bp.ctx, batch, pins) 249 if err != nil { 250 return !bp.closed, err 251 } 252 return false, nil 253 }) 254 } 255 256 func (bp *batchProcessor) persistBatch(batch *fftypes.Batch, newWork []*batchWork, seal bool) (contexts []*fftypes.Bytes32, err error) { 257 err = bp.retry.Do(bp.ctx, "batch persist", func(attempt int) (retry bool, err error) { 258 err = bp.database.RunAsGroup(bp.ctx, func(ctx context.Context) (err error) { 259 // Update all the messages in the batch with the batch ID 260 if len(newWork) > 0 { 261 msgIDs := make([]driver.Value, 0, len(newWork)) 262 for _, w := range newWork { 263 if w.msg != nil { 264 msgIDs = append(msgIDs, w.msg.Header.ID) 265 } 266 } 267 filter := database.MessageQueryFactory.NewFilter(ctx).In("id", msgIDs) 268 update := database.MessageQueryFactory.NewUpdate(ctx). 269 Set("batch", batch.ID). 270 Set("group", batch.Group) 271 err = bp.database.UpdateMessages(ctx, filter, update) 272 } 273 if err == nil && seal { 274 contexts, err = bp.maskContexts(bp.ctx, batch) 275 } 276 if err == nil { 277 // Persist the batch itself 278 err = bp.database.UpsertBatch(ctx, batch, true, seal /* we set the hash as it seals */) 279 } 280 return err 281 }) 282 if err != nil { 283 return !bp.closed, err 284 } 285 return false, nil 286 }) 287 return contexts, err 288 } 289 290 func (bp *batchProcessor) persistenceLoop() { 291 defer close(bp.batchSealed) 292 l := log.L(bp.ctx) 293 var currentBatch *fftypes.Batch 294 var batchSize = 0 295 for !bp.closed { 296 var seal bool 297 newWork := make([]*batchWork, 0, bp.conf.BatchMaxSize) 298 299 // Block waiting for work, or a batch sealing request 300 select { 301 case w := <-bp.persistWork: 302 newWork = append(newWork, w) 303 case <-bp.sealBatch: 304 seal = true 305 } 306 307 // Drain everything currently in the pipe waiting for dispatch 308 // This means we batch the writing to the database, which has to happen before 309 // we can callback the work with a persisted batch ID. 310 // We drain both the message queue, and the seal, because there's no point 311 // going round the loop (persisting twice) if the batch has just filled 312 var drained bool 313 for !drained { 314 select { 315 case _, ok := <-bp.sealBatch: 316 seal = true 317 if !ok { 318 return // Closed by termination of assemblyLoop 319 } 320 case w := <-bp.persistWork: 321 newWork = append(newWork, w) 322 default: 323 drained = true 324 } 325 } 326 327 batchSize += len(newWork) 328 currentBatch = bp.createOrAddToBatch(currentBatch, newWork, seal) 329 l.Debugf("Adding %d entries to batch %s. Size=%d Seal=%t", len(newWork), currentBatch.ID, batchSize, seal) 330 331 // Persist the batch - indefinite retry (unless we close, or context is cancelled) 332 contexts, err := bp.persistBatch(currentBatch, newWork, seal) 333 if err != nil { 334 return 335 } 336 337 // Inform all the work in this batch of the batch they have been persisted 338 // into. At this point they can carry on processing, because we won't lose 339 // the work - it's tracked in a batch ready to go 340 for _, w := range newWork { 341 w.dispatched <- &batchDispatch{ 342 w.msg, 343 currentBatch.ID, 344 } 345 } 346 347 if seal { 348 // At this point the batch is sealed, and the assember can start 349 // queing up the next batch. We only let them get one batch ahead 350 // (due to the size of the channel being the maxBatchSize) before 351 // they start blocking waiting for us to complete database of 352 // the current batch. 353 bp.batchSealed <- true 354 355 // Synchronously dispatch the batch. Must be last thing we do in the loop, as we 356 // will break out of the retry in the case that we close 357 bp.dispatchBatch(currentBatch, contexts) 358 359 // Move onto the next batch 360 currentBatch = nil 361 batchSize = 0 362 } 363 364 } 365 } 366 367 func (bp *batchProcessor) close() { 368 if !bp.closed { 369 // We don't cancel the context here, as we use close during quiesce and don't want the 370 // persistence loop to have its context cancelled, and fail to perform DB operations 371 close(bp.newWork) 372 bp.closed = true 373 } 374 } 375 376 func (bp *batchProcessor) waitClosed() { 377 <-bp.sealBatch 378 <-bp.batchSealed 379 }