github.com/kaleido-io/firefly@v0.0.0-20210622132723-8b4b6aacb971/kat/src/lib/batch-processor.ts (about) 1 // Copyright © 2021 Kaleido, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 import { clearTimeout, setTimeout } from 'timers'; 16 import { promisify } from 'util'; 17 import { v4 as uuidV4 } from 'uuid'; 18 import * as database from '../clients/database'; 19 import { IBatchRecord, IDBBatch } from './interfaces'; 20 import * as utils from './utils'; 21 22 const delay = promisify(setTimeout); 23 24 const log = utils.getLogger('lib/batch-processor.ts'); 25 26 export interface IBatchProcessorConfig { 27 addTimeoutMS: number; 28 batchTimeoutArrivallMS: number; 29 batchTimeoutOverallMS: number; 30 batchMaxRecords: number; 31 retryInitialDelayMS: number; 32 retryMaxDelayMS: number; 33 retryMultiplier: number; 34 } 35 36 interface BatchAssemblyTask { 37 timestamp: number; 38 record: IBatchRecord; 39 resolve: (batchID: string) => void; 40 reject: (err: Error) => void; 41 } 42 43 /** 44 * A singleton of these should be created for each batch type + author combination. 45 * 46 * A persistent batch implementation, which: 47 * - Is safe for calling concurrently on many async contexts 48 * - Guarantees to persists batch updates to the database before returning from add 49 * - Blocks the caller from getting more than one batch ahead 50 * - Protects against the caller of add (such as a REST API) giving up on a timeout before their record is accepted 51 * - Recovers in-flight batches on initialization 52 * - Pipelines the processing of one batch, with the building of the next 53 * - Retries accepted batches indefinitely 54 */ 55 export class BatchProcessor { 56 57 private assemblyList: BatchAssemblyTask[]; 58 private assembling: boolean; 59 private assemblyBatch?: IDBBatch; 60 private dispatchTimeout?: NodeJS.Timeout; 61 private batchInFlight?: Promise<void>; 62 public config: IBatchProcessorConfig; 63 64 constructor( 65 private author: string, 66 private type: string, 67 private processBatchCallback: (batch: IDBBatch) => Promise<void>, 68 private processorCompleteCallback: (author: string, type: string) => void, 69 config?: Partial<IBatchProcessorConfig>, 70 ) { 71 this.assemblyList = []; 72 this.assembling = false; 73 this.config = { 74 addTimeoutMS: utils.constants.BATCH_ADD_TIMEOUT_MILLIS, 75 batchTimeoutArrivallMS: utils.constants.BATCH_TIMEOUT_ARRIVAL_MILLIS, 76 batchTimeoutOverallMS: utils.constants.BATCH_TIMEOUT_OVERALL_MILLIS, 77 batchMaxRecords: utils.constants.BATCH_MAX_RECORDS, 78 retryInitialDelayMS: utils.constants.BATCH_RETRY_INITIAL_DELAY_MILLIS, 79 retryMaxDelayMS: utils.constants.BATCH_RETRY_MAX_DELAY_MILLIS, 80 retryMultiplier: utils.constants.BATCH_RETRY_MULTIPLIER, 81 ...config, 82 } 83 } 84 85 public async init(incompleteBatches: IDBBatch[]) { 86 // Treat the stored batches just as we would do filled batches. 87 // This logic blocks startup until we queued dispatch of all persisted batches 88 // (there should be a maximum of two, for the author+type combination) 89 while (incompleteBatches.length) { 90 this.assemblyBatch = incompleteBatches.shift(); 91 await this.dispatchBatch(); 92 } 93 } 94 95 /** 96 * Blocks until the requested record has been assigned to a batch, and its inclusion 97 * in that batch has been persisted to our local database. 98 * @param record the record to add to a batch 99 * @returns {string} the batchID the add was persisted into 100 */ 101 public async add(record: IBatchRecord): Promise<string> { 102 return new Promise<string>((resolve, reject) => { 103 // Add our record to the assember queue, to resove the parent promise 104 this.assemblyList.push({ timestamp: Date.now(), record, resolve, reject }); 105 // Give the assembler a kick, as it might not be already running 106 this.assembler(); 107 }); 108 } 109 110 protected newBatch(): IDBBatch { 111 const timestamp = Date.now(); 112 return { 113 type: this.type, 114 author: this.author, 115 batchID: uuidV4(), 116 created: timestamp, 117 completed: null, 118 records: [], 119 }; 120 } 121 122 // Safety check to make sure we haven't got work queued into the system 123 // from a long time ago, that potentially a REST client has forgotten about. 124 // These are rejected at the point they are detected, before we do any active 125 // processing on them. 126 private rejectAnyStale() { 127 const now = Date.now(); 128 const newAssemblyList = []; 129 for (const a of this.assemblyList) { 130 const inFlightTime = now - a.timestamp; 131 if (inFlightTime > this.config.addTimeoutMS) { 132 a.reject(new Error(`Timed out add of record after ${inFlightTime}ms`)) 133 } else { 134 newAssemblyList.push(a); 135 } 136 } 137 this.assemblyList = newAssemblyList; 138 return this.assemblyList; 139 } 140 141 private async assembler() { 142 143 // Use each add as an opportunity to check for stales 144 this.rejectAnyStale(); 145 146 // If we've already got an assembler running, nothing more to do 147 if (this.assembling) return; 148 149 // We are the assembler - stop an duplicate one running (cleared before return) 150 this.assembling = true; 151 let chosen: BatchAssemblyTask[] = []; 152 while (this.rejectAnyStale().length) { 153 try { 154 155 // Create a new assembly batch if we don't currently have one 156 if (!this.assemblyBatch) this.assemblyBatch = this.newBatch(); 157 const batch = this.assemblyBatch; 158 159 // Grab as much capacity as we can out of the assemblyList 160 let capacity = this.config.batchMaxRecords - batch.records.length; 161 chosen = this.assemblyList.slice(0, capacity); 162 this.assemblyList = this.assemblyList.slice(capacity); 163 164 // Add these entries to the in-memory batch object 165 for (let a of chosen) { 166 batch.records.push(a.record); 167 } 168 169 // Persist the batch object to our local database 170 log.trace(`${this.type}/${this.author}: added ${chosen.length} records to batch ${batch.batchID}`); 171 await database.upsertBatch(batch); 172 173 // Check if the batch is full 174 if (batch.records.length >= this.config.batchMaxRecords) { 175 // Only one batch can be dispatched, so this is a blocking call if we manage 176 // to run more than one batch ahead of the assembler. 177 await this.dispatchBatch(); 178 } else { 179 // Set/reset the timer to dispatch this batch 180 const now = Date.now(); 181 if (this.dispatchTimeout) clearTimeout(this.dispatchTimeout); 182 this.dispatchTimeout = setTimeout(() => this.dispatchBatch(), 183 Math.min( 184 // The next record must arrive within the batchTimeoutArrivallMS 185 this.config.batchTimeoutArrivallMS, 186 // The first record in the batch cannot be delayed by more than the batchTimeoutOverallMS 187 (batch.created + this.config.batchTimeoutOverallMS) - now, 188 ) 189 ); 190 } 191 192 // **** 193 // Note that this point this.assemblyBatch might be undefined, if we just dispatched it. 194 // It is also NOT SAFE to do do any async processing here, because the processBatch 195 // logic relies us to exit if this.assemblyBatch to be undefined when the batch completes. 196 // So we need to go round to `newBatch` again without any async logic. 197 // *** 198 199 // We have accepted all the chosen records into a persisted batch, ready for dispatch. 200 // This unblocks any callers waiting to know what batch they are in. 201 for (let a of chosen) a.resolve(batch.batchID); 202 } 203 catch(err) { 204 log.error(`${this.type}/${this.author}: Batch assembler failed`, err); 205 for (let a of chosen) a.reject(err); 206 } 207 } 208 this.assembling = false; 209 } 210 211 protected async dispatchBatch() { 212 if (this.batchInFlight) await this.batchInFlight; 213 if (this.dispatchTimeout) clearTimeout(this.dispatchTimeout); 214 const batch = this.assemblyBatch; 215 delete this.assemblyBatch; 216 delete this.dispatchTimeout; 217 if (!batch) return; // Covers the posibility of a timer and the assember loop both firing 218 const batchTime = Date.now() - batch.created; 219 log.info(`${this.type}/${this.author}: closed batch ${batch.batchID} after ${batchTime}ms with ${batch.records.length} records`); 220 // Capture the promise for competion of this batch, to block any further dispatchBatch calls 221 this.batchInFlight = this.processBatch(batch); 222 } 223 224 protected async processBatch(batch: IDBBatch) { 225 // We have accepted the batch at this point, and the REST calls to submit it to us have all completed. 226 // So we cannot fail to process it, and we must retry the processing indefinitely 227 let attempt = 0; 228 let complete = false; 229 while (!complete) { 230 try { 231 attempt++; 232 233 // Set the completed time in memory - forms part of uniqueness in the pinning process. 234 batch.completed = Date.now(); 235 await this.processBatchCallback(batch); 236 237 // Update the batch as complete - writes the now final completed timestamp, along with any updates made in processBatchCallback 238 await database.upsertBatch(batch); 239 240 // Ok, we're done here. 241 complete = true; 242 } 243 catch(err) { 244 let retryDelay = this.config.retryInitialDelayMS; 245 for (let i = 1; i < attempt; i++) retryDelay *= this.config.retryMultiplier; 246 retryDelay = Math.min(retryDelay, this.config.retryMaxDelayMS); 247 log.error(`${this.type}/${this.author}: batch ${batch.batchID} attempt ${attempt} failed (next-retry: ${retryDelay}ms): ${err.stack}`); 248 await delay(retryDelay); 249 } 250 } 251 252 // If there's nothing queued up, we call the completion handler that was passed in, 253 // to let them unregister this batch processor. 254 // This is because there are potentially infinite 'author' addresses that could be used, 255 // so leaving ourselves around indefinitely just because someone submitted on transaction 256 // would be a memory leak. 257 if (!this.assemblyBatch) { 258 this.processorCompleteCallback(this.author, this.type); 259 } 260 261 } 262 263 }