github.com/kaleido-io/firefly@v0.0.0-20210622132723-8b4b6aacb971/kat/src/lib/batch-processor.ts (about)

     1  // Copyright © 2021 Kaleido, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  import { clearTimeout, setTimeout } from 'timers';
    16  import { promisify } from 'util';
    17  import { v4 as uuidV4 } from 'uuid';
    18  import * as database from '../clients/database';
    19  import { IBatchRecord, IDBBatch } from './interfaces';
    20  import * as utils from './utils';
    21  
    22  const delay = promisify(setTimeout);
    23  
    24  const log = utils.getLogger('lib/batch-processor.ts');
    25  
    26  export interface IBatchProcessorConfig {
    27    addTimeoutMS: number;  
    28    batchTimeoutArrivallMS: number;
    29    batchTimeoutOverallMS: number;
    30    batchMaxRecords: number;
    31    retryInitialDelayMS: number;
    32    retryMaxDelayMS: number;
    33    retryMultiplier: number;
    34  }
    35  
    36  interface BatchAssemblyTask {
    37    timestamp: number;
    38    record: IBatchRecord;
    39    resolve: (batchID: string) => void;
    40    reject: (err: Error) => void;
    41  }
    42  
    43  /**
    44   * A singleton of these should be created for each batch type + author combination.
    45   * 
    46   * A persistent batch implementation, which:
    47   * - Is safe for calling concurrently on many async contexts
    48   * - Guarantees to persists batch updates to the database before returning from add
    49   * - Blocks the caller from getting more than one batch ahead
    50   * - Protects against the caller of add (such as a REST API) giving up on a timeout before their record is accepted
    51   * - Recovers in-flight batches on initialization
    52   * - Pipelines the processing of one batch, with the building of the next
    53   * - Retries accepted batches indefinitely
    54   */
    55  export class BatchProcessor {
    56  
    57    private assemblyList: BatchAssemblyTask[];
    58    private assembling: boolean;
    59    private assemblyBatch?: IDBBatch;
    60    private dispatchTimeout?: NodeJS.Timeout;
    61    private batchInFlight?: Promise<void>;
    62    public config: IBatchProcessorConfig;
    63  
    64    constructor(
    65      private author: string,
    66      private type: string,
    67      private processBatchCallback: (batch: IDBBatch) => Promise<void>,
    68      private processorCompleteCallback: (author: string, type: string) => void,
    69      config?: Partial<IBatchProcessorConfig>,
    70    ) {
    71      this.assemblyList = [];
    72      this.assembling = false;
    73      this.config = {
    74        addTimeoutMS: utils.constants.BATCH_ADD_TIMEOUT_MILLIS,
    75        batchTimeoutArrivallMS: utils.constants.BATCH_TIMEOUT_ARRIVAL_MILLIS,
    76        batchTimeoutOverallMS: utils.constants.BATCH_TIMEOUT_OVERALL_MILLIS,
    77        batchMaxRecords: utils.constants.BATCH_MAX_RECORDS,
    78        retryInitialDelayMS: utils.constants.BATCH_RETRY_INITIAL_DELAY_MILLIS,
    79        retryMaxDelayMS: utils.constants.BATCH_RETRY_MAX_DELAY_MILLIS,
    80        retryMultiplier: utils.constants.BATCH_RETRY_MULTIPLIER,
    81        ...config,
    82      }
    83    }
    84  
    85    public async init(incompleteBatches: IDBBatch[]) {
    86      // Treat the stored batches just as we would do filled batches.
    87      // This logic blocks startup until we queued dispatch of all persisted batches
    88      // (there should be a maximum of two, for the author+type combination)
    89      while (incompleteBatches.length) {
    90        this.assemblyBatch = incompleteBatches.shift();
    91        await this.dispatchBatch();
    92      }
    93    }
    94  
    95    /**
    96     * Blocks until the requested record has been assigned to a batch, and its inclusion
    97     * in that batch has been persisted to our local database.
    98     * @param record the record to add to a batch
    99     * @returns {string} the batchID the add was persisted into
   100     */
   101    public async add(record: IBatchRecord): Promise<string> {
   102      return new Promise<string>((resolve, reject) => {
   103        // Add our record to the assember queue, to resove the parent promise
   104        this.assemblyList.push({ timestamp: Date.now(), record, resolve, reject });
   105        // Give the assembler a kick, as it might not be already running
   106        this.assembler();
   107      });
   108    }
   109  
   110    protected newBatch(): IDBBatch {
   111      const timestamp = Date.now();
   112      return {
   113        type: this.type,
   114        author: this.author,
   115        batchID: uuidV4(),
   116        created: timestamp,
   117        completed: null,
   118        records: [],
   119      };
   120    }
   121  
   122    // Safety check to make sure we haven't got work queued into the system
   123    // from a long time ago, that potentially a REST client has forgotten about.
   124    // These are rejected at the point they are detected, before we do any active
   125    // processing on them.
   126    private rejectAnyStale() {
   127      const now = Date.now();
   128      const newAssemblyList = [];
   129      for (const a of this.assemblyList) {
   130        const inFlightTime = now - a.timestamp;
   131        if (inFlightTime > this.config.addTimeoutMS) {
   132          a.reject(new Error(`Timed out add of record after ${inFlightTime}ms`))
   133        } else {
   134          newAssemblyList.push(a);
   135        }
   136      }
   137      this.assemblyList = newAssemblyList;
   138      return this.assemblyList;
   139    }
   140  
   141    private async assembler() {
   142  
   143      // Use each add as an opportunity to check for stales
   144      this.rejectAnyStale();
   145  
   146      // If we've already got an assembler running, nothing more to do
   147      if (this.assembling) return;
   148  
   149      // We are the assembler - stop an duplicate one running (cleared before return)
   150      this.assembling = true;
   151      let chosen: BatchAssemblyTask[] = [];
   152      while (this.rejectAnyStale().length) {
   153        try {
   154  
   155          // Create a new assembly batch if we don't currently have one
   156          if (!this.assemblyBatch) this.assemblyBatch = this.newBatch();
   157          const batch = this.assemblyBatch;
   158  
   159          // Grab as much capacity as we can out of the assemblyList
   160          let capacity = this.config.batchMaxRecords - batch.records.length;
   161          chosen = this.assemblyList.slice(0, capacity);
   162          this.assemblyList = this.assemblyList.slice(capacity);
   163  
   164          // Add these entries to the in-memory batch object
   165          for (let a of chosen) {
   166            batch.records.push(a.record);
   167          }
   168  
   169          // Persist the batch object to our local database
   170          log.trace(`${this.type}/${this.author}: added ${chosen.length} records to batch ${batch.batchID}`);
   171          await database.upsertBatch(batch);
   172  
   173          // Check if the batch is full
   174          if (batch.records.length >= this.config.batchMaxRecords) {
   175            // Only one batch can be dispatched, so this is a blocking call if we manage
   176            // to run more than one batch ahead of the assembler.
   177            await this.dispatchBatch();
   178          } else {
   179            // Set/reset the timer to dispatch this batch
   180            const now = Date.now();
   181            if (this.dispatchTimeout) clearTimeout(this.dispatchTimeout);
   182            this.dispatchTimeout = setTimeout(() => this.dispatchBatch(),
   183              Math.min(
   184                // The next record must arrive within the batchTimeoutArrivallMS
   185                this.config.batchTimeoutArrivallMS,
   186                // The first record in the batch cannot be delayed by more than the batchTimeoutOverallMS
   187                (batch.created + this.config.batchTimeoutOverallMS) - now,
   188              )
   189            );
   190          }
   191  
   192          // ****
   193          // Note that this point this.assemblyBatch might be undefined, if we just dispatched it.
   194          // It is also NOT SAFE to do do any async processing here, because the processBatch
   195          // logic relies us to exit if this.assemblyBatch to be undefined when the batch completes.
   196          // So we need to go round to `newBatch` again without any async logic.
   197          // ***
   198  
   199          // We have accepted all the chosen records into a persisted batch, ready for dispatch.
   200          // This unblocks any callers waiting to know what batch they are in.
   201          for (let a of chosen) a.resolve(batch.batchID);
   202        }
   203        catch(err) {
   204          log.error(`${this.type}/${this.author}: Batch assembler failed`, err);
   205          for (let a of chosen) a.reject(err);
   206        }
   207      }
   208      this.assembling = false;
   209    }
   210  
   211    protected async dispatchBatch() {
   212      if (this.batchInFlight) await this.batchInFlight;
   213      if (this.dispatchTimeout) clearTimeout(this.dispatchTimeout);
   214      const batch = this.assemblyBatch;
   215      delete this.assemblyBatch;
   216      delete this.dispatchTimeout;
   217      if (!batch) return; // Covers the posibility of a timer and the assember loop both firing
   218      const batchTime = Date.now() - batch.created;
   219      log.info(`${this.type}/${this.author}: closed batch ${batch.batchID} after ${batchTime}ms with ${batch.records.length} records`);
   220      // Capture the promise for competion of this batch, to block any further dispatchBatch calls
   221      this.batchInFlight = this.processBatch(batch);
   222    }
   223  
   224    protected async processBatch(batch: IDBBatch) {
   225      // We have accepted the batch at this point, and the REST calls to submit it to us have all completed.
   226      // So we cannot fail to process it, and we must retry the processing indefinitely
   227      let attempt = 0;
   228      let complete = false;
   229      while (!complete) {
   230        try {
   231          attempt++;
   232  
   233          // Set the completed time in memory - forms part of uniqueness in the pinning process.
   234          batch.completed = Date.now();
   235          await this.processBatchCallback(batch);
   236  
   237          // Update the batch as complete - writes the now final completed timestamp, along with any updates made in processBatchCallback
   238          await database.upsertBatch(batch);
   239          
   240          // Ok, we're done here.
   241          complete = true;
   242        }
   243        catch(err) {
   244          let retryDelay = this.config.retryInitialDelayMS;
   245          for (let i = 1; i < attempt; i++) retryDelay *= this.config.retryMultiplier;
   246          retryDelay = Math.min(retryDelay, this.config.retryMaxDelayMS);
   247          log.error(`${this.type}/${this.author}: batch ${batch.batchID} attempt ${attempt} failed (next-retry: ${retryDelay}ms): ${err.stack}`);
   248          await delay(retryDelay);
   249        }  
   250      }
   251  
   252      // If there's nothing queued up, we call the completion handler that was passed in,
   253      // to let them unregister this batch processor.
   254      // This is because there are potentially infinite 'author' addresses that could be used,
   255      // so leaving ourselves around indefinitely just because someone submitted on transaction
   256      // would be a memory leak.
   257      if (!this.assemblyBatch) {
   258        this.processorCompleteCallback(this.author, this.type);
   259      }
   260  
   261    }
   262  
   263  }