github.com/Oyster-zx/tendermint@v0.34.24-fork/scripts/qa/reporting/latency_throughput.py

github.com/Oyster-zx/tendermint@v0.34.24-fork/scripts/qa/reporting/latency_throughput.py (about)

     1  #!/usr/bin/env python3
     2  """
     3  A simple script to parse the CSV output from the loadtime reporting tool (see
     4  https://github.com/tendermint/tendermint/tree/main/test/loadtime/cmd/report).
     5  
     6  Produces a plot of average transaction latency vs total transaction throughput
     7  according to the number of load testing tool WebSocket connections to the
     8  Tendermint node.
     9  """
    10  
    11  import argparse
    12  import csv
    13  import logging
    14  import sys
    15  import matplotlib.pyplot as plt
    16  import numpy as np
    17  
    18  DEFAULT_TITLE = "Tendermint latency vs throughput"
    19  
    20  
    21  def main():
    22      parser = argparse.ArgumentParser(
    23          description="Renders a latency vs throughput diagram "
    24          "for a set of transactions provided by the loadtime reporting tool",
    25          formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    26      parser.add_argument('-t',
    27                          '--title',
    28                          default=DEFAULT_TITLE,
    29                          help='Plot title')
    30      parser.add_argument('output_image',
    31                          help='Output image file (in PNG format)')
    32      parser.add_argument(
    33          'input_csv_file',
    34          nargs='+',
    35          help="CSV input file from which to read transaction data "
    36          "- must have been generated by the loadtime reporting tool")
    37      args = parser.parse_args()
    38  
    39      logging.basicConfig(format='%(levelname)s\t%(message)s',
    40                          stream=sys.stdout,
    41                          level=logging.INFO)
    42      plot_latency_vs_throughput(args.input_csv_file,
    43                                 args.output_image,
    44                                 title=args.title)
    45  
    46  
    47  def plot_latency_vs_throughput(input_files, output_image, title=DEFAULT_TITLE):
    48      avg_latencies, throughput_rates = process_input_files(input_files, )
    49  
    50      fig, ax = plt.subplots()
    51  
    52      connections = sorted(avg_latencies.keys())
    53      for c in connections:
    54          tr = np.array(throughput_rates[c])
    55          al = np.array(avg_latencies[c])
    56          label = '%d connection%s' % (c, '' if c == 1 else 's')
    57          ax.plot(tr, al, 'o-', label=label)
    58  
    59      ax.set_title(title)
    60      ax.set_xlabel('Throughput rate (tx/s)')
    61      ax.set_ylabel('Average transaction latency (s)')
    62  
    63      plt.legend(loc='upper left')
    64      plt.savefig(output_image)
    65  
    66  
    67  def process_input_files(input_files):
    68      # Experimental data from which we will derive the latency vs throughput
    69      # statistics
    70      experiments = {}
    71  
    72      for input_file in input_files:
    73          logging.info('Reading %s...' % input_file)
    74  
    75          with open(input_file, 'rt') as inf:
    76              reader = csv.DictReader(inf)
    77              for tx in reader:
    78                  experiments = process_tx(experiments, tx)
    79  
    80      return compute_experiments_stats(experiments)
    81  
    82  
    83  def process_tx(experiments, tx):
    84      exp_id = tx['experiment_id']
    85      # Block time is nanoseconds from the epoch - convert to seconds
    86      block_time = float(tx['block_time']) / (10**9)
    87      # Duration is also in nanoseconds - convert to seconds
    88      duration = float(tx['duration_ns']) / (10**9)
    89      connections = int(tx['connections'])
    90      rate = int(tx['rate'])
    91  
    92      if exp_id not in experiments:
    93          experiments[exp_id] = {
    94              'connections': connections,
    95              'rate': rate,
    96              'block_time_min': block_time,
    97              # We keep track of the latency associated with the minimum block
    98              # time to estimate the start time of the experiment
    99              'block_time_min_duration': duration,
   100              'block_time_max': block_time,
   101              'total_latencies': duration,
   102              'tx_count': 1,
   103          }
   104          logging.info('Found experiment %s with rate=%d, connections=%d' %
   105                       (exp_id, rate, connections))
   106      else:
   107          # Validation
   108          for field in ['connections', 'rate']:
   109              val = int(tx[field])
   110              if val != experiments[exp_id][field]:
   111                  raise Exception(
   112                      'Found multiple distinct values for field '
   113                      '"%s" for the same experiment (%s): %d and %d' %
   114                      (field, exp_id, val, experiments[exp_id][field]))
   115  
   116          if block_time < experiments[exp_id]['block_time_min']:
   117              experiments[exp_id]['block_time_min'] = block_time
   118              experiments[exp_id]['block_time_min_duration'] = duration
   119          if block_time > experiments[exp_id]['block_time_max']:
   120              experiments[exp_id]['block_time_max'] = block_time
   121  
   122          experiments[exp_id]['total_latencies'] += duration
   123          experiments[exp_id]['tx_count'] += 1
   124  
   125      return experiments
   126  
   127  
   128  def compute_experiments_stats(experiments):
   129      """Compute average latency vs throughput rate statistics from the given
   130      experiments"""
   131      stats = {}
   132  
   133      # Compute average latency and throughput rate for each experiment
   134      for exp_id, exp in experiments.items():
   135          conns = exp['connections']
   136          avg_latency = exp['total_latencies'] / exp['tx_count']
   137          exp_start_time = exp['block_time_min'] - exp['block_time_min_duration']
   138          exp_duration = exp['block_time_max'] - exp_start_time
   139          throughput_rate = exp['tx_count'] / exp_duration
   140          if conns not in stats:
   141              stats[conns] = []
   142  
   143          stats[conns].append({
   144              'avg_latency': avg_latency,
   145              'throughput_rate': throughput_rate,
   146          })
   147  
   148      # Sort stats for each number of connections in order of increasing
   149      # throughput rate, and then extract average latencies and throughput rates
   150      # as separate data series.
   151      conns = sorted(stats.keys())
   152      avg_latencies = {}
   153      throughput_rates = {}
   154      for c in conns:
   155          stats[c] = sorted(stats[c], key=lambda s: s['throughput_rate'])
   156          avg_latencies[c] = []
   157          throughput_rates[c] = []
   158          for s in stats[c]:
   159              avg_latencies[c].append(s['avg_latency'])
   160              throughput_rates[c].append(s['throughput_rate'])
   161              logging.info('For %d connection(s): '
   162                           'throughput rate = %.6f tx/s\t'
   163                           'average latency = %.6fs' %
   164                           (c, s['throughput_rate'], s['avg_latency']))
   165  
   166      return (avg_latencies, throughput_rates)
   167  
   168  
   169  if __name__ == "__main__":
   170      main()