github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/bench/tools/aisloader-composer/parse_results.py

github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/bench/tools/aisloader-composer/parse_results.py (about)

     1  import yaml
     2  from pathlib import Path
     3  import json
     4  import humanize
     5  
     6  
     7  def load_hosts(hosts_file):
     8      with open(hosts_file, "r", encoding="utf-8") as file:
     9          data = yaml.safe_load(file)
    10      dgx_nodes = data.get("aisloader_hosts").get("hosts")
    11      return dgx_nodes.keys()
    12  
    13  
    14  def read_results(host_list, bench_type, bucket):
    15      directory = f"output/{bench_type}/{bucket}"
    16      base_name = f"{bucket}-{bench_type}-"
    17  
    18      host_results = {}
    19      # load all the output files
    20      for host in host_list:
    21          filename = f"{base_name}aistore{host}.json"
    22          outfile = Path(directory).joinpath(filename)
    23          with open(str(outfile), "r", encoding="utf-8") as file:
    24              content = file.read()
    25              host_results[host] = {"results": json.loads(content)}
    26      return host_results
    27  
    28  
    29  # min, avg, max
    30  def get_latencies(bench_res, bench_type):
    31      final_stats = bench_res[-1].get(bench_type)
    32      return (
    33          final_stats.get("min_latency"),
    34          final_stats.get("latency"),
    35          final_stats.get("max_latency"),
    36      )
    37  
    38  
    39  def get_final_throughput(bench_res, bench_type):
    40      final_stats = bench_res[-1].get(bench_type)
    41      return final_stats.get("throughput")
    42  
    43  
    44  def combine_results(result_dict, bench_type):
    45      total_lat_min = 10000000000000000
    46      total_lat_max = 0
    47      lats = []
    48      tputs = []
    49      for host_values in result_dict.values():
    50          host_res = host_values.get("results")
    51          min_lat, avg_lat, max_lat = get_latencies(host_res, bench_type)
    52          if min_lat < total_lat_min:
    53              total_lat_min = min_lat
    54          if max_lat > total_lat_max:
    55              total_lat_max = max_lat
    56          lats.append(int(avg_lat))
    57          tputs.append(int(get_final_throughput(host_res, bench_type)))
    58      avg_lat = sum(lats) / len(lats)
    59      total_tput = sum(tputs)
    60      return total_lat_min, avg_lat, total_lat_max, total_tput
    61  
    62  
    63  def get_natural_time(raw_time):
    64      units = ["ns", "µs", "ms", "s"]
    65      unit_index = 0
    66  
    67      while raw_time >= 1000 and unit_index < len(units) - 1:
    68          raw_time /= 1000
    69          unit_index += 1
    70  
    71      return f"{raw_time:.2f} {units[unit_index]}"
    72  
    73  
    74  def pretty_print_res(bench_type, bucket, res, total_drives):
    75      lat_min, avg_lat, lat_max, total_tput = res
    76      print(f"Benchmark results for benchmark type '{bench_type}' on bucket '{bucket}'")
    77      print("Latencies: ")
    78      print(
    79          f"min: {get_natural_time(lat_min)}, avg: {get_natural_time(avg_lat)}, max: {get_natural_time(lat_max)}"
    80      )
    81      print(
    82          f"Cluster average throughput: {humanize.naturalsize(total_tput, binary=True)}/s ({humanize.naturalsize(total_tput/total_drives, binary=True)}/s per drive)"
    83      )
    84      print()
    85  
    86  
    87  def main(configs, args):
    88      for config in configs:
    89          bench_type, bucket = config
    90          # load hosts from ansible yaml file
    91          host_list = load_hosts(args.host_file)
    92          results = read_results(host_list, bench_type, bucket)
    93          combined_results = combine_results(results, bench_type)
    94          pretty_print_res(
    95              bench_type, bucket, combined_results, total_drives=args.total_drives
    96          )
    97  
    98  
    99  if __name__ == "__main__":
   100      bench_runs = [
   101          ("get", "bench_1MB"),
   102          ("put", "bench_1MB"),
   103      ]
   104      import argparse
   105  
   106      parser = argparse.ArgumentParser(
   107          description="Parses and combines results from multiple aisloader outputs"
   108      )
   109  
   110      parser.add_argument(
   111          "--host_file",
   112          default="inventory/inventory.yaml",
   113          help="Filename containing ansible hosts",
   114      )
   115      parser.add_argument(
   116          "--total_drives",
   117          type=int,
   118          default=30,
   119          help="Number of drives on the AIS cluster being tested",
   120      )
   121      parser.add_argument(
   122          "--aisloader_hosts",
   123          default="dgx_nodes",
   124          help="Name of hosts running the aisloader benchmark",
   125      )
   126  
   127      run_args = parser.parse_args()
   128      main(bench_runs, run_args)