github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/bench/tools/aisloader-composer/consolidate_results.py (about)

     1  """
     2  Python script designed to analyze and amalgamate the data from 
     3  AIS Loader-generated files into a cohesive report
     4  
     5  Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
     6  """
     7  
     8  import os
     9  import re
    10  import sys
    11  
    12  # Specify the folder where your files are located
    13  if len(sys.argv) != 2:
    14      FOLDER_PATH = "/path/to/directory"
    15  else:
    16      FOLDER_PATH = sys.argv[1]
    17  
    18  MIN_LATENCY = float("inf")
    19  MAX_LATENCY = 0
    20  SUM_AVG_LATENCY = 0
    21  SUM_THROUGHPUT = 0
    22  SUM_ERRORS = 0
    23  COUNT = 0
    24  
    25  # Regular expression to match various time components in the input string
    26  time_regex = re.compile(
    27      r"(?:(?P<hours>\d+)h)?"  # Capture hours
    28      r"(?:(?P<minutes>\d+)m)?"  # Capture minutes
    29      r"(?:(?P<seconds>\d+(?:\.\d*)?)s)?"  # Capture seconds
    30      r"(?:(?P<milliseconds>\d+(?:\.\d*)?)ms)?"  # Capture milliseconds
    31      r"(?:(?P<microseconds>\d+(?:\.\d*)?)(?:µs|us|μs))?"
    32  )
    33  
    34  # Conversion factors to milliseconds
    35  time_units = {
    36      "hours": 3600000,
    37      "minutes": 60000,
    38      "seconds": 1000,
    39      "milliseconds": 1,
    40      "microseconds": 0.001,
    41      "microseconds_alt": 0.001,
    42  }
    43  
    44  
    45  def convert_to_ms(time_str):
    46      """
    47      Interprets the time string from the report and converts it to
    48      milliseconds (ms).
    49      """
    50      # Normalize the input string
    51      normalized_time_str = time_str.strip().replace("us", "μs")
    52  
    53      # Match the input string against the regex pattern
    54      match = time_regex.fullmatch(normalized_time_str)
    55  
    56      if not match:
    57          raise ValueError(f"Invalid time format: {time_str}")
    58  
    59      # Initialize total milliseconds
    60      total_ms = 0.0
    61  
    62      # Iterate through matched groups and calculate total milliseconds
    63      for unit, value in match.groupdict(default="").items():
    64          if value:
    65              total_ms += float(value) * time_units[unit]
    66  
    67      return total_ms
    68  
    69  
    70  # Regular expression to match throughput strings
    71  throughput_regex = re.compile(r"(\d+\.?\d*)\s*(\w+)")
    72  
    73  # Conversion factors from various units to GiB/s
    74  throughput_units = {
    75      "GiB": 1,
    76      "MiB": 1 / 1024,
    77      "KiB": 1 / (1024**2),
    78  }
    79  
    80  
    81  def convert_to_gib_per_second(throughput_str):
    82      """
    83      Transforms the throughput value reported as a string into a
    84      floating-point number representing gibibytes per second (GiB/s).
    85      """
    86      match = throughput_regex.match(throughput_str)
    87      if not match:
    88          raise ValueError(f"Invalid throughput format: {throughput_str}")
    89  
    90      # Extract number and unit from the match
    91      number, unit = match.groups()
    92      number = float(number)
    93  
    94      # Check if the unit is valid
    95      if unit not in throughput_units:
    96          raise ValueError(f"Invalid throughput unit: {unit}")
    97  
    98      # Perform the conversion
    99      return number * throughput_units[unit]
   100  
   101  
   102  # Create or open the results file for writing
   103  with open("results.txt", "w", encoding="utf-8"):
   104      # List files in the folder
   105      file_list = list(os.listdir(FOLDER_PATH))
   106  
   107      # Process each file
   108      for file_name in file_list:
   109          if file_name.startswith("."):
   110              continue
   111          file_path = os.path.join(FOLDER_PATH, file_name)
   112  
   113          # Read the data from the file
   114          with open(file_path, "r", encoding="utf-8") as current_file:
   115              lines = current_file.readlines()
   116  
   117          summary = lines[-1]
   118  
   119          data = summary.split()
   120          MIN_LATENCY = min(MIN_LATENCY, convert_to_ms(data[4]))
   121          SUM_AVG_LATENCY += convert_to_ms(data[5])
   122          MAX_LATENCY = max(MAX_LATENCY, convert_to_ms(data[6]))
   123          SUM_THROUGHPUT += convert_to_gib_per_second(data[7])
   124          SUM_ERRORS += int(data[8])
   125          COUNT += 1
   126  
   127  # Calculate the average of average latencies
   128  avg_of_avg_latency = SUM_AVG_LATENCY / COUNT
   129  
   130  # Print the results
   131  print(f"Minimum Latency (ms): {MIN_LATENCY:.3f}")
   132  print(f"Average of Average Latencies (ms): {avg_of_avg_latency:.3f}")
   133  print(f"Maximum Latency (ms): {MAX_LATENCY:.3f}")
   134  print(f"Summation of all Throughputs (GiB/s): {SUM_THROUGHPUT:.2f}")
   135  print(f"Summation of all Errors: {SUM_ERRORS}")