github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/bench/tools/aisloader-composer/consolidate_results.py (about) 1 """ 2 Python script designed to analyze and amalgamate the data from 3 AIS Loader-generated files into a cohesive report 4 5 Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 6 """ 7 8 import os 9 import re 10 import sys 11 12 # Specify the folder where your files are located 13 if len(sys.argv) != 2: 14 FOLDER_PATH = "/path/to/directory" 15 else: 16 FOLDER_PATH = sys.argv[1] 17 18 MIN_LATENCY = float("inf") 19 MAX_LATENCY = 0 20 SUM_AVG_LATENCY = 0 21 SUM_THROUGHPUT = 0 22 SUM_ERRORS = 0 23 COUNT = 0 24 25 # Regular expression to match various time components in the input string 26 time_regex = re.compile( 27 r"(?:(?P<hours>\d+)h)?" # Capture hours 28 r"(?:(?P<minutes>\d+)m)?" # Capture minutes 29 r"(?:(?P<seconds>\d+(?:\.\d*)?)s)?" # Capture seconds 30 r"(?:(?P<milliseconds>\d+(?:\.\d*)?)ms)?" # Capture milliseconds 31 r"(?:(?P<microseconds>\d+(?:\.\d*)?)(?:µs|us|μs))?" 32 ) 33 34 # Conversion factors to milliseconds 35 time_units = { 36 "hours": 3600000, 37 "minutes": 60000, 38 "seconds": 1000, 39 "milliseconds": 1, 40 "microseconds": 0.001, 41 "microseconds_alt": 0.001, 42 } 43 44 45 def convert_to_ms(time_str): 46 """ 47 Interprets the time string from the report and converts it to 48 milliseconds (ms). 49 """ 50 # Normalize the input string 51 normalized_time_str = time_str.strip().replace("us", "μs") 52 53 # Match the input string against the regex pattern 54 match = time_regex.fullmatch(normalized_time_str) 55 56 if not match: 57 raise ValueError(f"Invalid time format: {time_str}") 58 59 # Initialize total milliseconds 60 total_ms = 0.0 61 62 # Iterate through matched groups and calculate total milliseconds 63 for unit, value in match.groupdict(default="").items(): 64 if value: 65 total_ms += float(value) * time_units[unit] 66 67 return total_ms 68 69 70 # Regular expression to match throughput strings 71 throughput_regex = re.compile(r"(\d+\.?\d*)\s*(\w+)") 72 73 # Conversion factors from various units to GiB/s 74 throughput_units = { 75 "GiB": 1, 76 "MiB": 1 / 1024, 77 "KiB": 1 / (1024**2), 78 } 79 80 81 def convert_to_gib_per_second(throughput_str): 82 """ 83 Transforms the throughput value reported as a string into a 84 floating-point number representing gibibytes per second (GiB/s). 85 """ 86 match = throughput_regex.match(throughput_str) 87 if not match: 88 raise ValueError(f"Invalid throughput format: {throughput_str}") 89 90 # Extract number and unit from the match 91 number, unit = match.groups() 92 number = float(number) 93 94 # Check if the unit is valid 95 if unit not in throughput_units: 96 raise ValueError(f"Invalid throughput unit: {unit}") 97 98 # Perform the conversion 99 return number * throughput_units[unit] 100 101 102 # Create or open the results file for writing 103 with open("results.txt", "w", encoding="utf-8"): 104 # List files in the folder 105 file_list = list(os.listdir(FOLDER_PATH)) 106 107 # Process each file 108 for file_name in file_list: 109 if file_name.startswith("."): 110 continue 111 file_path = os.path.join(FOLDER_PATH, file_name) 112 113 # Read the data from the file 114 with open(file_path, "r", encoding="utf-8") as current_file: 115 lines = current_file.readlines() 116 117 summary = lines[-1] 118 119 data = summary.split() 120 MIN_LATENCY = min(MIN_LATENCY, convert_to_ms(data[4])) 121 SUM_AVG_LATENCY += convert_to_ms(data[5]) 122 MAX_LATENCY = max(MAX_LATENCY, convert_to_ms(data[6])) 123 SUM_THROUGHPUT += convert_to_gib_per_second(data[7]) 124 SUM_ERRORS += int(data[8]) 125 COUNT += 1 126 127 # Calculate the average of average latencies 128 avg_of_avg_latency = SUM_AVG_LATENCY / COUNT 129 130 # Print the results 131 print(f"Minimum Latency (ms): {MIN_LATENCY:.3f}") 132 print(f"Average of Average Latencies (ms): {avg_of_avg_latency:.3f}") 133 print(f"Maximum Latency (ms): {MAX_LATENCY:.3f}") 134 print(f"Summation of all Throughputs (GiB/s): {SUM_THROUGHPUT:.2f}") 135 print(f"Summation of all Errors: {SUM_ERRORS}")