github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/bench/tools/aisloader-composer/parse_results.py (about) 1 import yaml 2 from pathlib import Path 3 import json 4 import humanize 5 6 7 def load_hosts(hosts_file): 8 with open(hosts_file, "r", encoding="utf-8") as file: 9 data = yaml.safe_load(file) 10 dgx_nodes = data.get("aisloader_hosts").get("hosts") 11 return dgx_nodes.keys() 12 13 14 def read_results(host_list, bench_type, bucket): 15 directory = f"output/{bench_type}/{bucket}" 16 base_name = f"{bucket}-{bench_type}-" 17 18 host_results = {} 19 # load all the output files 20 for host in host_list: 21 filename = f"{base_name}aistore{host}.json" 22 outfile = Path(directory).joinpath(filename) 23 with open(str(outfile), "r", encoding="utf-8") as file: 24 content = file.read() 25 host_results[host] = {"results": json.loads(content)} 26 return host_results 27 28 29 # min, avg, max 30 def get_latencies(bench_res, bench_type): 31 final_stats = bench_res[-1].get(bench_type) 32 return ( 33 final_stats.get("min_latency"), 34 final_stats.get("latency"), 35 final_stats.get("max_latency"), 36 ) 37 38 39 def get_final_throughput(bench_res, bench_type): 40 final_stats = bench_res[-1].get(bench_type) 41 return final_stats.get("throughput") 42 43 44 def combine_results(result_dict, bench_type): 45 total_lat_min = 10000000000000000 46 total_lat_max = 0 47 lats = [] 48 tputs = [] 49 for host_values in result_dict.values(): 50 host_res = host_values.get("results") 51 min_lat, avg_lat, max_lat = get_latencies(host_res, bench_type) 52 if min_lat < total_lat_min: 53 total_lat_min = min_lat 54 if max_lat > total_lat_max: 55 total_lat_max = max_lat 56 lats.append(int(avg_lat)) 57 tputs.append(int(get_final_throughput(host_res, bench_type))) 58 avg_lat = sum(lats) / len(lats) 59 total_tput = sum(tputs) 60 return total_lat_min, avg_lat, total_lat_max, total_tput 61 62 63 def get_natural_time(raw_time): 64 units = ["ns", "µs", "ms", "s"] 65 unit_index = 0 66 67 while raw_time >= 1000 and unit_index < len(units) - 1: 68 raw_time /= 1000 69 unit_index += 1 70 71 return f"{raw_time:.2f} {units[unit_index]}" 72 73 74 def pretty_print_res(bench_type, bucket, res, total_drives): 75 lat_min, avg_lat, lat_max, total_tput = res 76 print(f"Benchmark results for benchmark type '{bench_type}' on bucket '{bucket}'") 77 print("Latencies: ") 78 print( 79 f"min: {get_natural_time(lat_min)}, avg: {get_natural_time(avg_lat)}, max: {get_natural_time(lat_max)}" 80 ) 81 print( 82 f"Cluster average throughput: {humanize.naturalsize(total_tput, binary=True)}/s ({humanize.naturalsize(total_tput/total_drives, binary=True)}/s per drive)" 83 ) 84 print() 85 86 87 def main(configs, args): 88 for config in configs: 89 bench_type, bucket = config 90 # load hosts from ansible yaml file 91 host_list = load_hosts(args.host_file) 92 results = read_results(host_list, bench_type, bucket) 93 combined_results = combine_results(results, bench_type) 94 pretty_print_res( 95 bench_type, bucket, combined_results, total_drives=args.total_drives 96 ) 97 98 99 if __name__ == "__main__": 100 bench_runs = [ 101 ("get", "bench_1MB"), 102 ("put", "bench_1MB"), 103 ] 104 import argparse 105 106 parser = argparse.ArgumentParser( 107 description="Parses and combines results from multiple aisloader outputs" 108 ) 109 110 parser.add_argument( 111 "--host_file", 112 default="inventory/inventory.yaml", 113 help="Filename containing ansible hosts", 114 ) 115 parser.add_argument( 116 "--total_drives", 117 type=int, 118 default=30, 119 help="Number of drives on the AIS cluster being tested", 120 ) 121 parser.add_argument( 122 "--aisloader_hosts", 123 default="dgx_nodes", 124 help="Name of hosts running the aisloader benchmark", 125 ) 126 127 run_args = parser.parse_args() 128 main(bench_runs, run_args)