github.com/dmaizel/tests@v0.0.0-20210728163746-cae6a2d9cee8/metrics/storage/fio-k8s/scripts/dax-compare-test/report/fio.py

github.com/dmaizel/tests@v0.0.0-20210728163746-cae6a2d9cee8/metrics/storage/fio-k8s/scripts/dax-compare-test/report/fio.py (about)

     1  
     2  # Copyright (c) 2021 Intel Corporation
     3  #
     4  # SPDX-License-Identifier: Apache-2.0
     5  import pandas as pd
     6  import os
     7  import re
     8  import io
     9  import glob
    10  from IPython.display import display, Markdown
    11  import matplotlib.pyplot as plt
    12  
    13  #Compare the tests results group by fio job.
    14  #Input:
    15  # df: dataset from `import_data()`
    16  # metric: string of metrics provided in `df`
    17  def compare_tests_group_by_fio_job(df, metric):
    18      test_names, metric_df = group_metrics_group_by_testname(df, metric)
    19      show_df(metric_df)
    20      plot_df(metric_df,test_names)
    21  
    22  # Given a metric return results per test group by fio job.
    23  # input:
    24  #    df: dataset from `import_data()`
    25  #    metric: string with the name of the metric to filter.
    26  # output:
    27  #    dataset with fomat:
    28  #      'workload' , 'name[0]' , ... , 'name[n]'
    29  #
    30  def group_metrics_group_by_testname(df, metric):
    31    #name of each tests from results
    32    names = set()
    33    # Rows of new data set
    34    rows = []
    35    # map:
    36    # keys: name of fio job
    37    # value: dict[k]:v where k: name of a test, v: value of test for  metric`
    38    workload = {}
    39  
    40    for k, row in df.iterrows():
    41      # name of a fio job
    42      w = row['WORKLOAD']
    43      # name of tests
    44      tname = row['NAME']
    45      names.add(tname)
    46      # given a fio job name get dict of values
    47      # if not previous values init empty dict
    48      dict_values = workload.get(w, {})
    49      # For a given metric, add it into as value of dict_values[testname]=val
    50      #e.g
    51      # dict_values["test-name"] = row["IOPS"]
    52      dict_values[tname] = row[metric]
    53      workload[w] = dict_values
    54  
    55    names = list(names)
    56    cols = ['WORKLOAD'] + list(names)
    57    rdf = pd.DataFrame(workload,columns = cols)
    58  
    59    for k in workload:
    60      d = workload[k]
    61  
    62      if not d[names[0]] == 0:
    63        d["WORKLOAD"] = k;
    64        rdf = rdf.append(d,ignore_index=True)
    65    rdf = rdf.dropna()
    66    return names, rdf
    67  
    68  def plot_df(df, names,sort_key=""):
    69    if sort_key != "":
    70      df.sort_values(sort_key, ascending=False)
    71    df.plot(kind='bar',x="WORKLOAD",y=names,  figsize=(30, 10))
    72    plt.show()
    73  
    74  
    75  def import_data():
    76      frames = []
    77      for f in glob.glob('./results/*/results.csv'):
    78          print("reading:" + f)
    79          df = pd.read_csv(f)
    80          frames.append(df)
    81      return pd.concat(frames)
    82  
    83  def show_df(df):
    84      pd.set_option('display.max_rows', df.shape[0]+1)
    85      print(df)
    86  
    87  def print_md(s):
    88       display(Markdown(s))
    89  
    90  #notebook entrypoint
    91  def generate_report():
    92      #Load the all test results in a single dataset
    93      df_results = import_data()
    94      print_md("Show all data from results")
    95      show_df(df_results)
    96      print_md("### Compare the tests results group by fio job. The metric used to compare is write bandwidth")
    97      compare_tests_group_by_fio_job(df_results, 'bw_w')
    98      print_md("### Compare the tests results group by fio job. The metric used to compare is read bandwidth")
    99      compare_tests_group_by_fio_job(df_results, 'bw_r')
   100      print_md("### Compare the tests results group by fio job. The metric used to compare is write IOPS(Input/Output Operations Per Second)")
   101      compare_tests_group_by_fio_job(df_results, 'IOPS_w')
   102      print_md("### Compare the tests results group by fio job. The metric used to compare is read IOPS(Input/Output Operations Per Second)")
   103      compare_tests_group_by_fio_job(df_results, 'IOPS_r')