github.com/kata-containers/tests@v0.0.0-20240307153542-772105b56064/metrics/report/report_dockerfile/fio-reads.R (about)

     1  #!/usr/bin/env Rscript
     2  # Copyright (c) 2018 Intel Corporation
     3  #
     4  # SPDX-License-Identifier: Apache-2.0
     5  
     6  # Display details for `fio` random read storage IO tests.
     7  
     8  
     9  library(ggplot2)	# ability to plot nicely
    10  library(gridExtra)	# So we can plot multiple graphs together
    11  suppressMessages(suppressWarnings(library(ggpubr)))	# for ggtexttable
    12  suppressMessages(library(jsonlite))			# to load the data
    13  suppressMessages(suppressWarnings(library(tidyr)))	# for gather
    14  library(tibble)
    15  
    16  testnames=c(
    17  	"fio-randread-128",
    18  	"fio-randread-256",
    19  	"fio-randread-512",
    20  	"fio-randread-1k",
    21  	"fio-randread-2k",
    22  	"fio-randread-4k",
    23  	"fio-randread-8k",
    24  	"fio-randread-16k",
    25  	"fio-randread-32k",
    26  	"fio-randread-64k"
    27  	)
    28  
    29  data2=c()
    30  all_ldata=c()
    31  all_ldata2=c()
    32  stats=c()
    33  rstats=c()
    34  rstats_names=c()
    35  
    36  # Where to store up the stats for the tables
    37  read_bw_stats=c()
    38  read_iops_stats=c()
    39  read_lat95_stats=c()
    40  read_lat99_stats=c()
    41  
    42  # For each set of results
    43  for (currentdir in resultdirs) {
    44  	bw_dirstats=c()
    45  	iops_dirstats=c()
    46  	lat95_dirstats=c()
    47  	lat99_dirstats=c()
    48  	# Derive the name from the test result dirname
    49  	datasetname=basename(currentdir)
    50  
    51  	for (testname in testnames) {
    52  		fname=paste(inputdir, currentdir, testname, '.json', sep="")
    53  		if ( !file.exists(fname)) {
    54  			#warning(paste("Skipping non-existent file: ", fname))
    55  			next
    56  		}
    57  
    58  		# Import the data
    59  		fdata=fromJSON(fname)
    60  		# De-ref the test named unique data
    61  		fdata=fdata[[testname]]
    62  
    63  		blocksize=fdata$Raw$'global options'$bs
    64  
    65  		# Extract the latency data - it comes as a table of percentiles, so
    66  		# we have to do a little work...
    67  		clat=data.frame(clat_ns=fdata$Raw$jobs[[1]]$read$clat_ns$percentile)
    68  
    69  		# Generate a clat data set with 'clean' percentile numbers so
    70  		# we can sensibly plot it later on.
    71  		clat2=clat
    72  		colnames(clat2)<-sub("clat_ns.", "", colnames(clat2))
    73  		colnames(clat2)<-sub("0000", "", colnames(clat2))
    74  		ldata2=gather(clat2)
    75  		colnames(ldata2)[colnames(ldata2)=="key"] <- "percentile"
    76  		colnames(ldata2)[colnames(ldata2)=="value"] <- "ms"
    77  		ldata2$ms=ldata2$ms/1000000	#ns->ms
    78  		ldata2=cbind(ldata2, runtime=rep(datasetname, length(ldata2$percentile)))
    79  		ldata2=cbind(ldata2, blocksize=rep(blocksize, length(ldata2$percentile)))
    80  
    81  		# Pull the 95 and 99 percentile numbers for the boxplot
    82  		# Plotting all values for all runtimes and blocksizes is just way too
    83  		# noisy to make a meaninful picture, so we use this subset.
    84  		# Our values fall more in the range of ms...
    85  		pc95data=tibble(percentile=clat$clat_ns.95.000000/1000000)
    86  		pc95data=cbind(pc95data, runtime=rep(paste(datasetname, "95pc", sep="-"), length(pc95data$percentile)))
    87  		pc99data=tibble(percentile=clat$clat_ns.99.000000/1000000)
    88  		pc99data=cbind(pc99data, runtime=rep(paste(datasetname, "99pc", sep="-"), length(pc95data$percentile)))
    89  		ldata=rbind(pc95data, pc99data)
    90  		ldata=cbind(ldata, blocksize=rep(blocksize, length(ldata$percentile)))
    91  
    92  		# We want total bandwidth, so that is the sum of the bandwidths
    93  		# from all the read 'jobs'.
    94  		mdata=data.frame(read_bw_mps=as.numeric(sum(fdata$Raw$jobs[[1]]$read$bw)/1024))
    95  		mdata=cbind(mdata, iops_tot=as.numeric(sum(fdata$Raw$jobs[[1]]$read$iops)))
    96  		mdata=cbind(mdata, runtime=rep(datasetname, length(mdata[, "read_bw_mps"]) ))
    97  		mdata=cbind(mdata, blocksize=rep(blocksize, length(mdata[, "read_bw_mps"]) ))
    98  
    99  		# Extract the stats tables
   100  		bw_dirstats=rbind(bw_dirstats, round(mdata$read_bw_mps, digits=1))
   101  		# Rowname hack to get the blocksize recorded
   102  		rownames(bw_dirstats)[nrow(bw_dirstats)]=blocksize
   103  
   104  		iops_dirstats=rbind(iops_dirstats, round(mdata$iops_tot, digits=1))
   105  		rownames(iops_dirstats)[nrow(iops_dirstats)]=blocksize
   106  
   107  		# And do the 95 and 99 percentiles as tables as well
   108  		lat95_dirstats=rbind(lat95_dirstats, round(mean(clat$clat_ns.95.000000)/1000000, digits=1))
   109  		rownames(lat95_dirstats)[nrow(lat95_dirstats)]=blocksize
   110  		lat99_dirstats=rbind(lat99_dirstats, round(mean(clat$clat_ns.99.000000)/1000000, digits=1))
   111  		rownames(lat99_dirstats)[nrow(lat99_dirstats)]=blocksize
   112  
   113  		# Collect up as sets across all files and runtimes.
   114  		data2=rbind(data2, mdata)
   115  		all_ldata=rbind(all_ldata, ldata)
   116  		all_ldata2=rbind(all_ldata2, ldata2)
   117  	}
   118  
   119  	# Collect up for each dir we process into a column
   120  	read_bw_stats=cbind(read_bw_stats, bw_dirstats)
   121  	colnames(read_bw_stats)[ncol(read_bw_stats)]=datasetname
   122  
   123  	read_iops_stats=cbind(read_iops_stats, iops_dirstats)
   124  	colnames(read_iops_stats)[ncol(read_iops_stats)]=datasetname
   125  
   126  	read_lat95_stats=cbind(read_lat95_stats, lat95_dirstats)
   127  	colnames(read_lat95_stats)[ncol(read_lat95_stats)]=datasetname
   128  	read_lat99_stats=cbind(read_lat99_stats, lat99_dirstats)
   129  	colnames(read_lat99_stats)[ncol(read_lat99_stats)]=datasetname
   130  }
   131  
   132  # To get a nice looking table, we need to extract the rownames into their
   133  # own column
   134  read_bw_stats=cbind(Bandwidth=rownames(read_bw_stats), read_bw_stats)
   135  read_bw_stats=cbind(read_bw_stats, Units=rep("MB/s", nrow(read_bw_stats)))
   136  
   137  read_iops_stats=cbind(IOPS=rownames(read_iops_stats), read_iops_stats)
   138  read_iops_stats=cbind(read_iops_stats, Units=rep("IOP/s", nrow(read_iops_stats)))
   139  
   140  read_lat95_stats=cbind('lat 95pc'=rownames(read_lat95_stats), read_lat95_stats)
   141  read_lat95_stats=cbind(read_lat95_stats, Units=rep("ms", nrow(read_lat95_stats)))
   142  read_lat99_stats=cbind('lat 99pc'=rownames(read_lat99_stats), read_lat99_stats)
   143  read_lat99_stats=cbind(read_lat99_stats, Units=rep("ms", nrow(read_lat99_stats)))
   144  
   145  # Bandwidth line plot
   146  read_bw_line_plot <- ggplot() +
   147  	geom_line( data=data2, aes(blocksize, read_bw_mps, group=runtime, color=runtime)) +
   148  	ylim(0, NA) +
   149  	ggtitle("Random Read total bandwidth") +
   150  	xlab("Blocksize") +
   151  	ylab("Bandwidth (MiB/s)") +
   152  	theme(
   153  		axis.text.x=element_text(angle=90),
   154  		legend.position=c(0.35,0.8),
   155  		legend.title=element_text(size=5),
   156  		legend.text=element_text(size=5),
   157  		legend.background = element_rect(fill=alpha('blue', 0.2))
   158  	)
   159  
   160  # IOPS line plot
   161  read_iops_line_plot <- ggplot() +
   162  	geom_line( data=data2, aes(blocksize, iops_tot, group=runtime, color=runtime)) +
   163  	ylim(0, NA) +
   164  	ggtitle("Random Read total IOPS") +
   165  	xlab("Blocksize") +
   166  	ylab("IOPS") +
   167  	theme(
   168  		axis.text.x=element_text(angle=90),
   169  		legend.position=c(0.35,0.8),
   170  		legend.title=element_text(size=5),
   171  		legend.text=element_text(size=5),
   172  		legend.background = element_rect(fill=alpha('blue', 0.2))
   173  	)
   174  
   175  # 95 and 99 percentile box plot
   176  read_clat_box_plot <- ggplot() +
   177  	geom_boxplot( data=all_ldata, aes(blocksize, percentile, color=runtime)) +
   178  	stat_summary( data=all_ldata, aes(blocksize, percentile, group=runtime, color=runtime), fun.y=mean, geom="line") +
   179  	ylim(0, NA) +
   180  	ggtitle("Random Read completion latency", subtitle="95&99 percentiles, boxplot over jobs") +
   181  	xlab("Blocksize") +
   182  	ylab("Latency (ms)") +
   183  	theme(axis.text.x=element_text(angle=90)) +
   184  	# Use the 'paired' colour matrix as we are setting these up as pairs of
   185  	# 95 and 99 percentiles, and it is much easier to visually group those to
   186  	# each runtime if we use this colourmap.
   187  	scale_colour_brewer(palette="Paired")
   188  #	it would be nice to use the same legend theme as the other plots on this
   189  #	page, but because of the number of entries it tends to flow off the picture.
   190  #	theme(
   191  #		axis.text.x=element_text(angle=90),
   192  #		legend.position=c(0.35,0.8),
   193  #		legend.title=element_text(size=5),
   194  #		legend.text=element_text(size=5),
   195  #		legend.background = element_rect(fill=alpha('blue', 0.2))
   196  #	)
   197  
   198  # As the boxplot is actually quite hard to interpret, also show a linegraph
   199  # of all the percentiles for a single blocksize.
   200  which_blocksize='4k'
   201  clat_line_subtitle=paste("For blocksize", which_blocksize, sep=" ")
   202  single_blocksize=subset(all_ldata2, blocksize==which_blocksize)
   203  clat_line=aggregate(
   204  	single_blocksize$ms,
   205  	by=list(
   206  		percentile=single_blocksize$percentile,
   207  		blocksize=single_blocksize$blocksize,
   208  		runtime=single_blocksize$runtime
   209  	),
   210  	FUN=mean
   211  )
   212  
   213  clat_line$percentile=as.numeric(clat_line$percentile)
   214  
   215  read_clat_line_plot <- ggplot() +
   216  	geom_line( data=clat_line, aes(percentile, x, group=runtime, color=runtime)) +
   217  	ylim(0, NA) +
   218  	ggtitle("Random Read completion latency percentiles", subtitle=clat_line_subtitle) +
   219  	xlab("Percentile") +
   220  	ylab("Time (ms)") +
   221  	theme(
   222  		axis.text.x=element_text(angle=90),
   223  		legend.position=c(0.35,0.8),
   224  		legend.title=element_text(size=5),
   225  		legend.text=element_text(size=5),
   226  		legend.background = element_rect(fill=alpha('blue', 0.2))
   227  	)
   228  
   229  # Output the pretty pictures
   230  graphics_plot = grid.arrange(
   231  	read_bw_line_plot,
   232  	read_iops_line_plot,
   233  	read_clat_box_plot,
   234  	read_clat_line_plot,
   235  	nrow=2,
   236  	ncol=2 )
   237  
   238  # A bit of an odd tweak to force a pagebreak between the pictures and
   239  # the tables. This only works because we have a `results='asis'` in the Rmd
   240  # R fragment.
   241  cat("\n\n\\pagebreak\n")
   242  
   243  read_bw_stats_plot = suppressWarnings(ggtexttable(read_bw_stats,
   244  	theme=ttheme(base_size=10),
   245  	rows=NULL
   246  	))
   247  
   248  read_iops_stats_plot = suppressWarnings(ggtexttable(read_iops_stats,
   249  	theme=ttheme(base_size=10),
   250  	rows=NULL
   251  	))
   252  
   253  read_lat95_stats_plot = suppressWarnings(ggtexttable(read_lat95_stats,
   254  	theme=ttheme(base_size=10),
   255  	rows=NULL
   256  	))
   257  read_lat99_stats_plot = suppressWarnings(ggtexttable(read_lat99_stats,
   258  	theme=ttheme(base_size=10),
   259  	rows=NULL
   260  	))
   261  
   262  # and then the statistics tables
   263  stats_plot = grid.arrange(
   264  	read_bw_stats_plot,
   265  	read_iops_stats_plot,
   266  	read_lat95_stats_plot,
   267  	read_lat99_stats_plot,
   268  	nrow=4,
   269  	ncol=1 )