github.com/kata-containers/tests@v0.0.0-20240307153542-772105b56064/metrics/report/report_dockerfile/fio-reads.R (about) 1 #!/usr/bin/env Rscript 2 # Copyright (c) 2018 Intel Corporation 3 # 4 # SPDX-License-Identifier: Apache-2.0 5 6 # Display details for `fio` random read storage IO tests. 7 8 9 library(ggplot2) # ability to plot nicely 10 library(gridExtra) # So we can plot multiple graphs together 11 suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable 12 suppressMessages(library(jsonlite)) # to load the data 13 suppressMessages(suppressWarnings(library(tidyr))) # for gather 14 library(tibble) 15 16 testnames=c( 17 "fio-randread-128", 18 "fio-randread-256", 19 "fio-randread-512", 20 "fio-randread-1k", 21 "fio-randread-2k", 22 "fio-randread-4k", 23 "fio-randread-8k", 24 "fio-randread-16k", 25 "fio-randread-32k", 26 "fio-randread-64k" 27 ) 28 29 data2=c() 30 all_ldata=c() 31 all_ldata2=c() 32 stats=c() 33 rstats=c() 34 rstats_names=c() 35 36 # Where to store up the stats for the tables 37 read_bw_stats=c() 38 read_iops_stats=c() 39 read_lat95_stats=c() 40 read_lat99_stats=c() 41 42 # For each set of results 43 for (currentdir in resultdirs) { 44 bw_dirstats=c() 45 iops_dirstats=c() 46 lat95_dirstats=c() 47 lat99_dirstats=c() 48 # Derive the name from the test result dirname 49 datasetname=basename(currentdir) 50 51 for (testname in testnames) { 52 fname=paste(inputdir, currentdir, testname, '.json', sep="") 53 if ( !file.exists(fname)) { 54 #warning(paste("Skipping non-existent file: ", fname)) 55 next 56 } 57 58 # Import the data 59 fdata=fromJSON(fname) 60 # De-ref the test named unique data 61 fdata=fdata[[testname]] 62 63 blocksize=fdata$Raw$'global options'$bs 64 65 # Extract the latency data - it comes as a table of percentiles, so 66 # we have to do a little work... 67 clat=data.frame(clat_ns=fdata$Raw$jobs[[1]]$read$clat_ns$percentile) 68 69 # Generate a clat data set with 'clean' percentile numbers so 70 # we can sensibly plot it later on. 71 clat2=clat 72 colnames(clat2)<-sub("clat_ns.", "", colnames(clat2)) 73 colnames(clat2)<-sub("0000", "", colnames(clat2)) 74 ldata2=gather(clat2) 75 colnames(ldata2)[colnames(ldata2)=="key"] <- "percentile" 76 colnames(ldata2)[colnames(ldata2)=="value"] <- "ms" 77 ldata2$ms=ldata2$ms/1000000 #ns->ms 78 ldata2=cbind(ldata2, runtime=rep(datasetname, length(ldata2$percentile))) 79 ldata2=cbind(ldata2, blocksize=rep(blocksize, length(ldata2$percentile))) 80 81 # Pull the 95 and 99 percentile numbers for the boxplot 82 # Plotting all values for all runtimes and blocksizes is just way too 83 # noisy to make a meaninful picture, so we use this subset. 84 # Our values fall more in the range of ms... 85 pc95data=tibble(percentile=clat$clat_ns.95.000000/1000000) 86 pc95data=cbind(pc95data, runtime=rep(paste(datasetname, "95pc", sep="-"), length(pc95data$percentile))) 87 pc99data=tibble(percentile=clat$clat_ns.99.000000/1000000) 88 pc99data=cbind(pc99data, runtime=rep(paste(datasetname, "99pc", sep="-"), length(pc95data$percentile))) 89 ldata=rbind(pc95data, pc99data) 90 ldata=cbind(ldata, blocksize=rep(blocksize, length(ldata$percentile))) 91 92 # We want total bandwidth, so that is the sum of the bandwidths 93 # from all the read 'jobs'. 94 mdata=data.frame(read_bw_mps=as.numeric(sum(fdata$Raw$jobs[[1]]$read$bw)/1024)) 95 mdata=cbind(mdata, iops_tot=as.numeric(sum(fdata$Raw$jobs[[1]]$read$iops))) 96 mdata=cbind(mdata, runtime=rep(datasetname, length(mdata[, "read_bw_mps"]) )) 97 mdata=cbind(mdata, blocksize=rep(blocksize, length(mdata[, "read_bw_mps"]) )) 98 99 # Extract the stats tables 100 bw_dirstats=rbind(bw_dirstats, round(mdata$read_bw_mps, digits=1)) 101 # Rowname hack to get the blocksize recorded 102 rownames(bw_dirstats)[nrow(bw_dirstats)]=blocksize 103 104 iops_dirstats=rbind(iops_dirstats, round(mdata$iops_tot, digits=1)) 105 rownames(iops_dirstats)[nrow(iops_dirstats)]=blocksize 106 107 # And do the 95 and 99 percentiles as tables as well 108 lat95_dirstats=rbind(lat95_dirstats, round(mean(clat$clat_ns.95.000000)/1000000, digits=1)) 109 rownames(lat95_dirstats)[nrow(lat95_dirstats)]=blocksize 110 lat99_dirstats=rbind(lat99_dirstats, round(mean(clat$clat_ns.99.000000)/1000000, digits=1)) 111 rownames(lat99_dirstats)[nrow(lat99_dirstats)]=blocksize 112 113 # Collect up as sets across all files and runtimes. 114 data2=rbind(data2, mdata) 115 all_ldata=rbind(all_ldata, ldata) 116 all_ldata2=rbind(all_ldata2, ldata2) 117 } 118 119 # Collect up for each dir we process into a column 120 read_bw_stats=cbind(read_bw_stats, bw_dirstats) 121 colnames(read_bw_stats)[ncol(read_bw_stats)]=datasetname 122 123 read_iops_stats=cbind(read_iops_stats, iops_dirstats) 124 colnames(read_iops_stats)[ncol(read_iops_stats)]=datasetname 125 126 read_lat95_stats=cbind(read_lat95_stats, lat95_dirstats) 127 colnames(read_lat95_stats)[ncol(read_lat95_stats)]=datasetname 128 read_lat99_stats=cbind(read_lat99_stats, lat99_dirstats) 129 colnames(read_lat99_stats)[ncol(read_lat99_stats)]=datasetname 130 } 131 132 # To get a nice looking table, we need to extract the rownames into their 133 # own column 134 read_bw_stats=cbind(Bandwidth=rownames(read_bw_stats), read_bw_stats) 135 read_bw_stats=cbind(read_bw_stats, Units=rep("MB/s", nrow(read_bw_stats))) 136 137 read_iops_stats=cbind(IOPS=rownames(read_iops_stats), read_iops_stats) 138 read_iops_stats=cbind(read_iops_stats, Units=rep("IOP/s", nrow(read_iops_stats))) 139 140 read_lat95_stats=cbind('lat 95pc'=rownames(read_lat95_stats), read_lat95_stats) 141 read_lat95_stats=cbind(read_lat95_stats, Units=rep("ms", nrow(read_lat95_stats))) 142 read_lat99_stats=cbind('lat 99pc'=rownames(read_lat99_stats), read_lat99_stats) 143 read_lat99_stats=cbind(read_lat99_stats, Units=rep("ms", nrow(read_lat99_stats))) 144 145 # Bandwidth line plot 146 read_bw_line_plot <- ggplot() + 147 geom_line( data=data2, aes(blocksize, read_bw_mps, group=runtime, color=runtime)) + 148 ylim(0, NA) + 149 ggtitle("Random Read total bandwidth") + 150 xlab("Blocksize") + 151 ylab("Bandwidth (MiB/s)") + 152 theme( 153 axis.text.x=element_text(angle=90), 154 legend.position=c(0.35,0.8), 155 legend.title=element_text(size=5), 156 legend.text=element_text(size=5), 157 legend.background = element_rect(fill=alpha('blue', 0.2)) 158 ) 159 160 # IOPS line plot 161 read_iops_line_plot <- ggplot() + 162 geom_line( data=data2, aes(blocksize, iops_tot, group=runtime, color=runtime)) + 163 ylim(0, NA) + 164 ggtitle("Random Read total IOPS") + 165 xlab("Blocksize") + 166 ylab("IOPS") + 167 theme( 168 axis.text.x=element_text(angle=90), 169 legend.position=c(0.35,0.8), 170 legend.title=element_text(size=5), 171 legend.text=element_text(size=5), 172 legend.background = element_rect(fill=alpha('blue', 0.2)) 173 ) 174 175 # 95 and 99 percentile box plot 176 read_clat_box_plot <- ggplot() + 177 geom_boxplot( data=all_ldata, aes(blocksize, percentile, color=runtime)) + 178 stat_summary( data=all_ldata, aes(blocksize, percentile, group=runtime, color=runtime), fun.y=mean, geom="line") + 179 ylim(0, NA) + 180 ggtitle("Random Read completion latency", subtitle="95&99 percentiles, boxplot over jobs") + 181 xlab("Blocksize") + 182 ylab("Latency (ms)") + 183 theme(axis.text.x=element_text(angle=90)) + 184 # Use the 'paired' colour matrix as we are setting these up as pairs of 185 # 95 and 99 percentiles, and it is much easier to visually group those to 186 # each runtime if we use this colourmap. 187 scale_colour_brewer(palette="Paired") 188 # it would be nice to use the same legend theme as the other plots on this 189 # page, but because of the number of entries it tends to flow off the picture. 190 # theme( 191 # axis.text.x=element_text(angle=90), 192 # legend.position=c(0.35,0.8), 193 # legend.title=element_text(size=5), 194 # legend.text=element_text(size=5), 195 # legend.background = element_rect(fill=alpha('blue', 0.2)) 196 # ) 197 198 # As the boxplot is actually quite hard to interpret, also show a linegraph 199 # of all the percentiles for a single blocksize. 200 which_blocksize='4k' 201 clat_line_subtitle=paste("For blocksize", which_blocksize, sep=" ") 202 single_blocksize=subset(all_ldata2, blocksize==which_blocksize) 203 clat_line=aggregate( 204 single_blocksize$ms, 205 by=list( 206 percentile=single_blocksize$percentile, 207 blocksize=single_blocksize$blocksize, 208 runtime=single_blocksize$runtime 209 ), 210 FUN=mean 211 ) 212 213 clat_line$percentile=as.numeric(clat_line$percentile) 214 215 read_clat_line_plot <- ggplot() + 216 geom_line( data=clat_line, aes(percentile, x, group=runtime, color=runtime)) + 217 ylim(0, NA) + 218 ggtitle("Random Read completion latency percentiles", subtitle=clat_line_subtitle) + 219 xlab("Percentile") + 220 ylab("Time (ms)") + 221 theme( 222 axis.text.x=element_text(angle=90), 223 legend.position=c(0.35,0.8), 224 legend.title=element_text(size=5), 225 legend.text=element_text(size=5), 226 legend.background = element_rect(fill=alpha('blue', 0.2)) 227 ) 228 229 # Output the pretty pictures 230 graphics_plot = grid.arrange( 231 read_bw_line_plot, 232 read_iops_line_plot, 233 read_clat_box_plot, 234 read_clat_line_plot, 235 nrow=2, 236 ncol=2 ) 237 238 # A bit of an odd tweak to force a pagebreak between the pictures and 239 # the tables. This only works because we have a `results='asis'` in the Rmd 240 # R fragment. 241 cat("\n\n\\pagebreak\n") 242 243 read_bw_stats_plot = suppressWarnings(ggtexttable(read_bw_stats, 244 theme=ttheme(base_size=10), 245 rows=NULL 246 )) 247 248 read_iops_stats_plot = suppressWarnings(ggtexttable(read_iops_stats, 249 theme=ttheme(base_size=10), 250 rows=NULL 251 )) 252 253 read_lat95_stats_plot = suppressWarnings(ggtexttable(read_lat95_stats, 254 theme=ttheme(base_size=10), 255 rows=NULL 256 )) 257 read_lat99_stats_plot = suppressWarnings(ggtexttable(read_lat99_stats, 258 theme=ttheme(base_size=10), 259 rows=NULL 260 )) 261 262 # and then the statistics tables 263 stats_plot = grid.arrange( 264 read_bw_stats_plot, 265 read_iops_stats_plot, 266 read_lat95_stats_plot, 267 read_lat99_stats_plot, 268 nrow=4, 269 ncol=1 )