github.com/grailbio/base@v0.0.11/file/filebench/filebench.go (about) 1 package filebench 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "log" 8 "math/rand" 9 "sort" 10 "strings" 11 "sync/atomic" 12 "text/tabwriter" 13 "time" 14 15 "github.com/grailbio/base/file" 16 "github.com/grailbio/base/ioctx" 17 "github.com/grailbio/base/must" 18 "github.com/grailbio/base/traverse" 19 ) 20 21 // ReadSizes are the parameters for a benchmark run. 22 type ReadSizes struct { 23 ChunkBytes []int 24 ContiguousChunks []int 25 MaxReadBytes int 26 MaxReplicates int 27 } 28 29 // ReplicateTargetBytes limits the number of replicates of a single benchmark condition. 30 const ReplicateTargetBytes int = 1e9 31 32 // DefaultReadSizes constructs ReadSizes with the default range of parameters. 33 func DefaultReadSizes() ReadSizes { 34 return ReadSizes{ 35 ChunkBytes: []int{ 36 1 << 10, 37 1 << 20, 38 1 << 23, 39 1 << 24, 40 1 << 25, 41 1 << 27, 42 1 << 29, 43 1 << 30, 44 1 << 32, 45 }, 46 ContiguousChunks: []int{ 47 1, 48 1 << 3, 49 1 << 6, 50 1 << 9, 51 }, 52 MaxReadBytes: 1 << 32, 53 MaxReplicates: 10, 54 } 55 } 56 57 func (r ReadSizes) MinFileSize() int { 58 size := maxInts(r.ChunkBytes) * maxInts(r.ContiguousChunks) 59 if size < r.MaxReadBytes { 60 return size 61 } 62 return r.MaxReadBytes 63 } 64 65 func (r ReadSizes) sort() { 66 must.True(len(r.ChunkBytes) > 0) 67 must.True(len(r.ContiguousChunks) > 0) 68 sort.Ints(r.ChunkBytes) 69 sort.Ints(r.ContiguousChunks) 70 } 71 72 type Prefix struct { 73 Path string 74 // MaxReadBytes optionally overrides ReadSizes.MaxReadBytes (only to become smaller). 75 // Useful if one prefix (like FUSE) is slower than others. 76 MaxReadBytes int 77 } 78 79 // RunAndPrint executes the benchmark cases and prints a human-readable summary to out. 80 // pathPrefixes is typically s3:// or a FUSE mount point. Results are reported for each one. 81 // pathSuffix* are at least one S3-relative path (like bucket/some/file.txt) to a large file to read 82 // during benchmarking. If there are multiple, reads are spread across them (not multiplied for each 83 // suffix). Caller may want to pass multiple to try to reduce throttling when several benchmark 84 // tasks are running in parallel (see Bigmachine.RunAndPrint). 85 func (r ReadSizes) RunAndPrint( 86 ctx context.Context, 87 out io.Writer, 88 pathPrefixes []Prefix, 89 pathSuffix0 string, 90 pathSuffixes ...string, 91 ) { 92 minFileSize := r.MinFileSize() 93 r.sort() // Make sure table is easy to read. 94 95 pathSuffixes = append([]string{pathSuffix0}, pathSuffixes...) 96 type fileOption struct { 97 file.File 98 Info file.Info 99 } 100 files := make([][]fileOption, len(pathPrefixes)) 101 for prefixIdx, prefix := range pathPrefixes { 102 files[prefixIdx] = make([]fileOption, len(pathSuffixes)) 103 for suffixIdx, suffix := range pathSuffixes { 104 f, err := file.Open(ctx, file.Join(prefix.Path, suffix)) 105 must.Nil(err) 106 defer func() { must.Nil(f.Close(ctx)) }() 107 o := &files[prefixIdx][suffixIdx] 108 o.File = f 109 110 o.Info, err = f.Stat(ctx) 111 must.Nil(err) 112 must.True(o.Info.Size() >= int64(minFileSize), "file too small", f.Name()) 113 } 114 } 115 116 type ( 117 condition struct { 118 prefixIdx, chunkBytesIdx, contiguousChunksIdx int 119 parallel bool 120 } 121 result struct { 122 totalBytes int 123 totalTime time.Duration 124 } 125 ) 126 var ( 127 tasks []condition 128 results = make([][][][]result, len(pathPrefixes)) 129 ) 130 for prefixIdx, prefix := range pathPrefixes { 131 results[prefixIdx] = make([][][]result, len(r.ChunkBytes)) 132 for chunkBytesIdx, chunkBytes := range r.ChunkBytes { 133 results[prefixIdx][chunkBytesIdx] = make([][]result, len(r.ContiguousChunks)) 134 for contiguousChunksIdx, contiguousChunks := range r.ContiguousChunks { 135 results[prefixIdx][chunkBytesIdx][contiguousChunksIdx] = make([]result, 2) 136 totalReadBytes := chunkBytes * contiguousChunks 137 maxReadBytes := r.MaxReadBytes 138 if 0 < prefix.MaxReadBytes && prefix.MaxReadBytes < maxReadBytes { 139 maxReadBytes = prefix.MaxReadBytes 140 } 141 if totalReadBytes > maxReadBytes { 142 continue 143 } 144 replicates := 1 145 if totalReadBytes < ReplicateTargetBytes { 146 replicates = (ReplicateTargetBytes - 1 + totalReadBytes) / totalReadBytes 147 if replicates > r.MaxReplicates { 148 replicates = r.MaxReplicates 149 } 150 } 151 for _, parallel := range []bool{false, true} { 152 for ri := 0; ri < replicates; ri++ { 153 tasks = append(tasks, condition{ 154 prefixIdx: prefixIdx, 155 chunkBytesIdx: chunkBytesIdx, 156 contiguousChunksIdx: contiguousChunksIdx, 157 parallel: parallel, 158 }) 159 } 160 } 161 } 162 } 163 } 164 165 var ( 166 reproducibleRandom = rand.New(rand.NewSource(1)) 167 ephemeralRandom = rand.New(rand.NewSource(time.Now().UnixNano())) 168 ) 169 // While benchmarking is running, it's easy to compare the current task index from different 170 // benchmarking machines to judge their relative progress. 171 reproducibleRandom.Shuffle(len(tasks), func(i, j int) { 172 tasks[i], tasks[j] = tasks[j], tasks[i] 173 }) 174 175 var ( 176 currentTaskIdx int32 177 cancelled = make(chan struct{}) 178 ) 179 go func() { 180 ticker := time.NewTicker(10 * time.Second) 181 defer ticker.Stop() 182 for { 183 select { 184 case <-ticker.C: 185 taskIdx := atomic.LoadInt32(¤tTaskIdx) 186 c := tasks[taskIdx] 187 prefix := pathPrefixes[c.prefixIdx] 188 chunkBytes := r.ChunkBytes[c.chunkBytesIdx] 189 contiguousChunks := r.ContiguousChunks[c.contiguousChunksIdx] 190 log.Printf("done %d of %d tasks, current: %dB * %d on %s", 191 taskIdx, len(tasks), chunkBytes, contiguousChunks, prefix.Path) 192 case <-cancelled: 193 break 194 } 195 } 196 }() 197 defer close(cancelled) 198 199 dst := make([]byte, r.MaxReadBytes) 200 for taskIdx, c := range tasks { 201 atomic.StoreInt32(¤tTaskIdx, int32(taskIdx)) 202 203 chunkBytes := r.ChunkBytes[c.chunkBytesIdx] 204 contiguousChunks := r.ContiguousChunks[c.contiguousChunksIdx] 205 206 // Vary read locations non-reproducibly to try to spread load and avoid S3 throttling. 207 // There's a tradeoff here: we're also likely introducing variance in benchmark results 208 // if S3 read performance varies between objects and over time, which it probably does [1]. 209 // For now, empirically, it seems like throttling is the bigger problem, especially because 210 // our benchmark runs are relatively brief (compared to large batch workloads) and thus 211 // significantly affected by some throttling. We may revisit this in the future if a 212 // different choice helps make the benchmark a better guide for optimization. 213 // 214 // [1] https://web.archive.org/web/20221220192142/https://docs.aws.amazon.com/AmazonS3/latest/userguide/optimizing-performance.html 215 f := files[c.prefixIdx][ephemeralRandom.Intn(len(pathSuffixes))] 216 offset := ephemeralRandom.Int63n(f.Info.Size() - int64(chunkBytes*contiguousChunks) + 1) 217 218 parIdx := 0 219 start := time.Now() 220 func() { 221 var ( 222 traverser traverse.T 223 chunks = make([]struct { 224 r io.Reader 225 dst []byte 226 }, contiguousChunks) 227 ) 228 if c.parallel { 229 parIdx = 1 230 for i := range chunks { 231 chunkOffset := i * chunkBytes 232 rc := f.OffsetReader(offset + int64(chunkOffset)) 233 defer func() { must.Nil(rc.Close(ctx)) }() 234 chunks[i].r = ioctx.ToStdReader(ctx, rc) 235 chunks[i].dst = dst[chunkOffset : chunkOffset+chunkBytes] 236 } 237 } else { 238 traverser.Limit = 1 239 rc := ioctx.ToStdReadCloser(ctx, f.OffsetReader(offset)) 240 defer func() { must.Nil(rc.Close()) }() 241 for i := range chunks { 242 chunks[i].r = rc 243 chunks[i].dst = dst[:chunkBytes] 244 } 245 } 246 _ = traverser.Each(contiguousChunks, func(i int) error { 247 n, err := io.ReadFull(chunks[i].r, chunks[i].dst) 248 must.Nil(err) 249 must.True(n == chunkBytes) 250 return nil 251 }) 252 }() 253 elapsed := time.Since(start) 254 255 results[c.prefixIdx][c.chunkBytesIdx][c.contiguousChunksIdx][parIdx].totalBytes += chunkBytes * contiguousChunks 256 results[c.prefixIdx][c.chunkBytesIdx][c.contiguousChunksIdx][parIdx].totalTime += elapsed 257 } 258 259 tw := tabwriter.NewWriter(out, 0, 4, 4, ' ', 0) 260 mustPrintf := func(format string, args ...interface{}) { 261 _, err := fmt.Fprintf(tw, format, args...) 262 must.Nil(err) 263 } 264 mustPrintf("\t") 265 for _, prefix := range pathPrefixes { 266 mustPrintf("%s%s", prefix.Path, strings.Repeat("\t", 2*len(r.ContiguousChunks))) 267 } 268 mustPrintf("\n") 269 for range files { 270 for _, parLabel := range []string{"", "p"} { 271 for _, contiguousChunks := range r.ContiguousChunks { 272 mustPrintf("\t%s%d", parLabel, contiguousChunks) 273 } 274 } 275 } 276 mustPrintf("\n") 277 for chunkBytesIdx, chunkBytes := range r.ChunkBytes { 278 mustPrintf("%d", chunkBytes/(1<<20)) 279 for prefixIdx := range files { 280 for _, parIdx := range []int{0, 1} { 281 for contiguousChunksIdx := range r.ContiguousChunks { 282 s := results[prefixIdx][chunkBytesIdx][contiguousChunksIdx][parIdx] 283 mustPrintf("\t") 284 if s.totalTime > 0 { 285 mibs := float64(s.totalBytes) / s.totalTime.Seconds() / float64(1<<20) 286 mustPrintf("%.f", mibs) 287 } 288 } 289 } 290 } 291 mustPrintf("\n") 292 } 293 must.Nil(tw.Flush()) 294 } 295 296 func maxInts(ints []int) int { 297 if len(ints) == 0 { 298 return 0 // OK for our purposes. 299 } 300 max := ints[0] 301 for _, i := range ints[1:] { 302 if i > max { 303 max = i 304 } 305 } 306 return max 307 }