github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/bench/tools/frandread/frandread.go (about) 1 // Package frandread is a file-reading benchmark that makes a special effort to visit the files randomly and equally. 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package main 6 7 import ( 8 "bufio" 9 "flag" 10 "fmt" 11 "io" 12 "math" 13 "math/rand" 14 "os" 15 "os/signal" 16 "path/filepath" 17 "strings" 18 "sync" 19 "sync/atomic" 20 "syscall" 21 "time" 22 23 "github.com/NVIDIA/aistore/cmn/mono" 24 ) 25 26 type ( 27 cliVars struct { 28 fileList string // name of the file that contains filenames to read 29 dirs string // comma-separated list of directories to read 30 pattern string // filename matching wildcard; used when reading directories ind gnored with -l 31 pctPut int // percentage of PUTs in the generated workload 32 minSize int // minimum size of the object which will be created during PUT 33 maxSize int // maximum size of the object which will be created during PUT 34 seed int64 // random seed; current time (nanoseconds) if omitted 35 maxTime time.Duration // max time to run (run forever if both -t and -e (epochs) are not defined 36 numWorkers int // number of concurrently reading goroutines (workers) 37 numEpochs uint // number of "epochs" whereby each epoch entails a full random pass through all filenames 38 verbose bool // super-verbose output 39 usage bool // print usage and exit 40 } 41 statsVars struct { 42 sizeEpoch int64 43 sizeTotal int64 44 timeTotal time.Duration 45 } 46 47 bench struct { 48 sema chan struct{} 49 wg *sync.WaitGroup 50 rnd *rand.Rand 51 52 fileNames []string 53 perm []int 54 } 55 ) 56 57 var ( 58 cliv = &cliVars{} 59 stats = &statsVars{} 60 ) 61 62 func main() { 63 flag.StringVar(&cliv.fileList, "l", "files.txt", "name of the file that lists filenames to read") 64 flag.StringVar(&cliv.dirs, "d", "", "comma-separated list of directories to read (an alternative to list (-l) option)") 65 flag.StringVar(&cliv.pattern, "p", "", "filename matching wildcard when reading directories (ignored when -l is used)") 66 flag.IntVar(&cliv.pctPut, "pctput", 0, "percentage of PUTs in the generated workload") 67 flag.IntVar(&cliv.minSize, "minsize", 1024, "minimum size of the object which will be created during PUT") 68 flag.IntVar(&cliv.maxSize, "maxsize", 10*1024*1024, "maximum size of the object which will be created during PUT") 69 flag.Int64Var(&cliv.seed, "s", 0, "random seed; current time (nanoseconds) if omitted") 70 flag.DurationVar(&cliv.maxTime, "t", 0, "max time to run (run forever if both -t and -e (epochs) are not defined)") 71 flag.IntVar(&cliv.numWorkers, "w", 8, "number of concurrently reading goroutines (workers)") 72 flag.UintVar(&cliv.numEpochs, "e", 0, "number of \"epochs\" to run whereby each epoch entails a full random pass through all filenames") 73 flag.BoolVar(&cliv.verbose, "v", false, "verbose output") 74 flag.BoolVar(&cliv.usage, "h", false, "print usage and exit") 75 flag.Parse() 76 77 if cliv.usage || len(os.Args[1:]) == 0 { 78 flag.Usage() 79 fmt.Println("Build:") 80 fmt.Println("\tgo install frandread.go") 81 fmt.Println("Examples:") 82 fmt.Printf("\tfrandread -h\t\t\t\t\t- show usage\n") 83 fmt.Printf("\tfrandread -d /tmp/work -t 10m\t\t\t- read from /tmp/work, run for 10 minutes\n") 84 fmt.Printf("\tfrandread -d /tmp/work -v -t 10m -p *.tgz\t- filter by tgz extension\n") 85 fmt.Printf("\tfrandread -d /tmp/a,/tmp/work/b -e 999\t\t- read two directories, run for 999 epochs\n") 86 fmt.Printf("\tfrandread -d ~/smth -pctput 1\t\t\t- put files into ~/smth directory") 87 fmt.Println() 88 os.Exit(0) 89 } 90 if cliv.fileList != "files.txt" && cliv.dirs != "" { 91 panic("invalid command-line: -l and -d cannot be used together") 92 } 93 if cliv.numEpochs == 0 { 94 cliv.numEpochs = math.MaxUint32 95 } 96 97 sigCh := make(chan os.Signal, 1) 98 signal.Notify(sigCh, syscall.SIGHUP) 99 100 fileNames := make([]string, 0, 1024) 101 if cliv.pctPut == 0 { 102 // 1. open and read prepared list of file names 103 if cliv.dirs != "" { 104 dirs := strings.Split(cliv.dirs, ",") 105 for _, dir := range dirs { 106 fileNames = fileNamesFromDir(dir, fileNames) 107 } 108 } else { 109 fileNames = fileNamesFromList(fileNames) 110 } 111 } else { 112 if cliv.dirs == "" { 113 panic("In PUT mode one needs to specify directory to which files will be written") 114 } 115 116 for range 1024 { 117 fileNames = append(fileNames, randString(10)) 118 } 119 } 120 121 // 2. read them all at a given concurrency 122 now := time.Now() 123 fmt.Printf("Starting to run: %d filenames, %d workers\n", len(fileNames), cliv.numWorkers) 124 125 b := newBench(fileNames) 126 127 en := uint(1) 128 ml: 129 for ; en <= cliv.numEpochs; en++ { 130 b.reset() 131 started := time.Now() 132 b.epoch() 133 134 epochWritten := atomic.LoadInt64(&stats.sizeEpoch) 135 stats.sizeTotal += epochWritten 136 epochTime := time.Since(started) 137 stats.timeTotal += epochTime 138 139 b.cleanup() 140 141 sthr := formatThroughput(epochWritten, epochTime) 142 fmt.Printf("Epoch #%d:\t%s\n", en, sthr) 143 144 if cliv.maxTime != 0 { 145 if time.Since(now) > cliv.maxTime { 146 break 147 } 148 } 149 select { 150 case <-sigCh: 151 break ml 152 default: 153 break 154 } 155 } 156 elapsed := time.Since(now) 157 sthr := formatThroughput(stats.sizeTotal, stats.timeTotal) // total-bytes / total-effective-time 158 fmt.Println("ok", elapsed) 159 fmt.Printf("%-12s%-18s%-30s\n", "Epochs", "Time", "Average Throughput") 160 fmt.Printf("%-12d%-18v%-30s\n", en, stats.timeTotal, sthr) 161 } 162 163 func newBench(fileNames []string) *bench { 164 if cliv.seed == 0 { 165 cliv.seed = mono.NanoTime() 166 } 167 rnd := rand.New(rand.NewSource(cliv.seed)) 168 return &bench{ 169 rnd: rnd, 170 sema: make(chan struct{}, cliv.numWorkers), 171 wg: &sync.WaitGroup{}, 172 173 fileNames: fileNames, 174 } 175 } 176 177 func formatThroughput(bytes int64, duration time.Duration) (sthr string) { 178 var ( 179 gbs float64 180 mbs = float64(bytes) / 1024 / 1024 181 suffix = "MiB/s" 182 thr = mbs * float64(time.Second) / float64(duration) 183 ) 184 if duration == 0 { 185 return "-" 186 } 187 if thr > 1024 { 188 gbs = float64(bytes) / 1024 / 1024 / 1024 189 suffix = "GiB/s" 190 thr = gbs * float64(time.Second) / float64(duration) 191 } 192 sthr = fmt.Sprintf("%.3f%s", thr, suffix) 193 return 194 } 195 196 func (b *bench) reset() { 197 b.perm = b.rnd.Perm(len(b.fileNames)) 198 } 199 200 func (b *bench) epoch() { 201 atomic.StoreInt64(&stats.sizeEpoch, 0) 202 for _, idx := range b.perm { 203 fname := b.fileNames[idx] 204 b.wg.Add(1) 205 b.sema <- struct{}{} 206 go func(fname string) { 207 defer func() { 208 <-b.sema 209 b.wg.Done() 210 }() 211 212 if cliv.pctPut > 0 { 213 // PUT 214 f, err := os.Create(filepath.Join(cliv.dirs, fname)) 215 if err != nil { 216 panic(err) 217 } 218 219 size := b.rnd.Intn(cliv.maxSize-cliv.minSize) + cliv.minSize 220 r := io.LimitReader(&nopReadCloser{}, int64(size)) 221 written, err := io.Copy(f, r) 222 if err != nil { 223 panic(err) 224 } 225 atomic.AddInt64(&stats.sizeEpoch, written) 226 227 f.Close() 228 } else { 229 // GET 230 f, err := os.Open(fname) 231 if err != nil { 232 panic(err) 233 } 234 read, err := io.Copy(io.Discard, f) // drain the reader 235 if err != nil { 236 panic(err) 237 } 238 atomic.AddInt64(&stats.sizeEpoch, read) 239 f.Close() 240 } 241 242 if cliv.verbose { 243 fmt.Println("\t", fname) 244 } 245 }(fname) 246 } 247 248 b.wg.Wait() 249 } 250 251 func (b *bench) cleanup() { 252 if cliv.pctPut > 0 { 253 for _, fname := range b.fileNames { 254 os.Remove(filepath.Join(cliv.dirs, fname)) 255 } 256 } 257 } 258 259 func fileNamesFromList(fileNames []string) []string { 260 list, err := os.Open(cliv.fileList) 261 if err != nil { 262 panic(err) 263 } 264 scanner := bufio.NewScanner(list) 265 for scanner.Scan() { 266 fileNames = append(fileNames, scanner.Text()) 267 } 268 list.Close() 269 return fileNames 270 } 271 272 func fileNamesFromDir(dir string, fileNames []string) []string { 273 dentries, err := os.ReadDir(dir) 274 if err != nil { 275 panic(err) 276 } 277 for _, dent := range dentries { 278 if dent.IsDir() || !dent.Type().IsRegular() { 279 continue 280 } 281 if cliv.pattern != "" { 282 if matched, _ := filepath.Match(cliv.pattern, filepath.Base(dent.Name())); !matched { 283 continue 284 } 285 } 286 fname := filepath.Join(dir, dent.Name()) 287 fileNames = append(fileNames, fname) 288 } 289 return fileNames 290 } 291 292 func randString(n int) string { 293 letterRunes := []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") 294 b := make([]rune, n) 295 for i := range b { 296 b[i] = letterRunes[rand.Intn(len(letterRunes))] 297 } 298 return string(b) 299 } 300 301 type nopReadCloser struct{} 302 303 func (*nopReadCloser) Read(p []byte) (n int, err error) { return len(p), nil } 304 func (*nopReadCloser) Close() error { return nil }