github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/bench/tools/frandread/frandread.go (about)

     1  // Package frandread is a file-reading benchmark that makes a special effort to visit the files randomly and equally.
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package main
     6  
     7  import (
     8  	"bufio"
     9  	"flag"
    10  	"fmt"
    11  	"io"
    12  	"math"
    13  	"math/rand"
    14  	"os"
    15  	"os/signal"
    16  	"path/filepath"
    17  	"strings"
    18  	"sync"
    19  	"sync/atomic"
    20  	"syscall"
    21  	"time"
    22  
    23  	"github.com/NVIDIA/aistore/cmn/mono"
    24  )
    25  
    26  type (
    27  	cliVars struct {
    28  		fileList   string        // name of the file that contains filenames to read
    29  		dirs       string        // comma-separated list of directories to read
    30  		pattern    string        // filename matching wildcard; used when reading directories ind gnored with -l
    31  		pctPut     int           // percentage of PUTs in the generated workload
    32  		minSize    int           // minimum size of the object which will be created during PUT
    33  		maxSize    int           // maximum size of the object which will be created during PUT
    34  		seed       int64         // random seed; current time (nanoseconds) if omitted
    35  		maxTime    time.Duration // max time to run (run forever if both -t and -e (epochs) are not defined
    36  		numWorkers int           // number of concurrently reading goroutines (workers)
    37  		numEpochs  uint          // number of "epochs" whereby each epoch entails a full random pass through all filenames
    38  		verbose    bool          // super-verbose output
    39  		usage      bool          // print usage and exit
    40  	}
    41  	statsVars struct {
    42  		sizeEpoch int64
    43  		sizeTotal int64
    44  		timeTotal time.Duration
    45  	}
    46  
    47  	bench struct {
    48  		sema chan struct{}
    49  		wg   *sync.WaitGroup
    50  		rnd  *rand.Rand
    51  
    52  		fileNames []string
    53  		perm      []int
    54  	}
    55  )
    56  
    57  var (
    58  	cliv  = &cliVars{}
    59  	stats = &statsVars{}
    60  )
    61  
    62  func main() {
    63  	flag.StringVar(&cliv.fileList, "l", "files.txt", "name of the file that lists filenames to read")
    64  	flag.StringVar(&cliv.dirs, "d", "", "comma-separated list of directories to read (an alternative to list (-l) option)")
    65  	flag.StringVar(&cliv.pattern, "p", "", "filename matching wildcard when reading directories (ignored when -l is used)")
    66  	flag.IntVar(&cliv.pctPut, "pctput", 0, "percentage of PUTs in the generated workload")
    67  	flag.IntVar(&cliv.minSize, "minsize", 1024, "minimum size of the object which will be created during PUT")
    68  	flag.IntVar(&cliv.maxSize, "maxsize", 10*1024*1024, "maximum size of the object which will be created during PUT")
    69  	flag.Int64Var(&cliv.seed, "s", 0, "random seed; current time (nanoseconds) if omitted")
    70  	flag.DurationVar(&cliv.maxTime, "t", 0, "max time to run (run forever if both -t and -e (epochs) are not defined)")
    71  	flag.IntVar(&cliv.numWorkers, "w", 8, "number of concurrently reading goroutines (workers)")
    72  	flag.UintVar(&cliv.numEpochs, "e", 0, "number of \"epochs\" to run whereby each epoch entails a full random pass through all filenames")
    73  	flag.BoolVar(&cliv.verbose, "v", false, "verbose output")
    74  	flag.BoolVar(&cliv.usage, "h", false, "print usage and exit")
    75  	flag.Parse()
    76  
    77  	if cliv.usage || len(os.Args[1:]) == 0 {
    78  		flag.Usage()
    79  		fmt.Println("Build:")
    80  		fmt.Println("\tgo install frandread.go")
    81  		fmt.Println("Examples:")
    82  		fmt.Printf("\tfrandread -h\t\t\t\t\t- show usage\n")
    83  		fmt.Printf("\tfrandread -d /tmp/work -t 10m\t\t\t- read from /tmp/work, run for 10 minutes\n")
    84  		fmt.Printf("\tfrandread -d /tmp/work -v -t 10m -p *.tgz\t- filter by tgz extension\n")
    85  		fmt.Printf("\tfrandread -d /tmp/a,/tmp/work/b -e 999\t\t- read two directories, run for 999 epochs\n")
    86  		fmt.Printf("\tfrandread -d ~/smth -pctput 1\t\t\t- put files into ~/smth directory")
    87  		fmt.Println()
    88  		os.Exit(0)
    89  	}
    90  	if cliv.fileList != "files.txt" && cliv.dirs != "" {
    91  		panic("invalid command-line: -l and -d cannot be used together")
    92  	}
    93  	if cliv.numEpochs == 0 {
    94  		cliv.numEpochs = math.MaxUint32
    95  	}
    96  
    97  	sigCh := make(chan os.Signal, 1)
    98  	signal.Notify(sigCh, syscall.SIGHUP)
    99  
   100  	fileNames := make([]string, 0, 1024)
   101  	if cliv.pctPut == 0 {
   102  		// 1. open and read prepared list of file names
   103  		if cliv.dirs != "" {
   104  			dirs := strings.Split(cliv.dirs, ",")
   105  			for _, dir := range dirs {
   106  				fileNames = fileNamesFromDir(dir, fileNames)
   107  			}
   108  		} else {
   109  			fileNames = fileNamesFromList(fileNames)
   110  		}
   111  	} else {
   112  		if cliv.dirs == "" {
   113  			panic("In PUT mode one needs to specify directory to which files will be written")
   114  		}
   115  
   116  		for range 1024 {
   117  			fileNames = append(fileNames, randString(10))
   118  		}
   119  	}
   120  
   121  	// 2. read them all at a given concurrency
   122  	now := time.Now()
   123  	fmt.Printf("Starting to run: %d filenames, %d workers\n", len(fileNames), cliv.numWorkers)
   124  
   125  	b := newBench(fileNames)
   126  
   127  	en := uint(1)
   128  ml:
   129  	for ; en <= cliv.numEpochs; en++ {
   130  		b.reset()
   131  		started := time.Now()
   132  		b.epoch()
   133  
   134  		epochWritten := atomic.LoadInt64(&stats.sizeEpoch)
   135  		stats.sizeTotal += epochWritten
   136  		epochTime := time.Since(started)
   137  		stats.timeTotal += epochTime
   138  
   139  		b.cleanup()
   140  
   141  		sthr := formatThroughput(epochWritten, epochTime)
   142  		fmt.Printf("Epoch #%d:\t%s\n", en, sthr)
   143  
   144  		if cliv.maxTime != 0 {
   145  			if time.Since(now) > cliv.maxTime {
   146  				break
   147  			}
   148  		}
   149  		select {
   150  		case <-sigCh:
   151  			break ml
   152  		default:
   153  			break
   154  		}
   155  	}
   156  	elapsed := time.Since(now)
   157  	sthr := formatThroughput(stats.sizeTotal, stats.timeTotal) // total-bytes / total-effective-time
   158  	fmt.Println("ok", elapsed)
   159  	fmt.Printf("%-12s%-18s%-30s\n", "Epochs", "Time", "Average Throughput")
   160  	fmt.Printf("%-12d%-18v%-30s\n", en, stats.timeTotal, sthr)
   161  }
   162  
   163  func newBench(fileNames []string) *bench {
   164  	if cliv.seed == 0 {
   165  		cliv.seed = mono.NanoTime()
   166  	}
   167  	rnd := rand.New(rand.NewSource(cliv.seed))
   168  	return &bench{
   169  		rnd:  rnd,
   170  		sema: make(chan struct{}, cliv.numWorkers),
   171  		wg:   &sync.WaitGroup{},
   172  
   173  		fileNames: fileNames,
   174  	}
   175  }
   176  
   177  func formatThroughput(bytes int64, duration time.Duration) (sthr string) {
   178  	var (
   179  		gbs    float64
   180  		mbs    = float64(bytes) / 1024 / 1024
   181  		suffix = "MiB/s"
   182  		thr    = mbs * float64(time.Second) / float64(duration)
   183  	)
   184  	if duration == 0 {
   185  		return "-"
   186  	}
   187  	if thr > 1024 {
   188  		gbs = float64(bytes) / 1024 / 1024 / 1024
   189  		suffix = "GiB/s"
   190  		thr = gbs * float64(time.Second) / float64(duration)
   191  	}
   192  	sthr = fmt.Sprintf("%.3f%s", thr, suffix)
   193  	return
   194  }
   195  
   196  func (b *bench) reset() {
   197  	b.perm = b.rnd.Perm(len(b.fileNames))
   198  }
   199  
   200  func (b *bench) epoch() {
   201  	atomic.StoreInt64(&stats.sizeEpoch, 0)
   202  	for _, idx := range b.perm {
   203  		fname := b.fileNames[idx]
   204  		b.wg.Add(1)
   205  		b.sema <- struct{}{}
   206  		go func(fname string) {
   207  			defer func() {
   208  				<-b.sema
   209  				b.wg.Done()
   210  			}()
   211  
   212  			if cliv.pctPut > 0 {
   213  				// PUT
   214  				f, err := os.Create(filepath.Join(cliv.dirs, fname))
   215  				if err != nil {
   216  					panic(err)
   217  				}
   218  
   219  				size := b.rnd.Intn(cliv.maxSize-cliv.minSize) + cliv.minSize
   220  				r := io.LimitReader(&nopReadCloser{}, int64(size))
   221  				written, err := io.Copy(f, r)
   222  				if err != nil {
   223  					panic(err)
   224  				}
   225  				atomic.AddInt64(&stats.sizeEpoch, written)
   226  
   227  				f.Close()
   228  			} else {
   229  				// GET
   230  				f, err := os.Open(fname)
   231  				if err != nil {
   232  					panic(err)
   233  				}
   234  				read, err := io.Copy(io.Discard, f) // drain the reader
   235  				if err != nil {
   236  					panic(err)
   237  				}
   238  				atomic.AddInt64(&stats.sizeEpoch, read)
   239  				f.Close()
   240  			}
   241  
   242  			if cliv.verbose {
   243  				fmt.Println("\t", fname)
   244  			}
   245  		}(fname)
   246  	}
   247  
   248  	b.wg.Wait()
   249  }
   250  
   251  func (b *bench) cleanup() {
   252  	if cliv.pctPut > 0 {
   253  		for _, fname := range b.fileNames {
   254  			os.Remove(filepath.Join(cliv.dirs, fname))
   255  		}
   256  	}
   257  }
   258  
   259  func fileNamesFromList(fileNames []string) []string {
   260  	list, err := os.Open(cliv.fileList)
   261  	if err != nil {
   262  		panic(err)
   263  	}
   264  	scanner := bufio.NewScanner(list)
   265  	for scanner.Scan() {
   266  		fileNames = append(fileNames, scanner.Text())
   267  	}
   268  	list.Close()
   269  	return fileNames
   270  }
   271  
   272  func fileNamesFromDir(dir string, fileNames []string) []string {
   273  	dentries, err := os.ReadDir(dir)
   274  	if err != nil {
   275  		panic(err)
   276  	}
   277  	for _, dent := range dentries {
   278  		if dent.IsDir() || !dent.Type().IsRegular() {
   279  			continue
   280  		}
   281  		if cliv.pattern != "" {
   282  			if matched, _ := filepath.Match(cliv.pattern, filepath.Base(dent.Name())); !matched {
   283  				continue
   284  			}
   285  		}
   286  		fname := filepath.Join(dir, dent.Name())
   287  		fileNames = append(fileNames, fname)
   288  	}
   289  	return fileNames
   290  }
   291  
   292  func randString(n int) string {
   293  	letterRunes := []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
   294  	b := make([]rune, n)
   295  	for i := range b {
   296  		b[i] = letterRunes[rand.Intn(len(letterRunes))]
   297  	}
   298  	return string(b)
   299  }
   300  
   301  type nopReadCloser struct{}
   302  
   303  func (*nopReadCloser) Read(p []byte) (n int, err error) { return len(p), nil }
   304  func (*nopReadCloser) Close() error                     { return nil }