github.com/petermattis/pebble@v0.0.0-20190905164901-ab51a2166067/cmd/pebble/ycsb.go (about)

     1  // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"log"
    11  	"strconv"
    12  	"strings"
    13  	"sync"
    14  	"sync/atomic"
    15  	"time"
    16  
    17  	"github.com/petermattis/pebble"
    18  	"github.com/petermattis/pebble/internal/ackseq"
    19  	"github.com/petermattis/pebble/internal/randvar"
    20  	"github.com/petermattis/pebble/internal/rate"
    21  	"github.com/spf13/cobra"
    22  	"golang.org/x/exp/rand"
    23  )
    24  
    25  const (
    26  	ycsbInsert = iota
    27  	ycsbRead
    28  	ycsbScan
    29  	ycsbReverseScan
    30  	ycsbUpdate
    31  	ycsbNumOps
    32  )
    33  
    34  var ycsbConfig struct {
    35  	batch            string
    36  	keys             string
    37  	initialKeys      int
    38  	prepopulatedKeys int
    39  	numOps           uint64
    40  	scans            string
    41  	values           string
    42  	workload         string
    43  }
    44  
    45  var ycsbCmd = &cobra.Command{
    46  	Use:   "ycsb <dir>",
    47  	Short: "run customizable YCSB benchmark",
    48  	Long: `
    49  Run a customizable YCSB workload. The workload is specified by the --workload
    50  flag which can take either one of the standard workload mixes (A-F), or
    51  customizable workload fixes specified as a command separated list of op=weight
    52  pairs. For example, --workload=read=50,update=50 performs a workload composed
    53  of 50% reads and 50% updates. This is identical to the standard workload A.
    54  
    55  The --batch, --scans, and --values flags take the specification for a random
    56  variable: [<type>:]<min>[-<max>]. The <type> parameter must be one of "uniform"
    57  or "zipf". If <type> is omitted, a uniform distribution is used. If <max> is
    58  omitted it is set to the same value as <min>. The specification "1000" results
    59  in a constant 1000. The specification "10-100" results in a uniformly random
    60  variable in the range [10,100). The specification "zipf(10,100)" results in a
    61  zipf distribution with a minimum value of 10 and a maximum value of 100.
    62  
    63  The --batch flag controls the size of batches used for insert and update
    64  operations. The --scans flag controls the number of iterations performed by a
    65  scan operation. Read operations always read a single key.
    66  
    67  The --values flag provides for an optional "/<target-compression-ratio>"
    68  suffix. The default target compression ratio is 1.0 (i.e. incompressible random
    69  data). A value of 2 will cause random data to be generated that should compress
    70  to 50% of its uncompressed size.
    71  
    72  Standard workloads:
    73  
    74    A:  50% reads   /  50% updates
    75    B:  95% reads   /   5% updates
    76    C: 100% reads
    77    D:  95% reads   /   5% inserts
    78    E:  95% scans   /   5% inserts
    79    F: 100% inserts
    80  `,
    81  	Args: cobra.ExactArgs(1),
    82  	RunE: runYcsb,
    83  }
    84  
    85  func init() {
    86  	ycsbCmd.Flags().StringVar(
    87  		&ycsbConfig.batch, "batch", "1",
    88  		"batch size distribution [{zipf,uniform}:]min[-max]")
    89  	ycsbCmd.Flags().StringVar(
    90  		&ycsbConfig.keys, "keys", "zipf", "latest, uniform, or zipf")
    91  	ycsbCmd.Flags().IntVar(
    92  		&ycsbConfig.initialKeys, "initial-keys", 10000,
    93  		"initial number of keys to insert before beginning workload")
    94  	ycsbCmd.Flags().IntVar(
    95  		&ycsbConfig.prepopulatedKeys, "prepopulated-keys", 0,
    96  		"number of keys that were previously inserted into the database")
    97  	ycsbCmd.Flags().Uint64VarP(
    98  		&ycsbConfig.numOps, "num-ops", "n", 0,
    99  		"maximum number of operations (0 means unlimited)")
   100  	ycsbCmd.Flags().StringVar(
   101  		&ycsbConfig.scans, "scans", "zipf:1-1000",
   102  		"scan length distribution [{zipf,uniform}:]min[-max]")
   103  	ycsbCmd.Flags().StringVar(
   104  		&ycsbConfig.workload, "workload", "B",
   105  		"workload type (A-F) or spec (read=X,update=Y,...)")
   106  	ycsbCmd.Flags().StringVar(
   107  		&ycsbConfig.values, "values", "1000",
   108  		"value size distribution [{zipf,uniform}:]min[-max][/<target-compression>]")
   109  }
   110  
   111  type ycsbWeights []float64
   112  
   113  func (w ycsbWeights) get(i int) float64 {
   114  	if i >= len(w) {
   115  		return 0
   116  	}
   117  	return w[i]
   118  }
   119  
   120  var ycsbWorkloads = map[string]ycsbWeights{
   121  	"A": ycsbWeights{
   122  		ycsbRead:   0.5,
   123  		ycsbUpdate: 0.5,
   124  	},
   125  	"B": ycsbWeights{
   126  		ycsbRead:   0.95,
   127  		ycsbUpdate: 0.05,
   128  	},
   129  	"C": ycsbWeights{
   130  		ycsbRead: 1.0,
   131  	},
   132  	"D": ycsbWeights{
   133  		ycsbInsert: 0.05,
   134  		ycsbRead:   0.95,
   135  		// TODO(peter): default to skewed-latest distribution.
   136  	},
   137  	"E": ycsbWeights{
   138  		ycsbInsert: 0.05,
   139  		ycsbScan:   0.95,
   140  	},
   141  	"F": ycsbWeights{
   142  		ycsbInsert: 1.0,
   143  		// TODO(peter): the real workload is read-modify-write.
   144  	},
   145  }
   146  
   147  func ycsbParseWorkload(w string) (ycsbWeights, error) {
   148  	if weights := ycsbWorkloads[w]; weights != nil {
   149  		return weights, nil
   150  	}
   151  	iWeights := make([]int, 4)
   152  	for _, p := range strings.Split(w, ",") {
   153  		parts := strings.Split(p, "=")
   154  		if len(parts) != 2 {
   155  			return nil, fmt.Errorf("malformed weights: %s", w)
   156  		}
   157  		weight, err := strconv.Atoi(parts[1])
   158  		if err != nil {
   159  			return nil, err
   160  		}
   161  		switch parts[0] {
   162  		case "insert":
   163  			iWeights[ycsbInsert] = weight
   164  		case "read":
   165  			iWeights[ycsbRead] = weight
   166  		case "scan":
   167  			iWeights[ycsbScan] = weight
   168  		case "rscan":
   169  			iWeights[ycsbReverseScan] = weight
   170  		case "update":
   171  			iWeights[ycsbUpdate] = weight
   172  		}
   173  	}
   174  
   175  	var sum int
   176  	for _, w := range iWeights {
   177  		sum += w
   178  	}
   179  	if sum == 0 {
   180  		return nil, fmt.Errorf("zero weight specified: %s", w)
   181  	}
   182  
   183  	weights := make(ycsbWeights, 4)
   184  	for i := range weights {
   185  		weights[i] = float64(iWeights[i]) / float64(sum)
   186  	}
   187  	return weights, nil
   188  }
   189  
   190  func ycsbParseKeyDist(d string) (randvar.Dynamic, error) {
   191  	totalKeys := uint64(ycsbConfig.initialKeys + ycsbConfig.prepopulatedKeys)
   192  	switch strings.ToLower(d) {
   193  	case "latest":
   194  		return randvar.NewDefaultSkewedLatest(nil)
   195  	case "uniform":
   196  		return randvar.NewUniform(nil, 1, totalKeys), nil
   197  	case "zipf":
   198  		return randvar.NewZipf(nil, 1, totalKeys, 0.99)
   199  	default:
   200  		return nil, fmt.Errorf("unknown distribution: %s", d)
   201  	}
   202  }
   203  
   204  func runYcsb(cmd *cobra.Command, args []string) error {
   205  	if wipe && ycsbConfig.prepopulatedKeys > 0 {
   206  		return fmt.Errorf("--wipe and --prepopulated-keys both specified which is nonsensical")
   207  	}
   208  
   209  	weights, err := ycsbParseWorkload(ycsbConfig.workload)
   210  	if err != nil {
   211  		return err
   212  	}
   213  
   214  	keyDist, err := ycsbParseKeyDist(ycsbConfig.keys)
   215  	if err != nil {
   216  		return err
   217  	}
   218  
   219  	batchDist, err := parseRandVarSpec(ycsbConfig.batch)
   220  	if err != nil {
   221  		return err
   222  	}
   223  
   224  	scanDist, err := parseRandVarSpec(ycsbConfig.scans)
   225  	if err != nil {
   226  		return err
   227  	}
   228  
   229  	valueDist, targetCompression, err := parseValuesSpec(ycsbConfig.values)
   230  	if err != nil {
   231  		return err
   232  	}
   233  
   234  	y := newYcsb(weights, keyDist, batchDist, scanDist, valueDist, targetCompression)
   235  	runTest(args[0], test{
   236  		init: y.init,
   237  		tick: y.tick,
   238  		done: y.done,
   239  	})
   240  	return nil
   241  }
   242  
   243  type ycsb struct {
   244  	writeOpts         *pebble.WriteOptions
   245  	reg               *histogramRegistry
   246  	ops               *randvar.Weighted
   247  	keyDist           randvar.Dynamic
   248  	batchDist         randvar.Static
   249  	scanDist          randvar.Static
   250  	valueDist         randvar.Static
   251  	targetCompression float64
   252  	keyNum            *ackseq.S
   253  	numOps            uint64
   254  	numKeys           [ycsbNumOps]uint64
   255  	prevNumKeys       [ycsbNumOps]uint64
   256  	opsMap            map[string]int
   257  	latency           [ycsbNumOps]*namedHistogram
   258  	limiter           *rate.Limiter
   259  }
   260  
   261  func newYcsb(
   262  	weights ycsbWeights,
   263  	keyDist randvar.Dynamic,
   264  	batchDist, scanDist, valueDist randvar.Static,
   265  	targetCompression float64,
   266  ) *ycsb {
   267  	y := &ycsb{
   268  		reg:               newHistogramRegistry(),
   269  		ops:               randvar.NewWeighted(nil, weights...),
   270  		keyDist:           keyDist,
   271  		batchDist:         batchDist,
   272  		scanDist:          scanDist,
   273  		valueDist:         valueDist,
   274  		targetCompression: targetCompression,
   275  	}
   276  	y.writeOpts = pebble.Sync
   277  	if disableWAL {
   278  		y.writeOpts = pebble.NoSync
   279  	}
   280  
   281  	y.opsMap = make(map[string]int)
   282  	maybeRegister := func(op int, name string) *namedHistogram {
   283  		w := weights.get(op)
   284  		if w == 0 {
   285  			return nil
   286  		}
   287  		wstr := fmt.Sprint(int(100 * w))
   288  		fill := strings.Repeat("_", 3-len(wstr))
   289  		if fill == "" {
   290  			fill = "_"
   291  		}
   292  		fullName := fmt.Sprintf("%s%s%s", name, fill, wstr)
   293  		y.opsMap[fullName] = op
   294  		return y.reg.Register(fullName)
   295  	}
   296  
   297  	y.latency[ycsbInsert] = maybeRegister(ycsbInsert, "insert")
   298  	y.latency[ycsbRead] = maybeRegister(ycsbRead, "read")
   299  	y.latency[ycsbScan] = maybeRegister(ycsbScan, "scan")
   300  	y.latency[ycsbReverseScan] = maybeRegister(ycsbReverseScan, "rscan")
   301  	y.latency[ycsbUpdate] = maybeRegister(ycsbUpdate, "update")
   302  	return y
   303  }
   304  
   305  func (y *ycsb) init(db DB, wg *sync.WaitGroup) {
   306  	if ycsbConfig.initialKeys > 0 {
   307  		rng := randvar.NewRand()
   308  
   309  		b := db.NewBatch()
   310  		for i := 1; i <= ycsbConfig.initialKeys; i++ {
   311  			if len(b.Repr()) >= 1<<20 {
   312  				if err := b.Commit(y.writeOpts); err != nil {
   313  					log.Fatal(err)
   314  				}
   315  				b = db.NewBatch()
   316  			}
   317  			_ = b.Set(y.makeKey(uint64(i+ycsbConfig.prepopulatedKeys)), y.randBytes(rng), nil)
   318  		}
   319  		if err := b.Commit(y.writeOpts); err != nil {
   320  			log.Fatal(err)
   321  		}
   322  		fmt.Printf("inserted keys [%d-%d)\n",
   323  			1+ycsbConfig.prepopulatedKeys,
   324  			1+ycsbConfig.prepopulatedKeys+ycsbConfig.initialKeys)
   325  	}
   326  	y.keyNum = ackseq.New(uint64(ycsbConfig.initialKeys + ycsbConfig.prepopulatedKeys))
   327  
   328  	var err error
   329  	y.limiter, err = newFluctuatingRateLimiter(maxOpsPerSec)
   330  	if err != nil {
   331  		fmt.Println(err)
   332  		return
   333  	}
   334  	wg.Add(concurrency)
   335  	for i := 0; i < concurrency; i++ {
   336  		go y.run(db, wg)
   337  	}
   338  }
   339  
   340  func (y *ycsb) run(db DB, wg *sync.WaitGroup) {
   341  	defer wg.Done()
   342  
   343  	rng := randvar.NewRand()
   344  	for {
   345  		y.limiter.Wait(context.Background())
   346  		start := time.Now()
   347  
   348  		op := y.ops.Int()
   349  		switch op {
   350  		case ycsbInsert:
   351  			y.insert(db, rng)
   352  		case ycsbRead:
   353  			y.read(db, rng)
   354  		case ycsbScan:
   355  			y.scan(db, rng, false /* reverse */)
   356  		case ycsbReverseScan:
   357  			y.scan(db, rng, true /* reverse */)
   358  		case ycsbUpdate:
   359  			y.update(db, rng)
   360  		default:
   361  			panic("not reached")
   362  		}
   363  
   364  		y.latency[op].Record(time.Since(start))
   365  		if ycsbConfig.numOps > 0 &&
   366  			atomic.AddUint64(&y.numOps, 1) >= ycsbConfig.numOps {
   367  			break
   368  		}
   369  	}
   370  }
   371  
   372  func (y *ycsb) hashKey(key uint64) uint64 {
   373  	// Inlined version of fnv.New64 + Write.
   374  	const offset64 = 14695981039346656037
   375  	const prime64 = 1099511628211
   376  
   377  	h := uint64(offset64)
   378  	for i := 0; i < 8; i++ {
   379  		h *= prime64
   380  		h ^= uint64(key & 0xff)
   381  		key >>= 8
   382  	}
   383  	return h
   384  }
   385  
   386  func (y *ycsb) makeKey(keyNum uint64) []byte {
   387  	key := make([]byte, 4, 24+10)
   388  	copy(key, "user")
   389  	key = strconv.AppendUint(key, y.hashKey(keyNum), 10)
   390  	// Use the MVCC encoding for keys. This appends a timestamp with
   391  	// walltime=1. That knowledge is utilized by rocksDB.Scan.
   392  	key = append(key, '\x00', '\x00', '\x00', '\x00', '\x00',
   393  		'\x00', '\x00', '\x00', '\x01', '\x09')
   394  	return key
   395  }
   396  
   397  func (y *ycsb) nextReadKey() []byte {
   398  	// NB: the range of values returned by keyDist is tied to the range returned
   399  	// by keyNum.Base. See how these are both incremented by ycsb.insert().
   400  	keyNum := y.keyDist.Uint64()
   401  	return y.makeKey(keyNum)
   402  }
   403  
   404  func (y *ycsb) randBytes(rng *rand.Rand) []byte {
   405  	length := int(y.valueDist.Uint64())
   406  	return randomBlock(rng, length, y.targetCompression)
   407  }
   408  
   409  func (y *ycsb) insert(db DB, rng *rand.Rand) {
   410  	count := y.batchDist.Uint64()
   411  	keyNums := make([]uint64, count)
   412  
   413  	b := db.NewBatch()
   414  	for i := range keyNums {
   415  		keyNums[i] = y.keyNum.Next()
   416  		_ = b.Set(y.makeKey(keyNums[i]), y.randBytes(rng), nil)
   417  	}
   418  	if err := b.Commit(y.writeOpts); err != nil {
   419  		log.Fatal(err)
   420  	}
   421  	atomic.AddUint64(&y.numKeys[ycsbInsert], uint64(len(keyNums)))
   422  
   423  	for i := range keyNums {
   424  		delta, err := y.keyNum.Ack(keyNums[i])
   425  		if err != nil {
   426  			log.Fatal(err)
   427  		}
   428  		if delta > 0 {
   429  			y.keyDist.IncMax(delta)
   430  		}
   431  	}
   432  }
   433  
   434  func (y *ycsb) read(db DB, rng *rand.Rand) {
   435  	key := y.nextReadKey()
   436  	iter := db.NewIter(nil)
   437  	iter.SeekGE(key)
   438  	if err := iter.Close(); err != nil {
   439  		log.Fatal(err)
   440  	}
   441  	atomic.AddUint64(&y.numKeys[ycsbRead], 1)
   442  }
   443  
   444  func (y *ycsb) scan(db DB, rng *rand.Rand, reverse bool) {
   445  	count := y.scanDist.Uint64()
   446  	key := y.nextReadKey()
   447  	if err := db.Scan(key, int64(count), reverse); err != nil {
   448  		log.Fatal(err)
   449  	}
   450  	atomic.AddUint64(&y.numKeys[ycsbScan], count)
   451  }
   452  
   453  func (y *ycsb) update(db DB, rng *rand.Rand) {
   454  	count := int(y.batchDist.Uint64())
   455  	b := db.NewBatch()
   456  	for i := 0; i < count; i++ {
   457  		_ = b.Set(y.nextReadKey(), y.randBytes(rng), nil)
   458  	}
   459  	if err := b.Commit(y.writeOpts); err != nil {
   460  		log.Fatal(err)
   461  	}
   462  	atomic.AddUint64(&y.numKeys[ycsbUpdate], uint64(count))
   463  }
   464  
   465  func (y *ycsb) tick(elapsed time.Duration, i int) {
   466  	if i%20 == 0 {
   467  		fmt.Println("____optype__elapsed____ops/sec___keys/sec__p50(ms)__p95(ms)__p99(ms)_pMax(ms)")
   468  	}
   469  	y.reg.Tick(func(tick histogramTick) {
   470  		op := y.opsMap[tick.Name]
   471  		numKeys := atomic.LoadUint64(&y.numKeys[op])
   472  		h := tick.Hist
   473  
   474  		fmt.Printf("%10s %8s %10.1f %10.1f %8.1f %8.1f %8.1f %8.1f\n",
   475  			tick.Name,
   476  			time.Duration(elapsed.Seconds()+0.5)*time.Second,
   477  			float64(h.TotalCount())/tick.Elapsed.Seconds(),
   478  			float64(numKeys-y.prevNumKeys[op])/tick.Elapsed.Seconds(),
   479  			time.Duration(h.ValueAtQuantile(50)).Seconds()*1000,
   480  			time.Duration(h.ValueAtQuantile(95)).Seconds()*1000,
   481  			time.Duration(h.ValueAtQuantile(99)).Seconds()*1000,
   482  			time.Duration(h.ValueAtQuantile(100)).Seconds()*1000,
   483  		)
   484  
   485  		y.prevNumKeys[op] = numKeys
   486  	})
   487  }
   488  
   489  func (y *ycsb) done(elapsed time.Duration) {
   490  	fmt.Println("\n____optype__elapsed_____ops(total)___ops/sec(cum)__keys/sec(cum)__avg(ms)__p50(ms)__p95(ms)__p99(ms)_pMax(ms)")
   491  	y.reg.Tick(func(tick histogramTick) {
   492  		op := y.opsMap[tick.Name]
   493  		numKeys := atomic.LoadUint64(&y.numKeys[op])
   494  		h := tick.Cumulative
   495  
   496  		fmt.Printf("%10s %7.1fs %14d %14.1f %14.1f %8.1f %8.1f %8.1f %8.1f %8.1f\n",
   497  			tick.Name, elapsed.Seconds(), h.TotalCount(),
   498  			float64(h.TotalCount())/elapsed.Seconds(),
   499  			float64(numKeys)/elapsed.Seconds(),
   500  			time.Duration(h.Mean()).Seconds()*1000,
   501  			time.Duration(h.ValueAtQuantile(50)).Seconds()*1000,
   502  			time.Duration(h.ValueAtQuantile(95)).Seconds()*1000,
   503  			time.Duration(h.ValueAtQuantile(99)).Seconds()*1000,
   504  			time.Duration(h.ValueAtQuantile(100)).Seconds()*1000)
   505  	})
   506  	fmt.Println()
   507  }