github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/workload/kv/kv.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kv
    12  
    13  import (
    14  	"context"
    15  	"crypto/sha1"
    16  	"encoding/binary"
    17  	"fmt"
    18  	"hash"
    19  	"math"
    20  	"math/rand"
    21  	"strconv"
    22  	"strings"
    23  	"sync/atomic"
    24  
    25  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    26  	"github.com/cockroachdb/cockroach/pkg/workload"
    27  	"github.com/cockroachdb/cockroach/pkg/workload/histogram"
    28  	"github.com/cockroachdb/errors"
    29  	"github.com/spf13/pflag"
    30  )
    31  
    32  const (
    33  	kvSchema = `(
    34  		k BIGINT NOT NULL PRIMARY KEY,
    35  		v BYTES NOT NULL
    36  	)`
    37  	kvSchemaWithIndex = `(
    38  		k BIGINT NOT NULL PRIMARY KEY,
    39  		v BYTES NOT NULL,
    40  		INDEX (v)
    41  	)`
    42  	// TODO(ajwerner): Change this to use the "easier" hash sharded index syntax once that
    43  	// is in.
    44  	shardedKvSchema = `(
    45  		k BIGINT NOT NULL,
    46  		v BYTES NOT NULL,
    47  		shard INT4 AS (mod(k, %d)) STORED CHECK (%s),
    48  		PRIMARY KEY (shard, k)
    49  	)`
    50  	shardedKvSchemaWithIndex = `(
    51  		k BIGINT NOT NULL,
    52  		v BYTES NOT NULL,
    53  		shard INT4 AS (mod(k, %d)) STORED CHECK (%s),
    54  		PRIMARY KEY (shard, k),
    55  		INDEX (v)
    56  	)`
    57  )
    58  
    59  type kv struct {
    60  	flags     workload.Flags
    61  	connFlags *workload.ConnFlags
    62  
    63  	batchSize                            int
    64  	minBlockSizeBytes, maxBlockSizeBytes int
    65  	cycleLength                          int64
    66  	readPercent                          int
    67  	spanPercent                          int
    68  	seed                                 int64
    69  	writeSeq                             string
    70  	sequential                           bool
    71  	zipfian                              bool
    72  	splits                               int
    73  	secondaryIndex                       bool
    74  	shards                               int
    75  	targetCompressionRatio               float64
    76  }
    77  
    78  func init() {
    79  	workload.Register(kvMeta)
    80  }
    81  
    82  var kvMeta = workload.Meta{
    83  	Name:        `kv`,
    84  	Description: `KV reads and writes to keys spread randomly across the cluster.`,
    85  	Details: `
    86  	By default, keys are picked uniformly at random across the cluster.
    87  	--concurrency workers alternate between doing selects and upserts (according
    88  	to a --read-percent ratio). Each select/upsert reads/writes a batch of --batch
    89  	rows. The write keys are randomly generated in a deterministic fashion (or
    90  	sequentially if --sequential is specified). Reads select a random batch of ids
    91  	out of the ones previously written.
    92  	--write-seq can be used to incorporate data produced by a previous run into
    93  	the current run.
    94  	`,
    95  	Version:      `1.0.0`,
    96  	PublicFacing: true,
    97  	New: func() workload.Generator {
    98  		g := &kv{}
    99  		g.flags.FlagSet = pflag.NewFlagSet(`kv`, pflag.ContinueOnError)
   100  		g.flags.Meta = map[string]workload.FlagMeta{
   101  			`batch`: {RuntimeOnly: true},
   102  		}
   103  		g.flags.IntVar(&g.batchSize, `batch`, 1,
   104  			`Number of blocks to read/insert in a single SQL statement.`)
   105  		g.flags.IntVar(&g.minBlockSizeBytes, `min-block-bytes`, 1,
   106  			`Minimum amount of raw data written with each insertion.`)
   107  		g.flags.IntVar(&g.maxBlockSizeBytes, `max-block-bytes`, 1,
   108  			`Maximum amount of raw data written with each insertion`)
   109  		g.flags.Int64Var(&g.cycleLength, `cycle-length`, math.MaxInt64,
   110  			`Number of keys repeatedly accessed by each writer through upserts.`)
   111  		g.flags.IntVar(&g.readPercent, `read-percent`, 0,
   112  			`Percent (0-100) of operations that are reads of existing keys.`)
   113  		g.flags.IntVar(&g.spanPercent, `span-percent`, 0,
   114  			`Percent (0-100) of operations that are spanning queries of all ranges.`)
   115  		g.flags.Int64Var(&g.seed, `seed`, 1, `Key hash seed.`)
   116  		g.flags.BoolVar(&g.zipfian, `zipfian`, false,
   117  			`Pick keys in a zipfian distribution instead of randomly.`)
   118  		g.flags.BoolVar(&g.sequential, `sequential`, false,
   119  			`Pick keys sequentially instead of randomly.`)
   120  		g.flags.StringVar(&g.writeSeq, `write-seq`, "",
   121  			`Initial write sequence value. Can be used to use the data produced by a previous run. `+
   122  				`It has to be of the form (R|S)<number>, where S implies that it was taken from a `+
   123  				`previous --sequential run and R implies a previous random run.`)
   124  		g.flags.IntVar(&g.splits, `splits`, 0,
   125  			`Number of splits to perform before starting normal operations.`)
   126  		g.flags.BoolVar(&g.secondaryIndex, `secondary-index`, false,
   127  			`Add a secondary index to the schema`)
   128  		g.flags.IntVar(&g.shards, `num-shards`, 0,
   129  			`Number of shards to create on the primary key.`)
   130  		g.flags.Float64Var(&g.targetCompressionRatio, `target-compression-ratio`, 1.0,
   131  			`Target compression ratio for data blocks. Must be >= 1.0`)
   132  		g.connFlags = workload.NewConnFlags(&g.flags)
   133  		return g
   134  	},
   135  }
   136  
   137  // Meta implements the Generator interface.
   138  func (*kv) Meta() workload.Meta { return kvMeta }
   139  
   140  // Flags implements the Flagser interface.
   141  func (w *kv) Flags() workload.Flags { return w.flags }
   142  
   143  // Hooks implements the Hookser interface.
   144  func (w *kv) Hooks() workload.Hooks {
   145  	return workload.Hooks{
   146  		Validate: func() error {
   147  			if w.maxBlockSizeBytes < w.minBlockSizeBytes {
   148  				return errors.Errorf("Value of 'max-block-bytes' (%d) must be greater than or equal to value of 'min-block-bytes' (%d)",
   149  					w.maxBlockSizeBytes, w.minBlockSizeBytes)
   150  			}
   151  			if w.sequential && w.splits > 0 {
   152  				return errors.New("'sequential' and 'splits' cannot both be enabled")
   153  			}
   154  			if w.sequential && w.zipfian {
   155  				return errors.New("'sequential' and 'zipfian' cannot both be enabled")
   156  			}
   157  			if w.readPercent+w.spanPercent > 100 {
   158  				return errors.New("'read-percent' and 'span-percent' higher than 100")
   159  			}
   160  			if w.targetCompressionRatio < 1.0 || math.IsNaN(w.targetCompressionRatio) {
   161  				return errors.New("'target-compression-ratio' must be a number >= 1.0")
   162  			}
   163  			return nil
   164  		},
   165  	}
   166  }
   167  
   168  // Tables implements the Generator interface.
   169  func (w *kv) Tables() []workload.Table {
   170  	table := workload.Table{
   171  		Name: `kv`,
   172  		// TODO(dan): Support initializing kv with data.
   173  		Splits: workload.Tuples(
   174  			w.splits,
   175  			func(splitIdx int) []interface{} {
   176  				stride := (float64(w.cycleLength) - float64(math.MinInt64)) / float64(w.splits+1)
   177  				splitPoint := int(math.MinInt64 + float64(splitIdx+1)*stride)
   178  				return []interface{}{splitPoint}
   179  			},
   180  		),
   181  	}
   182  	if w.shards > 0 {
   183  		schema := shardedKvSchema
   184  		if w.secondaryIndex {
   185  			schema = shardedKvSchemaWithIndex
   186  		}
   187  		checkConstraint := strings.Builder{}
   188  		checkConstraint.WriteString(`shard IN (`)
   189  		for i := 0; i < w.shards; i++ {
   190  			if i != 0 {
   191  				checkConstraint.WriteString(",")
   192  			}
   193  			fmt.Fprintf(&checkConstraint, "%d", i)
   194  		}
   195  		checkConstraint.WriteString(")")
   196  		table.Schema = fmt.Sprintf(schema, w.shards, checkConstraint.String())
   197  	} else {
   198  		if w.secondaryIndex {
   199  			table.Schema = kvSchemaWithIndex
   200  		} else {
   201  			table.Schema = kvSchema
   202  		}
   203  	}
   204  	return []workload.Table{table}
   205  }
   206  
   207  // Ops implements the Opser interface.
   208  func (w *kv) Ops(urls []string, reg *histogram.Registry) (workload.QueryLoad, error) {
   209  	writeSeq := 0
   210  	if w.writeSeq != "" {
   211  		first := w.writeSeq[0]
   212  		if len(w.writeSeq) < 2 || (first != 'R' && first != 'S') {
   213  			return workload.QueryLoad{}, fmt.Errorf("--write-seq has to be of the form '(R|S)<num>'")
   214  		}
   215  		rest := w.writeSeq[1:]
   216  		var err error
   217  		writeSeq, err = strconv.Atoi(rest)
   218  		if err != nil {
   219  			return workload.QueryLoad{}, fmt.Errorf("--write-seq has to be of the form '(R|S)<num>'")
   220  		}
   221  		if first == 'R' && w.sequential {
   222  			return workload.QueryLoad{}, fmt.Errorf("--sequential incompatible with a Random --write-seq")
   223  		}
   224  		if first == 'S' && !w.sequential {
   225  			return workload.QueryLoad{}, fmt.Errorf(
   226  				"--sequential=false incompatible with a Sequential --write-seq")
   227  		}
   228  	}
   229  
   230  	ctx := context.Background()
   231  	sqlDatabase, err := workload.SanitizeUrls(w, w.connFlags.DBOverride, urls)
   232  	if err != nil {
   233  		return workload.QueryLoad{}, err
   234  	}
   235  	cfg := workload.MultiConnPoolCfg{
   236  		MaxTotalConnections: w.connFlags.Concurrency + 1,
   237  	}
   238  	mcp, err := workload.NewMultiConnPool(cfg, urls...)
   239  	if err != nil {
   240  		return workload.QueryLoad{}, err
   241  	}
   242  
   243  	// Read statement
   244  	var buf strings.Builder
   245  	if w.shards == 0 {
   246  		buf.WriteString(`SELECT k, v FROM kv WHERE k IN (`)
   247  		for i := 0; i < w.batchSize; i++ {
   248  			if i > 0 {
   249  				buf.WriteString(", ")
   250  			}
   251  			fmt.Fprintf(&buf, `$%d`, i+1)
   252  		}
   253  	} else {
   254  		// TODO(ajwerner): We're currently manually plumbing down the computed shard column
   255  		// since the optimizer doesn't yet support deriving values of computed columns
   256  		// when all the columns they reference are available. See
   257  		// https://github.com/cockroachdb/cockroach/issues/39340#issuecomment-535338071
   258  		// for details. Remove this once that functionality is added.
   259  		buf.WriteString(`SELECT k, v FROM kv WHERE (shard, k) in (`)
   260  		for i := 0; i < w.batchSize; i++ {
   261  			if i > 0 {
   262  				buf.WriteString(", ")
   263  			}
   264  			fmt.Fprintf(&buf, `(mod($%d, %d), $%d)`, i+1, w.shards, i+1)
   265  		}
   266  	}
   267  	buf.WriteString(`)`)
   268  	readStmtStr := buf.String()
   269  
   270  	// Write statement
   271  	buf.Reset()
   272  	buf.WriteString(`UPSERT INTO kv (k, v) VALUES`)
   273  	for i := 0; i < w.batchSize; i++ {
   274  		j := i * 2
   275  		if i > 0 {
   276  			buf.WriteString(", ")
   277  		}
   278  		fmt.Fprintf(&buf, ` ($%d, $%d)`, j+1, j+2)
   279  	}
   280  	writeStmtStr := buf.String()
   281  
   282  	// Span statement
   283  	spanStmtStr := "SELECT count(v) FROM kv"
   284  
   285  	ql := workload.QueryLoad{SQLDatabase: sqlDatabase}
   286  	seq := &sequence{config: w, val: int64(writeSeq)}
   287  	numEmptyResults := new(int64)
   288  	for i := 0; i < w.connFlags.Concurrency; i++ {
   289  		op := &kvOp{
   290  			config:          w,
   291  			hists:           reg.GetHandle(),
   292  			numEmptyResults: numEmptyResults,
   293  		}
   294  		op.readStmt = op.sr.Define(readStmtStr)
   295  		op.writeStmt = op.sr.Define(writeStmtStr)
   296  		op.spanStmt = op.sr.Define(spanStmtStr)
   297  		if err := op.sr.Init(ctx, "kv", mcp, w.connFlags); err != nil {
   298  			return workload.QueryLoad{}, err
   299  		}
   300  		if w.sequential {
   301  			op.g = newSequentialGenerator(seq)
   302  		} else if w.zipfian {
   303  			op.g = newZipfianGenerator(seq)
   304  		} else {
   305  			op.g = newHashGenerator(seq)
   306  		}
   307  		ql.WorkerFns = append(ql.WorkerFns, op.run)
   308  		ql.Close = op.close
   309  	}
   310  	return ql, nil
   311  }
   312  
   313  type kvOp struct {
   314  	config          *kv
   315  	hists           *histogram.Histograms
   316  	sr              workload.SQLRunner
   317  	readStmt        workload.StmtHandle
   318  	writeStmt       workload.StmtHandle
   319  	spanStmt        workload.StmtHandle
   320  	g               keyGenerator
   321  	numEmptyResults *int64 // accessed atomically
   322  }
   323  
   324  func (o *kvOp) run(ctx context.Context) error {
   325  	statementProbability := o.g.rand().Intn(100) // Determines what statement is executed.
   326  	if statementProbability < o.config.readPercent {
   327  		args := make([]interface{}, o.config.batchSize)
   328  		for i := 0; i < o.config.batchSize; i++ {
   329  			args[i] = o.g.readKey()
   330  		}
   331  		start := timeutil.Now()
   332  		rows, err := o.readStmt.Query(ctx, args...)
   333  		if err != nil {
   334  			return err
   335  		}
   336  		empty := true
   337  		for rows.Next() {
   338  			empty = false
   339  		}
   340  		if empty {
   341  			atomic.AddInt64(o.numEmptyResults, 1)
   342  		}
   343  		elapsed := timeutil.Since(start)
   344  		o.hists.Get(`read`).Record(elapsed)
   345  		return rows.Err()
   346  	}
   347  	// Since we know the statement is not a read, we recalibrate
   348  	// statementProbability to only consider the other statements.
   349  	statementProbability -= o.config.readPercent
   350  	if statementProbability < o.config.spanPercent {
   351  		start := timeutil.Now()
   352  		_, err := o.spanStmt.Exec(ctx)
   353  		elapsed := timeutil.Since(start)
   354  		o.hists.Get(`span`).Record(elapsed)
   355  		return err
   356  	}
   357  	const argCount = 2
   358  	args := make([]interface{}, argCount*o.config.batchSize)
   359  	for i := 0; i < o.config.batchSize; i++ {
   360  		j := i * argCount
   361  		args[j+0] = o.g.writeKey()
   362  		args[j+1] = randomBlock(o.config, o.g.rand())
   363  	}
   364  	start := timeutil.Now()
   365  	_, err := o.writeStmt.Exec(ctx, args...)
   366  	elapsed := timeutil.Since(start)
   367  	o.hists.Get(`write`).Record(elapsed)
   368  	return err
   369  }
   370  
   371  func (o *kvOp) close(context.Context) {
   372  	if empty := atomic.LoadInt64(o.numEmptyResults); empty != 0 {
   373  		fmt.Printf("Number of reads that didn't return any results: %d.\n", empty)
   374  	}
   375  	seq := o.g.sequence()
   376  	var ch string
   377  	if o.config.sequential {
   378  		ch = "S"
   379  	} else {
   380  		ch = "R"
   381  	}
   382  	fmt.Printf("Highest sequence written: %d. Can be passed as --write-seq=%s%d to the next run.\n",
   383  		seq, ch, seq)
   384  }
   385  
   386  type sequence struct {
   387  	config *kv
   388  	val    int64
   389  }
   390  
   391  func (s *sequence) write() int64 {
   392  	return (atomic.AddInt64(&s.val, 1) - 1) % s.config.cycleLength
   393  }
   394  
   395  // read returns the last key index that has been written. Note that the returned
   396  // index might not actually have been written yet, so a read operation cannot
   397  // require that the key is present.
   398  func (s *sequence) read() int64 {
   399  	return atomic.LoadInt64(&s.val) % s.config.cycleLength
   400  }
   401  
   402  // keyGenerator generates read and write keys. Read keys may not yet exist and
   403  // write keys may already exist.
   404  type keyGenerator interface {
   405  	writeKey() int64
   406  	readKey() int64
   407  	rand() *rand.Rand
   408  	sequence() int64
   409  }
   410  
   411  type hashGenerator struct {
   412  	seq    *sequence
   413  	random *rand.Rand
   414  	hasher hash.Hash
   415  	buf    [sha1.Size]byte
   416  }
   417  
   418  func newHashGenerator(seq *sequence) *hashGenerator {
   419  	return &hashGenerator{
   420  		seq:    seq,
   421  		random: rand.New(rand.NewSource(timeutil.Now().UnixNano())),
   422  		hasher: sha1.New(),
   423  	}
   424  }
   425  
   426  func (g *hashGenerator) hash(v int64) int64 {
   427  	binary.BigEndian.PutUint64(g.buf[:8], uint64(v))
   428  	binary.BigEndian.PutUint64(g.buf[8:16], uint64(g.seq.config.seed))
   429  	g.hasher.Reset()
   430  	_, _ = g.hasher.Write(g.buf[:16])
   431  	g.hasher.Sum(g.buf[:0])
   432  	return int64(binary.BigEndian.Uint64(g.buf[:8]))
   433  }
   434  
   435  func (g *hashGenerator) writeKey() int64 {
   436  	return g.hash(g.seq.write())
   437  }
   438  
   439  func (g *hashGenerator) readKey() int64 {
   440  	v := g.seq.read()
   441  	if v == 0 {
   442  		return 0
   443  	}
   444  	return g.hash(g.random.Int63n(v))
   445  }
   446  
   447  func (g *hashGenerator) rand() *rand.Rand {
   448  	return g.random
   449  }
   450  
   451  func (g *hashGenerator) sequence() int64 {
   452  	return atomic.LoadInt64(&g.seq.val)
   453  }
   454  
   455  type sequentialGenerator struct {
   456  	seq    *sequence
   457  	random *rand.Rand
   458  }
   459  
   460  func newSequentialGenerator(seq *sequence) *sequentialGenerator {
   461  	return &sequentialGenerator{
   462  		seq:    seq,
   463  		random: rand.New(rand.NewSource(timeutil.Now().UnixNano())),
   464  	}
   465  }
   466  
   467  func (g *sequentialGenerator) writeKey() int64 {
   468  	return g.seq.write()
   469  }
   470  
   471  func (g *sequentialGenerator) readKey() int64 {
   472  	v := g.seq.read()
   473  	if v == 0 {
   474  		return 0
   475  	}
   476  	return g.random.Int63n(v)
   477  }
   478  
   479  func (g *sequentialGenerator) rand() *rand.Rand {
   480  	return g.random
   481  }
   482  
   483  func (g *sequentialGenerator) sequence() int64 {
   484  	return atomic.LoadInt64(&g.seq.val)
   485  }
   486  
   487  type zipfGenerator struct {
   488  	seq    *sequence
   489  	random *rand.Rand
   490  	zipf   *zipf
   491  }
   492  
   493  // Creates a new zipfian generator.
   494  func newZipfianGenerator(seq *sequence) *zipfGenerator {
   495  	random := rand.New(rand.NewSource(timeutil.Now().UnixNano()))
   496  	return &zipfGenerator{
   497  		seq:    seq,
   498  		random: random,
   499  		zipf:   newZipf(1.1, 1, uint64(math.MaxInt64)),
   500  	}
   501  }
   502  
   503  // Get a random number seeded by v that follows the
   504  // zipfian distribution.
   505  func (g *zipfGenerator) zipfian(seed int64) int64 {
   506  	randomWithSeed := rand.New(rand.NewSource(seed))
   507  	return int64(g.zipf.Uint64(randomWithSeed))
   508  }
   509  
   510  // Get a zipf write key appropriately.
   511  func (g *zipfGenerator) writeKey() int64 {
   512  	return g.zipfian(g.seq.write())
   513  }
   514  
   515  // Get a zipf read key appropriately.
   516  func (g *zipfGenerator) readKey() int64 {
   517  	v := g.seq.read()
   518  	if v == 0 {
   519  		return 0
   520  	}
   521  	return g.zipfian(g.random.Int63n(v))
   522  }
   523  
   524  func (g *zipfGenerator) rand() *rand.Rand {
   525  	return g.random
   526  }
   527  
   528  func (g *zipfGenerator) sequence() int64 {
   529  	return atomic.LoadInt64(&g.seq.val)
   530  }
   531  
   532  func randomBlock(config *kv, r *rand.Rand) []byte {
   533  	blockSize := r.Intn(config.maxBlockSizeBytes-config.minBlockSizeBytes+1) + config.minBlockSizeBytes
   534  	blockData := make([]byte, blockSize)
   535  	uniqueSize := int(float64(blockSize) / config.targetCompressionRatio)
   536  	if uniqueSize < 1 {
   537  		uniqueSize = 1
   538  	}
   539  	for i := range blockData {
   540  		if i >= uniqueSize {
   541  			blockData[i] = blockData[i-uniqueSize]
   542  		} else {
   543  			blockData[i] = byte(r.Int() & 0xff)
   544  		}
   545  	}
   546  	return blockData
   547  }