github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/workload/jsonload/json.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package jsonload
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"crypto/sha1"
    17  	gosql "database/sql"
    18  	"encoding/binary"
    19  	"fmt"
    20  	"hash"
    21  	"math"
    22  	"math/rand"
    23  	"strings"
    24  	"sync/atomic"
    25  
    26  	"github.com/cockroachdb/cockroach/pkg/util/json"
    27  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    28  	"github.com/cockroachdb/cockroach/pkg/workload"
    29  	"github.com/cockroachdb/cockroach/pkg/workload/histogram"
    30  	"github.com/cockroachdb/errors"
    31  	"github.com/spf13/pflag"
    32  )
    33  
    34  const (
    35  	jsonSchema                  = `(k BIGINT NOT NULL PRIMARY KEY, v JSONB NOT NULL)`
    36  	jsonSchemaWithInvertedIndex = `(k BIGINT NOT NULL PRIMARY KEY, v JSONB NOT NULL, INVERTED INDEX (v))`
    37  	jsonSchemaWithComputed      = `(k BIGINT AS (v->>'key')::BIGINT STORED PRIMARY KEY, v JSONB NOT NULL)`
    38  )
    39  
    40  type jsonLoad struct {
    41  	flags     workload.Flags
    42  	connFlags *workload.ConnFlags
    43  
    44  	batchSize      int
    45  	cycleLength    int64
    46  	readPercent    int
    47  	writeSeq, seed int64
    48  	sequential     bool
    49  	splits         int
    50  	complexity     int
    51  	inverted       bool
    52  	computed       bool
    53  }
    54  
    55  func init() {
    56  	workload.Register(jsonLoadMeta)
    57  }
    58  
    59  var jsonLoadMeta = workload.Meta{
    60  	Name: `json`,
    61  	Description: `JSON reads and writes to keys spread (by default, uniformly` +
    62  		` at random) across the cluster`,
    63  	Version: `1.0.0`,
    64  	New: func() workload.Generator {
    65  		g := &jsonLoad{}
    66  		g.flags.FlagSet = pflag.NewFlagSet(`json`, pflag.ContinueOnError)
    67  		g.flags.Meta = map[string]workload.FlagMeta{
    68  			`batch`: {RuntimeOnly: true},
    69  		}
    70  		g.flags.IntVar(&g.batchSize, `batch`, 1, `Number of blocks to insert in a single SQL statement`)
    71  		g.flags.Int64Var(&g.cycleLength, `cycle-length`, math.MaxInt64, `Number of keys repeatedly accessed by each writer`)
    72  		g.flags.IntVar(&g.readPercent, `read-percent`, 0, `Percent (0-100) of operations that are reads of existing keys`)
    73  		g.flags.Int64Var(&g.writeSeq, `write-seq`, 0, `Initial write sequence value.`)
    74  		g.flags.Int64Var(&g.seed, `seed`, 1, `Key hash seed.`)
    75  		g.flags.BoolVar(&g.sequential, `sequential`, false, `Pick keys sequentially instead of randomly`)
    76  		g.flags.IntVar(&g.splits, `splits`, 0, `Number of splits to perform before starting normal operations`)
    77  		g.flags.IntVar(&g.complexity, `complexity`, 20, `Complexity of generated JSON data`)
    78  		g.flags.BoolVar(&g.inverted, `inverted`, false, `Whether to include an inverted index`)
    79  		g.flags.BoolVar(&g.computed, `computed`, false, `Whether to use a computed primary key`)
    80  		g.connFlags = workload.NewConnFlags(&g.flags)
    81  		return g
    82  	},
    83  }
    84  
    85  // Meta implements the Generator interface.
    86  func (*jsonLoad) Meta() workload.Meta { return jsonLoadMeta }
    87  
    88  // Flags implements the Flagser interface.
    89  func (w *jsonLoad) Flags() workload.Flags { return w.flags }
    90  
    91  // Hooks implements the Hookser interface.
    92  func (w *jsonLoad) Hooks() workload.Hooks {
    93  	return workload.Hooks{
    94  		Validate: func() error {
    95  			if w.computed && w.inverted {
    96  				return errors.Errorf("computed and inverted cannot be used together")
    97  			}
    98  			return nil
    99  		},
   100  	}
   101  }
   102  
   103  // Tables implements the Generator interface.
   104  func (w *jsonLoad) Tables() []workload.Table {
   105  	schema := jsonSchema
   106  	if w.inverted {
   107  		schema = jsonSchemaWithInvertedIndex
   108  	} else if w.computed {
   109  		schema = jsonSchemaWithComputed
   110  	}
   111  	table := workload.Table{
   112  		Name:   `j`,
   113  		Schema: schema,
   114  		Splits: workload.Tuples(
   115  			w.splits,
   116  			func(splitIdx int) []interface{} {
   117  				rng := rand.New(rand.NewSource(w.seed + int64(splitIdx)))
   118  				g := newHashGenerator(&sequence{config: w, val: w.writeSeq})
   119  				return []interface{}{
   120  					int(g.hash(rng.Int63())),
   121  				}
   122  			},
   123  		),
   124  	}
   125  	return []workload.Table{table}
   126  }
   127  
   128  // Ops implements the Opser interface.
   129  func (w *jsonLoad) Ops(urls []string, reg *histogram.Registry) (workload.QueryLoad, error) {
   130  	sqlDatabase, err := workload.SanitizeUrls(w, w.connFlags.DBOverride, urls)
   131  	if err != nil {
   132  		return workload.QueryLoad{}, err
   133  	}
   134  	db, err := gosql.Open(`cockroach`, strings.Join(urls, ` `))
   135  	if err != nil {
   136  		return workload.QueryLoad{}, err
   137  	}
   138  	// Allow a maximum of concurrency+1 connections to the database.
   139  	db.SetMaxOpenConns(w.connFlags.Concurrency + 1)
   140  	db.SetMaxIdleConns(w.connFlags.Concurrency + 1)
   141  
   142  	var buf bytes.Buffer
   143  	buf.WriteString(`SELECT k, v FROM j WHERE k IN (`)
   144  	for i := 0; i < w.batchSize; i++ {
   145  		if i > 0 {
   146  			buf.WriteString(", ")
   147  		}
   148  		fmt.Fprintf(&buf, `$%d`, i+1)
   149  	}
   150  	buf.WriteString(`)`)
   151  	readStmt, err := db.Prepare(buf.String())
   152  	if err != nil {
   153  		return workload.QueryLoad{}, err
   154  	}
   155  
   156  	buf.Reset()
   157  	if w.computed {
   158  		buf.WriteString(`UPSERT INTO j (v) VALUES`)
   159  	} else {
   160  		buf.WriteString(`UPSERT INTO j (k, v) VALUES`)
   161  	}
   162  
   163  	for i := 0; i < w.batchSize; i++ {
   164  		j := i * 2
   165  		if i > 0 {
   166  			buf.WriteString(", ")
   167  		}
   168  		if w.computed {
   169  			fmt.Fprintf(&buf, ` ($%d)`, i+1)
   170  		} else {
   171  			fmt.Fprintf(&buf, ` ($%d, $%d)`, j+1, j+2)
   172  		}
   173  	}
   174  
   175  	writeStmt, err := db.Prepare(buf.String())
   176  	if err != nil {
   177  		return workload.QueryLoad{}, err
   178  	}
   179  
   180  	ql := workload.QueryLoad{SQLDatabase: sqlDatabase}
   181  	for i := 0; i < w.connFlags.Concurrency; i++ {
   182  		op := jsonOp{
   183  			config:    w,
   184  			hists:     reg.GetHandle(),
   185  			db:        db,
   186  			readStmt:  readStmt,
   187  			writeStmt: writeStmt,
   188  		}
   189  		seq := &sequence{config: w, val: w.writeSeq}
   190  		if w.sequential {
   191  			op.g = newSequentialGenerator(seq)
   192  		} else {
   193  			op.g = newHashGenerator(seq)
   194  		}
   195  		ql.WorkerFns = append(ql.WorkerFns, op.run)
   196  	}
   197  	return ql, nil
   198  }
   199  
   200  type jsonOp struct {
   201  	config    *jsonLoad
   202  	hists     *histogram.Histograms
   203  	db        *gosql.DB
   204  	readStmt  *gosql.Stmt
   205  	writeStmt *gosql.Stmt
   206  	g         keyGenerator
   207  }
   208  
   209  func (o *jsonOp) run(ctx context.Context) error {
   210  	if o.g.rand().Intn(100) < o.config.readPercent {
   211  		args := make([]interface{}, o.config.batchSize)
   212  		for i := 0; i < o.config.batchSize; i++ {
   213  			args[i] = o.g.readKey()
   214  		}
   215  		start := timeutil.Now()
   216  		rows, err := o.readStmt.Query(args...)
   217  		if err != nil {
   218  			return err
   219  		}
   220  		for rows.Next() {
   221  		}
   222  		elapsed := timeutil.Since(start)
   223  		o.hists.Get(`read`).Record(elapsed)
   224  		return rows.Err()
   225  	}
   226  	argCount := 2
   227  	if o.config.computed {
   228  		argCount = 1
   229  	}
   230  	args := make([]interface{}, argCount*o.config.batchSize)
   231  	for i := 0; i < o.config.batchSize*argCount; i += argCount {
   232  		j := i
   233  		if !o.config.computed {
   234  			args[j] = o.g.writeKey()
   235  			j++
   236  		}
   237  		js, err := json.Random(o.config.complexity, o.g.rand())
   238  		if err != nil {
   239  			return err
   240  		}
   241  		if o.config.computed {
   242  			builder := json.NewObjectBuilder(2)
   243  			builder.Add("key", json.FromInt64(o.g.writeKey()))
   244  			builder.Add("data", js)
   245  			js = builder.Build()
   246  		}
   247  		args[j] = js.String()
   248  	}
   249  	start := timeutil.Now()
   250  	_, err := o.writeStmt.Exec(args...)
   251  	elapsed := timeutil.Since(start)
   252  	o.hists.Get(`write`).Record(elapsed)
   253  	return err
   254  }
   255  
   256  type sequence struct {
   257  	config *jsonLoad
   258  	val    int64
   259  }
   260  
   261  func (s *sequence) write() int64 {
   262  	return (atomic.AddInt64(&s.val, 1) - 1) % s.config.cycleLength
   263  }
   264  
   265  // read returns the last key index that has been written. Note that the returned
   266  // index might not actually have been written yet, so a read operation cannot
   267  // require that the key is present.
   268  func (s *sequence) read() int64 {
   269  	return atomic.LoadInt64(&s.val) % s.config.cycleLength
   270  }
   271  
   272  // keyGenerator generates read and write keys. Read keys may not yet exist and
   273  // write keys may already exist.
   274  type keyGenerator interface {
   275  	writeKey() int64
   276  	readKey() int64
   277  	rand() *rand.Rand
   278  }
   279  
   280  type hashGenerator struct {
   281  	seq    *sequence
   282  	random *rand.Rand
   283  	hasher hash.Hash
   284  	buf    [sha1.Size]byte
   285  }
   286  
   287  func newHashGenerator(seq *sequence) *hashGenerator {
   288  	return &hashGenerator{
   289  		seq:    seq,
   290  		random: rand.New(rand.NewSource(seq.config.seed)),
   291  		hasher: sha1.New(),
   292  	}
   293  }
   294  
   295  func (g *hashGenerator) hash(v int64) int64 {
   296  	binary.BigEndian.PutUint64(g.buf[:8], uint64(v))
   297  	binary.BigEndian.PutUint64(g.buf[8:16], uint64(g.seq.config.seed))
   298  	g.hasher.Reset()
   299  	_, _ = g.hasher.Write(g.buf[:16])
   300  	g.hasher.Sum(g.buf[:0])
   301  	return int64(binary.BigEndian.Uint64(g.buf[:8]))
   302  }
   303  
   304  func (g *hashGenerator) writeKey() int64 {
   305  	return g.hash(g.seq.write())
   306  }
   307  
   308  func (g *hashGenerator) readKey() int64 {
   309  	v := g.seq.read()
   310  	if v == 0 {
   311  		return 0
   312  	}
   313  	return g.hash(g.random.Int63n(v))
   314  }
   315  
   316  func (g *hashGenerator) rand() *rand.Rand {
   317  	return g.random
   318  }
   319  
   320  type sequentialGenerator struct {
   321  	seq    *sequence
   322  	random *rand.Rand
   323  }
   324  
   325  func newSequentialGenerator(seq *sequence) *sequentialGenerator {
   326  	return &sequentialGenerator{
   327  		seq:    seq,
   328  		random: rand.New(rand.NewSource(seq.config.seed)),
   329  	}
   330  }
   331  
   332  func (g *sequentialGenerator) writeKey() int64 {
   333  	return g.seq.write()
   334  }
   335  
   336  func (g *sequentialGenerator) readKey() int64 {
   337  	v := g.seq.read()
   338  	if v == 0 {
   339  		return 0
   340  	}
   341  	return g.random.Int63n(v)
   342  }
   343  
   344  func (g *sequentialGenerator) rand() *rand.Rand {
   345  	return g.random
   346  }