github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/workload/kv/kv.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kv 12 13 import ( 14 "context" 15 "crypto/sha1" 16 "encoding/binary" 17 "fmt" 18 "hash" 19 "math" 20 "math/rand" 21 "strconv" 22 "strings" 23 "sync/atomic" 24 25 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 26 "github.com/cockroachdb/cockroach/pkg/workload" 27 "github.com/cockroachdb/cockroach/pkg/workload/histogram" 28 "github.com/cockroachdb/errors" 29 "github.com/spf13/pflag" 30 ) 31 32 const ( 33 kvSchema = `( 34 k BIGINT NOT NULL PRIMARY KEY, 35 v BYTES NOT NULL 36 )` 37 kvSchemaWithIndex = `( 38 k BIGINT NOT NULL PRIMARY KEY, 39 v BYTES NOT NULL, 40 INDEX (v) 41 )` 42 // TODO(ajwerner): Change this to use the "easier" hash sharded index syntax once that 43 // is in. 44 shardedKvSchema = `( 45 k BIGINT NOT NULL, 46 v BYTES NOT NULL, 47 shard INT4 AS (mod(k, %d)) STORED CHECK (%s), 48 PRIMARY KEY (shard, k) 49 )` 50 shardedKvSchemaWithIndex = `( 51 k BIGINT NOT NULL, 52 v BYTES NOT NULL, 53 shard INT4 AS (mod(k, %d)) STORED CHECK (%s), 54 PRIMARY KEY (shard, k), 55 INDEX (v) 56 )` 57 ) 58 59 type kv struct { 60 flags workload.Flags 61 connFlags *workload.ConnFlags 62 63 batchSize int 64 minBlockSizeBytes, maxBlockSizeBytes int 65 cycleLength int64 66 readPercent int 67 spanPercent int 68 seed int64 69 writeSeq string 70 sequential bool 71 zipfian bool 72 splits int 73 secondaryIndex bool 74 shards int 75 targetCompressionRatio float64 76 } 77 78 func init() { 79 workload.Register(kvMeta) 80 } 81 82 var kvMeta = workload.Meta{ 83 Name: `kv`, 84 Description: `KV reads and writes to keys spread randomly across the cluster.`, 85 Details: ` 86 By default, keys are picked uniformly at random across the cluster. 87 --concurrency workers alternate between doing selects and upserts (according 88 to a --read-percent ratio). Each select/upsert reads/writes a batch of --batch 89 rows. The write keys are randomly generated in a deterministic fashion (or 90 sequentially if --sequential is specified). Reads select a random batch of ids 91 out of the ones previously written. 92 --write-seq can be used to incorporate data produced by a previous run into 93 the current run. 94 `, 95 Version: `1.0.0`, 96 PublicFacing: true, 97 New: func() workload.Generator { 98 g := &kv{} 99 g.flags.FlagSet = pflag.NewFlagSet(`kv`, pflag.ContinueOnError) 100 g.flags.Meta = map[string]workload.FlagMeta{ 101 `batch`: {RuntimeOnly: true}, 102 } 103 g.flags.IntVar(&g.batchSize, `batch`, 1, 104 `Number of blocks to read/insert in a single SQL statement.`) 105 g.flags.IntVar(&g.minBlockSizeBytes, `min-block-bytes`, 1, 106 `Minimum amount of raw data written with each insertion.`) 107 g.flags.IntVar(&g.maxBlockSizeBytes, `max-block-bytes`, 1, 108 `Maximum amount of raw data written with each insertion`) 109 g.flags.Int64Var(&g.cycleLength, `cycle-length`, math.MaxInt64, 110 `Number of keys repeatedly accessed by each writer through upserts.`) 111 g.flags.IntVar(&g.readPercent, `read-percent`, 0, 112 `Percent (0-100) of operations that are reads of existing keys.`) 113 g.flags.IntVar(&g.spanPercent, `span-percent`, 0, 114 `Percent (0-100) of operations that are spanning queries of all ranges.`) 115 g.flags.Int64Var(&g.seed, `seed`, 1, `Key hash seed.`) 116 g.flags.BoolVar(&g.zipfian, `zipfian`, false, 117 `Pick keys in a zipfian distribution instead of randomly.`) 118 g.flags.BoolVar(&g.sequential, `sequential`, false, 119 `Pick keys sequentially instead of randomly.`) 120 g.flags.StringVar(&g.writeSeq, `write-seq`, "", 121 `Initial write sequence value. Can be used to use the data produced by a previous run. `+ 122 `It has to be of the form (R|S)<number>, where S implies that it was taken from a `+ 123 `previous --sequential run and R implies a previous random run.`) 124 g.flags.IntVar(&g.splits, `splits`, 0, 125 `Number of splits to perform before starting normal operations.`) 126 g.flags.BoolVar(&g.secondaryIndex, `secondary-index`, false, 127 `Add a secondary index to the schema`) 128 g.flags.IntVar(&g.shards, `num-shards`, 0, 129 `Number of shards to create on the primary key.`) 130 g.flags.Float64Var(&g.targetCompressionRatio, `target-compression-ratio`, 1.0, 131 `Target compression ratio for data blocks. Must be >= 1.0`) 132 g.connFlags = workload.NewConnFlags(&g.flags) 133 return g 134 }, 135 } 136 137 // Meta implements the Generator interface. 138 func (*kv) Meta() workload.Meta { return kvMeta } 139 140 // Flags implements the Flagser interface. 141 func (w *kv) Flags() workload.Flags { return w.flags } 142 143 // Hooks implements the Hookser interface. 144 func (w *kv) Hooks() workload.Hooks { 145 return workload.Hooks{ 146 Validate: func() error { 147 if w.maxBlockSizeBytes < w.minBlockSizeBytes { 148 return errors.Errorf("Value of 'max-block-bytes' (%d) must be greater than or equal to value of 'min-block-bytes' (%d)", 149 w.maxBlockSizeBytes, w.minBlockSizeBytes) 150 } 151 if w.sequential && w.splits > 0 { 152 return errors.New("'sequential' and 'splits' cannot both be enabled") 153 } 154 if w.sequential && w.zipfian { 155 return errors.New("'sequential' and 'zipfian' cannot both be enabled") 156 } 157 if w.readPercent+w.spanPercent > 100 { 158 return errors.New("'read-percent' and 'span-percent' higher than 100") 159 } 160 if w.targetCompressionRatio < 1.0 || math.IsNaN(w.targetCompressionRatio) { 161 return errors.New("'target-compression-ratio' must be a number >= 1.0") 162 } 163 return nil 164 }, 165 } 166 } 167 168 // Tables implements the Generator interface. 169 func (w *kv) Tables() []workload.Table { 170 table := workload.Table{ 171 Name: `kv`, 172 // TODO(dan): Support initializing kv with data. 173 Splits: workload.Tuples( 174 w.splits, 175 func(splitIdx int) []interface{} { 176 stride := (float64(w.cycleLength) - float64(math.MinInt64)) / float64(w.splits+1) 177 splitPoint := int(math.MinInt64 + float64(splitIdx+1)*stride) 178 return []interface{}{splitPoint} 179 }, 180 ), 181 } 182 if w.shards > 0 { 183 schema := shardedKvSchema 184 if w.secondaryIndex { 185 schema = shardedKvSchemaWithIndex 186 } 187 checkConstraint := strings.Builder{} 188 checkConstraint.WriteString(`shard IN (`) 189 for i := 0; i < w.shards; i++ { 190 if i != 0 { 191 checkConstraint.WriteString(",") 192 } 193 fmt.Fprintf(&checkConstraint, "%d", i) 194 } 195 checkConstraint.WriteString(")") 196 table.Schema = fmt.Sprintf(schema, w.shards, checkConstraint.String()) 197 } else { 198 if w.secondaryIndex { 199 table.Schema = kvSchemaWithIndex 200 } else { 201 table.Schema = kvSchema 202 } 203 } 204 return []workload.Table{table} 205 } 206 207 // Ops implements the Opser interface. 208 func (w *kv) Ops(urls []string, reg *histogram.Registry) (workload.QueryLoad, error) { 209 writeSeq := 0 210 if w.writeSeq != "" { 211 first := w.writeSeq[0] 212 if len(w.writeSeq) < 2 || (first != 'R' && first != 'S') { 213 return workload.QueryLoad{}, fmt.Errorf("--write-seq has to be of the form '(R|S)<num>'") 214 } 215 rest := w.writeSeq[1:] 216 var err error 217 writeSeq, err = strconv.Atoi(rest) 218 if err != nil { 219 return workload.QueryLoad{}, fmt.Errorf("--write-seq has to be of the form '(R|S)<num>'") 220 } 221 if first == 'R' && w.sequential { 222 return workload.QueryLoad{}, fmt.Errorf("--sequential incompatible with a Random --write-seq") 223 } 224 if first == 'S' && !w.sequential { 225 return workload.QueryLoad{}, fmt.Errorf( 226 "--sequential=false incompatible with a Sequential --write-seq") 227 } 228 } 229 230 ctx := context.Background() 231 sqlDatabase, err := workload.SanitizeUrls(w, w.connFlags.DBOverride, urls) 232 if err != nil { 233 return workload.QueryLoad{}, err 234 } 235 cfg := workload.MultiConnPoolCfg{ 236 MaxTotalConnections: w.connFlags.Concurrency + 1, 237 } 238 mcp, err := workload.NewMultiConnPool(cfg, urls...) 239 if err != nil { 240 return workload.QueryLoad{}, err 241 } 242 243 // Read statement 244 var buf strings.Builder 245 if w.shards == 0 { 246 buf.WriteString(`SELECT k, v FROM kv WHERE k IN (`) 247 for i := 0; i < w.batchSize; i++ { 248 if i > 0 { 249 buf.WriteString(", ") 250 } 251 fmt.Fprintf(&buf, `$%d`, i+1) 252 } 253 } else { 254 // TODO(ajwerner): We're currently manually plumbing down the computed shard column 255 // since the optimizer doesn't yet support deriving values of computed columns 256 // when all the columns they reference are available. See 257 // https://github.com/cockroachdb/cockroach/issues/39340#issuecomment-535338071 258 // for details. Remove this once that functionality is added. 259 buf.WriteString(`SELECT k, v FROM kv WHERE (shard, k) in (`) 260 for i := 0; i < w.batchSize; i++ { 261 if i > 0 { 262 buf.WriteString(", ") 263 } 264 fmt.Fprintf(&buf, `(mod($%d, %d), $%d)`, i+1, w.shards, i+1) 265 } 266 } 267 buf.WriteString(`)`) 268 readStmtStr := buf.String() 269 270 // Write statement 271 buf.Reset() 272 buf.WriteString(`UPSERT INTO kv (k, v) VALUES`) 273 for i := 0; i < w.batchSize; i++ { 274 j := i * 2 275 if i > 0 { 276 buf.WriteString(", ") 277 } 278 fmt.Fprintf(&buf, ` ($%d, $%d)`, j+1, j+2) 279 } 280 writeStmtStr := buf.String() 281 282 // Span statement 283 spanStmtStr := "SELECT count(v) FROM kv" 284 285 ql := workload.QueryLoad{SQLDatabase: sqlDatabase} 286 seq := &sequence{config: w, val: int64(writeSeq)} 287 numEmptyResults := new(int64) 288 for i := 0; i < w.connFlags.Concurrency; i++ { 289 op := &kvOp{ 290 config: w, 291 hists: reg.GetHandle(), 292 numEmptyResults: numEmptyResults, 293 } 294 op.readStmt = op.sr.Define(readStmtStr) 295 op.writeStmt = op.sr.Define(writeStmtStr) 296 op.spanStmt = op.sr.Define(spanStmtStr) 297 if err := op.sr.Init(ctx, "kv", mcp, w.connFlags); err != nil { 298 return workload.QueryLoad{}, err 299 } 300 if w.sequential { 301 op.g = newSequentialGenerator(seq) 302 } else if w.zipfian { 303 op.g = newZipfianGenerator(seq) 304 } else { 305 op.g = newHashGenerator(seq) 306 } 307 ql.WorkerFns = append(ql.WorkerFns, op.run) 308 ql.Close = op.close 309 } 310 return ql, nil 311 } 312 313 type kvOp struct { 314 config *kv 315 hists *histogram.Histograms 316 sr workload.SQLRunner 317 readStmt workload.StmtHandle 318 writeStmt workload.StmtHandle 319 spanStmt workload.StmtHandle 320 g keyGenerator 321 numEmptyResults *int64 // accessed atomically 322 } 323 324 func (o *kvOp) run(ctx context.Context) error { 325 statementProbability := o.g.rand().Intn(100) // Determines what statement is executed. 326 if statementProbability < o.config.readPercent { 327 args := make([]interface{}, o.config.batchSize) 328 for i := 0; i < o.config.batchSize; i++ { 329 args[i] = o.g.readKey() 330 } 331 start := timeutil.Now() 332 rows, err := o.readStmt.Query(ctx, args...) 333 if err != nil { 334 return err 335 } 336 empty := true 337 for rows.Next() { 338 empty = false 339 } 340 if empty { 341 atomic.AddInt64(o.numEmptyResults, 1) 342 } 343 elapsed := timeutil.Since(start) 344 o.hists.Get(`read`).Record(elapsed) 345 return rows.Err() 346 } 347 // Since we know the statement is not a read, we recalibrate 348 // statementProbability to only consider the other statements. 349 statementProbability -= o.config.readPercent 350 if statementProbability < o.config.spanPercent { 351 start := timeutil.Now() 352 _, err := o.spanStmt.Exec(ctx) 353 elapsed := timeutil.Since(start) 354 o.hists.Get(`span`).Record(elapsed) 355 return err 356 } 357 const argCount = 2 358 args := make([]interface{}, argCount*o.config.batchSize) 359 for i := 0; i < o.config.batchSize; i++ { 360 j := i * argCount 361 args[j+0] = o.g.writeKey() 362 args[j+1] = randomBlock(o.config, o.g.rand()) 363 } 364 start := timeutil.Now() 365 _, err := o.writeStmt.Exec(ctx, args...) 366 elapsed := timeutil.Since(start) 367 o.hists.Get(`write`).Record(elapsed) 368 return err 369 } 370 371 func (o *kvOp) close(context.Context) { 372 if empty := atomic.LoadInt64(o.numEmptyResults); empty != 0 { 373 fmt.Printf("Number of reads that didn't return any results: %d.\n", empty) 374 } 375 seq := o.g.sequence() 376 var ch string 377 if o.config.sequential { 378 ch = "S" 379 } else { 380 ch = "R" 381 } 382 fmt.Printf("Highest sequence written: %d. Can be passed as --write-seq=%s%d to the next run.\n", 383 seq, ch, seq) 384 } 385 386 type sequence struct { 387 config *kv 388 val int64 389 } 390 391 func (s *sequence) write() int64 { 392 return (atomic.AddInt64(&s.val, 1) - 1) % s.config.cycleLength 393 } 394 395 // read returns the last key index that has been written. Note that the returned 396 // index might not actually have been written yet, so a read operation cannot 397 // require that the key is present. 398 func (s *sequence) read() int64 { 399 return atomic.LoadInt64(&s.val) % s.config.cycleLength 400 } 401 402 // keyGenerator generates read and write keys. Read keys may not yet exist and 403 // write keys may already exist. 404 type keyGenerator interface { 405 writeKey() int64 406 readKey() int64 407 rand() *rand.Rand 408 sequence() int64 409 } 410 411 type hashGenerator struct { 412 seq *sequence 413 random *rand.Rand 414 hasher hash.Hash 415 buf [sha1.Size]byte 416 } 417 418 func newHashGenerator(seq *sequence) *hashGenerator { 419 return &hashGenerator{ 420 seq: seq, 421 random: rand.New(rand.NewSource(timeutil.Now().UnixNano())), 422 hasher: sha1.New(), 423 } 424 } 425 426 func (g *hashGenerator) hash(v int64) int64 { 427 binary.BigEndian.PutUint64(g.buf[:8], uint64(v)) 428 binary.BigEndian.PutUint64(g.buf[8:16], uint64(g.seq.config.seed)) 429 g.hasher.Reset() 430 _, _ = g.hasher.Write(g.buf[:16]) 431 g.hasher.Sum(g.buf[:0]) 432 return int64(binary.BigEndian.Uint64(g.buf[:8])) 433 } 434 435 func (g *hashGenerator) writeKey() int64 { 436 return g.hash(g.seq.write()) 437 } 438 439 func (g *hashGenerator) readKey() int64 { 440 v := g.seq.read() 441 if v == 0 { 442 return 0 443 } 444 return g.hash(g.random.Int63n(v)) 445 } 446 447 func (g *hashGenerator) rand() *rand.Rand { 448 return g.random 449 } 450 451 func (g *hashGenerator) sequence() int64 { 452 return atomic.LoadInt64(&g.seq.val) 453 } 454 455 type sequentialGenerator struct { 456 seq *sequence 457 random *rand.Rand 458 } 459 460 func newSequentialGenerator(seq *sequence) *sequentialGenerator { 461 return &sequentialGenerator{ 462 seq: seq, 463 random: rand.New(rand.NewSource(timeutil.Now().UnixNano())), 464 } 465 } 466 467 func (g *sequentialGenerator) writeKey() int64 { 468 return g.seq.write() 469 } 470 471 func (g *sequentialGenerator) readKey() int64 { 472 v := g.seq.read() 473 if v == 0 { 474 return 0 475 } 476 return g.random.Int63n(v) 477 } 478 479 func (g *sequentialGenerator) rand() *rand.Rand { 480 return g.random 481 } 482 483 func (g *sequentialGenerator) sequence() int64 { 484 return atomic.LoadInt64(&g.seq.val) 485 } 486 487 type zipfGenerator struct { 488 seq *sequence 489 random *rand.Rand 490 zipf *zipf 491 } 492 493 // Creates a new zipfian generator. 494 func newZipfianGenerator(seq *sequence) *zipfGenerator { 495 random := rand.New(rand.NewSource(timeutil.Now().UnixNano())) 496 return &zipfGenerator{ 497 seq: seq, 498 random: random, 499 zipf: newZipf(1.1, 1, uint64(math.MaxInt64)), 500 } 501 } 502 503 // Get a random number seeded by v that follows the 504 // zipfian distribution. 505 func (g *zipfGenerator) zipfian(seed int64) int64 { 506 randomWithSeed := rand.New(rand.NewSource(seed)) 507 return int64(g.zipf.Uint64(randomWithSeed)) 508 } 509 510 // Get a zipf write key appropriately. 511 func (g *zipfGenerator) writeKey() int64 { 512 return g.zipfian(g.seq.write()) 513 } 514 515 // Get a zipf read key appropriately. 516 func (g *zipfGenerator) readKey() int64 { 517 v := g.seq.read() 518 if v == 0 { 519 return 0 520 } 521 return g.zipfian(g.random.Int63n(v)) 522 } 523 524 func (g *zipfGenerator) rand() *rand.Rand { 525 return g.random 526 } 527 528 func (g *zipfGenerator) sequence() int64 { 529 return atomic.LoadInt64(&g.seq.val) 530 } 531 532 func randomBlock(config *kv, r *rand.Rand) []byte { 533 blockSize := r.Intn(config.maxBlockSizeBytes-config.minBlockSizeBytes+1) + config.minBlockSizeBytes 534 blockData := make([]byte, blockSize) 535 uniqueSize := int(float64(blockSize) / config.targetCompressionRatio) 536 if uniqueSize < 1 { 537 uniqueSize = 1 538 } 539 for i := range blockData { 540 if i >= uniqueSize { 541 blockData[i] = blockData[i-uniqueSize] 542 } else { 543 blockData[i] = byte(r.Int() & 0xff) 544 } 545 } 546 return blockData 547 }