github.com/petermattis/pebble@v0.0.0-20190905164901-ab51a2166067/cmd/pebble/ycsb.go (about) 1 // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package main 6 7 import ( 8 "context" 9 "fmt" 10 "log" 11 "strconv" 12 "strings" 13 "sync" 14 "sync/atomic" 15 "time" 16 17 "github.com/petermattis/pebble" 18 "github.com/petermattis/pebble/internal/ackseq" 19 "github.com/petermattis/pebble/internal/randvar" 20 "github.com/petermattis/pebble/internal/rate" 21 "github.com/spf13/cobra" 22 "golang.org/x/exp/rand" 23 ) 24 25 const ( 26 ycsbInsert = iota 27 ycsbRead 28 ycsbScan 29 ycsbReverseScan 30 ycsbUpdate 31 ycsbNumOps 32 ) 33 34 var ycsbConfig struct { 35 batch string 36 keys string 37 initialKeys int 38 prepopulatedKeys int 39 numOps uint64 40 scans string 41 values string 42 workload string 43 } 44 45 var ycsbCmd = &cobra.Command{ 46 Use: "ycsb <dir>", 47 Short: "run customizable YCSB benchmark", 48 Long: ` 49 Run a customizable YCSB workload. The workload is specified by the --workload 50 flag which can take either one of the standard workload mixes (A-F), or 51 customizable workload fixes specified as a command separated list of op=weight 52 pairs. For example, --workload=read=50,update=50 performs a workload composed 53 of 50% reads and 50% updates. This is identical to the standard workload A. 54 55 The --batch, --scans, and --values flags take the specification for a random 56 variable: [<type>:]<min>[-<max>]. The <type> parameter must be one of "uniform" 57 or "zipf". If <type> is omitted, a uniform distribution is used. If <max> is 58 omitted it is set to the same value as <min>. The specification "1000" results 59 in a constant 1000. The specification "10-100" results in a uniformly random 60 variable in the range [10,100). The specification "zipf(10,100)" results in a 61 zipf distribution with a minimum value of 10 and a maximum value of 100. 62 63 The --batch flag controls the size of batches used for insert and update 64 operations. The --scans flag controls the number of iterations performed by a 65 scan operation. Read operations always read a single key. 66 67 The --values flag provides for an optional "/<target-compression-ratio>" 68 suffix. The default target compression ratio is 1.0 (i.e. incompressible random 69 data). A value of 2 will cause random data to be generated that should compress 70 to 50% of its uncompressed size. 71 72 Standard workloads: 73 74 A: 50% reads / 50% updates 75 B: 95% reads / 5% updates 76 C: 100% reads 77 D: 95% reads / 5% inserts 78 E: 95% scans / 5% inserts 79 F: 100% inserts 80 `, 81 Args: cobra.ExactArgs(1), 82 RunE: runYcsb, 83 } 84 85 func init() { 86 ycsbCmd.Flags().StringVar( 87 &ycsbConfig.batch, "batch", "1", 88 "batch size distribution [{zipf,uniform}:]min[-max]") 89 ycsbCmd.Flags().StringVar( 90 &ycsbConfig.keys, "keys", "zipf", "latest, uniform, or zipf") 91 ycsbCmd.Flags().IntVar( 92 &ycsbConfig.initialKeys, "initial-keys", 10000, 93 "initial number of keys to insert before beginning workload") 94 ycsbCmd.Flags().IntVar( 95 &ycsbConfig.prepopulatedKeys, "prepopulated-keys", 0, 96 "number of keys that were previously inserted into the database") 97 ycsbCmd.Flags().Uint64VarP( 98 &ycsbConfig.numOps, "num-ops", "n", 0, 99 "maximum number of operations (0 means unlimited)") 100 ycsbCmd.Flags().StringVar( 101 &ycsbConfig.scans, "scans", "zipf:1-1000", 102 "scan length distribution [{zipf,uniform}:]min[-max]") 103 ycsbCmd.Flags().StringVar( 104 &ycsbConfig.workload, "workload", "B", 105 "workload type (A-F) or spec (read=X,update=Y,...)") 106 ycsbCmd.Flags().StringVar( 107 &ycsbConfig.values, "values", "1000", 108 "value size distribution [{zipf,uniform}:]min[-max][/<target-compression>]") 109 } 110 111 type ycsbWeights []float64 112 113 func (w ycsbWeights) get(i int) float64 { 114 if i >= len(w) { 115 return 0 116 } 117 return w[i] 118 } 119 120 var ycsbWorkloads = map[string]ycsbWeights{ 121 "A": ycsbWeights{ 122 ycsbRead: 0.5, 123 ycsbUpdate: 0.5, 124 }, 125 "B": ycsbWeights{ 126 ycsbRead: 0.95, 127 ycsbUpdate: 0.05, 128 }, 129 "C": ycsbWeights{ 130 ycsbRead: 1.0, 131 }, 132 "D": ycsbWeights{ 133 ycsbInsert: 0.05, 134 ycsbRead: 0.95, 135 // TODO(peter): default to skewed-latest distribution. 136 }, 137 "E": ycsbWeights{ 138 ycsbInsert: 0.05, 139 ycsbScan: 0.95, 140 }, 141 "F": ycsbWeights{ 142 ycsbInsert: 1.0, 143 // TODO(peter): the real workload is read-modify-write. 144 }, 145 } 146 147 func ycsbParseWorkload(w string) (ycsbWeights, error) { 148 if weights := ycsbWorkloads[w]; weights != nil { 149 return weights, nil 150 } 151 iWeights := make([]int, 4) 152 for _, p := range strings.Split(w, ",") { 153 parts := strings.Split(p, "=") 154 if len(parts) != 2 { 155 return nil, fmt.Errorf("malformed weights: %s", w) 156 } 157 weight, err := strconv.Atoi(parts[1]) 158 if err != nil { 159 return nil, err 160 } 161 switch parts[0] { 162 case "insert": 163 iWeights[ycsbInsert] = weight 164 case "read": 165 iWeights[ycsbRead] = weight 166 case "scan": 167 iWeights[ycsbScan] = weight 168 case "rscan": 169 iWeights[ycsbReverseScan] = weight 170 case "update": 171 iWeights[ycsbUpdate] = weight 172 } 173 } 174 175 var sum int 176 for _, w := range iWeights { 177 sum += w 178 } 179 if sum == 0 { 180 return nil, fmt.Errorf("zero weight specified: %s", w) 181 } 182 183 weights := make(ycsbWeights, 4) 184 for i := range weights { 185 weights[i] = float64(iWeights[i]) / float64(sum) 186 } 187 return weights, nil 188 } 189 190 func ycsbParseKeyDist(d string) (randvar.Dynamic, error) { 191 totalKeys := uint64(ycsbConfig.initialKeys + ycsbConfig.prepopulatedKeys) 192 switch strings.ToLower(d) { 193 case "latest": 194 return randvar.NewDefaultSkewedLatest(nil) 195 case "uniform": 196 return randvar.NewUniform(nil, 1, totalKeys), nil 197 case "zipf": 198 return randvar.NewZipf(nil, 1, totalKeys, 0.99) 199 default: 200 return nil, fmt.Errorf("unknown distribution: %s", d) 201 } 202 } 203 204 func runYcsb(cmd *cobra.Command, args []string) error { 205 if wipe && ycsbConfig.prepopulatedKeys > 0 { 206 return fmt.Errorf("--wipe and --prepopulated-keys both specified which is nonsensical") 207 } 208 209 weights, err := ycsbParseWorkload(ycsbConfig.workload) 210 if err != nil { 211 return err 212 } 213 214 keyDist, err := ycsbParseKeyDist(ycsbConfig.keys) 215 if err != nil { 216 return err 217 } 218 219 batchDist, err := parseRandVarSpec(ycsbConfig.batch) 220 if err != nil { 221 return err 222 } 223 224 scanDist, err := parseRandVarSpec(ycsbConfig.scans) 225 if err != nil { 226 return err 227 } 228 229 valueDist, targetCompression, err := parseValuesSpec(ycsbConfig.values) 230 if err != nil { 231 return err 232 } 233 234 y := newYcsb(weights, keyDist, batchDist, scanDist, valueDist, targetCompression) 235 runTest(args[0], test{ 236 init: y.init, 237 tick: y.tick, 238 done: y.done, 239 }) 240 return nil 241 } 242 243 type ycsb struct { 244 writeOpts *pebble.WriteOptions 245 reg *histogramRegistry 246 ops *randvar.Weighted 247 keyDist randvar.Dynamic 248 batchDist randvar.Static 249 scanDist randvar.Static 250 valueDist randvar.Static 251 targetCompression float64 252 keyNum *ackseq.S 253 numOps uint64 254 numKeys [ycsbNumOps]uint64 255 prevNumKeys [ycsbNumOps]uint64 256 opsMap map[string]int 257 latency [ycsbNumOps]*namedHistogram 258 limiter *rate.Limiter 259 } 260 261 func newYcsb( 262 weights ycsbWeights, 263 keyDist randvar.Dynamic, 264 batchDist, scanDist, valueDist randvar.Static, 265 targetCompression float64, 266 ) *ycsb { 267 y := &ycsb{ 268 reg: newHistogramRegistry(), 269 ops: randvar.NewWeighted(nil, weights...), 270 keyDist: keyDist, 271 batchDist: batchDist, 272 scanDist: scanDist, 273 valueDist: valueDist, 274 targetCompression: targetCompression, 275 } 276 y.writeOpts = pebble.Sync 277 if disableWAL { 278 y.writeOpts = pebble.NoSync 279 } 280 281 y.opsMap = make(map[string]int) 282 maybeRegister := func(op int, name string) *namedHistogram { 283 w := weights.get(op) 284 if w == 0 { 285 return nil 286 } 287 wstr := fmt.Sprint(int(100 * w)) 288 fill := strings.Repeat("_", 3-len(wstr)) 289 if fill == "" { 290 fill = "_" 291 } 292 fullName := fmt.Sprintf("%s%s%s", name, fill, wstr) 293 y.opsMap[fullName] = op 294 return y.reg.Register(fullName) 295 } 296 297 y.latency[ycsbInsert] = maybeRegister(ycsbInsert, "insert") 298 y.latency[ycsbRead] = maybeRegister(ycsbRead, "read") 299 y.latency[ycsbScan] = maybeRegister(ycsbScan, "scan") 300 y.latency[ycsbReverseScan] = maybeRegister(ycsbReverseScan, "rscan") 301 y.latency[ycsbUpdate] = maybeRegister(ycsbUpdate, "update") 302 return y 303 } 304 305 func (y *ycsb) init(db DB, wg *sync.WaitGroup) { 306 if ycsbConfig.initialKeys > 0 { 307 rng := randvar.NewRand() 308 309 b := db.NewBatch() 310 for i := 1; i <= ycsbConfig.initialKeys; i++ { 311 if len(b.Repr()) >= 1<<20 { 312 if err := b.Commit(y.writeOpts); err != nil { 313 log.Fatal(err) 314 } 315 b = db.NewBatch() 316 } 317 _ = b.Set(y.makeKey(uint64(i+ycsbConfig.prepopulatedKeys)), y.randBytes(rng), nil) 318 } 319 if err := b.Commit(y.writeOpts); err != nil { 320 log.Fatal(err) 321 } 322 fmt.Printf("inserted keys [%d-%d)\n", 323 1+ycsbConfig.prepopulatedKeys, 324 1+ycsbConfig.prepopulatedKeys+ycsbConfig.initialKeys) 325 } 326 y.keyNum = ackseq.New(uint64(ycsbConfig.initialKeys + ycsbConfig.prepopulatedKeys)) 327 328 var err error 329 y.limiter, err = newFluctuatingRateLimiter(maxOpsPerSec) 330 if err != nil { 331 fmt.Println(err) 332 return 333 } 334 wg.Add(concurrency) 335 for i := 0; i < concurrency; i++ { 336 go y.run(db, wg) 337 } 338 } 339 340 func (y *ycsb) run(db DB, wg *sync.WaitGroup) { 341 defer wg.Done() 342 343 rng := randvar.NewRand() 344 for { 345 y.limiter.Wait(context.Background()) 346 start := time.Now() 347 348 op := y.ops.Int() 349 switch op { 350 case ycsbInsert: 351 y.insert(db, rng) 352 case ycsbRead: 353 y.read(db, rng) 354 case ycsbScan: 355 y.scan(db, rng, false /* reverse */) 356 case ycsbReverseScan: 357 y.scan(db, rng, true /* reverse */) 358 case ycsbUpdate: 359 y.update(db, rng) 360 default: 361 panic("not reached") 362 } 363 364 y.latency[op].Record(time.Since(start)) 365 if ycsbConfig.numOps > 0 && 366 atomic.AddUint64(&y.numOps, 1) >= ycsbConfig.numOps { 367 break 368 } 369 } 370 } 371 372 func (y *ycsb) hashKey(key uint64) uint64 { 373 // Inlined version of fnv.New64 + Write. 374 const offset64 = 14695981039346656037 375 const prime64 = 1099511628211 376 377 h := uint64(offset64) 378 for i := 0; i < 8; i++ { 379 h *= prime64 380 h ^= uint64(key & 0xff) 381 key >>= 8 382 } 383 return h 384 } 385 386 func (y *ycsb) makeKey(keyNum uint64) []byte { 387 key := make([]byte, 4, 24+10) 388 copy(key, "user") 389 key = strconv.AppendUint(key, y.hashKey(keyNum), 10) 390 // Use the MVCC encoding for keys. This appends a timestamp with 391 // walltime=1. That knowledge is utilized by rocksDB.Scan. 392 key = append(key, '\x00', '\x00', '\x00', '\x00', '\x00', 393 '\x00', '\x00', '\x00', '\x01', '\x09') 394 return key 395 } 396 397 func (y *ycsb) nextReadKey() []byte { 398 // NB: the range of values returned by keyDist is tied to the range returned 399 // by keyNum.Base. See how these are both incremented by ycsb.insert(). 400 keyNum := y.keyDist.Uint64() 401 return y.makeKey(keyNum) 402 } 403 404 func (y *ycsb) randBytes(rng *rand.Rand) []byte { 405 length := int(y.valueDist.Uint64()) 406 return randomBlock(rng, length, y.targetCompression) 407 } 408 409 func (y *ycsb) insert(db DB, rng *rand.Rand) { 410 count := y.batchDist.Uint64() 411 keyNums := make([]uint64, count) 412 413 b := db.NewBatch() 414 for i := range keyNums { 415 keyNums[i] = y.keyNum.Next() 416 _ = b.Set(y.makeKey(keyNums[i]), y.randBytes(rng), nil) 417 } 418 if err := b.Commit(y.writeOpts); err != nil { 419 log.Fatal(err) 420 } 421 atomic.AddUint64(&y.numKeys[ycsbInsert], uint64(len(keyNums))) 422 423 for i := range keyNums { 424 delta, err := y.keyNum.Ack(keyNums[i]) 425 if err != nil { 426 log.Fatal(err) 427 } 428 if delta > 0 { 429 y.keyDist.IncMax(delta) 430 } 431 } 432 } 433 434 func (y *ycsb) read(db DB, rng *rand.Rand) { 435 key := y.nextReadKey() 436 iter := db.NewIter(nil) 437 iter.SeekGE(key) 438 if err := iter.Close(); err != nil { 439 log.Fatal(err) 440 } 441 atomic.AddUint64(&y.numKeys[ycsbRead], 1) 442 } 443 444 func (y *ycsb) scan(db DB, rng *rand.Rand, reverse bool) { 445 count := y.scanDist.Uint64() 446 key := y.nextReadKey() 447 if err := db.Scan(key, int64(count), reverse); err != nil { 448 log.Fatal(err) 449 } 450 atomic.AddUint64(&y.numKeys[ycsbScan], count) 451 } 452 453 func (y *ycsb) update(db DB, rng *rand.Rand) { 454 count := int(y.batchDist.Uint64()) 455 b := db.NewBatch() 456 for i := 0; i < count; i++ { 457 _ = b.Set(y.nextReadKey(), y.randBytes(rng), nil) 458 } 459 if err := b.Commit(y.writeOpts); err != nil { 460 log.Fatal(err) 461 } 462 atomic.AddUint64(&y.numKeys[ycsbUpdate], uint64(count)) 463 } 464 465 func (y *ycsb) tick(elapsed time.Duration, i int) { 466 if i%20 == 0 { 467 fmt.Println("____optype__elapsed____ops/sec___keys/sec__p50(ms)__p95(ms)__p99(ms)_pMax(ms)") 468 } 469 y.reg.Tick(func(tick histogramTick) { 470 op := y.opsMap[tick.Name] 471 numKeys := atomic.LoadUint64(&y.numKeys[op]) 472 h := tick.Hist 473 474 fmt.Printf("%10s %8s %10.1f %10.1f %8.1f %8.1f %8.1f %8.1f\n", 475 tick.Name, 476 time.Duration(elapsed.Seconds()+0.5)*time.Second, 477 float64(h.TotalCount())/tick.Elapsed.Seconds(), 478 float64(numKeys-y.prevNumKeys[op])/tick.Elapsed.Seconds(), 479 time.Duration(h.ValueAtQuantile(50)).Seconds()*1000, 480 time.Duration(h.ValueAtQuantile(95)).Seconds()*1000, 481 time.Duration(h.ValueAtQuantile(99)).Seconds()*1000, 482 time.Duration(h.ValueAtQuantile(100)).Seconds()*1000, 483 ) 484 485 y.prevNumKeys[op] = numKeys 486 }) 487 } 488 489 func (y *ycsb) done(elapsed time.Duration) { 490 fmt.Println("\n____optype__elapsed_____ops(total)___ops/sec(cum)__keys/sec(cum)__avg(ms)__p50(ms)__p95(ms)__p99(ms)_pMax(ms)") 491 y.reg.Tick(func(tick histogramTick) { 492 op := y.opsMap[tick.Name] 493 numKeys := atomic.LoadUint64(&y.numKeys[op]) 494 h := tick.Cumulative 495 496 fmt.Printf("%10s %7.1fs %14d %14.1f %14.1f %8.1f %8.1f %8.1f %8.1f %8.1f\n", 497 tick.Name, elapsed.Seconds(), h.TotalCount(), 498 float64(h.TotalCount())/elapsed.Seconds(), 499 float64(numKeys)/elapsed.Seconds(), 500 time.Duration(h.Mean()).Seconds()*1000, 501 time.Duration(h.ValueAtQuantile(50)).Seconds()*1000, 502 time.Duration(h.ValueAtQuantile(95)).Seconds()*1000, 503 time.Duration(h.ValueAtQuantile(99)).Seconds()*1000, 504 time.Duration(h.ValueAtQuantile(100)).Seconds()*1000) 505 }) 506 fmt.Println() 507 }