github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/workload/jsonload/json.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package jsonload 12 13 import ( 14 "bytes" 15 "context" 16 "crypto/sha1" 17 gosql "database/sql" 18 "encoding/binary" 19 "fmt" 20 "hash" 21 "math" 22 "math/rand" 23 "strings" 24 "sync/atomic" 25 26 "github.com/cockroachdb/cockroach/pkg/util/json" 27 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 28 "github.com/cockroachdb/cockroach/pkg/workload" 29 "github.com/cockroachdb/cockroach/pkg/workload/histogram" 30 "github.com/cockroachdb/errors" 31 "github.com/spf13/pflag" 32 ) 33 34 const ( 35 jsonSchema = `(k BIGINT NOT NULL PRIMARY KEY, v JSONB NOT NULL)` 36 jsonSchemaWithInvertedIndex = `(k BIGINT NOT NULL PRIMARY KEY, v JSONB NOT NULL, INVERTED INDEX (v))` 37 jsonSchemaWithComputed = `(k BIGINT AS (v->>'key')::BIGINT STORED PRIMARY KEY, v JSONB NOT NULL)` 38 ) 39 40 type jsonLoad struct { 41 flags workload.Flags 42 connFlags *workload.ConnFlags 43 44 batchSize int 45 cycleLength int64 46 readPercent int 47 writeSeq, seed int64 48 sequential bool 49 splits int 50 complexity int 51 inverted bool 52 computed bool 53 } 54 55 func init() { 56 workload.Register(jsonLoadMeta) 57 } 58 59 var jsonLoadMeta = workload.Meta{ 60 Name: `json`, 61 Description: `JSON reads and writes to keys spread (by default, uniformly` + 62 ` at random) across the cluster`, 63 Version: `1.0.0`, 64 New: func() workload.Generator { 65 g := &jsonLoad{} 66 g.flags.FlagSet = pflag.NewFlagSet(`json`, pflag.ContinueOnError) 67 g.flags.Meta = map[string]workload.FlagMeta{ 68 `batch`: {RuntimeOnly: true}, 69 } 70 g.flags.IntVar(&g.batchSize, `batch`, 1, `Number of blocks to insert in a single SQL statement`) 71 g.flags.Int64Var(&g.cycleLength, `cycle-length`, math.MaxInt64, `Number of keys repeatedly accessed by each writer`) 72 g.flags.IntVar(&g.readPercent, `read-percent`, 0, `Percent (0-100) of operations that are reads of existing keys`) 73 g.flags.Int64Var(&g.writeSeq, `write-seq`, 0, `Initial write sequence value.`) 74 g.flags.Int64Var(&g.seed, `seed`, 1, `Key hash seed.`) 75 g.flags.BoolVar(&g.sequential, `sequential`, false, `Pick keys sequentially instead of randomly`) 76 g.flags.IntVar(&g.splits, `splits`, 0, `Number of splits to perform before starting normal operations`) 77 g.flags.IntVar(&g.complexity, `complexity`, 20, `Complexity of generated JSON data`) 78 g.flags.BoolVar(&g.inverted, `inverted`, false, `Whether to include an inverted index`) 79 g.flags.BoolVar(&g.computed, `computed`, false, `Whether to use a computed primary key`) 80 g.connFlags = workload.NewConnFlags(&g.flags) 81 return g 82 }, 83 } 84 85 // Meta implements the Generator interface. 86 func (*jsonLoad) Meta() workload.Meta { return jsonLoadMeta } 87 88 // Flags implements the Flagser interface. 89 func (w *jsonLoad) Flags() workload.Flags { return w.flags } 90 91 // Hooks implements the Hookser interface. 92 func (w *jsonLoad) Hooks() workload.Hooks { 93 return workload.Hooks{ 94 Validate: func() error { 95 if w.computed && w.inverted { 96 return errors.Errorf("computed and inverted cannot be used together") 97 } 98 return nil 99 }, 100 } 101 } 102 103 // Tables implements the Generator interface. 104 func (w *jsonLoad) Tables() []workload.Table { 105 schema := jsonSchema 106 if w.inverted { 107 schema = jsonSchemaWithInvertedIndex 108 } else if w.computed { 109 schema = jsonSchemaWithComputed 110 } 111 table := workload.Table{ 112 Name: `j`, 113 Schema: schema, 114 Splits: workload.Tuples( 115 w.splits, 116 func(splitIdx int) []interface{} { 117 rng := rand.New(rand.NewSource(w.seed + int64(splitIdx))) 118 g := newHashGenerator(&sequence{config: w, val: w.writeSeq}) 119 return []interface{}{ 120 int(g.hash(rng.Int63())), 121 } 122 }, 123 ), 124 } 125 return []workload.Table{table} 126 } 127 128 // Ops implements the Opser interface. 129 func (w *jsonLoad) Ops(urls []string, reg *histogram.Registry) (workload.QueryLoad, error) { 130 sqlDatabase, err := workload.SanitizeUrls(w, w.connFlags.DBOverride, urls) 131 if err != nil { 132 return workload.QueryLoad{}, err 133 } 134 db, err := gosql.Open(`cockroach`, strings.Join(urls, ` `)) 135 if err != nil { 136 return workload.QueryLoad{}, err 137 } 138 // Allow a maximum of concurrency+1 connections to the database. 139 db.SetMaxOpenConns(w.connFlags.Concurrency + 1) 140 db.SetMaxIdleConns(w.connFlags.Concurrency + 1) 141 142 var buf bytes.Buffer 143 buf.WriteString(`SELECT k, v FROM j WHERE k IN (`) 144 for i := 0; i < w.batchSize; i++ { 145 if i > 0 { 146 buf.WriteString(", ") 147 } 148 fmt.Fprintf(&buf, `$%d`, i+1) 149 } 150 buf.WriteString(`)`) 151 readStmt, err := db.Prepare(buf.String()) 152 if err != nil { 153 return workload.QueryLoad{}, err 154 } 155 156 buf.Reset() 157 if w.computed { 158 buf.WriteString(`UPSERT INTO j (v) VALUES`) 159 } else { 160 buf.WriteString(`UPSERT INTO j (k, v) VALUES`) 161 } 162 163 for i := 0; i < w.batchSize; i++ { 164 j := i * 2 165 if i > 0 { 166 buf.WriteString(", ") 167 } 168 if w.computed { 169 fmt.Fprintf(&buf, ` ($%d)`, i+1) 170 } else { 171 fmt.Fprintf(&buf, ` ($%d, $%d)`, j+1, j+2) 172 } 173 } 174 175 writeStmt, err := db.Prepare(buf.String()) 176 if err != nil { 177 return workload.QueryLoad{}, err 178 } 179 180 ql := workload.QueryLoad{SQLDatabase: sqlDatabase} 181 for i := 0; i < w.connFlags.Concurrency; i++ { 182 op := jsonOp{ 183 config: w, 184 hists: reg.GetHandle(), 185 db: db, 186 readStmt: readStmt, 187 writeStmt: writeStmt, 188 } 189 seq := &sequence{config: w, val: w.writeSeq} 190 if w.sequential { 191 op.g = newSequentialGenerator(seq) 192 } else { 193 op.g = newHashGenerator(seq) 194 } 195 ql.WorkerFns = append(ql.WorkerFns, op.run) 196 } 197 return ql, nil 198 } 199 200 type jsonOp struct { 201 config *jsonLoad 202 hists *histogram.Histograms 203 db *gosql.DB 204 readStmt *gosql.Stmt 205 writeStmt *gosql.Stmt 206 g keyGenerator 207 } 208 209 func (o *jsonOp) run(ctx context.Context) error { 210 if o.g.rand().Intn(100) < o.config.readPercent { 211 args := make([]interface{}, o.config.batchSize) 212 for i := 0; i < o.config.batchSize; i++ { 213 args[i] = o.g.readKey() 214 } 215 start := timeutil.Now() 216 rows, err := o.readStmt.Query(args...) 217 if err != nil { 218 return err 219 } 220 for rows.Next() { 221 } 222 elapsed := timeutil.Since(start) 223 o.hists.Get(`read`).Record(elapsed) 224 return rows.Err() 225 } 226 argCount := 2 227 if o.config.computed { 228 argCount = 1 229 } 230 args := make([]interface{}, argCount*o.config.batchSize) 231 for i := 0; i < o.config.batchSize*argCount; i += argCount { 232 j := i 233 if !o.config.computed { 234 args[j] = o.g.writeKey() 235 j++ 236 } 237 js, err := json.Random(o.config.complexity, o.g.rand()) 238 if err != nil { 239 return err 240 } 241 if o.config.computed { 242 builder := json.NewObjectBuilder(2) 243 builder.Add("key", json.FromInt64(o.g.writeKey())) 244 builder.Add("data", js) 245 js = builder.Build() 246 } 247 args[j] = js.String() 248 } 249 start := timeutil.Now() 250 _, err := o.writeStmt.Exec(args...) 251 elapsed := timeutil.Since(start) 252 o.hists.Get(`write`).Record(elapsed) 253 return err 254 } 255 256 type sequence struct { 257 config *jsonLoad 258 val int64 259 } 260 261 func (s *sequence) write() int64 { 262 return (atomic.AddInt64(&s.val, 1) - 1) % s.config.cycleLength 263 } 264 265 // read returns the last key index that has been written. Note that the returned 266 // index might not actually have been written yet, so a read operation cannot 267 // require that the key is present. 268 func (s *sequence) read() int64 { 269 return atomic.LoadInt64(&s.val) % s.config.cycleLength 270 } 271 272 // keyGenerator generates read and write keys. Read keys may not yet exist and 273 // write keys may already exist. 274 type keyGenerator interface { 275 writeKey() int64 276 readKey() int64 277 rand() *rand.Rand 278 } 279 280 type hashGenerator struct { 281 seq *sequence 282 random *rand.Rand 283 hasher hash.Hash 284 buf [sha1.Size]byte 285 } 286 287 func newHashGenerator(seq *sequence) *hashGenerator { 288 return &hashGenerator{ 289 seq: seq, 290 random: rand.New(rand.NewSource(seq.config.seed)), 291 hasher: sha1.New(), 292 } 293 } 294 295 func (g *hashGenerator) hash(v int64) int64 { 296 binary.BigEndian.PutUint64(g.buf[:8], uint64(v)) 297 binary.BigEndian.PutUint64(g.buf[8:16], uint64(g.seq.config.seed)) 298 g.hasher.Reset() 299 _, _ = g.hasher.Write(g.buf[:16]) 300 g.hasher.Sum(g.buf[:0]) 301 return int64(binary.BigEndian.Uint64(g.buf[:8])) 302 } 303 304 func (g *hashGenerator) writeKey() int64 { 305 return g.hash(g.seq.write()) 306 } 307 308 func (g *hashGenerator) readKey() int64 { 309 v := g.seq.read() 310 if v == 0 { 311 return 0 312 } 313 return g.hash(g.random.Int63n(v)) 314 } 315 316 func (g *hashGenerator) rand() *rand.Rand { 317 return g.random 318 } 319 320 type sequentialGenerator struct { 321 seq *sequence 322 random *rand.Rand 323 } 324 325 func newSequentialGenerator(seq *sequence) *sequentialGenerator { 326 return &sequentialGenerator{ 327 seq: seq, 328 random: rand.New(rand.NewSource(seq.config.seed)), 329 } 330 } 331 332 func (g *sequentialGenerator) writeKey() int64 { 333 return g.seq.write() 334 } 335 336 func (g *sequentialGenerator) readKey() int64 { 337 v := g.seq.read() 338 if v == 0 { 339 return 0 340 } 341 return g.random.Int63n(v) 342 } 343 344 func (g *sequentialGenerator) rand() *rand.Rand { 345 return g.random 346 }