github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/inverted_index.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package main 12 13 import ( 14 "context" 15 "fmt" 16 "time" 17 18 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 19 ) 20 21 func registerSchemaChangeInvertedIndex(r *testRegistry) { 22 r.Add(testSpec{ 23 Name: "schemachange/invertedindex", 24 Owner: OwnerSQLSchema, 25 Cluster: makeClusterSpec(5), 26 Run: func(ctx context.Context, t *test, c *cluster) { 27 runSchemaChangeInvertedIndex(ctx, t, c) 28 }, 29 }) 30 } 31 32 // runInvertedIndex tests the correctness and performance of building an 33 // inverted index on randomly generated JSON data (from the JSON workload). 34 func runSchemaChangeInvertedIndex(ctx context.Context, t *test, c *cluster) { 35 crdbNodes := c.Range(1, c.spec.NodeCount-1) 36 workloadNode := c.Node(c.spec.NodeCount) 37 38 c.Put(ctx, cockroach, "./cockroach", crdbNodes) 39 c.Put(ctx, workload, "./workload", workloadNode) 40 c.Start(ctx, t, crdbNodes) 41 42 cmdInit := fmt.Sprintf("./workload init json {pgurl:1}") 43 c.Run(ctx, workloadNode, cmdInit) 44 45 // On a 4-node GCE cluster with the standard configuration, this generates ~10 million rows 46 initialDataDuration := time.Minute * 20 47 indexDuration := time.Hour 48 if c.isLocal() { 49 initialDataDuration = time.Minute 50 indexDuration = time.Minute 51 } 52 53 // First generate random JSON data using the JSON workload. 54 // TODO (lucy): Using a pre-generated test fixture would be much faster 55 m := newMonitor(ctx, c, crdbNodes) 56 57 cmdWrite := fmt.Sprintf( 58 "./workload run json --read-percent=0 --duration %s {pgurl:1-%d} --batch 1000 --sequential", 59 initialDataDuration.String(), c.spec.NodeCount-1, 60 ) 61 m.Go(func(ctx context.Context) error { 62 c.Run(ctx, workloadNode, cmdWrite) 63 64 db := c.Conn(ctx, 1) 65 defer db.Close() 66 67 var count int 68 if err := db.QueryRow(`SELECT count(*) FROM json.j`).Scan(&count); err != nil { 69 t.Fatal(err) 70 } 71 t.l.Printf("finished writing %d rows to table", count) 72 73 return nil 74 }) 75 76 m.Wait() 77 78 // Run the workload (with both reads and writes), and create the index at the same time. 79 m = newMonitor(ctx, c, crdbNodes) 80 81 cmdWriteAndRead := fmt.Sprintf( 82 "./workload run json --read-percent=50 --duration %s {pgurl:1-%d} --sequential", 83 indexDuration.String(), c.spec.NodeCount-1, 84 ) 85 m.Go(func(ctx context.Context) error { 86 c.Run(ctx, workloadNode, cmdWriteAndRead) 87 return nil 88 }) 89 90 m.Go(func(ctx context.Context) error { 91 db := c.Conn(ctx, 1) 92 defer db.Close() 93 94 t.l.Printf("creating index") 95 start := timeutil.Now() 96 if _, err := db.Exec(`CREATE INVERTED INDEX ON json.j (v)`); err != nil { 97 return err 98 } 99 t.l.Printf("index was created, took %v", timeutil.Since(start)) 100 101 return nil 102 }) 103 104 m.Wait() 105 }