github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/inverted_index.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package main
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"time"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    19  )
    20  
    21  func registerSchemaChangeInvertedIndex(r *testRegistry) {
    22  	r.Add(testSpec{
    23  		Name:    "schemachange/invertedindex",
    24  		Owner:   OwnerSQLSchema,
    25  		Cluster: makeClusterSpec(5),
    26  		Run: func(ctx context.Context, t *test, c *cluster) {
    27  			runSchemaChangeInvertedIndex(ctx, t, c)
    28  		},
    29  	})
    30  }
    31  
    32  // runInvertedIndex tests the correctness and performance of building an
    33  // inverted index on randomly generated JSON data (from the JSON workload).
    34  func runSchemaChangeInvertedIndex(ctx context.Context, t *test, c *cluster) {
    35  	crdbNodes := c.Range(1, c.spec.NodeCount-1)
    36  	workloadNode := c.Node(c.spec.NodeCount)
    37  
    38  	c.Put(ctx, cockroach, "./cockroach", crdbNodes)
    39  	c.Put(ctx, workload, "./workload", workloadNode)
    40  	c.Start(ctx, t, crdbNodes)
    41  
    42  	cmdInit := fmt.Sprintf("./workload init json {pgurl:1}")
    43  	c.Run(ctx, workloadNode, cmdInit)
    44  
    45  	// On a 4-node GCE cluster with the standard configuration, this generates ~10 million rows
    46  	initialDataDuration := time.Minute * 20
    47  	indexDuration := time.Hour
    48  	if c.isLocal() {
    49  		initialDataDuration = time.Minute
    50  		indexDuration = time.Minute
    51  	}
    52  
    53  	// First generate random JSON data using the JSON workload.
    54  	// TODO (lucy): Using a pre-generated test fixture would be much faster
    55  	m := newMonitor(ctx, c, crdbNodes)
    56  
    57  	cmdWrite := fmt.Sprintf(
    58  		"./workload run json --read-percent=0 --duration %s {pgurl:1-%d} --batch 1000 --sequential",
    59  		initialDataDuration.String(), c.spec.NodeCount-1,
    60  	)
    61  	m.Go(func(ctx context.Context) error {
    62  		c.Run(ctx, workloadNode, cmdWrite)
    63  
    64  		db := c.Conn(ctx, 1)
    65  		defer db.Close()
    66  
    67  		var count int
    68  		if err := db.QueryRow(`SELECT count(*) FROM json.j`).Scan(&count); err != nil {
    69  			t.Fatal(err)
    70  		}
    71  		t.l.Printf("finished writing %d rows to table", count)
    72  
    73  		return nil
    74  	})
    75  
    76  	m.Wait()
    77  
    78  	// Run the workload (with both reads and writes), and create the index at the same time.
    79  	m = newMonitor(ctx, c, crdbNodes)
    80  
    81  	cmdWriteAndRead := fmt.Sprintf(
    82  		"./workload run json --read-percent=50 --duration %s {pgurl:1-%d} --sequential",
    83  		indexDuration.String(), c.spec.NodeCount-1,
    84  	)
    85  	m.Go(func(ctx context.Context) error {
    86  		c.Run(ctx, workloadNode, cmdWriteAndRead)
    87  		return nil
    88  	})
    89  
    90  	m.Go(func(ctx context.Context) error {
    91  		db := c.Conn(ctx, 1)
    92  		defer db.Close()
    93  
    94  		t.l.Printf("creating index")
    95  		start := timeutil.Now()
    96  		if _, err := db.Exec(`CREATE INVERTED INDEX ON json.j (v)`); err != nil {
    97  			return err
    98  		}
    99  		t.l.Printf("index was created, took %v", timeutil.Since(start))
   100  
   101  		return nil
   102  	})
   103  
   104  	m.Wait()
   105  }