github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/tpcdsvec.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package main
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"time"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/cmd/cmpconn"
    19  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    20  	"github.com/cockroachdb/cockroach/pkg/workload/tpcds"
    21  	"github.com/cockroachdb/errors"
    22  )
    23  
    24  func registerTPCDSVec(r *testRegistry) {
    25  	const (
    26  		timeout                         = 5 * time.Minute
    27  		withStatsSlowerWarningThreshold = 1.25
    28  	)
    29  
    30  	queriesToSkip := map[int]bool{
    31  		// The plans for these queries contain processors with
    32  		// core.LocalPlanNode which currently cannot be wrapped by the
    33  		// vectorized engine, so 'vectorize' session variable will make no
    34  		// difference.
    35  		1:  true,
    36  		2:  true,
    37  		4:  true,
    38  		11: true,
    39  		23: true,
    40  		24: true,
    41  		30: true,
    42  		31: true,
    43  		39: true,
    44  		45: true,
    45  		47: true,
    46  		57: true,
    47  		59: true,
    48  		64: true,
    49  		74: true,
    50  		75: true,
    51  		81: true,
    52  		95: true,
    53  
    54  		// These queries contain unsupported function 'rollup' (#46280).
    55  		5:  true,
    56  		14: true,
    57  		18: true,
    58  		22: true,
    59  		67: true,
    60  		77: true,
    61  		80: true,
    62  
    63  		// These queries do not finish in 5 minutes.
    64  		7:  true,
    65  		13: true,
    66  		17: true,
    67  		19: true,
    68  		25: true,
    69  		26: true,
    70  		29: true,
    71  		//45: true,
    72  		46: true,
    73  		48: true,
    74  		50: true,
    75  		61: true,
    76  		//64: true,
    77  		66: true,
    78  		68: true,
    79  		72: true,
    80  		84: true,
    81  		85: true,
    82  	}
    83  
    84  	tpcdsTables := []string{
    85  		`call_center`, `catalog_page`, `catalog_returns`, `catalog_sales`,
    86  		`customer`, `customer_address`, `customer_demographics`, `date_dim`,
    87  		`dbgen_version`, `household_demographics`, `income_band`, `inventory`,
    88  		`item`, `promotion`, `reason`, `ship_mode`, `store`, `store_returns`,
    89  		`store_sales`, `time_dim`, `warehouse`, `web_page`, `web_returns`,
    90  		`web_sales`, `web_site`,
    91  	}
    92  
    93  	runTPCDSVec := func(ctx context.Context, t *test, c *cluster) {
    94  		c.Put(ctx, cockroach, "./cockroach", c.All())
    95  		c.Start(ctx, t)
    96  
    97  		clusterConn := c.Conn(ctx, 1)
    98  		disableAutoStats(t, clusterConn)
    99  		t.Status("restoring TPCDS dataset for Scale Factor 1")
   100  		if _, err := clusterConn.Exec(
   101  			`RESTORE DATABASE tpcds FROM 'gs://cockroach-fixtures/workload/tpcds/scalefactor=1/backup';`,
   102  		); err != nil {
   103  			t.Fatal(err)
   104  		}
   105  
   106  		if _, err := clusterConn.Exec("USE tpcds;"); err != nil {
   107  			t.Fatal(err)
   108  		}
   109  		scatterTables(t, clusterConn, tpcdsTables)
   110  		t.Status("waiting for full replication")
   111  		waitForFullReplication(t, clusterConn)
   112  
   113  		// TODO(yuzefovich): it seems like if cmpconn.CompareConns hits a
   114  		// timeout, the query actually keeps on going and the connection
   115  		// becomes kinda stale. To go around it, we set a statement timeout
   116  		// variable on the connections and pass in 3 x timeout into
   117  		// CompareConns hoping that the session variable is better respected.
   118  		// We additionally open fresh connections for each query.
   119  		setStmtTimeout := fmt.Sprintf("SET statement_timeout='%s';", timeout)
   120  		firstNode := c.Node(1)
   121  		firstNodeURL := c.ExternalPGUrl(ctx, firstNode)[0]
   122  		openNewConnections := func() (map[string]cmpconn.Conn, func()) {
   123  			conns := map[string]cmpconn.Conn{}
   124  			vecOffConn, err := cmpconn.NewConn(
   125  				firstNodeURL, setStmtTimeout+"SET vectorize=off; USE tpcds;",
   126  			)
   127  			if err != nil {
   128  				t.Fatal(err)
   129  			}
   130  			conns["vectorize=OFF"] = vecOffConn
   131  			vecOnConn, err := cmpconn.NewConn(
   132  				firstNodeURL, setStmtTimeout+"SET vectorize=on; USE tpcds;",
   133  			)
   134  			if err != nil {
   135  				t.Fatal(err)
   136  			}
   137  			conns["vectorize=ON"] = vecOnConn
   138  			// A sanity check that we have different values of 'vectorize'
   139  			// session variable on two connections and that the comparator will
   140  			// emit an error because of that difference.
   141  			if err := cmpconn.CompareConns(
   142  				ctx, timeout, conns, "", "SHOW vectorize;", false, /* ignoreSQLErrors */
   143  			); err == nil {
   144  				t.Fatal("unexpectedly SHOW vectorize didn't trigger an error on comparison")
   145  			}
   146  			return conns, func() {
   147  				vecOffConn.Close()
   148  				vecOnConn.Close()
   149  			}
   150  		}
   151  
   152  		noStatsRunTimes := make(map[int]float64)
   153  		var errToReport error
   154  		// We will run all queries in two scenarios: without stats and with
   155  		// auto stats. The idea is that the plans are likely to be different,
   156  		// so we will be testing different execution scenarios. We additionally
   157  		// will compare the queries' run times in both scenarios and print out
   158  		// warnings when in presence of stats we seem to be choosing worse
   159  		// plans.
   160  		for _, haveStats := range []bool{false, true} {
   161  			for queryNum := 1; queryNum <= tpcds.NumQueries; queryNum++ {
   162  				if toSkip, ok := queriesToSkip[queryNum]; ok || toSkip {
   163  					continue
   164  				}
   165  				query, ok := tpcds.QueriesByNumber[queryNum]
   166  				if !ok {
   167  					continue
   168  				}
   169  				t.Status(fmt.Sprintf("running query %d\n", queryNum))
   170  				// We will be opening fresh connections for every query to go
   171  				// around issues with cancellation.
   172  				conns, cleanup := openNewConnections()
   173  				defer cleanup()
   174  				start := timeutil.Now()
   175  				if err := cmpconn.CompareConns(
   176  					ctx, 3*timeout, conns, "", query, false, /* ignoreSQLErrors */
   177  				); err != nil {
   178  					t.Status(fmt.Sprintf("encountered an error: %s\n", err))
   179  					errToReport = errors.CombineErrors(errToReport, err)
   180  				} else {
   181  					runTimeInSeconds := timeutil.Since(start).Seconds()
   182  					t.Status(
   183  						fmt.Sprintf("[q%d] took about %.2fs to run on both configs",
   184  							queryNum, runTimeInSeconds),
   185  					)
   186  					if haveStats {
   187  						noStatsRunTime, ok := noStatsRunTimes[queryNum]
   188  						if ok && noStatsRunTime*withStatsSlowerWarningThreshold < runTimeInSeconds {
   189  							t.Status(fmt.Sprintf("WARNING: suboptimal plan when stats are present\n"+
   190  								"no stats: %.2fs\twith stats: %.2fs", noStatsRunTime, runTimeInSeconds))
   191  						}
   192  					} else {
   193  						noStatsRunTimes[queryNum] = runTimeInSeconds
   194  					}
   195  				}
   196  			}
   197  
   198  			if !haveStats {
   199  				createStatsFromTables(t, clusterConn, tpcdsTables)
   200  			}
   201  		}
   202  		if errToReport != nil {
   203  			t.Fatal(errToReport)
   204  		}
   205  	}
   206  
   207  	r.Add(testSpec{
   208  		Name:       "tpcdsvec",
   209  		Owner:      OwnerSQLExec,
   210  		Cluster:    makeClusterSpec(3),
   211  		MinVersion: "v20.1.0",
   212  		Run: func(ctx context.Context, t *test, c *cluster) {
   213  			runTPCDSVec(ctx, t, c)
   214  		},
   215  	})
   216  }