github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/tpc_utils.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package main
    12  
    13  import (
    14  	"context"
    15  	gosql "database/sql"
    16  	"fmt"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
    19  	"github.com/cockroachdb/errors"
    20  	"github.com/lib/pq"
    21  )
    22  
    23  // loadTPCHDataset loads a TPC-H dataset for the specific benchmark spec on the
    24  // provided roachNodes. The function is idempotent and first checks whether a
    25  // compatible dataset exists (compatible is defined as a tpch dataset with a
    26  // scale factor at least as large as the provided scale factor), performing an
    27  // expensive dataset restore only if it doesn't.
    28  func loadTPCHDataset(
    29  	ctx context.Context, t *test, c *cluster, sf int, m *monitor, roachNodes nodeListOption,
    30  ) error {
    31  	db := c.Conn(ctx, roachNodes[0])
    32  	defer db.Close()
    33  
    34  	if _, err := db.ExecContext(ctx, `USE tpch`); err == nil {
    35  		t.l.Printf("found existing tpch dataset, verifying scale factor\n")
    36  
    37  		var supplierCardinality int
    38  		if err := db.QueryRowContext(
    39  			ctx, `SELECT count(*) FROM tpch.supplier`,
    40  		).Scan(&supplierCardinality); err != nil {
    41  			if pqErr := (*pq.Error)(nil); !(errors.As(err, &pqErr) && pqErr.Code == pgcode.UndefinedTable) {
    42  				return err
    43  			}
    44  			// Table does not exist. Set cardinality to 0.
    45  			supplierCardinality = 0
    46  		}
    47  
    48  		// Check if a tpch database with the required scale factor exists.
    49  		// 10000 is the number of rows in the supplier table at scale factor 1.
    50  		// supplier is the smallest table whose cardinality scales with the scale
    51  		// factor.
    52  		expectedSupplierCardinality := 10000 * sf
    53  		if supplierCardinality >= expectedSupplierCardinality {
    54  			t.l.Printf("dataset is at least of scale factor %d, continuing", sf)
    55  			return nil
    56  		}
    57  
    58  		// If the scale factor was smaller than the required scale factor, wipe the
    59  		// cluster and restore.
    60  		m.ExpectDeaths(int32(c.spec.NodeCount))
    61  		c.Wipe(ctx, roachNodes)
    62  		c.Start(ctx, t, roachNodes)
    63  		m.ResetDeaths()
    64  	} else if pqErr := (*pq.Error)(nil); !(errors.As(err, &pqErr) &&
    65  		string(pqErr.Code) == pgcode.InvalidCatalogName) {
    66  		return err
    67  	}
    68  
    69  	t.l.Printf("restoring tpch scale factor %d\n", sf)
    70  	tpchURL := fmt.Sprintf("gs://cockroach-fixtures/workload/tpch/scalefactor=%d/backup", sf)
    71  	query := fmt.Sprintf(`CREATE DATABASE IF NOT EXISTS tpch; RESTORE tpch.* FROM '%s' WITH into_db = 'tpch';`, tpchURL)
    72  	_, err := db.ExecContext(ctx, query)
    73  	return err
    74  }
    75  
    76  // scatterTables runs "ALTER TABLE ... SCATTER" statement for every table in
    77  // tableNames. It assumes that conn is already using the target database. If an
    78  // error is encountered, the test is failed.
    79  func scatterTables(t *test, conn *gosql.DB, tableNames []string) {
    80  	t.Status("scattering the data")
    81  	for _, table := range tableNames {
    82  		scatter := fmt.Sprintf("ALTER TABLE %s SCATTER;", table)
    83  		if _, err := conn.Exec(scatter); err != nil {
    84  			t.Fatal(err)
    85  		}
    86  	}
    87  }
    88  
    89  // disableAutoStats disables automatic collection of statistics on the cluster.
    90  func disableAutoStats(t *test, conn *gosql.DB) {
    91  	t.Status("disabling automatic collection of stats")
    92  	if _, err := conn.Exec(
    93  		`SET CLUSTER SETTING sql.stats.automatic_collection.enabled=false;`,
    94  	); err != nil {
    95  		t.Fatal(err)
    96  	}
    97  }
    98  
    99  // createStatsFromTables runs "CREATE STATISTICS" statement for every table in
   100  // tableNames. It assumes that conn is already using the target database. If an
   101  // error is encountered, the test is failed.
   102  func createStatsFromTables(t *test, conn *gosql.DB, tableNames []string) {
   103  	t.Status("collecting stats")
   104  	for _, tableName := range tableNames {
   105  		t.Status(fmt.Sprintf("creating statistics from table %q", tableName))
   106  		if _, err := conn.Exec(
   107  			fmt.Sprintf(`CREATE STATISTICS %s FROM %s;`, tableName, tableName),
   108  		); err != nil {
   109  			t.Fatal(err)
   110  		}
   111  	}
   112  }