github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/bench/bench_test.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package bench
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	gosql "database/sql"
    17  	"flag"
    18  	"fmt"
    19  	"os"
    20  	"runtime/pprof"
    21  	"testing"
    22  	"time"
    23  
    24  	"github.com/cockroachdb/cockroach/pkg/base"
    25  	"github.com/cockroachdb/cockroach/pkg/security"
    26  	"github.com/cockroachdb/cockroach/pkg/security/securitytest"
    27  	"github.com/cockroachdb/cockroach/pkg/server"
    28  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    29  	"github.com/cockroachdb/cockroach/pkg/sql/opt/exec/execbuilder"
    30  	"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
    31  	"github.com/cockroachdb/cockroach/pkg/sql/opt/optbuilder"
    32  	"github.com/cockroachdb/cockroach/pkg/sql/opt/testutils/testcat"
    33  	"github.com/cockroachdb/cockroach/pkg/sql/opt/xform"
    34  	"github.com/cockroachdb/cockroach/pkg/sql/parser"
    35  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    36  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    37  	"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
    38  	"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
    39  	"github.com/cockroachdb/cockroach/pkg/util/randutil"
    40  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    41  )
    42  
    43  type BenchmarkType int
    44  
    45  const (
    46  	// Parse creates the AST.
    47  	Parse BenchmarkType = iota
    48  
    49  	// OptBuild constructs the Memo from the AST. It runs no normalization or
    50  	// exploration rules. OptBuild does not include the time to Parse.
    51  	OptBuild
    52  
    53  	// Normalize constructs the Memo from the AST, but enables all normalization
    54  	// rules, unlike OptBuild. No Explore rules are enabled. Normalize includes
    55  	// the time to OptBuild.
    56  	Normalize
    57  
    58  	// Explore constructs the Memo from the AST and enables all normalization
    59  	// and exploration rules. The Memo is fully optimized. Explore includes the
    60  	// time to OptBuild and Normalize.
    61  	Explore
    62  
    63  	// ExecBuild calls a stub factory to construct a dummy plan from the optimized
    64  	// Memo. Since the factory is not creating a real plan, only a part of the
    65  	// execbuild time is captured.
    66  	ExecBuild
    67  
    68  	// EndToEnd executes the query end-to-end using the cost-based optimizer.
    69  	EndToEnd
    70  )
    71  
    72  var benchmarkTypeStrings = [...]string{
    73  	Parse:     "Parse",
    74  	OptBuild:  "OptBuild",
    75  	Normalize: "Normalize",
    76  	Explore:   "Explore",
    77  	ExecBuild: "ExecBuild",
    78  	EndToEnd:  "EndToEnd",
    79  }
    80  
    81  type benchQuery struct {
    82  	name    string
    83  	query   string
    84  	args    []interface{}
    85  	prepare bool
    86  }
    87  
    88  var schemas = [...]string{
    89  	`CREATE TABLE kv (k BIGINT NOT NULL PRIMARY KEY, v BYTES NOT NULL)`,
    90  	`
    91  	CREATE TABLE customer
    92  	(
    93  		c_id           integer        not null,
    94  		c_d_id         integer        not null,
    95  		c_w_id         integer        not null,
    96  		c_first        varchar(16),
    97  		c_middle       char(2),
    98  		c_last         varchar(16),
    99  		c_street_1     varchar(20),
   100  		c_street_2     varchar(20),
   101  		c_city         varchar(20),
   102  		c_state        char(2),
   103  		c_zip          char(9),
   104  		c_phone        char(16),
   105  		c_since        timestamp,
   106  		c_credit       char(2),
   107  		c_credit_lim   decimal(12,2),
   108  		c_discount     decimal(4,4),
   109  		c_balance      decimal(12,2),
   110  		c_ytd_payment  decimal(12,2),
   111  		c_payment_cnt  integer,
   112  		c_delivery_cnt integer,
   113  		c_data         varchar(500),
   114  		primary key (c_w_id, c_d_id, c_id),
   115  		index customer_idx (c_w_id, c_d_id, c_last, c_first)
   116  	)
   117  	`,
   118  	`
   119  	CREATE TABLE new_order
   120  	(
   121  		no_o_id  integer   not null,
   122  		no_d_id  integer   not null,
   123  		no_w_id  integer   not null,
   124  		primary key (no_w_id, no_d_id, no_o_id DESC)
   125  	)
   126  	`,
   127  	`
   128  	CREATE TABLE stock
   129  	(
   130  		s_i_id       integer       not null,
   131  		s_w_id       integer       not null,
   132  		s_quantity   integer,
   133  		s_dist_01    char(24),
   134  		s_dist_02    char(24),
   135  		s_dist_03    char(24),
   136  		s_dist_04    char(24),
   137  		s_dist_05    char(24),
   138  		s_dist_06    char(24),
   139  		s_dist_07    char(24),
   140  		s_dist_08    char(24),
   141  		s_dist_09    char(24),
   142  		s_dist_10    char(24),
   143  		s_ytd        integer,
   144  		s_order_cnt  integer,
   145  		s_remote_cnt integer,
   146  		s_data       varchar(50),
   147  		primary key (s_w_id, s_i_id),
   148  		index stock_item_fk_idx (s_i_id)
   149  	)
   150  	`,
   151  	`
   152  	CREATE TABLE order_line
   153  	(
   154  		ol_o_id         integer   not null,
   155  		ol_d_id         integer   not null,
   156  		ol_w_id         integer   not null,
   157  		ol_number       integer   not null,
   158  		ol_i_id         integer   not null,
   159  		ol_supply_w_id  integer,
   160  		ol_delivery_d   timestamp,
   161  		ol_quantity     integer,
   162  		ol_amount       decimal(6,2),
   163  		ol_dist_info    char(24),
   164  		primary key (ol_w_id, ol_d_id, ol_o_id DESC, ol_number),
   165  		index order_line_fk (ol_supply_w_id, ol_i_id),
   166  		foreign key (ol_supply_w_id, ol_i_id) references stock (s_w_id, s_i_id)
   167  	)
   168  	`,
   169  	`
   170  	CREATE TABLE j
   171  	(
   172  	  a INT PRIMARY KEY,
   173  	  b INT,
   174  	  INDEX (b)
   175  	)
   176    `,
   177  }
   178  
   179  var queries = [...]benchQuery{
   180  	// 1. Table with small number of columns.
   181  	// 2. Table with no indexes.
   182  	// 3. Very simple query that returns single row based on key filter.
   183  	{
   184  		name:    "kv-read",
   185  		query:   `SELECT k, v FROM kv WHERE k IN ($1)`,
   186  		args:    []interface{}{1},
   187  		prepare: true,
   188  	},
   189  
   190  	// 1. No PREPARE phase, only EXECUTE.
   191  	{
   192  		name:    "kv-read-no-prep",
   193  		query:   `SELECT k, v FROM kv WHERE k IN ($1)`,
   194  		args:    []interface{}{1},
   195  		prepare: false,
   196  	},
   197  
   198  	// 1. PREPARE with constant filter value (no placeholders).
   199  	{
   200  		name:    "kv-read-const",
   201  		query:   `SELECT k, v FROM kv WHERE k IN (1)`,
   202  		args:    []interface{}{},
   203  		prepare: true,
   204  	},
   205  
   206  	// 1. Table with many columns.
   207  	// 2. Multi-column primary key.
   208  	// 3. Mutiple indexes to consider.
   209  	// 4. Multiple placeholder values.
   210  	{
   211  		name: "tpcc-new-order",
   212  		query: `
   213  			SELECT c_discount, c_last, c_credit
   214  			FROM customer
   215  			WHERE c_w_id = $1 AND c_d_id = $2 AND c_id = $3
   216  		`,
   217  		args:    []interface{}{10, 100, 50},
   218  		prepare: true,
   219  	},
   220  
   221  	// 1. ORDER BY clause.
   222  	// 2. LIMIT clause.
   223  	// 3. Best plan requires reverse scan.
   224  	{
   225  		name: "tpcc-delivery",
   226  		query: `
   227  			SELECT no_o_id
   228  			FROM new_order
   229  			WHERE no_w_id = $1 AND no_d_id = $2
   230  			ORDER BY no_o_id ASC
   231  			LIMIT 1
   232  		`,
   233  		args:    []interface{}{10, 100},
   234  		prepare: true,
   235  	},
   236  
   237  	// 1. Count and Distinct aggregate functions.
   238  	// 2. Simple join.
   239  	// 3. Best plan requires lookup join.
   240  	// 4. Placeholders used in larger constant expressions.
   241  	{
   242  		name: "tpcc-stock-level",
   243  		query: `
   244  			SELECT count(DISTINCT s_i_id)
   245  			FROM order_line
   246  			JOIN stock
   247  			ON s_i_id=ol_i_id AND s_w_id=ol_w_id
   248  			WHERE ol_w_id = $1
   249  				AND ol_d_id = $2
   250  				AND ol_o_id BETWEEN $3 - 20 AND $3 - 1
   251  				AND s_quantity < $4
   252  		`,
   253  		args:    []interface{}{10, 100, 1000, 15},
   254  		prepare: true,
   255  	},
   256  }
   257  
   258  func init() {
   259  	security.SetAssetLoader(securitytest.EmbeddedAssets)
   260  	randutil.SeedForTests()
   261  	serverutils.InitTestServerFactory(server.TestServerFactory)
   262  }
   263  
   264  var profileTime = flag.Duration("profile-time", 10*time.Second, "duration of profiling run")
   265  var profileType = flag.String("profile-type", "ExecBuild", "Parse, OptBuild, Normalize, Explore, ExecBuild, EndToEnd")
   266  var profileQuery = flag.String("profile-query", "kv-read", "name of query to run")
   267  
   268  // TestCPUProfile executes the configured profileQuery in a loop in order to
   269  // profile its CPU usage. Rather than allow the Go testing infrastructure to
   270  // start profiling, TestCPUProfile triggers startup, so that it has control over
   271  // when profiling starts. In particular, profiling is only started once the
   272  // server or API has been initialized, so that the profiles don't include
   273  // startup activities.
   274  //
   275  // TestCPUProfile writes the output profile to a cpu.out file in the current
   276  // directory. See the profile flags for ways to configure what is profiled.
   277  func TestCPUProfile(t *testing.T) {
   278  	t.Skip(
   279  		"Remove this when profiling. Use profile flags above to configure. Sample command line: \n" +
   280  			"GOMAXPROCS=1 go test -run TestCPUProfile --logtostderr NONE && go tool pprof bench.test cpu.out",
   281  	)
   282  
   283  	h := newHarness()
   284  	defer h.close()
   285  
   286  	var query benchQuery
   287  	for _, query = range queries {
   288  		if query.name == *profileQuery {
   289  			break
   290  		}
   291  	}
   292  
   293  	var bmType BenchmarkType
   294  	for i, s := range benchmarkTypeStrings {
   295  		if s == *profileType {
   296  			bmType = BenchmarkType(i)
   297  		}
   298  	}
   299  
   300  	h.runForProfiling(t, bmType, query, *profileTime)
   301  }
   302  
   303  // BenchmarkPhases measures the time that each of the optimization phases takes
   304  // to run. See the comments for the BenchmarkType enumeration for more details
   305  // on what each phase includes.
   306  func BenchmarkPhases(b *testing.B) {
   307  	bm := newHarness()
   308  	defer bm.close()
   309  
   310  	for _, query := range queries {
   311  		bm.runForBenchmark(b, Parse, query)
   312  		bm.runForBenchmark(b, OptBuild, query)
   313  		bm.runForBenchmark(b, Normalize, query)
   314  		bm.runForBenchmark(b, Explore, query)
   315  		bm.runForBenchmark(b, ExecBuild, query)
   316  	}
   317  }
   318  
   319  // BenchmarkEndToEnd measures the time to execute a query end-to-end.
   320  func BenchmarkEndToEnd(b *testing.B) {
   321  	h := newHarness()
   322  	defer h.close()
   323  
   324  	for _, query := range queries {
   325  		h.runForBenchmark(b, EndToEnd, query)
   326  	}
   327  }
   328  
   329  type harness struct {
   330  	ctx       context.Context
   331  	semaCtx   tree.SemaContext
   332  	evalCtx   tree.EvalContext
   333  	prepMemo  *memo.Memo
   334  	cat       *testcat.Catalog
   335  	optimizer xform.Optimizer
   336  
   337  	s  serverutils.TestServerInterface
   338  	db *gosql.DB
   339  	sr *sqlutils.SQLRunner
   340  
   341  	bmType   BenchmarkType
   342  	query    benchQuery
   343  	prepared *gosql.Stmt
   344  	ready    bool
   345  }
   346  
   347  func newHarness() *harness {
   348  	return &harness{}
   349  }
   350  
   351  func (h *harness) close() {
   352  	if h.s != nil {
   353  		h.s.Stopper().Stop(context.Background())
   354  	}
   355  }
   356  
   357  func (h *harness) runForProfiling(
   358  	t *testing.T, bmType BenchmarkType, query benchQuery, duration time.Duration,
   359  ) {
   360  	h.bmType = bmType
   361  	h.query = query
   362  	h.prepare(t)
   363  
   364  	f, err := os.Create("cpu.out")
   365  	if err != nil {
   366  		t.Fatalf("%v", err)
   367  	}
   368  	defer f.Close()
   369  
   370  	err = pprof.StartCPUProfile(f)
   371  	if err != nil {
   372  		t.Fatalf("%v", err)
   373  	}
   374  	defer pprof.StopCPUProfile()
   375  
   376  	start := timeutil.Now()
   377  	for {
   378  		now := timeutil.Now()
   379  		if now.Sub(start) > duration {
   380  			break
   381  		}
   382  
   383  		// Minimize overhead of getting timings by iterating 1000 times before
   384  		// checking if done.
   385  		for i := 0; i < 1000; i++ {
   386  			switch bmType {
   387  			case EndToEnd:
   388  				h.runUsingServer(t)
   389  
   390  			default:
   391  				h.runUsingAPI(t, bmType, query.prepare)
   392  			}
   393  		}
   394  	}
   395  }
   396  
   397  func (h *harness) runForBenchmark(b *testing.B, bmType BenchmarkType, query benchQuery) {
   398  	h.bmType = bmType
   399  	h.query = query
   400  	h.prepare(b)
   401  
   402  	b.Run(fmt.Sprintf("%s/%s", query.name, benchmarkTypeStrings[bmType]), func(b *testing.B) {
   403  		switch bmType {
   404  		case EndToEnd:
   405  			for i := 0; i < b.N; i++ {
   406  				h.runUsingServer(b)
   407  			}
   408  
   409  		default:
   410  			for i := 0; i < b.N; i++ {
   411  				h.runUsingAPI(b, bmType, query.prepare)
   412  			}
   413  		}
   414  	})
   415  }
   416  
   417  func (h *harness) prepare(tb testing.TB) {
   418  	switch h.bmType {
   419  	case EndToEnd:
   420  		h.prepareUsingServer(tb)
   421  
   422  	default:
   423  		h.prepareUsingAPI(tb)
   424  	}
   425  }
   426  
   427  func (h *harness) prepareUsingServer(tb testing.TB) {
   428  	if !h.ready {
   429  		// Set up database.
   430  		h.s, h.db, _ = serverutils.StartServer(tb, base.TestServerArgs{UseDatabase: "bench"})
   431  		h.sr = sqlutils.MakeSQLRunner(h.db)
   432  		h.sr.Exec(tb, `CREATE DATABASE bench`)
   433  		for _, schema := range schemas {
   434  			h.sr.Exec(tb, schema)
   435  		}
   436  		h.ready = true
   437  	}
   438  
   439  	if h.query.prepare {
   440  		var err error
   441  		h.prepared, err = h.db.Prepare(h.query.query)
   442  		if err != nil {
   443  			tb.Fatalf("%v", err)
   444  		}
   445  	} else {
   446  		h.prepared = nil
   447  	}
   448  }
   449  
   450  func (h *harness) runUsingServer(tb testing.TB) {
   451  	var err error
   452  	if h.prepared != nil {
   453  		_, err = h.prepared.Exec(h.query.args...)
   454  		if err != nil {
   455  			tb.Fatalf("%v", err)
   456  		}
   457  	} else {
   458  		h.sr.Exec(tb, h.query.query, h.query.args...)
   459  	}
   460  }
   461  
   462  func (h *harness) prepareUsingAPI(tb testing.TB) {
   463  	// Clear any state from previous usage of this harness instance.
   464  	h.ctx = context.Background()
   465  	h.semaCtx = tree.MakeSemaContext()
   466  	h.evalCtx = tree.MakeTestingEvalContext(cluster.MakeTestingClusterSettings())
   467  	h.prepMemo = nil
   468  	h.cat = nil
   469  	h.optimizer = xform.Optimizer{}
   470  
   471  	// Set up the catalog.
   472  	h.cat = testcat.New()
   473  	for _, schema := range schemas {
   474  		_, err := h.cat.ExecuteDDL(schema)
   475  		if err != nil {
   476  			tb.Fatalf("%v", err)
   477  		}
   478  	}
   479  
   480  	if err := h.semaCtx.Placeholders.Init(len(h.query.args), nil /* typeHints */); err != nil {
   481  		tb.Fatal(err)
   482  	}
   483  	if h.query.prepare {
   484  		// Prepare the query by normalizing it (if it has placeholders) or exploring
   485  		// it (if it doesn't have placeholders), and cache the resulting memo so that
   486  		// it can be used during execution.
   487  		if len(h.query.args) > 0 {
   488  			h.runUsingAPI(tb, Normalize, false /* usePrepared */)
   489  		} else {
   490  			h.runUsingAPI(tb, Explore, false /* usePrepared */)
   491  		}
   492  		h.prepMemo = h.optimizer.DetachMemo()
   493  	} else {
   494  		// Run optbuilder to infer any placeholder types.
   495  		h.runUsingAPI(tb, OptBuild, false /* usePrepared */)
   496  	}
   497  
   498  	// Construct placeholder values.
   499  	h.semaCtx.Placeholders.Values = make(tree.QueryArguments, len(h.query.args))
   500  	for i, arg := range h.query.args {
   501  		var parg tree.Expr
   502  		parg, err := parser.ParseExpr(fmt.Sprintf("%v", arg))
   503  		if err != nil {
   504  			tb.Fatalf("%v", err)
   505  		}
   506  
   507  		id := tree.PlaceholderIdx(i)
   508  		typ, _ := h.semaCtx.Placeholders.ValueType(id)
   509  		texpr, err := sqlbase.SanitizeVarFreeExpr(
   510  			context.Background(),
   511  			parg,
   512  			typ,
   513  			"", /* context */
   514  			&h.semaCtx,
   515  			true, /* allowImpure */
   516  		)
   517  		if err != nil {
   518  			tb.Fatalf("%v", err)
   519  		}
   520  
   521  		h.semaCtx.Placeholders.Values[i] = texpr
   522  	}
   523  	h.evalCtx.Placeholders = &h.semaCtx.Placeholders
   524  	h.evalCtx.Annotations = &h.semaCtx.Annotations
   525  }
   526  
   527  func (h *harness) runUsingAPI(tb testing.TB, bmType BenchmarkType, usePrepared bool) {
   528  	var stmt parser.Statement
   529  	var err error
   530  	if !usePrepared {
   531  		stmt, err = parser.ParseOne(h.query.query)
   532  		if err != nil {
   533  			tb.Fatalf("%v", err)
   534  		}
   535  	}
   536  
   537  	if bmType == Parse {
   538  		return
   539  	}
   540  
   541  	h.optimizer.Init(&h.evalCtx, h.cat)
   542  	if bmType == OptBuild {
   543  		h.optimizer.DisableOptimizations()
   544  	}
   545  
   546  	if !usePrepared {
   547  		bld := optbuilder.New(h.ctx, &h.semaCtx, &h.evalCtx, h.cat, h.optimizer.Factory(), stmt.AST)
   548  		if err = bld.Build(); err != nil {
   549  			tb.Fatalf("%v", err)
   550  		}
   551  	} else if h.prepMemo.HasPlaceholders() {
   552  		_ = h.optimizer.Factory().AssignPlaceholders(h.prepMemo)
   553  	}
   554  
   555  	if bmType == OptBuild || bmType == Normalize {
   556  		return
   557  	}
   558  
   559  	var execMemo *memo.Memo
   560  	if usePrepared && !h.prepMemo.HasPlaceholders() {
   561  		execMemo = h.prepMemo
   562  	} else {
   563  		if _, err := h.optimizer.Optimize(); err != nil {
   564  			panic(err)
   565  		}
   566  		execMemo = h.optimizer.Memo()
   567  	}
   568  
   569  	if bmType == Explore {
   570  		return
   571  	}
   572  
   573  	root := execMemo.RootExpr()
   574  	execFactory := stubFactory{}
   575  	eb := execbuilder.New(&execFactory, execMemo, nil /* catalog */, root, &h.evalCtx)
   576  	if _, err = eb.Build(); err != nil {
   577  		tb.Fatalf("%v", err)
   578  	}
   579  }
   580  
   581  func makeChain(size int) benchQuery {
   582  	var buf bytes.Buffer
   583  	buf.WriteString(`SELECT * FROM `)
   584  	comma := ""
   585  	for i := 0; i < size; i++ {
   586  		buf.WriteString(comma)
   587  		fmt.Fprintf(&buf, "j AS tab%d", i+1)
   588  		comma = ", "
   589  	}
   590  
   591  	if size > 1 {
   592  		buf.WriteString(" WHERE ")
   593  	}
   594  
   595  	comma = ""
   596  	for i := 0; i < size-1; i++ {
   597  		buf.WriteString(comma)
   598  		fmt.Fprintf(&buf, "tab%d.a = tab%d.b", i+1, i+2)
   599  		comma = " AND "
   600  	}
   601  
   602  	return benchQuery{
   603  		name:  fmt.Sprintf("chain-%d", size),
   604  		query: buf.String(),
   605  	}
   606  }
   607  
   608  // BenchmarkChain benchmarks the planning of a "chain" query, where
   609  // some number of tables are joined together, with there being a
   610  // predicate joining the first and second, second and third, third
   611  // and fourth, etc.
   612  //
   613  // For example, a 5-chain looks like:
   614  //
   615  //   SELECT * FROM a, b, c, d, e
   616  //   WHERE a.x = b.y
   617  //     AND b.x = c.y
   618  //     AND c.x = d.y
   619  //     AND d.x = e.y
   620  //
   621  func BenchmarkChain(b *testing.B) {
   622  	h := newHarness()
   623  	defer h.close()
   624  
   625  	for i := 1; i < 20; i++ {
   626  		q := makeChain(i)
   627  		for i := 0; i < b.N; i++ {
   628  			h.runForBenchmark(b, Explore, q)
   629  		}
   630  	}
   631  }