github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/optbuilder/builder.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package optbuilder
    12  
    13  import (
    14  	"context"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/sql/delegate"
    17  	"github.com/cockroachdb/cockroach/pkg/sql/opt"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/opt/cat"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/opt/norm"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/opt/optgen/exprgen"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    25  	"github.com/cockroachdb/cockroach/pkg/util/errorutil"
    26  	"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
    27  	"github.com/cockroachdb/errors"
    28  )
    29  
    30  // Builder holds the context needed for building a memo structure from a SQL
    31  // statement. Builder.Build() is the top-level function to perform this build
    32  // process. As part of the build process, it performs name resolution and
    33  // type checking on the expressions within Builder.stmt.
    34  //
    35  // The memo structure is the primary data structure used for query optimization,
    36  // so building the memo is the first step required to optimize a query. The memo
    37  // is maintained inside Builder.factory, which exposes methods to construct
    38  // expression groups inside the memo. Once the expression tree has been built,
    39  // the builder calls SetRoot on the memo to indicate the root memo group, as
    40  // well as the set of physical properties (e.g., row and column ordering) that
    41  // at least one expression in the root group must satisfy.
    42  //
    43  // A memo is essentially a compact representation of a forest of logically-
    44  // equivalent query trees. Each tree is either a logical or a physical plan
    45  // for executing the SQL query. After the build process is complete, the memo
    46  // forest will contain exactly one tree: the logical query plan corresponding
    47  // to the AST of the original SQL statement with some number of "normalization"
    48  // transformations applied. Normalization transformations include heuristics
    49  // such as predicate push-down that should always be applied. They do not
    50  // include "exploration" transformations whose benefit must be evaluated with
    51  // the optimizer's cost model (e.g., join reordering).
    52  //
    53  // See factory.go and memo.go inside the opt/xform package for more details
    54  // about the memo structure.
    55  type Builder struct {
    56  
    57  	// -- Control knobs --
    58  	//
    59  	// These fields can be set before calling Build to control various aspects of
    60  	// the building process.
    61  
    62  	// KeepPlaceholders is a control knob: if set, optbuilder will never replace
    63  	// a placeholder operator with its assigned value, even when it is available.
    64  	// This is used when re-preparing invalidated queries.
    65  	KeepPlaceholders bool
    66  
    67  	// -- Results --
    68  	//
    69  	// These fields are set during the building process and can be used after
    70  	// Build is called.
    71  
    72  	// HadPlaceholders is set to true if we replaced any placeholders with their
    73  	// values.
    74  	HadPlaceholders bool
    75  
    76  	// DisableMemoReuse is set to true if we encountered a statement that is not
    77  	// safe to cache the memo for. This is the case for various DDL and SHOW
    78  	// statements.
    79  	DisableMemoReuse bool
    80  
    81  	factory *norm.Factory
    82  	stmt    tree.Statement
    83  
    84  	ctx        context.Context
    85  	semaCtx    *tree.SemaContext
    86  	evalCtx    *tree.EvalContext
    87  	catalog    cat.Catalog
    88  	scopeAlloc []scope
    89  	cteStack   [][]cteSource
    90  
    91  	// If set, the planner will skip checking for the SELECT privilege when
    92  	// resolving data sources (tables, views, etc). This is used when compiling
    93  	// views and the view SELECT privilege has already been checked. This should
    94  	// be used with care.
    95  	skipSelectPrivilegeChecks bool
    96  
    97  	// views contains a cache of views that have already been parsed, in case they
    98  	// are referenced multiple times in the same query.
    99  	views map[cat.View]*tree.Select
   100  
   101  	// subquery contains a pointer to the subquery which is currently being built
   102  	// (if any).
   103  	subquery *subquery
   104  
   105  	// If set, we are processing a view definition; in this case, catalog caches
   106  	// are disabled and certain statements (like mutations) are disallowed.
   107  	insideViewDef bool
   108  
   109  	// If set, we are collecting view dependencies in viewDeps. This can only
   110  	// happen inside view definitions.
   111  	//
   112  	// When a view depends on another view, we only want to track the dependency
   113  	// on the inner view itself, and not the transitive dependencies (so
   114  	// trackViewDeps would be false inside that inner view).
   115  	trackViewDeps bool
   116  	viewDeps      opt.ViewDeps
   117  
   118  	// If set, the data source names in the AST are rewritten to the fully
   119  	// qualified version (after resolution). Used to construct the strings for
   120  	// CREATE VIEW and CREATE TABLE AS queries.
   121  	// TODO(radu): modifying the AST in-place is hacky; we will need to switch to
   122  	// using AST annotations.
   123  	qualifyDataSourceNamesInAST bool
   124  
   125  	// isCorrelated is set to true if we already reported to telemetry that the
   126  	// query contains a correlated subquery.
   127  	isCorrelated bool
   128  }
   129  
   130  // New creates a new Builder structure initialized with the given
   131  // parsed SQL statement.
   132  func New(
   133  	ctx context.Context,
   134  	semaCtx *tree.SemaContext,
   135  	evalCtx *tree.EvalContext,
   136  	catalog cat.Catalog,
   137  	factory *norm.Factory,
   138  	stmt tree.Statement,
   139  ) *Builder {
   140  	return &Builder{
   141  		factory: factory,
   142  		stmt:    stmt,
   143  		ctx:     ctx,
   144  		semaCtx: semaCtx,
   145  		evalCtx: evalCtx,
   146  		catalog: catalog,
   147  	}
   148  }
   149  
   150  // Build is the top-level function to build the memo structure inside
   151  // Builder.factory from the parsed SQL statement in Builder.stmt. See the
   152  // comment above the Builder type declaration for details.
   153  //
   154  // If any subroutines panic with a non-runtime error as part of the build
   155  // process, the panic is caught here and returned as an error.
   156  func (b *Builder) Build() (err error) {
   157  	defer func() {
   158  		if r := recover(); r != nil {
   159  			// This code allows us to propagate errors without adding lots of checks
   160  			// for `if err != nil` throughout the construction code. This is only
   161  			// possible because the code does not update shared state and does not
   162  			// manipulate locks.
   163  			if ok, e := errorutil.ShouldCatch(r); ok {
   164  				err = e
   165  			} else {
   166  				panic(r)
   167  			}
   168  		}
   169  	}()
   170  
   171  	// Special case for CannedOptPlan.
   172  	if canned, ok := b.stmt.(*tree.CannedOptPlan); ok {
   173  		b.factory.DisableOptimizations()
   174  		_, err := exprgen.Build(b.catalog, b.factory, canned.Plan)
   175  		return err
   176  	}
   177  
   178  	b.pushWithFrame()
   179  
   180  	// Build the memo, and call SetRoot on the memo to indicate the root group
   181  	// and physical properties.
   182  	outScope := b.buildStmtAtRoot(b.stmt, nil /* desiredTypes */, b.allocScope())
   183  
   184  	b.popWithFrame(outScope)
   185  	if len(b.cteStack) > 0 {
   186  		panic(errors.AssertionFailedf("dangling CTE stack frames"))
   187  	}
   188  
   189  	physical := outScope.makePhysicalProps()
   190  	b.factory.Memo().SetRoot(outScope.expr, physical)
   191  	return nil
   192  }
   193  
   194  // unimplementedWithIssueDetailf formats according to a format
   195  // specifier and returns a Postgres error with the
   196  // pg code FeatureNotSupported.
   197  func unimplementedWithIssueDetailf(issue int, detail, format string, args ...interface{}) error {
   198  	return unimplemented.NewWithIssueDetailf(issue, detail, format, args...)
   199  }
   200  
   201  // buildStmtAtRoot builds a statement, beginning a new conceptual query
   202  // "context".
   203  func (b *Builder) buildStmtAtRoot(
   204  	stmt tree.Statement, desiredTypes []*types.T, inScope *scope,
   205  ) (outScope *scope) {
   206  	defer func(prevAtRoot bool) {
   207  		inScope.atRoot = prevAtRoot
   208  	}(inScope.atRoot)
   209  	inScope.atRoot = true
   210  
   211  	return b.buildStmt(stmt, desiredTypes, inScope)
   212  }
   213  
   214  // buildStmt builds a set of memo groups that represent the given SQL
   215  // statement.
   216  //
   217  // NOTE: The following descriptions of the inScope parameter and outScope
   218  //       return value apply for all buildXXX() functions in this directory.
   219  //       Note that some buildXXX() functions pass outScope as a parameter
   220  //       rather than a return value so its scopeColumns can be built up
   221  //       incrementally across several function calls.
   222  //
   223  // inScope   This parameter contains the name bindings that are visible for this
   224  //           statement/expression (e.g., passed in from an enclosing statement).
   225  //
   226  // outScope  This return value contains the newly bound variables that will be
   227  //           visible to enclosing statements, as well as a pointer to any
   228  //           "parent" scope that is still visible. The top-level memo expression
   229  //           for the built statement/expression is returned in outScope.expr.
   230  func (b *Builder) buildStmt(
   231  	stmt tree.Statement, desiredTypes []*types.T, inScope *scope,
   232  ) (outScope *scope) {
   233  	if b.insideViewDef {
   234  		// A black list of statements that can't be used from inside a view.
   235  		switch stmt := stmt.(type) {
   236  		case *tree.Delete, *tree.Insert, *tree.Update, *tree.CreateTable, *tree.CreateView,
   237  			*tree.Split, *tree.Unsplit, *tree.Relocate,
   238  			*tree.ControlJobs, *tree.CancelQueries, *tree.CancelSessions:
   239  			panic(pgerror.Newf(
   240  				pgcode.Syntax, "%s cannot be used inside a view definition", stmt.StatementTag(),
   241  			))
   242  		}
   243  	}
   244  
   245  	switch stmt := stmt.(type) {
   246  	case *tree.Select:
   247  		return b.buildSelect(stmt, noRowLocking, desiredTypes, inScope)
   248  
   249  	case *tree.ParenSelect:
   250  		return b.buildSelect(stmt.Select, noRowLocking, desiredTypes, inScope)
   251  
   252  	case *tree.Delete:
   253  		return b.processWiths(stmt.With, inScope, func(inScope *scope) *scope {
   254  			return b.buildDelete(stmt, inScope)
   255  		})
   256  
   257  	case *tree.Insert:
   258  		return b.processWiths(stmt.With, inScope, func(inScope *scope) *scope {
   259  			return b.buildInsert(stmt, inScope)
   260  		})
   261  
   262  	case *tree.Update:
   263  		return b.processWiths(stmt.With, inScope, func(inScope *scope) *scope {
   264  			return b.buildUpdate(stmt, inScope)
   265  		})
   266  
   267  	case *tree.CreateTable:
   268  		return b.buildCreateTable(stmt, inScope)
   269  
   270  	case *tree.CreateView:
   271  		return b.buildCreateView(stmt, inScope)
   272  
   273  	case *tree.Explain:
   274  		return b.buildExplain(stmt, inScope)
   275  
   276  	case *tree.ShowTraceForSession:
   277  		return b.buildShowTrace(stmt, inScope)
   278  
   279  	case *tree.Split:
   280  		return b.buildAlterTableSplit(stmt, inScope)
   281  
   282  	case *tree.Unsplit:
   283  		return b.buildAlterTableUnsplit(stmt, inScope)
   284  
   285  	case *tree.Relocate:
   286  		return b.buildAlterTableRelocate(stmt, inScope)
   287  
   288  	case *tree.ControlJobs:
   289  		return b.buildControlJobs(stmt, inScope)
   290  
   291  	case *tree.CancelQueries:
   292  		return b.buildCancelQueries(stmt, inScope)
   293  
   294  	case *tree.CancelSessions:
   295  		return b.buildCancelSessions(stmt, inScope)
   296  
   297  	case *tree.Export:
   298  		return b.buildExport(stmt, inScope)
   299  
   300  	case *tree.ExplainAnalyzeDebug:
   301  		// This statement should have been handled by the executor.
   302  		panic(errors.Errorf("%s can only be used as a top-level statement", stmt.StatementTag()))
   303  
   304  	default:
   305  		// See if this statement can be rewritten to another statement using the
   306  		// delegate functionality.
   307  		newStmt, err := delegate.TryDelegate(b.ctx, b.catalog, b.evalCtx, stmt)
   308  		if err != nil {
   309  			panic(err)
   310  		}
   311  		if newStmt != nil {
   312  			// Many delegate implementations resolve objects. It would be tedious to
   313  			// register all those dependencies with the metadata (for cache
   314  			// invalidation). We don't care about caching plans for these statements.
   315  			b.DisableMemoReuse = true
   316  			return b.buildStmt(newStmt, desiredTypes, inScope)
   317  		}
   318  
   319  		// See if we have an opaque handler registered for this statement type.
   320  		if outScope := b.tryBuildOpaque(stmt, inScope); outScope != nil {
   321  			// The opaque handler may resolve objects; we don't care about caching
   322  			// plans for these statements.
   323  			b.DisableMemoReuse = true
   324  			return outScope
   325  		}
   326  		panic(errors.AssertionFailedf("unexpected statement: %T", stmt))
   327  	}
   328  }
   329  
   330  func (b *Builder) allocScope() *scope {
   331  	if len(b.scopeAlloc) == 0 {
   332  		// scope is relatively large (~250 bytes), so only allocate in small
   333  		// chunks.
   334  		b.scopeAlloc = make([]scope, 4)
   335  	}
   336  	r := &b.scopeAlloc[0]
   337  	b.scopeAlloc = b.scopeAlloc[1:]
   338  	r.builder = b
   339  	return r
   340  }