github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/props/logical.go

github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/props/logical.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package props
    12  
    13  import (
    14  	"github.com/cockroachdb/cockroach/pkg/sql/opt"
    15  	"github.com/cockroachdb/cockroach/pkg/sql/opt/constraint"
    16  )
    17  
    18  // AvailableRuleProps is a bit set that indicates when lazily-populated Rule
    19  // properties are initialized and ready for use.
    20  type AvailableRuleProps int
    21  
    22  const (
    23  	// PruneCols is set when the Relational.Rule.PruneCols field is populated.
    24  	PruneCols AvailableRuleProps = 1 << iota
    25  
    26  	// RejectNullCols is set when the Relational.Rule.RejectNullCols field is
    27  	// populated.
    28  	RejectNullCols
    29  
    30  	// InterestingOrderings is set when the Relational.Rule.InterestingOrderings
    31  	// field is populated.
    32  	InterestingOrderings
    33  
    34  	// HasHoistableSubquery is set when the Scalar.Rule.HasHoistableSubquery
    35  	// is populated.
    36  	HasHoistableSubquery
    37  
    38  	// JoinSize is set when the Relational.Rule.JoinSize field is populated.
    39  	JoinSize
    40  
    41  	// MultiplicityProps is set when the Relational.Rule.MultiplicityProps
    42  	// field is populated.
    43  	MultiplicityProps
    44  
    45  	// WithUses is set when the Shared.Rule.WithUses field is populated.
    46  	WithUses
    47  )
    48  
    49  // Shared are properties that are shared by both relational and scalar
    50  // expressions.
    51  type Shared struct {
    52  	// Populated is set to true once the properties have been built for the
    53  	// operator.
    54  	Populated bool
    55  
    56  	// OuterCols is the set of columns that are referenced by variables within
    57  	// this sub-expression, but are not bound within the scope of the expression.
    58  	// For example:
    59  	//
    60  	//   SELECT *
    61  	//   FROM a
    62  	//   WHERE EXISTS(SELECT * FROM b WHERE b.x = a.x AND b.y = 5)
    63  	//
    64  	// For the EXISTS expression, a.x is an outer column, meaning that it is
    65  	// defined "outside" the EXISTS expression (hence the name "outer"). The
    66  	// SELECT expression binds the b.x and b.y references, so they are not part
    67  	// of the outer column set. The outer SELECT binds the a.x column, and so
    68  	// its outer column set is empty.
    69  	//
    70  	// Note that what constitutes an "outer column" is dependent on an
    71  	// expression's location in the query. For example, while the b.x and b.y
    72  	// columns are not outer columns on the EXISTS expression, they *are* outer
    73  	// columns on the inner WHERE condition.
    74  	OuterCols opt.ColSet
    75  
    76  	// HasSubquery is true if the subtree rooted at this node contains a subquery.
    77  	// The subquery can be a Subquery, Exists, Any, or ArrayFlatten expression.
    78  	// Subqueries are the only place where a relational node can be nested within a
    79  	// scalar expression.
    80  	HasSubquery bool
    81  
    82  	// HasCorrelatedSubquery is true if the scalar expression tree contains a
    83  	// subquery having one or more outer columns. The subquery can be a Subquery,
    84  	// Exists, or Any operator. These operators usually need to be hoisted out of
    85  	// scalar expression trees and turned into top-level apply joins. This
    86  	// property makes detection fast and easy so that the hoister doesn't waste
    87  	// time searching subtrees that don't contain subqueries.
    88  	HasCorrelatedSubquery bool
    89  
    90  	// VolatilitySet contains the set of volatilities contained in the expression.
    91  	VolatilitySet VolatilitySet
    92  
    93  	// CanHaveSideEffects is true if the expression modifies state outside its
    94  	// own scope, or if depends upon state that may change across evaluations. An
    95  	// expression can have side effects if it can do any of the following:
    96  	//
    97  	//   1. Trigger a run-time error
    98  	//        10 / col                          -- division by zero error possible
    99  	//        crdb_internal.force_error('', '') -- triggers run-time error
   100  	//
   101  	//   2. Modify outside session or database state
   102  	//        nextval(seq)               -- modifies database sequence value
   103  	//        SELECT * FROM [INSERT ...] -- inserts rows into database
   104  	//
   105  	//   3. Return different results when repeatedly called with same input
   106  	//        ORDER BY random()      -- random can return different values
   107  	//        ts < clock_timestamp() -- clock_timestamp can return different vals
   108  	//
   109  	// The optimizer makes *only* the following side-effect related guarantees:
   110  	//
   111  	//   1. CASE/IF branches are only evaluated if the branch condition is true.
   112  	//      Therefore, the following is guaranteed to never raise a divide by
   113  	//      zero error, regardless of how cleverly the optimizer rewrites the
   114  	//      expression:
   115  	//
   116  	//        CASE WHEN divisor<>0 THEN dividend / divisor ELSE NULL END
   117  	//
   118  	//      While this example is trivial, a more complex example might have
   119  	//      correlated subqueries that cannot be hoisted outside the CASE
   120  	//      expression in the usual way, since that would trigger premature
   121  	//      evaluation.
   122  	//
   123  	//   2. Expressions with side effects are never treated as constant
   124  	//      expressions, even though they do not depend on other columns in the
   125  	//      query:
   126  	//
   127  	//        SELECT * FROM xy ORDER BY random()
   128  	//
   129  	//      If the random() expression were treated as a constant, then the ORDER
   130  	//      BY could be dropped by the optimizer, since ordering by a constant is
   131  	//      a no-op. Instead, the optimizer treats it like it would an expression
   132  	//      that depends upon a column.
   133  	//
   134  	//   3. A common table expression (CTE) with side effects will only be
   135  	//      evaluated one time. This will typically prevent inlining of the CTE
   136  	//      into the query body. For example:
   137  	//
   138  	//        WITH a AS (INSERT ... RETURNING ...) SELECT * FROM a, a
   139  	//
   140  	//      Although the "a" CTE is referenced twice, it must be evaluated only
   141  	//      one time (and its results cached to satisfy the second reference).
   142  	//
   143  	// As long as the optimizer provides these guarantees, it is free to rewrite,
   144  	// reorder, duplicate, and eliminate as if no side effects were present. As an
   145  	// example, the optimizer is free to eliminate the unused "nextval" column in
   146  	// this query:
   147  	//
   148  	//   SELECT x FROM (SELECT nextval(seq), x FROM xy)
   149  	//   =>
   150  	//   SELECT x FROM xy
   151  	//
   152  	// It's also allowed to duplicate side-effecting expressions during predicate
   153  	// pushdown:
   154  	//
   155  	//   SELECT * FROM xy INNER JOIN xz ON xy.x=xz.x WHERE xy.x=random()
   156  	//   =>
   157  	//   SELECT *
   158  	//   FROM (SELECT * FROM xy WHERE xy.x=random())
   159  	//   INNER JOIN (SELECT * FROM xz WHERE xz.x=random())
   160  	//   ON xy.x=xz.x
   161  	//
   162  	CanHaveSideEffects bool
   163  
   164  	// CanMutate is true if the subtree rooted at this expression contains at
   165  	// least one operator that modifies schema (like CreateTable) or writes or
   166  	// deletes rows (like Insert).
   167  	CanMutate bool
   168  
   169  	// HasPlaceholder is true if the subtree rooted at this expression contains
   170  	// at least one Placeholder operator.
   171  	HasPlaceholder bool
   172  
   173  	// Rule props are lazily calculated and typically only apply to a single
   174  	// rule. See the comment above Relational.Rule for more details.
   175  	Rule struct {
   176  		// WithUses tracks information about the WithScans inside the given
   177  		// expression which reference WithIDs outside of that expression.
   178  		WithUses WithUsesMap
   179  	}
   180  }
   181  
   182  // WithUsesMap stores information about each WithScan referencing an outside
   183  // WithID, grouped by each WithID.
   184  type WithUsesMap map[opt.WithID]WithUseInfo
   185  
   186  // WithUseInfo contains information about the usage of a specific WithID.
   187  type WithUseInfo struct {
   188  	// Count is the number of WithScan operators which reference this WithID.
   189  	Count int
   190  
   191  	// UsedCols is the union of columns used by all WithScan operators which
   192  	// reference this WithID.
   193  	UsedCols opt.ColSet
   194  }
   195  
   196  // Relational properties describe the content and characteristics of relational
   197  // data returned by all expression variants within a memo group. While each
   198  // expression in the group may return rows or columns in a different order, or
   199  // compute the result using different algorithms, the same set of data is
   200  // returned and can then be  transformed into whatever layout or presentation
   201  // format that is desired, according to the required physical properties.
   202  type Relational struct {
   203  	Shared
   204  
   205  	// OutputCols is the set of columns that can be projected by the expression.
   206  	// Ordering, naming, and duplication of columns is not representable by this
   207  	// property; those are physical properties.
   208  	OutputCols opt.ColSet
   209  
   210  	// NotNullCols is the subset of output columns which cannot be NULL. The
   211  	// nullability of columns flows from the inputs and can also be derived from
   212  	// filters that reject nulls.
   213  	NotNullCols opt.ColSet
   214  
   215  	// Cardinality is the number of rows that can be returned from this relational
   216  	// expression. The number of rows will always be between the inclusive Min and
   217  	// Max bounds. If Max=math.MaxUint32, then there is no limit to the number of
   218  	// rows returned by the expression.
   219  	Cardinality Cardinality
   220  
   221  	// FuncDepSet is a set of functional dependencies (FDs) that encode useful
   222  	// relationships between columns in a base or derived relation. Given two sets
   223  	// of columns A and B, a functional dependency A-->B holds if A uniquely
   224  	// determines B. In other words, if two different rows have equal values for
   225  	// columns in A, then those two rows will also have equal values for columns
   226  	// in B. For example:
   227  	//
   228  	//   a1 a2 b1
   229  	//   --------
   230  	//   1  2  5
   231  	//   1  2  5
   232  	//
   233  	// FDs assist the optimizer in proving useful properties about query results.
   234  	// This information powers many optimizations, including eliminating
   235  	// unnecessary DISTINCT operators, simplifying ORDER BY columns, removing
   236  	// Max1Row operators, and mapping semi-joins to inner-joins.
   237  	//
   238  	// The methods that are most useful for optimizations are:
   239  	//   Key: extract a candidate key for the relation
   240  	//   ColsAreStrictKey: determine if a set of columns uniquely identify rows
   241  	//   ReduceCols: discard redundant columns to create a candidate key
   242  	//
   243  	// For more details, see the header comment for FuncDepSet.
   244  	FuncDeps FuncDepSet
   245  
   246  	// Stats is the set of statistics that apply to this relational expression.
   247  	// See statistics.go and memo/statistics_builder.go for more details.
   248  	Stats Statistics
   249  
   250  	// Rule encapsulates the set of properties that are maintained to assist
   251  	// with specific sets of transformation rules. They are not intended to be
   252  	// general purpose in nature. Typically, they're used by rules which need to
   253  	// decide whether to push operators down into the tree. These properties
   254  	// "bubble up" information about the subtree which can aid in that decision.
   255  	//
   256  	// Whereas the other logical relational properties are filled in by the memo
   257  	// package upon creation of a new memo group, the rules properties are filled
   258  	// in by one of the transformation packages, since deriving rule properties
   259  	// is so closely tied with maintenance of the rules that depend upon them.
   260  	// For example, the PruneCols set is connected to the PruneCols normalization
   261  	// rules. The decision about which columns to add to PruneCols depends upon
   262  	// what works best for those rules. Neither the rules nor their properties
   263  	// can be considered in isolation, without considering the other.
   264  	Rule struct {
   265  		// Available contains bits that indicate whether lazily-populated Rule
   266  		// properties have been initialized. For example, if the UnfilteredCols
   267  		// bit is set, then the Rule.UnfilteredCols field has been initialized
   268  		// and is ready for use.
   269  		Available AvailableRuleProps
   270  
   271  		// PruneCols is the subset of output columns that can potentially be
   272  		// eliminated by one of the PruneCols normalization rules. Those rules
   273  		// operate by pushing a Project operator down the tree that discards
   274  		// unused columns. For example:
   275  		//
   276  		//   SELECT y FROM xyz WHERE x=1 ORDER BY y LIMIT 1
   277  		//
   278  		// The z column is never referenced, either by the filter or by the
   279  		// limit, and would be part of the PruneCols set for the Limit operator.
   280  		// The final Project operator could then push down a pruning Project
   281  		// operator that eliminated the z column from its subtree.
   282  		//
   283  		// PruneCols is built bottom-up. It typically starts out containing the
   284  		// complete set of output columns in a leaf expression, but quickly
   285  		// empties out at higher levels of the expression tree as the columns
   286  		// are referenced. Drawing from the example above:
   287  		//
   288  		//   Limit PruneCols : [z]
   289  		//   Select PruneCols: [y, z]
   290  		//   Scan PruneCols  : [x, y, z]
   291  		//
   292  		// Only a small number of relational operators are capable of pruning
   293  		// columns (e.g. Scan, Project). A pruning Project operator pushed down
   294  		// the tree must journey downwards until it finds a pruning-capable
   295  		// operator. If a column is part of PruneCols, then it is guaranteed that
   296  		// such an operator exists at the end of the journey. Operators that are
   297  		// not capable of filtering columns (like Explain) will not add any of
   298  		// their columns to this set.
   299  		//
   300  		// PruneCols is lazily populated by rules in prune_cols.opt. It is
   301  		// only valid once the Rule.Available.PruneCols bit has been set.
   302  		PruneCols opt.ColSet
   303  
   304  		// RejectNullCols is the subset of nullable output columns that can
   305  		// potentially be made not-null by one of the RejectNull normalization
   306  		// rules. Those rules work in concert with the predicate pushdown rules
   307  		// to synthesize a "col IS NOT NULL" filter and push it down the tree.
   308  		// See the header comments for the reject_nulls.opt file for more
   309  		// information and an example.
   310  		//
   311  		// RejectNullCols is built bottom-up by rulePropsBuilder, and only contains
   312  		// nullable outer join columns that can be simplified. The columns can be
   313  		// propagated up through multiple operators, giving higher levels of the
   314  		// tree a window into the structure of the tree several layers down. In
   315  		// particular, the null rejection rules use this property to determine when
   316  		// it's advantageous to synthesize a new "IS NOT NULL" filter. Without this
   317  		// information, the rules can clutter the tree with extraneous and
   318  		// marginally useful null filters.
   319  		//
   320  		// RejectNullCols is lazily populated by rules in reject_nulls.opt. It is
   321  		// only valid once the Rule.Available.RejectNullCols bit has been set.
   322  		RejectNullCols opt.ColSet
   323  
   324  		// InterestingOrderings is a list of orderings that potentially could be
   325  		// provided by the operator without sorting. Interesting orderings normally
   326  		// come from scans (index orders) and are bubbled up through some operators.
   327  		//
   328  		// Note that all prefixes of an interesting order are "interesting"; the
   329  		// list doesn't need to contain orderings that are prefixes of some other
   330  		// ordering in the list.
   331  		//
   332  		// InterestingOrderings is lazily populated by interesting_orderings.go.
   333  		// It is only valid once the Rule.Available.InterestingOrderings bit has
   334  		// been set.
   335  		InterestingOrderings opt.OrderingSet
   336  
   337  		// JoinSize is the number of relations being *inner* joined underneath
   338  		// this node. It is used to only reorder joins via AssociateJoin up to
   339  		// a certain limit.
   340  		JoinSize int
   341  
   342  		// MultiplicityProps is a struct that describes how rows from the input of
   343  		// a join are affected by the join. Rows from the left or right input are
   344  		// described as being duplicated and/or filtered.
   345  		// MultiplicityProps also contains a ColSet that contains columns from base
   346  		// tables that are guaranteed not to have been filtered. This ColSet is used
   347  		// in non-join operators as well.
   348  		//
   349  		// MultiplicityProps is lazily populated by multiplicity_builder.go. It is
   350  		// only valid once the Rule.Available.MultiplicityProps bit has been set.
   351  		MultiplicityProps JoinMultiplicity
   352  	}
   353  }
   354  
   355  // Scalar properties are logical properties that are computed for scalar
   356  // expressions that return primitive-valued types. Scalar properties are
   357  // lazily populated on request.
   358  type Scalar struct {
   359  	Shared
   360  
   361  	// Constraints is the set of constraints deduced from a boolean expression.
   362  	// For the expression to be true, all constraints in the set must be
   363  	// satisfied.
   364  	Constraints *constraint.Set
   365  
   366  	// TightConstraints is true if the expression is exactly equivalent to the
   367  	// constraints. If it is false, the constraints are weaker than the
   368  	// expression.
   369  	TightConstraints bool
   370  
   371  	// FuncDeps is a set of functional dependencies (FDs) inferred from a
   372  	// boolean expression. This field is only populated for Filters expressions.
   373  	//
   374  	//  - Constant column FDs such as ()-->(1,2) from conjuncts such as
   375  	//    x = 5 AND y = 10.
   376  	//  - Equivalent column FDs such as (1)==(2), (2)==(1) from conjuncts such
   377  	//    as x = y.
   378  	//
   379  	// It is useful to calculate FDs on Filters expressions, because it allows
   380  	// additional filters to be inferred for push-down. For example, consider
   381  	// the query:
   382  	//
   383  	//   SELECT * FROM a, b WHERE a.x = b.x AND a.x > 5;
   384  	//
   385  	// By adding the equivalency FD for a.x = b.x, we can infer an additional
   386  	// filter, b.x > 5. This allows us to rewrite the query as:
   387  	//
   388  	//   SELECT * FROM (SELECT * FROM a WHERE a.x > 5) AS a,
   389  	//     (SELECT * FROM b WHERE b.x > 5) AS b WHERE a.x = b.x;
   390  	//
   391  	// For more details, see the header comment for FuncDepSet.
   392  	FuncDeps FuncDepSet
   393  
   394  	// Rule encapsulates the set of properties that are maintained to assist
   395  	// with specific sets of transformation rules. See the Relational.Rule
   396  	// comment for more details.
   397  	Rule struct {
   398  		// Available contains bits that indicate whether lazily-populated Rule
   399  		// properties have been initialized. For example, if the
   400  		// HasHoistableSubquery bit is set, then the Rule.HasHoistableSubquery
   401  		// field has been initialized and is ready for use.
   402  		Available AvailableRuleProps
   403  
   404  		// HasHoistableSubquery is true if the scalar expression tree contains a
   405  		// subquery having one or more outer columns, and if the subquery needs
   406  		// to be hoisted up into its parent query as part of query decorrelation.
   407  		// The subquery can be a Subquery, Exists, or Any operator. These operators
   408  		// need to be hoisted out of scalar expression trees and turned into top-
   409  		// level apply joins. This property makes detection fast and easy so that
   410  		// the hoister doesn't waste time searching subtrees that don't contain
   411  		// subqueries.
   412  		//
   413  		// HasHoistableSubquery is lazily populated by rules in decorrelate.opt.
   414  		// It is only valid once the Rule.Available.HasHoistableSubquery bit has
   415  		// been set.
   416  		HasHoistableSubquery bool
   417  	}
   418  }
   419  
   420  // IsAvailable returns true if the specified rule property has been populated
   421  // on this relational properties instance.
   422  func (r *Relational) IsAvailable(p AvailableRuleProps) bool {
   423  	return (r.Rule.Available & p) != 0
   424  }
   425  
   426  // SetAvailable sets the available bits for the given properties, in order to
   427  // mark them as populated on this relational properties instance.
   428  func (r *Relational) SetAvailable(p AvailableRuleProps) {
   429  	r.Rule.Available |= p
   430  }
   431  
   432  // IsAvailable returns true if the specified rule property has been populated
   433  // on this scalar properties instance.
   434  func (s *Scalar) IsAvailable(p AvailableRuleProps) bool {
   435  	return (s.Rule.Available & p) != 0
   436  }
   437  
   438  // SetAvailable sets the available bits for the given properties, in order to
   439  // mark them as populated on this scalar properties instance.
   440  func (s *Scalar) SetAvailable(p AvailableRuleProps) {
   441  	s.Rule.Available |= p
   442  }