github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/xform/physical_props.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package xform
    12  
    13  import (
    14  	"math"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/sql/opt"
    17  	"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/opt/ordering"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/opt/props/physical"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    21  	"github.com/cockroachdb/errors"
    22  )
    23  
    24  // CanProvidePhysicalProps returns true if the given expression can provide the
    25  // required physical properties. The optimizer uses this to determine whether an
    26  // expression provides a required physical property. If it does not, then the
    27  // optimizer inserts an enforcer operator that is able to provide it.
    28  //
    29  // Some operators, like Select and Project, may not directly provide a required
    30  // physical property, but do "pass through" the requirement to their input.
    31  // Operators that do this should return true from the appropriate canProvide
    32  // method and then pass through that property in the buildChildPhysicalProps
    33  // method.
    34  func CanProvidePhysicalProps(e memo.RelExpr, required *physical.Required) bool {
    35  	// All operators can provide the Presentation and LimitHint properties, so no
    36  	// need to check for that.
    37  	return e.Op() == opt.SortOp || ordering.CanProvide(e, &required.Ordering)
    38  }
    39  
    40  // BuildChildPhysicalProps returns the set of physical properties required of
    41  // the nth child, based upon the properties required of the parent. For example,
    42  // the Project operator passes through any ordering requirement to its child,
    43  // but provides any presentation requirement.
    44  //
    45  // The childProps argument is allocated once by the caller and can be reused
    46  // repeatedly as physical properties are derived for each child. On each call,
    47  // buildChildPhysicalProps updates the childProps argument.
    48  func BuildChildPhysicalProps(
    49  	mem *memo.Memo, parent memo.RelExpr, nth int, parentProps *physical.Required,
    50  ) *physical.Required {
    51  	var childProps physical.Required
    52  
    53  	// ScalarExprs don't support required physical properties; don't build
    54  	// physical properties for them.
    55  	if _, ok := parent.Child(nth).(opt.ScalarExpr); ok {
    56  		return mem.InternPhysicalProps(&childProps)
    57  	}
    58  
    59  	// Most operations don't require a presentation of their input; these are the
    60  	// exceptions.
    61  	switch parent.Op() {
    62  	case opt.ExplainOp:
    63  		childProps.Presentation = parent.(*memo.ExplainExpr).Props.Presentation
    64  	case opt.AlterTableSplitOp:
    65  		childProps.Presentation = parent.(*memo.AlterTableSplitExpr).Props.Presentation
    66  	case opt.AlterTableUnsplitOp:
    67  		childProps.Presentation = parent.(*memo.AlterTableUnsplitExpr).Props.Presentation
    68  	case opt.AlterTableRelocateOp:
    69  		childProps.Presentation = parent.(*memo.AlterTableRelocateExpr).Props.Presentation
    70  	case opt.ControlJobsOp:
    71  		childProps.Presentation = parent.(*memo.ControlJobsExpr).Props.Presentation
    72  	case opt.CancelQueriesOp:
    73  		childProps.Presentation = parent.(*memo.CancelQueriesExpr).Props.Presentation
    74  	case opt.CancelSessionsOp:
    75  		childProps.Presentation = parent.(*memo.CancelSessionsExpr).Props.Presentation
    76  	case opt.ExportOp:
    77  		childProps.Presentation = parent.(*memo.ExportExpr).Props.Presentation
    78  	}
    79  
    80  	childProps.Ordering = ordering.BuildChildRequired(parent, &parentProps.Ordering, nth)
    81  
    82  	switch parent.Op() {
    83  	case opt.LimitOp:
    84  		if constLimit, ok := parent.(*memo.LimitExpr).Limit.(*memo.ConstExpr); ok {
    85  			childProps.LimitHint = float64(*constLimit.Value.(*tree.DInt))
    86  			if childProps.LimitHint <= 0 {
    87  				childProps.LimitHint = 1
    88  			}
    89  		}
    90  	case opt.OffsetOp:
    91  		if parentProps.LimitHint == 0 {
    92  			break
    93  		}
    94  		if constOffset, ok := parent.(*memo.OffsetExpr).Offset.(*memo.ConstExpr); ok {
    95  			childProps.LimitHint = parentProps.LimitHint + float64(*constOffset.Value.(*tree.DInt))
    96  			if childProps.LimitHint <= 0 {
    97  				childProps.LimitHint = 1
    98  			}
    99  		}
   100  
   101  	case opt.IndexJoinOp:
   102  		// For an index join, every input row results in exactly one output row.
   103  		childProps.LimitHint = parentProps.LimitHint
   104  
   105  	case opt.ExceptOp, opt.ExceptAllOp, opt.IntersectOp, opt.IntersectAllOp,
   106  		opt.UnionOp, opt.UnionAllOp:
   107  		// TODO(celine): Set operation limits need further thought; for example,
   108  		// the right child of an ExceptOp should not be limited.
   109  		childProps.LimitHint = parentProps.LimitHint
   110  
   111  	case opt.DistinctOnOp:
   112  		distinctCount := parent.(memo.RelExpr).Relational().Stats.RowCount
   113  		if parentProps.LimitHint > 0 {
   114  			childProps.LimitHint = distinctOnLimitHint(distinctCount, parentProps.LimitHint)
   115  		}
   116  
   117  	case opt.SelectOp, opt.LookupJoinOp:
   118  		// These operations are assumed to produce a constant number of output rows
   119  		// for each input row, independent of already-processed rows.
   120  		outputRows := parent.(memo.RelExpr).Relational().Stats.RowCount
   121  		if outputRows == 0 || outputRows < parentProps.LimitHint {
   122  			break
   123  		}
   124  		if input, ok := parent.Child(nth).(memo.RelExpr); ok {
   125  			inputRows := input.Relational().Stats.RowCount
   126  			switch parent.Op() {
   127  			case opt.SelectOp:
   128  				// outputRows / inputRows is roughly the number of output rows produced
   129  				// for each input row. Reduce the number of required input rows so that
   130  				// the expected number of output rows is equal to the parent limit hint.
   131  				childProps.LimitHint = parentProps.LimitHint * inputRows / outputRows
   132  			case opt.LookupJoinOp:
   133  				childProps.LimitHint = lookupJoinInputLimitHint(inputRows, outputRows, parentProps.LimitHint)
   134  			}
   135  		}
   136  
   137  	case opt.OrdinalityOp, opt.ProjectOp, opt.ProjectSetOp:
   138  		childProps.LimitHint = parentProps.LimitHint
   139  	}
   140  
   141  	if childProps.LimitHint < 0 {
   142  		panic(errors.AssertionFailedf("negative limit hint"))
   143  	}
   144  
   145  	// If properties haven't changed, no need to re-intern them.
   146  	if childProps.Equals(parentProps) {
   147  		return parentProps
   148  	}
   149  
   150  	return mem.InternPhysicalProps(&childProps)
   151  }
   152  
   153  // distinctOnLimitHint returns a limit hint for the distinct operation. Given a
   154  // table with distinctCount distinct rows, distinctOnLimitHint will return an
   155  // estimated number of rows to scan that in most cases will yield at least
   156  // neededRows distinct rows while still substantially reducing the number of
   157  // unnecessarily scanned rows.
   158  //
   159  // Assume that when examining a row, each of the distinctCount possible values
   160  // has an equal probability of appearing. The expected number of rows that must
   161  // be examined to collect neededRows distinct rows is
   162  //
   163  // E[examined rows] = distinctCount * (H_{distinctCount} - H_{distinctCount-neededRows})
   164  //
   165  // where distinctCount > neededRows and H_{i} is the ith harmonic number. This
   166  // is a variation on the coupon collector's problem:
   167  // https://en.wikipedia.org/wiki/Coupon_collector%27s_problem
   168  //
   169  // Since values are not uniformly distributed in practice, the limit hint is
   170  // calculated by multiplying E[examined rows] by an experimentally-chosen factor
   171  // to provide a small overestimate of the actual number of rows needed in most
   172  // cases.
   173  //
   174  // This method is least accurate when attempting to return all or nearly all the
   175  // distinct values in the table, since the actual distribution of values becomes
   176  // the primary factor in how long it takes to "collect" the least-likely values.
   177  // As a result, cases where this limit hint may be poor (too low or more than
   178  // twice as high as needed) tend to occur when distinctCount is very close to
   179  // neededRows.
   180  func distinctOnLimitHint(distinctCount, neededRows float64) float64 {
   181  	// The harmonic function below is not intended for values under 1 (for one,
   182  	// it's not monotonic until 0.5); make sure we never return negative results.
   183  	if neededRows >= distinctCount-1.0 {
   184  		return 0
   185  	}
   186  
   187  	// Return an approximation of the nth harmonic number.
   188  	H := func(n float64) float64 {
   189  		// Euler–Mascheroni constant; this is included for clarity but is canceled
   190  		// out in our formula below.
   191  		const gamma = 0.5772156649
   192  		return math.Log(n) + gamma + 1/(2*n)
   193  	}
   194  
   195  	// Coupon collector's estimate, for a uniformly-distributed table.
   196  	uniformPrediction := distinctCount * (H(distinctCount) - H(distinctCount-neededRows))
   197  
   198  	// This multiplier was chosen based on simulating the distinct operation on
   199  	// hundreds of thousands of nonuniformly distributed tables with values of
   200  	// neededRows and distinctCount ranging between 1 and 1000.
   201  	multiplier := 0.15*neededRows/(distinctCount-neededRows) + 1.2
   202  
   203  	// In 91.6% of trials, this scaled estimate was between a 0% and 30%
   204  	// overestimate, and in 97.5% it was between a 0% and 100% overestimate.
   205  	//
   206  	// In 1.8% of tests, the prediction was for an insufficient number of rows, and
   207  	// in 0.7% of tests, the predicted number of rows was more than twice the actual
   208  	// number required.
   209  	return uniformPrediction * multiplier
   210  }
   211  
   212  // BuildChildPhysicalPropsScalar is like BuildChildPhysicalProps, but for
   213  // when the parent is a scalar expression.
   214  func BuildChildPhysicalPropsScalar(mem *memo.Memo, parent opt.Expr, nth int) *physical.Required {
   215  	var childProps physical.Required
   216  	switch parent.Op() {
   217  	case opt.ArrayFlattenOp:
   218  		if nth == 0 {
   219  			af := parent.(*memo.ArrayFlattenExpr)
   220  			childProps.Ordering.FromOrdering(af.Ordering)
   221  			// ArrayFlatten might have extra ordering columns. Use the Presentation property
   222  			// to get rid of them.
   223  			childProps.Presentation = physical.Presentation{
   224  				opt.AliasedColumn{
   225  					// Keep the existing label for the column.
   226  					Alias: mem.Metadata().ColumnMeta(af.RequestedCol).Alias,
   227  					ID:    af.RequestedCol,
   228  				},
   229  			}
   230  		}
   231  	default:
   232  		return physical.MinRequired
   233  	}
   234  	return mem.InternPhysicalProps(&childProps)
   235  }