github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/xform/physical_props.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package xform 12 13 import ( 14 "math" 15 16 "github.com/cockroachdb/cockroach/pkg/sql/opt" 17 "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" 18 "github.com/cockroachdb/cockroach/pkg/sql/opt/ordering" 19 "github.com/cockroachdb/cockroach/pkg/sql/opt/props/physical" 20 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 21 "github.com/cockroachdb/errors" 22 ) 23 24 // CanProvidePhysicalProps returns true if the given expression can provide the 25 // required physical properties. The optimizer uses this to determine whether an 26 // expression provides a required physical property. If it does not, then the 27 // optimizer inserts an enforcer operator that is able to provide it. 28 // 29 // Some operators, like Select and Project, may not directly provide a required 30 // physical property, but do "pass through" the requirement to their input. 31 // Operators that do this should return true from the appropriate canProvide 32 // method and then pass through that property in the buildChildPhysicalProps 33 // method. 34 func CanProvidePhysicalProps(e memo.RelExpr, required *physical.Required) bool { 35 // All operators can provide the Presentation and LimitHint properties, so no 36 // need to check for that. 37 return e.Op() == opt.SortOp || ordering.CanProvide(e, &required.Ordering) 38 } 39 40 // BuildChildPhysicalProps returns the set of physical properties required of 41 // the nth child, based upon the properties required of the parent. For example, 42 // the Project operator passes through any ordering requirement to its child, 43 // but provides any presentation requirement. 44 // 45 // The childProps argument is allocated once by the caller and can be reused 46 // repeatedly as physical properties are derived for each child. On each call, 47 // buildChildPhysicalProps updates the childProps argument. 48 func BuildChildPhysicalProps( 49 mem *memo.Memo, parent memo.RelExpr, nth int, parentProps *physical.Required, 50 ) *physical.Required { 51 var childProps physical.Required 52 53 // ScalarExprs don't support required physical properties; don't build 54 // physical properties for them. 55 if _, ok := parent.Child(nth).(opt.ScalarExpr); ok { 56 return mem.InternPhysicalProps(&childProps) 57 } 58 59 // Most operations don't require a presentation of their input; these are the 60 // exceptions. 61 switch parent.Op() { 62 case opt.ExplainOp: 63 childProps.Presentation = parent.(*memo.ExplainExpr).Props.Presentation 64 case opt.AlterTableSplitOp: 65 childProps.Presentation = parent.(*memo.AlterTableSplitExpr).Props.Presentation 66 case opt.AlterTableUnsplitOp: 67 childProps.Presentation = parent.(*memo.AlterTableUnsplitExpr).Props.Presentation 68 case opt.AlterTableRelocateOp: 69 childProps.Presentation = parent.(*memo.AlterTableRelocateExpr).Props.Presentation 70 case opt.ControlJobsOp: 71 childProps.Presentation = parent.(*memo.ControlJobsExpr).Props.Presentation 72 case opt.CancelQueriesOp: 73 childProps.Presentation = parent.(*memo.CancelQueriesExpr).Props.Presentation 74 case opt.CancelSessionsOp: 75 childProps.Presentation = parent.(*memo.CancelSessionsExpr).Props.Presentation 76 case opt.ExportOp: 77 childProps.Presentation = parent.(*memo.ExportExpr).Props.Presentation 78 } 79 80 childProps.Ordering = ordering.BuildChildRequired(parent, &parentProps.Ordering, nth) 81 82 switch parent.Op() { 83 case opt.LimitOp: 84 if constLimit, ok := parent.(*memo.LimitExpr).Limit.(*memo.ConstExpr); ok { 85 childProps.LimitHint = float64(*constLimit.Value.(*tree.DInt)) 86 if childProps.LimitHint <= 0 { 87 childProps.LimitHint = 1 88 } 89 } 90 case opt.OffsetOp: 91 if parentProps.LimitHint == 0 { 92 break 93 } 94 if constOffset, ok := parent.(*memo.OffsetExpr).Offset.(*memo.ConstExpr); ok { 95 childProps.LimitHint = parentProps.LimitHint + float64(*constOffset.Value.(*tree.DInt)) 96 if childProps.LimitHint <= 0 { 97 childProps.LimitHint = 1 98 } 99 } 100 101 case opt.IndexJoinOp: 102 // For an index join, every input row results in exactly one output row. 103 childProps.LimitHint = parentProps.LimitHint 104 105 case opt.ExceptOp, opt.ExceptAllOp, opt.IntersectOp, opt.IntersectAllOp, 106 opt.UnionOp, opt.UnionAllOp: 107 // TODO(celine): Set operation limits need further thought; for example, 108 // the right child of an ExceptOp should not be limited. 109 childProps.LimitHint = parentProps.LimitHint 110 111 case opt.DistinctOnOp: 112 distinctCount := parent.(memo.RelExpr).Relational().Stats.RowCount 113 if parentProps.LimitHint > 0 { 114 childProps.LimitHint = distinctOnLimitHint(distinctCount, parentProps.LimitHint) 115 } 116 117 case opt.SelectOp, opt.LookupJoinOp: 118 // These operations are assumed to produce a constant number of output rows 119 // for each input row, independent of already-processed rows. 120 outputRows := parent.(memo.RelExpr).Relational().Stats.RowCount 121 if outputRows == 0 || outputRows < parentProps.LimitHint { 122 break 123 } 124 if input, ok := parent.Child(nth).(memo.RelExpr); ok { 125 inputRows := input.Relational().Stats.RowCount 126 switch parent.Op() { 127 case opt.SelectOp: 128 // outputRows / inputRows is roughly the number of output rows produced 129 // for each input row. Reduce the number of required input rows so that 130 // the expected number of output rows is equal to the parent limit hint. 131 childProps.LimitHint = parentProps.LimitHint * inputRows / outputRows 132 case opt.LookupJoinOp: 133 childProps.LimitHint = lookupJoinInputLimitHint(inputRows, outputRows, parentProps.LimitHint) 134 } 135 } 136 137 case opt.OrdinalityOp, opt.ProjectOp, opt.ProjectSetOp: 138 childProps.LimitHint = parentProps.LimitHint 139 } 140 141 if childProps.LimitHint < 0 { 142 panic(errors.AssertionFailedf("negative limit hint")) 143 } 144 145 // If properties haven't changed, no need to re-intern them. 146 if childProps.Equals(parentProps) { 147 return parentProps 148 } 149 150 return mem.InternPhysicalProps(&childProps) 151 } 152 153 // distinctOnLimitHint returns a limit hint for the distinct operation. Given a 154 // table with distinctCount distinct rows, distinctOnLimitHint will return an 155 // estimated number of rows to scan that in most cases will yield at least 156 // neededRows distinct rows while still substantially reducing the number of 157 // unnecessarily scanned rows. 158 // 159 // Assume that when examining a row, each of the distinctCount possible values 160 // has an equal probability of appearing. The expected number of rows that must 161 // be examined to collect neededRows distinct rows is 162 // 163 // E[examined rows] = distinctCount * (H_{distinctCount} - H_{distinctCount-neededRows}) 164 // 165 // where distinctCount > neededRows and H_{i} is the ith harmonic number. This 166 // is a variation on the coupon collector's problem: 167 // https://en.wikipedia.org/wiki/Coupon_collector%27s_problem 168 // 169 // Since values are not uniformly distributed in practice, the limit hint is 170 // calculated by multiplying E[examined rows] by an experimentally-chosen factor 171 // to provide a small overestimate of the actual number of rows needed in most 172 // cases. 173 // 174 // This method is least accurate when attempting to return all or nearly all the 175 // distinct values in the table, since the actual distribution of values becomes 176 // the primary factor in how long it takes to "collect" the least-likely values. 177 // As a result, cases where this limit hint may be poor (too low or more than 178 // twice as high as needed) tend to occur when distinctCount is very close to 179 // neededRows. 180 func distinctOnLimitHint(distinctCount, neededRows float64) float64 { 181 // The harmonic function below is not intended for values under 1 (for one, 182 // it's not monotonic until 0.5); make sure we never return negative results. 183 if neededRows >= distinctCount-1.0 { 184 return 0 185 } 186 187 // Return an approximation of the nth harmonic number. 188 H := func(n float64) float64 { 189 // Euler–Mascheroni constant; this is included for clarity but is canceled 190 // out in our formula below. 191 const gamma = 0.5772156649 192 return math.Log(n) + gamma + 1/(2*n) 193 } 194 195 // Coupon collector's estimate, for a uniformly-distributed table. 196 uniformPrediction := distinctCount * (H(distinctCount) - H(distinctCount-neededRows)) 197 198 // This multiplier was chosen based on simulating the distinct operation on 199 // hundreds of thousands of nonuniformly distributed tables with values of 200 // neededRows and distinctCount ranging between 1 and 1000. 201 multiplier := 0.15*neededRows/(distinctCount-neededRows) + 1.2 202 203 // In 91.6% of trials, this scaled estimate was between a 0% and 30% 204 // overestimate, and in 97.5% it was between a 0% and 100% overestimate. 205 // 206 // In 1.8% of tests, the prediction was for an insufficient number of rows, and 207 // in 0.7% of tests, the predicted number of rows was more than twice the actual 208 // number required. 209 return uniformPrediction * multiplier 210 } 211 212 // BuildChildPhysicalPropsScalar is like BuildChildPhysicalProps, but for 213 // when the parent is a scalar expression. 214 func BuildChildPhysicalPropsScalar(mem *memo.Memo, parent opt.Expr, nth int) *physical.Required { 215 var childProps physical.Required 216 switch parent.Op() { 217 case opt.ArrayFlattenOp: 218 if nth == 0 { 219 af := parent.(*memo.ArrayFlattenExpr) 220 childProps.Ordering.FromOrdering(af.Ordering) 221 // ArrayFlatten might have extra ordering columns. Use the Presentation property 222 // to get rid of them. 223 childProps.Presentation = physical.Presentation{ 224 opt.AliasedColumn{ 225 // Keep the existing label for the column. 226 Alias: mem.Metadata().ColumnMeta(af.RequestedCol).Alias, 227 ID: af.RequestedCol, 228 }, 229 } 230 } 231 default: 232 return physical.MinRequired 233 } 234 return mem.InternPhysicalProps(&childProps) 235 }