github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/props/logical.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package props 12 13 import ( 14 "github.com/cockroachdb/cockroach/pkg/sql/opt" 15 "github.com/cockroachdb/cockroach/pkg/sql/opt/constraint" 16 ) 17 18 // AvailableRuleProps is a bit set that indicates when lazily-populated Rule 19 // properties are initialized and ready for use. 20 type AvailableRuleProps int 21 22 const ( 23 // PruneCols is set when the Relational.Rule.PruneCols field is populated. 24 PruneCols AvailableRuleProps = 1 << iota 25 26 // RejectNullCols is set when the Relational.Rule.RejectNullCols field is 27 // populated. 28 RejectNullCols 29 30 // InterestingOrderings is set when the Relational.Rule.InterestingOrderings 31 // field is populated. 32 InterestingOrderings 33 34 // HasHoistableSubquery is set when the Scalar.Rule.HasHoistableSubquery 35 // is populated. 36 HasHoistableSubquery 37 38 // JoinSize is set when the Relational.Rule.JoinSize field is populated. 39 JoinSize 40 41 // MultiplicityProps is set when the Relational.Rule.MultiplicityProps 42 // field is populated. 43 MultiplicityProps 44 45 // WithUses is set when the Shared.Rule.WithUses field is populated. 46 WithUses 47 ) 48 49 // Shared are properties that are shared by both relational and scalar 50 // expressions. 51 type Shared struct { 52 // Populated is set to true once the properties have been built for the 53 // operator. 54 Populated bool 55 56 // OuterCols is the set of columns that are referenced by variables within 57 // this sub-expression, but are not bound within the scope of the expression. 58 // For example: 59 // 60 // SELECT * 61 // FROM a 62 // WHERE EXISTS(SELECT * FROM b WHERE b.x = a.x AND b.y = 5) 63 // 64 // For the EXISTS expression, a.x is an outer column, meaning that it is 65 // defined "outside" the EXISTS expression (hence the name "outer"). The 66 // SELECT expression binds the b.x and b.y references, so they are not part 67 // of the outer column set. The outer SELECT binds the a.x column, and so 68 // its outer column set is empty. 69 // 70 // Note that what constitutes an "outer column" is dependent on an 71 // expression's location in the query. For example, while the b.x and b.y 72 // columns are not outer columns on the EXISTS expression, they *are* outer 73 // columns on the inner WHERE condition. 74 OuterCols opt.ColSet 75 76 // HasSubquery is true if the subtree rooted at this node contains a subquery. 77 // The subquery can be a Subquery, Exists, Any, or ArrayFlatten expression. 78 // Subqueries are the only place where a relational node can be nested within a 79 // scalar expression. 80 HasSubquery bool 81 82 // HasCorrelatedSubquery is true if the scalar expression tree contains a 83 // subquery having one or more outer columns. The subquery can be a Subquery, 84 // Exists, or Any operator. These operators usually need to be hoisted out of 85 // scalar expression trees and turned into top-level apply joins. This 86 // property makes detection fast and easy so that the hoister doesn't waste 87 // time searching subtrees that don't contain subqueries. 88 HasCorrelatedSubquery bool 89 90 // VolatilitySet contains the set of volatilities contained in the expression. 91 VolatilitySet VolatilitySet 92 93 // CanHaveSideEffects is true if the expression modifies state outside its 94 // own scope, or if depends upon state that may change across evaluations. An 95 // expression can have side effects if it can do any of the following: 96 // 97 // 1. Trigger a run-time error 98 // 10 / col -- division by zero error possible 99 // crdb_internal.force_error('', '') -- triggers run-time error 100 // 101 // 2. Modify outside session or database state 102 // nextval(seq) -- modifies database sequence value 103 // SELECT * FROM [INSERT ...] -- inserts rows into database 104 // 105 // 3. Return different results when repeatedly called with same input 106 // ORDER BY random() -- random can return different values 107 // ts < clock_timestamp() -- clock_timestamp can return different vals 108 // 109 // The optimizer makes *only* the following side-effect related guarantees: 110 // 111 // 1. CASE/IF branches are only evaluated if the branch condition is true. 112 // Therefore, the following is guaranteed to never raise a divide by 113 // zero error, regardless of how cleverly the optimizer rewrites the 114 // expression: 115 // 116 // CASE WHEN divisor<>0 THEN dividend / divisor ELSE NULL END 117 // 118 // While this example is trivial, a more complex example might have 119 // correlated subqueries that cannot be hoisted outside the CASE 120 // expression in the usual way, since that would trigger premature 121 // evaluation. 122 // 123 // 2. Expressions with side effects are never treated as constant 124 // expressions, even though they do not depend on other columns in the 125 // query: 126 // 127 // SELECT * FROM xy ORDER BY random() 128 // 129 // If the random() expression were treated as a constant, then the ORDER 130 // BY could be dropped by the optimizer, since ordering by a constant is 131 // a no-op. Instead, the optimizer treats it like it would an expression 132 // that depends upon a column. 133 // 134 // 3. A common table expression (CTE) with side effects will only be 135 // evaluated one time. This will typically prevent inlining of the CTE 136 // into the query body. For example: 137 // 138 // WITH a AS (INSERT ... RETURNING ...) SELECT * FROM a, a 139 // 140 // Although the "a" CTE is referenced twice, it must be evaluated only 141 // one time (and its results cached to satisfy the second reference). 142 // 143 // As long as the optimizer provides these guarantees, it is free to rewrite, 144 // reorder, duplicate, and eliminate as if no side effects were present. As an 145 // example, the optimizer is free to eliminate the unused "nextval" column in 146 // this query: 147 // 148 // SELECT x FROM (SELECT nextval(seq), x FROM xy) 149 // => 150 // SELECT x FROM xy 151 // 152 // It's also allowed to duplicate side-effecting expressions during predicate 153 // pushdown: 154 // 155 // SELECT * FROM xy INNER JOIN xz ON xy.x=xz.x WHERE xy.x=random() 156 // => 157 // SELECT * 158 // FROM (SELECT * FROM xy WHERE xy.x=random()) 159 // INNER JOIN (SELECT * FROM xz WHERE xz.x=random()) 160 // ON xy.x=xz.x 161 // 162 CanHaveSideEffects bool 163 164 // CanMutate is true if the subtree rooted at this expression contains at 165 // least one operator that modifies schema (like CreateTable) or writes or 166 // deletes rows (like Insert). 167 CanMutate bool 168 169 // HasPlaceholder is true if the subtree rooted at this expression contains 170 // at least one Placeholder operator. 171 HasPlaceholder bool 172 173 // Rule props are lazily calculated and typically only apply to a single 174 // rule. See the comment above Relational.Rule for more details. 175 Rule struct { 176 // WithUses tracks information about the WithScans inside the given 177 // expression which reference WithIDs outside of that expression. 178 WithUses WithUsesMap 179 } 180 } 181 182 // WithUsesMap stores information about each WithScan referencing an outside 183 // WithID, grouped by each WithID. 184 type WithUsesMap map[opt.WithID]WithUseInfo 185 186 // WithUseInfo contains information about the usage of a specific WithID. 187 type WithUseInfo struct { 188 // Count is the number of WithScan operators which reference this WithID. 189 Count int 190 191 // UsedCols is the union of columns used by all WithScan operators which 192 // reference this WithID. 193 UsedCols opt.ColSet 194 } 195 196 // Relational properties describe the content and characteristics of relational 197 // data returned by all expression variants within a memo group. While each 198 // expression in the group may return rows or columns in a different order, or 199 // compute the result using different algorithms, the same set of data is 200 // returned and can then be transformed into whatever layout or presentation 201 // format that is desired, according to the required physical properties. 202 type Relational struct { 203 Shared 204 205 // OutputCols is the set of columns that can be projected by the expression. 206 // Ordering, naming, and duplication of columns is not representable by this 207 // property; those are physical properties. 208 OutputCols opt.ColSet 209 210 // NotNullCols is the subset of output columns which cannot be NULL. The 211 // nullability of columns flows from the inputs and can also be derived from 212 // filters that reject nulls. 213 NotNullCols opt.ColSet 214 215 // Cardinality is the number of rows that can be returned from this relational 216 // expression. The number of rows will always be between the inclusive Min and 217 // Max bounds. If Max=math.MaxUint32, then there is no limit to the number of 218 // rows returned by the expression. 219 Cardinality Cardinality 220 221 // FuncDepSet is a set of functional dependencies (FDs) that encode useful 222 // relationships between columns in a base or derived relation. Given two sets 223 // of columns A and B, a functional dependency A-->B holds if A uniquely 224 // determines B. In other words, if two different rows have equal values for 225 // columns in A, then those two rows will also have equal values for columns 226 // in B. For example: 227 // 228 // a1 a2 b1 229 // -------- 230 // 1 2 5 231 // 1 2 5 232 // 233 // FDs assist the optimizer in proving useful properties about query results. 234 // This information powers many optimizations, including eliminating 235 // unnecessary DISTINCT operators, simplifying ORDER BY columns, removing 236 // Max1Row operators, and mapping semi-joins to inner-joins. 237 // 238 // The methods that are most useful for optimizations are: 239 // Key: extract a candidate key for the relation 240 // ColsAreStrictKey: determine if a set of columns uniquely identify rows 241 // ReduceCols: discard redundant columns to create a candidate key 242 // 243 // For more details, see the header comment for FuncDepSet. 244 FuncDeps FuncDepSet 245 246 // Stats is the set of statistics that apply to this relational expression. 247 // See statistics.go and memo/statistics_builder.go for more details. 248 Stats Statistics 249 250 // Rule encapsulates the set of properties that are maintained to assist 251 // with specific sets of transformation rules. They are not intended to be 252 // general purpose in nature. Typically, they're used by rules which need to 253 // decide whether to push operators down into the tree. These properties 254 // "bubble up" information about the subtree which can aid in that decision. 255 // 256 // Whereas the other logical relational properties are filled in by the memo 257 // package upon creation of a new memo group, the rules properties are filled 258 // in by one of the transformation packages, since deriving rule properties 259 // is so closely tied with maintenance of the rules that depend upon them. 260 // For example, the PruneCols set is connected to the PruneCols normalization 261 // rules. The decision about which columns to add to PruneCols depends upon 262 // what works best for those rules. Neither the rules nor their properties 263 // can be considered in isolation, without considering the other. 264 Rule struct { 265 // Available contains bits that indicate whether lazily-populated Rule 266 // properties have been initialized. For example, if the UnfilteredCols 267 // bit is set, then the Rule.UnfilteredCols field has been initialized 268 // and is ready for use. 269 Available AvailableRuleProps 270 271 // PruneCols is the subset of output columns that can potentially be 272 // eliminated by one of the PruneCols normalization rules. Those rules 273 // operate by pushing a Project operator down the tree that discards 274 // unused columns. For example: 275 // 276 // SELECT y FROM xyz WHERE x=1 ORDER BY y LIMIT 1 277 // 278 // The z column is never referenced, either by the filter or by the 279 // limit, and would be part of the PruneCols set for the Limit operator. 280 // The final Project operator could then push down a pruning Project 281 // operator that eliminated the z column from its subtree. 282 // 283 // PruneCols is built bottom-up. It typically starts out containing the 284 // complete set of output columns in a leaf expression, but quickly 285 // empties out at higher levels of the expression tree as the columns 286 // are referenced. Drawing from the example above: 287 // 288 // Limit PruneCols : [z] 289 // Select PruneCols: [y, z] 290 // Scan PruneCols : [x, y, z] 291 // 292 // Only a small number of relational operators are capable of pruning 293 // columns (e.g. Scan, Project). A pruning Project operator pushed down 294 // the tree must journey downwards until it finds a pruning-capable 295 // operator. If a column is part of PruneCols, then it is guaranteed that 296 // such an operator exists at the end of the journey. Operators that are 297 // not capable of filtering columns (like Explain) will not add any of 298 // their columns to this set. 299 // 300 // PruneCols is lazily populated by rules in prune_cols.opt. It is 301 // only valid once the Rule.Available.PruneCols bit has been set. 302 PruneCols opt.ColSet 303 304 // RejectNullCols is the subset of nullable output columns that can 305 // potentially be made not-null by one of the RejectNull normalization 306 // rules. Those rules work in concert with the predicate pushdown rules 307 // to synthesize a "col IS NOT NULL" filter and push it down the tree. 308 // See the header comments for the reject_nulls.opt file for more 309 // information and an example. 310 // 311 // RejectNullCols is built bottom-up by rulePropsBuilder, and only contains 312 // nullable outer join columns that can be simplified. The columns can be 313 // propagated up through multiple operators, giving higher levels of the 314 // tree a window into the structure of the tree several layers down. In 315 // particular, the null rejection rules use this property to determine when 316 // it's advantageous to synthesize a new "IS NOT NULL" filter. Without this 317 // information, the rules can clutter the tree with extraneous and 318 // marginally useful null filters. 319 // 320 // RejectNullCols is lazily populated by rules in reject_nulls.opt. It is 321 // only valid once the Rule.Available.RejectNullCols bit has been set. 322 RejectNullCols opt.ColSet 323 324 // InterestingOrderings is a list of orderings that potentially could be 325 // provided by the operator without sorting. Interesting orderings normally 326 // come from scans (index orders) and are bubbled up through some operators. 327 // 328 // Note that all prefixes of an interesting order are "interesting"; the 329 // list doesn't need to contain orderings that are prefixes of some other 330 // ordering in the list. 331 // 332 // InterestingOrderings is lazily populated by interesting_orderings.go. 333 // It is only valid once the Rule.Available.InterestingOrderings bit has 334 // been set. 335 InterestingOrderings opt.OrderingSet 336 337 // JoinSize is the number of relations being *inner* joined underneath 338 // this node. It is used to only reorder joins via AssociateJoin up to 339 // a certain limit. 340 JoinSize int 341 342 // MultiplicityProps is a struct that describes how rows from the input of 343 // a join are affected by the join. Rows from the left or right input are 344 // described as being duplicated and/or filtered. 345 // MultiplicityProps also contains a ColSet that contains columns from base 346 // tables that are guaranteed not to have been filtered. This ColSet is used 347 // in non-join operators as well. 348 // 349 // MultiplicityProps is lazily populated by multiplicity_builder.go. It is 350 // only valid once the Rule.Available.MultiplicityProps bit has been set. 351 MultiplicityProps JoinMultiplicity 352 } 353 } 354 355 // Scalar properties are logical properties that are computed for scalar 356 // expressions that return primitive-valued types. Scalar properties are 357 // lazily populated on request. 358 type Scalar struct { 359 Shared 360 361 // Constraints is the set of constraints deduced from a boolean expression. 362 // For the expression to be true, all constraints in the set must be 363 // satisfied. 364 Constraints *constraint.Set 365 366 // TightConstraints is true if the expression is exactly equivalent to the 367 // constraints. If it is false, the constraints are weaker than the 368 // expression. 369 TightConstraints bool 370 371 // FuncDeps is a set of functional dependencies (FDs) inferred from a 372 // boolean expression. This field is only populated for Filters expressions. 373 // 374 // - Constant column FDs such as ()-->(1,2) from conjuncts such as 375 // x = 5 AND y = 10. 376 // - Equivalent column FDs such as (1)==(2), (2)==(1) from conjuncts such 377 // as x = y. 378 // 379 // It is useful to calculate FDs on Filters expressions, because it allows 380 // additional filters to be inferred for push-down. For example, consider 381 // the query: 382 // 383 // SELECT * FROM a, b WHERE a.x = b.x AND a.x > 5; 384 // 385 // By adding the equivalency FD for a.x = b.x, we can infer an additional 386 // filter, b.x > 5. This allows us to rewrite the query as: 387 // 388 // SELECT * FROM (SELECT * FROM a WHERE a.x > 5) AS a, 389 // (SELECT * FROM b WHERE b.x > 5) AS b WHERE a.x = b.x; 390 // 391 // For more details, see the header comment for FuncDepSet. 392 FuncDeps FuncDepSet 393 394 // Rule encapsulates the set of properties that are maintained to assist 395 // with specific sets of transformation rules. See the Relational.Rule 396 // comment for more details. 397 Rule struct { 398 // Available contains bits that indicate whether lazily-populated Rule 399 // properties have been initialized. For example, if the 400 // HasHoistableSubquery bit is set, then the Rule.HasHoistableSubquery 401 // field has been initialized and is ready for use. 402 Available AvailableRuleProps 403 404 // HasHoistableSubquery is true if the scalar expression tree contains a 405 // subquery having one or more outer columns, and if the subquery needs 406 // to be hoisted up into its parent query as part of query decorrelation. 407 // The subquery can be a Subquery, Exists, or Any operator. These operators 408 // need to be hoisted out of scalar expression trees and turned into top- 409 // level apply joins. This property makes detection fast and easy so that 410 // the hoister doesn't waste time searching subtrees that don't contain 411 // subqueries. 412 // 413 // HasHoistableSubquery is lazily populated by rules in decorrelate.opt. 414 // It is only valid once the Rule.Available.HasHoistableSubquery bit has 415 // been set. 416 HasHoistableSubquery bool 417 } 418 } 419 420 // IsAvailable returns true if the specified rule property has been populated 421 // on this relational properties instance. 422 func (r *Relational) IsAvailable(p AvailableRuleProps) bool { 423 return (r.Rule.Available & p) != 0 424 } 425 426 // SetAvailable sets the available bits for the given properties, in order to 427 // mark them as populated on this relational properties instance. 428 func (r *Relational) SetAvailable(p AvailableRuleProps) { 429 r.Rule.Available |= p 430 } 431 432 // IsAvailable returns true if the specified rule property has been populated 433 // on this scalar properties instance. 434 func (s *Scalar) IsAvailable(p AvailableRuleProps) bool { 435 return (s.Rule.Available & p) != 0 436 } 437 438 // SetAvailable sets the available bits for the given properties, in order to 439 // mark them as populated on this scalar properties instance. 440 func (s *Scalar) SetAvailable(p AvailableRuleProps) { 441 s.Rule.Available |= p 442 }