github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/norm/rules/select.opt (about) 1 # ============================================================================= 2 # select.opt contains normalization rules for the Select operator. 3 # ============================================================================= 4 5 # SimplifySelectFilters simplifies the Filters operator in several possible 6 # ways: 7 # - Removes True operands 8 # - Replaces the Filters operator with False if any operand is False or Null 9 # - Flattens nested And operands by merging their conditions into parent 10 # - Simplifies Or operands where one side is a Null to the other side 11 # 12 # Note that the Null handling behavior is different than the SimplifyAnd rules, 13 # because Filters only appears as a Select or Join filter condition, both of 14 # which treat a Null filter conjunct exactly as if it were False. 15 [SimplifySelectFilters, Normalize, HighPriority] 16 (Select 17 $input:* 18 $filters:[ 19 ... 20 $item:(FiltersItem 21 (And | True | False | Null | Or) 22 ) & 23 ^(IsUnsimplifiableOr $item) 24 ... 25 ] & 26 ^(IsFilterFalse $filters) 27 ) 28 => 29 (Select $input (SimplifyFilters $filters)) 30 31 # ConsolidateSelectFilters consolidates filters that constrain a single 32 # variable. For example, filters x >= 5 and x <= 10 would be combined into a 33 # single Range operation. 34 # 35 # The benefit of consolidating these filters is it allows a single constraint 36 # to be generated for the variable instead of multiple. In the example above, 37 # we can generate the single constraint [/5 - /10] instead of the two 38 # constraints [/5 - ] and [ - /10]. The single constraint allows us to better 39 # estimate the selectivity of the predicate when calculating statistics for 40 # the Select expression. 41 # 42 # This rule is low priority so other rules in this file such as 43 # RemoveNotNullCondition can run first. 44 [ConsolidateSelectFilters, Normalize, LowPriority] 45 (Select $input:* $filters:* & (CanConsolidateFilters $filters)) 46 => 47 (Select $input (ConsolidateFilters $filters)) 48 49 # DetectSelectContradiction replaces a Select with an empty Values if it detects 50 # a contradiction in the filter. 51 [DetectSelectContradiction, Normalize] 52 (Select 53 $input:* 54 [ ... $item:(FiltersItem) & (IsContradiction $item) ... ] 55 ) 56 => 57 (ConstructEmptyValues (OutputCols $input)) 58 59 # EliminateSelect discards an unnecessary Select operator in the case where its 60 # filter is always true. Keep this near the top of the file so that it tends to 61 # be checked early. 62 [EliminateSelect, Normalize] 63 (Select $input:* []) 64 => 65 $input 66 67 # MergeSelects combines two nested Select operators into a single Select that 68 # ANDs the filter conditions of the two Selects. 69 [MergeSelects, Normalize] 70 (Select (Select $input:* $innerFilters:*) $filters:*) 71 => 72 (Select $input (ConcatFilters $innerFilters $filters)) 73 74 # PushSelectIntoProject pushes the Select operator into its Project input. This 75 # is typically preferable because it minimizes the number of rows which Project 76 # needs to process. This is especially important if Project is adding expensive 77 # computed columns. 78 [PushSelectIntoProject, Normalize] 79 (Select 80 (Project $input:* $projections:* $passthrough:*) 81 $filters:[ 82 ... 83 $item:* & 84 (IsBoundBy $item $inputCols:(OutputCols $input)) 85 ... 86 ] 87 ) 88 => 89 (Select 90 (Project 91 (Select 92 $input 93 (ExtractBoundConditions $filters $inputCols) 94 ) 95 $projections 96 $passthrough 97 ) 98 (ExtractUnboundConditions $filters $inputCols) 99 ) 100 101 # MergeSelectInnerJoin merges a Select operator with an InnerJoin input by 102 # AND'ing the filter conditions of each and creating a new InnerJoin with that 103 # On condition. This is only safe to do with InnerJoin in the general case 104 # where the conditions could filter either left or right rows. The special case 105 # where a condition filters only one or the other is already taken care of by 106 # the PushSelectIntoJoin rules. 107 # NOTE: Keep this rule ordered before the PushSelectIntoJoin rules to avoid 108 # missing out on the potential for new filter inference based on 109 # equivalent columns. 110 [MergeSelectInnerJoin, Normalize] 111 (Select 112 $input:(InnerJoin | InnerJoinApply 113 $left:* 114 $right:* 115 $on:* 116 $private:* 117 ) 118 $filters:* 119 ) 120 => 121 ((OpName $input) 122 $left 123 $right 124 (ConcatFilters $on $filters) 125 $private 126 ) 127 128 # PushSelectCondLeftIntoJoinLeftAndRight applies to the case when a condition 129 # bound by the left side of a join can be mapped to the right side using 130 # equality columns from the ON condition of the join. It pushes the original 131 # filter to the left side, and the mapped filter to the right side. 132 # For example, consider this query: 133 # 134 # SELECT * FROM l LEFT JOIN r ON l.x = r.x WHERE l.x = 5; 135 # 136 # This can safely be converted to: 137 # 138 # SELECT * FROM (SELECT * FROM l WHERE l.x = 5) 139 # LEFT JOIN (SELECT * FROM r WHERE r.x = 5) ON l.x = r.x; 140 # 141 # It's not normally correct to push filters from the SELECT clause to 142 # the right side of a LEFT JOIN, since those rows might still show up 143 # in the output as NULL-extended rows from the left side. In this case, 144 # however, for any rows removed from the right side, the matching rows are 145 # also removed from the left side (and thus removed from the output). 146 # To ensure that this is the case, it's important that the filter only refers 147 # to columns on the left side that have corresponding equivalent columns on 148 # the right side. 149 [PushSelectCondLeftIntoJoinLeftAndRight, Normalize] 150 (Select 151 $input:(LeftJoin | LeftJoinApply | SemiJoin | SemiJoinApply 152 | AntiJoin | AntiJoinApply 153 $left:* 154 $right:* 155 $on:* 156 $private:* 157 ) 158 $filters:[ 159 ... 160 $item:(FiltersItem $condition:*) & 161 (IsBoundBy $item (OutputCols $left)) & 162 (CanMapJoinOpFilter 163 $item 164 $rightCols:(OutputCols $right) 165 $equivFD:(GetEquivFD $on $left $right) 166 ) 167 ... 168 ] 169 ) 170 => 171 (Select 172 ((OpName $input) 173 (Select $left [ (FiltersItem $condition) ]) 174 (Select 175 $right 176 [ 177 (FiltersItem 178 (MapJoinOpFilter $item $rightCols $equivFD) 179 ) 180 ] 181 ) 182 $on 183 $private 184 ) 185 (RemoveFiltersItem $filters $item) 186 ) 187 188 # PushSelectIntoJoinLeft pushes Select filter conditions into the left side of 189 # an input Join. This is possible in the case of InnerJoin, LeftJoin, SemiJoin, 190 # and AntiJoin, as long as the condition has no dependencies on the right side 191 # of the join. Right and Full joins are not eligible, since attempting to filter 192 # left rows would just result in NULL left rows instead. 193 # 194 # -- No row is returned for a.x=1, a.y=2, b.x=1, since the WHERE excludes it. 195 # SELECT * FROM a RIGHT JOIN b ON a.x=b.x WHERE a.y < 0 196 # 197 # -- But if the filter is incorrectly pushed down in RIGHT/FULL JOIN case, 198 # -- then a row containing null values on the left side is returned. 199 # SELECT * FROM (SELECT * FROM a WHERE a.y < 0) a RIGHT JOIN b ON a.x=b.x 200 # 201 # Citations: [1] 202 [PushSelectIntoJoinLeft, Normalize] 203 (Select 204 $input:(LeftJoin | LeftJoinApply | SemiJoin | SemiJoinApply 205 | AntiJoin | AntiJoinApply 206 $left:* 207 $right:* 208 $on:* 209 $private:* 210 ) 211 $filters:[ 212 ... 213 $item:* & (IsBoundBy $item $leftCols:(OutputCols $left)) 214 ... 215 ] 216 ) 217 => 218 (Select 219 ((OpName $input) 220 (Select 221 $left 222 (ExtractBoundConditions $filters $leftCols) 223 ) 224 $right 225 $on 226 $private 227 ) 228 (ExtractUnboundConditions $filters $leftCols) 229 ) 230 231 # PushSelectIntoGroupBy pushes a Select condition below a GroupBy in the case 232 # where it only references grouping columns or ConstAgg columns. 233 # 234 # This rule doesn't work on ScalarGroupBy which exhibits different behavior if 235 # the input is empty: 236 # SELECT MAX(y) FROM a 237 # 238 # If "a" is empty, this returns a single row containing a null value. This is 239 # different behavior than a GroupBy with grouping columns, which would return 240 # the empty set for a similar query: 241 # SELECT MAX(y) FROM a GROUP BY x 242 # 243 # Citations: [2] 244 # 245 # Note: Do not add EnsureDistinctOn to the match pattern. Pushing the select 246 # filters through the EnsureDistinctOn can prevent it from detecting duplicate 247 # rows and therefore change error behavior. 248 [PushSelectIntoGroupBy, Normalize] 249 (Select 250 $input:(GroupBy | DistinctOn 251 $groupingInput:* 252 $aggregations:* 253 $groupingPrivate:* 254 ) 255 $filters:[ 256 ... 257 $item:* & 258 (IsBoundBy 259 $item 260 $passthrough:(GroupingAndConstCols 261 $groupingPrivate 262 $aggregations 263 ) 264 ) 265 ... 266 ] 267 ) 268 => 269 (Select 270 ((OpName $input) 271 (Select 272 $groupingInput 273 (ExtractBoundConditions $filters $passthrough) 274 ) 275 $aggregations 276 $groupingPrivate 277 ) 278 (ExtractUnboundConditions $filters $passthrough) 279 ) 280 281 # RemoveNotNullCondition removes a filter with an IS NOT NULL condition 282 # when the given column has a NOT NULL constraint. 283 [RemoveNotNullCondition, Normalize] 284 (Select 285 $input:* 286 $filters:[ 287 ... 288 $item:(FiltersItem 289 (IsNot 290 (Variable $col:* & (IsColNotNull $col $input)) 291 (Null) 292 ) 293 ) 294 ... 295 ] 296 ) 297 => 298 (Select $input (RemoveFiltersItem $filters $item)) 299 300 # PushSelectIntoProjectSet pushes filters into a ProjectSet. In particular, 301 # the filters that are bound to the input columns of the ProjectSet are 302 # pushed down into it, in hopes of being pushed down further into joins 303 # and scans underneath the ProjectSet. 304 [PushSelectIntoProjectSet, Normalize] 305 (Select 306 (ProjectSet $input:* $zip:*) 307 $filters:[ 308 ... 309 $item:* & 310 (IsBoundBy $item $inputCols:(OutputCols $input)) 311 ... 312 ] 313 ) 314 => 315 (Select 316 (ProjectSet 317 (Select 318 $input 319 (ExtractBoundConditions $filters $inputCols) 320 ) 321 $zip 322 ) 323 (ExtractUnboundConditions $filters $inputCols) 324 ) 325 326 # PushFilterIntoSetOp pushes filters down to both the left and right sides 327 # of all set operators. For example, consider this query: 328 # 329 # SELECT * FROM (SELECT x FROM a UNION ALL SELECT y FROM b) WHERE x < 5 330 # 331 # In this case, we can map x < 5 to both sides based on the knowledge that 332 # in the union the x out col corresponds to the x column in the (a) table and 333 # the y column in the (b) table. Therefore our mapping becomes x < 5 in the 334 # left side, and y < 5 in the right side. Given this mapping, we can now 335 # safely push the filter down to both sides as follows: 336 # 337 # SELECT * FROM (SELECT x FROM a UNION ALL SELECT y FROM b) WHERE x < 5 338 # => 339 # (SELECT x FROM a WHERE x < 5) UNION ALL (SELECT y FROM b WHERE y < 5) 340 # 341 # Pushing (all) the filters down for each of the set operators (Union, Union 342 # All, Except, Except All, Intersect, Intersect All) is logically equivalent 343 # to filtering after applying the set operator. Here's some justification for 344 # this claim: 345 # 346 # Notice that each of the set operators this rule applies to, only works on 347 # union compatible relations. The resulting column set after applying the set 348 # operator, is also necessarily a subset of the column set of the (any) 349 # relations it was composed of. And so, any filter applied must have a 350 # corresponding column in each of the base relations. 351 # 352 # This works in the case of Union because a row passing the filter is 353 # independent of the other rows in its relation. It works in the case of 354 # Intersect because if a row `a` was filtered from the intersection, it will 355 # also be filtered from the LHS and RHS. It works in the case of Except because 356 # if a row is filtered from the RHS, preventing its removal from the LHS, it 357 # will also have been filtered from the LHS. 358 # 359 # Visualization of the rule: 360 # Let A and B be sets. Let the filter be represented by eliminating some set C. 361 # 362 # Union (All): (A Union B) \ C => (A \ C) Union (B \ C) 363 # Intersection (All): (A Intersect B) \ C => (A \ C) Intersect (B \ C) 364 # Except (All): (A Except B) \ C => (A \ C) Except (B \ C) 365 # 366 # We don't push a filter down if it references outer columns because doing so 367 # prevents decorrelation. 368 [PushFilterIntoSetOp, Normalize] 369 (Select 370 $input:(Set $left:* $right:* $colmap:*) 371 $filter:[ 372 ... 373 $item:* & 374 (CanMapOnSetOp $item) & 375 (IsBoundBy $item $inputCols:(OutputCols $input)) 376 ... 377 ] 378 ) 379 => 380 (Select 381 ((OpName $input) 382 (Select 383 $left 384 [ (FiltersItem (MapSetOpFilterLeft $item $colmap)) ] 385 ) 386 (Select 387 $right 388 [ (FiltersItem (MapSetOpFilterRight $item $colmap)) ] 389 ) 390 $colmap 391 ) 392 (RemoveFiltersItem $filter $item) 393 )