github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/memo/multiplicity_builder.go (about) 1 // Copyright 2020 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package memo 12 13 import ( 14 "github.com/cockroachdb/cockroach/pkg/sql/opt" 15 "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" 16 "github.com/cockroachdb/cockroach/pkg/sql/opt/props" 17 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 18 "github.com/cockroachdb/errors" 19 ) 20 21 // DeriveJoinMultiplicity returns a JoinMultiplicity struct that describes how a 22 // join operator will affect the rows of its left and right inputs (e.g. 23 // duplicated and/or filtered). When the function is called on an operator other 24 // than an InnerJoin, a LeftJoin, or a FullJoin, it simply populates the 25 // UnfilteredCols field of the JoinMultiplicity for that operator and leaves the 26 // join fields unchanged. 27 // 28 // DeriveJoinMultiplicity recursively derives the UnfilteredCols field and 29 // populates the props.Relational.Rule.MultiplicityProps field as it goes to 30 // make future calls faster. 31 func DeriveJoinMultiplicity(in RelExpr) props.JoinMultiplicity { 32 // If the MultiplicityProps property has already been derived, return it 33 // immediately. 34 relational := in.Relational() 35 if relational.IsAvailable(props.MultiplicityProps) { 36 return relational.Rule.MultiplicityProps 37 } 38 relational.Rule.Available |= props.MultiplicityProps 39 var multiplicity props.JoinMultiplicity 40 41 // Derive MultiplicityProps now. 42 switch t := in.(type) { 43 case *ScanExpr: 44 // All un-limited, unconstrained output columns are unfiltered columns. 45 if t.HardLimit == 0 && t.Constraint == nil { 46 multiplicity.UnfilteredCols = relational.OutputCols 47 } 48 49 case *ProjectExpr: 50 // Project never filters rows, so it passes through unfiltered columns. 51 unfilteredCols := DeriveJoinMultiplicity(t.Input).UnfilteredCols 52 multiplicity.UnfilteredCols = unfilteredCols.Intersection(relational.OutputCols) 53 54 case *InnerJoinExpr, *LeftJoinExpr, *FullJoinExpr: 55 left := t.Child(0).(RelExpr) 56 right := t.Child(1).(RelExpr) 57 filters := *t.Child(2).(*FiltersExpr) 58 multiplicity = GetJoinMultiplicityFromInputs(t.Op(), left, right, filters) 59 60 // Use the JoinMultiplicity to determine whether unfiltered columns can be 61 // passed through. 62 if multiplicity.JoinPreservesLeftRows() { 63 multiplicity.UnfilteredCols.UnionWith(DeriveJoinMultiplicity(left).UnfilteredCols) 64 } 65 if multiplicity.JoinPreservesRightRows() { 66 multiplicity.UnfilteredCols.UnionWith(DeriveJoinMultiplicity(right).UnfilteredCols) 67 } 68 69 default: 70 // An empty JoinMultiplicity is returned. 71 } 72 relational.Rule.MultiplicityProps = multiplicity 73 return relational.Rule.MultiplicityProps 74 } 75 76 // GetJoinMultiplicityFromInputs returns a JoinMultiplicity that describes how a 77 // join of the given type with the given inputs and filters will affect the rows 78 // of its inputs. When possible, DeriveJoinMultiplicity should be called instead 79 // because GetJoinMultiplicityFromInputs cannot take advantage of a previously 80 // calculated JoinMultiplicity. 81 func GetJoinMultiplicityFromInputs( 82 joinOp opt.Operator, left, right RelExpr, filters FiltersExpr, 83 ) props.JoinMultiplicity { 84 85 switch joinOp { 86 case opt.InnerJoinOp, opt.LeftJoinOp, opt.FullJoinOp: 87 88 default: 89 panic(errors.AssertionFailedf("invalid operator: %v", joinOp)) 90 } 91 92 isLeftOuter := joinOp == opt.LeftJoinOp || joinOp == opt.FullJoinOp 93 isRightOuter := joinOp == opt.FullJoinOp 94 95 leftMultiplicity := getJoinLeftMultiplicityVal(left, right, filters, isLeftOuter) 96 rightMultiplicity := getJoinLeftMultiplicityVal(right, left, filters, isRightOuter) 97 98 return props.JoinMultiplicity{ 99 LeftMultiplicity: leftMultiplicity, 100 RightMultiplicity: rightMultiplicity, 101 } 102 } 103 104 // getJoinLeftMultiplicityVal returns a MultiplicityValue that describes whether 105 // a join with the given properties would duplicate or filter the rows of its 106 // left input. 107 // 108 // The duplicated and filtered flags will be set unless it can be statically 109 // proven that no rows will be duplicated or filtered respectively. 110 func getJoinLeftMultiplicityVal( 111 left, right RelExpr, filters FiltersExpr, isLeftOuter bool, 112 ) props.MultiplicityValue { 113 multiplicity := props.MultiplicityIndeterminateVal 114 if filtersMatchLeftRowsAtMostOnce(left, right, filters) { 115 multiplicity |= props.MultiplicityNotDuplicatedVal 116 } 117 if isLeftOuter || filtersMatchAllLeftRows(left, right, filters) { 118 multiplicity |= props.MultiplicityPreservedVal 119 } 120 return multiplicity 121 } 122 123 // filtersMatchLeftRowsAtMostOnce returns true if a join expression with the 124 // given ON filters is guaranteed to match every left row at most once. This is 125 // the case when either of the following conditions is satisfied: 126 // 127 // 1. The join is a cross join and the right input has zero or one rows. 128 // 129 // 2. The equivalence closure of the left columns over the filter functional 130 // dependencies forms a lax key over the right columns. 131 // 132 // Why is condition #2 sufficient to ensure that no left rows are matched more 133 // than once? 134 // * It implies that left columns are being equated with a lax key from the 135 // right input. 136 // * A lax key means that the right rows being equated are unique apart from 137 // nulls. 138 // * Equalities are null-rejecting and the right rows are otherwise unique, so 139 // no left row can be equal to more than one right row on the filters. 140 // * Therefore, no left row will be matched more than once. 141 // 142 // As an example: 143 // 144 // CREATE TABLE x_tab (x INT); 145 // CREATE TABLE a_tab (a INT UNIQUE); 146 // 147 // x a 148 // ---- ---- 149 // NULL NULL 150 // 1 1 151 // 1 2 152 // 2 3 153 // 154 // SELECT * FROM x_tab INNER JOIN a_tab ON x = a; 155 // => 156 // x a 157 // --- 158 // 1 1 159 // 1 1 160 // 2 2 161 // 162 // In this example, no rows from x are duplicated, while the '1' row from a is 163 // duplicated. 164 func filtersMatchLeftRowsAtMostOnce(left, right RelExpr, filters FiltersExpr) bool { 165 // Condition #1. 166 if len(filters) == 0 && right.Relational().Cardinality.IsZeroOrOne() { 167 return true 168 } 169 170 // Condition #2. 171 filtersFDs := getFiltersFDs(filters) 172 closure := filtersFDs.ComputeEquivClosure(left.Relational().OutputCols) 173 return right.Relational().FuncDeps.ColsAreLaxKey(closure) 174 } 175 176 // filtersMatchAllLeftRows returns true when each row in the given join's left 177 // input can be guaranteed to match at least one row from the right input, 178 // according to the join filters. This is true when the following conditions are 179 // satisfied: 180 // 181 // 1. If this is a cross join (there are no filters), then either: 182 // a. The minimum cardinality of the right input is greater than zero. There 183 // must be at least one right row for the left rows to be preserved. 184 // b. There is a not-null foreign key column in the left input that references 185 // an unfiltered column from the right input. 186 // 187 // 2. If this is not a cross join, every filter falls under one of these two 188 // cases: 189 // a. The self-join case: an equality between ColumnIDs that come from the 190 // same column on the same base table. 191 // b. The foreign-key case: an equality between a foreign key column on the 192 // left and the column it references from the right. 193 // 194 // In both the self-join and the foreign key cases, the left columns must be 195 // not-null, and the right columns must be unfiltered. 196 // 197 // Why do the left columns have to be not-null and the right columns 198 // unfiltered? 199 // * In both the self-join and the foreign-key cases, a non-null value in 200 // the left column guarantees a corresponding value in the right column. As 201 // long as no nulls have been added to the left column and no values have 202 // been removed from the right, this property will be valid. 203 // 204 // Note: in the foreign key case, if the key's match method is match simple, all 205 // columns in the foreign key must be not-null in order to guarantee that all 206 // rows will have a match in the referenced table. 207 func filtersMatchAllLeftRows(left, right RelExpr, filters FiltersExpr) bool { 208 md := left.Memo().Metadata() 209 210 // Cross join case. 211 if len(filters) == 0 { 212 if !right.Relational().Cardinality.CanBeZero() { 213 // Case 1a: this is a cross join and there's at least one row in the right 214 // input, so every left row is guaranteed to match at least once. 215 return true 216 } 217 // Case 1b: if there is at least one not-null foreign key column referencing 218 // the unfiltered right columns, return true. Otherwise, false. 219 return makeForeignKeyMap( 220 md, left.Relational().NotNullCols, DeriveJoinMultiplicity(right).UnfilteredCols) != nil 221 } 222 223 leftColIDs := left.Relational().NotNullCols 224 rightColIDs := DeriveJoinMultiplicity(right).UnfilteredCols 225 if rightColIDs.Empty() { 226 // Right input has no unfiltered columns. 227 return false 228 } 229 230 var fkColMap map[opt.ColumnID]opt.ColumnID 231 232 for i := range filters { 233 eq, _ := filters[i].Condition.(*EqExpr) 234 if eq == nil { 235 // Conjunct is not an equality comparison. 236 return false 237 } 238 239 leftVar, _ := eq.Left.(*VariableExpr) 240 rightVar, _ := eq.Right.(*VariableExpr) 241 if leftVar == nil || rightVar == nil { 242 // Conjunct does not directly compare two columns. 243 return false 244 } 245 246 leftColID := leftVar.Col 247 rightColID := rightVar.Col 248 249 // Normalize leftColID to come from leftColIDs. 250 if !leftColIDs.Contains(leftColID) { 251 leftColID, rightColID = rightColID, leftColID 252 } 253 if !leftColIDs.Contains(leftColID) || !rightColIDs.Contains(rightColID) { 254 // Columns don't come from both sides of join, left column is nullable or 255 // right column is filtered. 256 return false 257 } 258 259 leftTab := md.ColumnMeta(leftColID).Table 260 rightTab := md.ColumnMeta(rightColID).Table 261 if leftTab == 0 || rightTab == 0 { 262 // Columns don't come from base tables. 263 return false 264 } 265 266 if md.TableMeta(leftTab).Table == md.TableMeta(rightTab).Table { 267 // Case 2a: check self-join case. 268 leftColOrd := leftTab.ColumnOrdinal(leftColID) 269 rightColOrd := rightTab.ColumnOrdinal(rightColID) 270 if leftColOrd != rightColOrd { 271 // Left and right column ordinals do not match. 272 return false 273 } 274 } else { 275 // Case 2b: check foreign-key case. 276 if fkColMap == nil { 277 // Lazily construct a map from all not-null foreign key columns on the 278 // left to all unfiltered referenced columns on the right. 279 fkColMap = makeForeignKeyMap(md, leftColIDs, rightColIDs) 280 if fkColMap == nil { 281 // No valid foreign key relations were found. 282 return false 283 } 284 } 285 if refCol, ok := fkColMap[leftColID]; !ok || refCol != rightColID { 286 // There is no valid foreign key relation from leftColID to 287 // rightColID. 288 return false 289 } 290 } 291 } 292 293 return true 294 } 295 296 // makeForeignKeyMap returns a map from left foreign key columns to right 297 // referenced columns. The given left columns should not be nullable and the 298 // right columns should be guaranteed to be unfiltered, or the foreign key 299 // relation may not hold. If the key's match method isn't match full, all 300 // foreign key columns must be not-null, or the key relation is not guaranteed 301 // to have a match for each row. If no valid foreign key relations are found, 302 // fkColMap is nil. 303 func makeForeignKeyMap( 304 md *opt.Metadata, leftNotNullCols, rightUnfilteredCols opt.ColSet, 305 ) map[opt.ColumnID]opt.ColumnID { 306 var tableIDMap map[cat.StableID]opt.TableID 307 var fkColMap map[opt.ColumnID]opt.ColumnID 308 var lastSeen opt.TableID 309 310 // Walk through the left columns and add foreign key and referenced columns to 311 // the output mapping if they come from the leftNotNullCols and 312 // rightUnfilteredCols ColSets respectively. 313 for col, ok := leftNotNullCols.Next(0); ok; col, ok = leftNotNullCols.Next(col + 1) { 314 fkTableID := md.ColumnMeta(col).Table 315 if fkTableID < 1 { 316 // The column does not come from a base table. 317 continue 318 } 319 if fkTableID == lastSeen { 320 // We have already encountered this TableID. (This works because ColumnIDs 321 // with the same TableID are clustered together). 322 continue 323 } 324 lastSeen = fkTableID 325 fkTableMeta := md.TableMeta(fkTableID) 326 if fkTableMeta.IgnoreForeignKeys { 327 // We are not allowed to use any of this table's foreign keys. 328 continue 329 } 330 fkTable := fkTableMeta.Table 331 for i, cnt := 0, fkTable.OutboundForeignKeyCount(); i < cnt; i++ { 332 fk := fkTable.OutboundForeignKey(i) 333 if !fk.Validated() { 334 // The data is not guaranteed to follow the foreign key constraint. 335 continue 336 } 337 if tableIDMap == nil { 338 // Lazily initialize tableIDMap. 339 tableIDMap = makeStableTableIDMap(md, rightUnfilteredCols) 340 if len(tableIDMap) == 0 { 341 // No valid tables were found from the right side. 342 break 343 } 344 } 345 refTableID, ok := tableIDMap[fk.ReferencedTableID()] 346 if !ok { 347 // There is no valid right table corresponding to the referenced table. 348 continue 349 } 350 var leftCols, rightCols []opt.ColumnID 351 fkValid := true 352 for j, numCols := 0, fk.ColumnCount(); j < numCols; j++ { 353 leftOrd := fk.OriginColumnOrdinal(fkTable, j) 354 rightOrd := fk.ReferencedColumnOrdinal(md.Table(refTableID), j) 355 leftCol := fkTableID.ColumnID(leftOrd) 356 rightCol := refTableID.ColumnID(rightOrd) 357 if !leftNotNullCols.Contains(leftCol) { 358 // Not all FK columns are part of the equality conditions. There are two 359 // cases: 360 // 1. MATCH SIMPLE/PARTIAL: if this column is nullable, rows from this 361 // foreign key are not guaranteed to match. 362 // 2. MATCH FULL: FK rows are still guaranteed to match because the 363 // non-present columns can only be NULL if all FK columns are NULL. 364 if fk.MatchMethod() != tree.MatchFull { 365 fkValid = false 366 break 367 } 368 continue 369 } 370 if !rightUnfilteredCols.Contains(rightCol) { 371 continue 372 } 373 leftCols = append(leftCols, leftCol) 374 rightCols = append(rightCols, rightCol) 375 } 376 if !fkValid { 377 // The foreign key relations should only be added to the mapping if the 378 // foreign key is guaranteed a match for every row. 379 continue 380 } 381 for i := range leftCols { 382 // Add any valid foreign key relations to the mapping. 383 if fkColMap == nil { 384 // Lazily initialize fkColMap 385 fkColMap = map[opt.ColumnID]opt.ColumnID{} 386 } 387 fkColMap[leftCols[i]] = rightCols[i] 388 } 389 } 390 } 391 return fkColMap 392 } 393 394 // makeStableTableIDMap creates a mapping from the StableIDs of the base tables 395 // to the meta TableIDs for the given columns. 396 func makeStableTableIDMap(md *opt.Metadata, cols opt.ColSet) map[cat.StableID]opt.TableID { 397 idMap := map[cat.StableID]opt.TableID{} 398 for col, ok := cols.Next(0); ok; col, ok = cols.Next(col + 1) { 399 metaTableID := md.ColumnMeta(col).Table 400 if metaTableID == 0 { 401 continue 402 } 403 stableTableID := md.Table(metaTableID).ID() 404 if prevID, ok := idMap[stableTableID]; ok && prevID != metaTableID { 405 // Avoid dealing with cases where multiple meta tables reference the same 406 // base table so that only one TableID has to be stored. 407 return map[cat.StableID]opt.TableID{} 408 } 409 idMap[stableTableID] = metaTableID 410 } 411 return idMap 412 } 413 414 // getFiltersFDs returns a FuncDepSet with the FDs from the FiltersItems in 415 // the given FiltersExpr. 416 func getFiltersFDs(filters FiltersExpr) props.FuncDepSet { 417 if len(filters) == 1 { 418 return filters[0].ScalarProps().FuncDeps 419 } 420 421 filtersFDs := props.FuncDepSet{} 422 for i := range filters { 423 filtersFDs.AddFrom(&filters[i].ScalarProps().FuncDeps) 424 } 425 return filtersFDs 426 }