github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/memo/multiplicity_builder.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package memo
    12  
    13  import (
    14  	"github.com/cockroachdb/cockroach/pkg/sql/opt"
    15  	"github.com/cockroachdb/cockroach/pkg/sql/opt/cat"
    16  	"github.com/cockroachdb/cockroach/pkg/sql/opt/props"
    17  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    18  	"github.com/cockroachdb/errors"
    19  )
    20  
    21  // DeriveJoinMultiplicity returns a JoinMultiplicity struct that describes how a
    22  // join operator will affect the rows of its left and right inputs (e.g.
    23  // duplicated and/or filtered). When the function is called on an operator other
    24  // than an InnerJoin, a LeftJoin, or a FullJoin, it simply populates the
    25  // UnfilteredCols field of the JoinMultiplicity for that operator and leaves the
    26  // join fields unchanged.
    27  //
    28  // DeriveJoinMultiplicity recursively derives the UnfilteredCols field and
    29  // populates the props.Relational.Rule.MultiplicityProps field as it goes to
    30  // make future calls faster.
    31  func DeriveJoinMultiplicity(in RelExpr) props.JoinMultiplicity {
    32  	// If the MultiplicityProps property has already been derived, return it
    33  	// immediately.
    34  	relational := in.Relational()
    35  	if relational.IsAvailable(props.MultiplicityProps) {
    36  		return relational.Rule.MultiplicityProps
    37  	}
    38  	relational.Rule.Available |= props.MultiplicityProps
    39  	var multiplicity props.JoinMultiplicity
    40  
    41  	// Derive MultiplicityProps now.
    42  	switch t := in.(type) {
    43  	case *ScanExpr:
    44  		// All un-limited, unconstrained output columns are unfiltered columns.
    45  		if t.HardLimit == 0 && t.Constraint == nil {
    46  			multiplicity.UnfilteredCols = relational.OutputCols
    47  		}
    48  
    49  	case *ProjectExpr:
    50  		// Project never filters rows, so it passes through unfiltered columns.
    51  		unfilteredCols := DeriveJoinMultiplicity(t.Input).UnfilteredCols
    52  		multiplicity.UnfilteredCols = unfilteredCols.Intersection(relational.OutputCols)
    53  
    54  	case *InnerJoinExpr, *LeftJoinExpr, *FullJoinExpr:
    55  		left := t.Child(0).(RelExpr)
    56  		right := t.Child(1).(RelExpr)
    57  		filters := *t.Child(2).(*FiltersExpr)
    58  		multiplicity = GetJoinMultiplicityFromInputs(t.Op(), left, right, filters)
    59  
    60  		// Use the JoinMultiplicity to determine whether unfiltered columns can be
    61  		// passed through.
    62  		if multiplicity.JoinPreservesLeftRows() {
    63  			multiplicity.UnfilteredCols.UnionWith(DeriveJoinMultiplicity(left).UnfilteredCols)
    64  		}
    65  		if multiplicity.JoinPreservesRightRows() {
    66  			multiplicity.UnfilteredCols.UnionWith(DeriveJoinMultiplicity(right).UnfilteredCols)
    67  		}
    68  
    69  	default:
    70  		// An empty JoinMultiplicity is returned.
    71  	}
    72  	relational.Rule.MultiplicityProps = multiplicity
    73  	return relational.Rule.MultiplicityProps
    74  }
    75  
    76  // GetJoinMultiplicityFromInputs returns a JoinMultiplicity that describes how a
    77  // join of the given type with the given inputs and filters will affect the rows
    78  // of its inputs. When possible, DeriveJoinMultiplicity should be called instead
    79  // because GetJoinMultiplicityFromInputs cannot take advantage of a previously
    80  // calculated JoinMultiplicity.
    81  func GetJoinMultiplicityFromInputs(
    82  	joinOp opt.Operator, left, right RelExpr, filters FiltersExpr,
    83  ) props.JoinMultiplicity {
    84  
    85  	switch joinOp {
    86  	case opt.InnerJoinOp, opt.LeftJoinOp, opt.FullJoinOp:
    87  
    88  	default:
    89  		panic(errors.AssertionFailedf("invalid operator: %v", joinOp))
    90  	}
    91  
    92  	isLeftOuter := joinOp == opt.LeftJoinOp || joinOp == opt.FullJoinOp
    93  	isRightOuter := joinOp == opt.FullJoinOp
    94  
    95  	leftMultiplicity := getJoinLeftMultiplicityVal(left, right, filters, isLeftOuter)
    96  	rightMultiplicity := getJoinLeftMultiplicityVal(right, left, filters, isRightOuter)
    97  
    98  	return props.JoinMultiplicity{
    99  		LeftMultiplicity:  leftMultiplicity,
   100  		RightMultiplicity: rightMultiplicity,
   101  	}
   102  }
   103  
   104  // getJoinLeftMultiplicityVal returns a MultiplicityValue that describes whether
   105  // a join with the given properties would duplicate or filter the rows of its
   106  // left input.
   107  //
   108  // The duplicated and filtered flags will be set unless it can be statically
   109  // proven that no rows will be duplicated or filtered respectively.
   110  func getJoinLeftMultiplicityVal(
   111  	left, right RelExpr, filters FiltersExpr, isLeftOuter bool,
   112  ) props.MultiplicityValue {
   113  	multiplicity := props.MultiplicityIndeterminateVal
   114  	if filtersMatchLeftRowsAtMostOnce(left, right, filters) {
   115  		multiplicity |= props.MultiplicityNotDuplicatedVal
   116  	}
   117  	if isLeftOuter || filtersMatchAllLeftRows(left, right, filters) {
   118  		multiplicity |= props.MultiplicityPreservedVal
   119  	}
   120  	return multiplicity
   121  }
   122  
   123  // filtersMatchLeftRowsAtMostOnce returns true if a join expression with the
   124  // given ON filters is guaranteed to match every left row at most once. This is
   125  // the case when either of the following conditions is satisfied:
   126  //
   127  //  1. The join is a cross join and the right input has zero or one rows.
   128  //
   129  //  2. The equivalence closure of the left columns over the filter functional
   130  //     dependencies forms a lax key over the right columns.
   131  //
   132  // Why is condition #2 sufficient to ensure that no left rows are matched more
   133  // than once?
   134  // * It implies that left columns are being equated with a lax key from the
   135  //   right input.
   136  // * A lax key means that the right rows being equated are unique apart from
   137  //   nulls.
   138  // * Equalities are null-rejecting and the right rows are otherwise unique, so
   139  //   no left row can be equal to more than one right row on the filters.
   140  // * Therefore, no left row will be matched more than once.
   141  //
   142  // As an example:
   143  //
   144  //   CREATE TABLE x_tab (x INT);
   145  //   CREATE TABLE a_tab (a INT UNIQUE);
   146  //
   147  //   x     a
   148  //   ----  ----
   149  //   NULL  NULL
   150  //   1     1
   151  //   1     2
   152  //   2     3
   153  //
   154  //   SELECT * FROM x_tab INNER JOIN a_tab ON x = a;
   155  //   =>
   156  //   x a
   157  //   ---
   158  //   1 1
   159  //   1 1
   160  //   2 2
   161  //
   162  // In this example, no rows from x are duplicated, while the '1' row from a is
   163  // duplicated.
   164  func filtersMatchLeftRowsAtMostOnce(left, right RelExpr, filters FiltersExpr) bool {
   165  	// Condition #1.
   166  	if len(filters) == 0 && right.Relational().Cardinality.IsZeroOrOne() {
   167  		return true
   168  	}
   169  
   170  	// Condition #2.
   171  	filtersFDs := getFiltersFDs(filters)
   172  	closure := filtersFDs.ComputeEquivClosure(left.Relational().OutputCols)
   173  	return right.Relational().FuncDeps.ColsAreLaxKey(closure)
   174  }
   175  
   176  // filtersMatchAllLeftRows returns true when each row in the given join's left
   177  // input can be guaranteed to match at least one row from the right input,
   178  // according to the join filters. This is true when the following conditions are
   179  // satisfied:
   180  //
   181  // 1. If this is a cross join (there are no filters), then either:
   182  //   a. The minimum cardinality of the right input is greater than zero. There
   183  //      must be at least one right row for the left rows to be preserved.
   184  //   b. There is a not-null foreign key column in the left input that references
   185  //      an unfiltered column from the right input.
   186  //
   187  // 2. If this is not a cross join, every filter falls under one of these two
   188  //    cases:
   189  //   a. The self-join case: an equality between ColumnIDs that come from the
   190  //      same column on the same base table.
   191  //   b. The foreign-key case: an equality between a foreign key column on the
   192  //      left and the column it references from the right.
   193  //
   194  // In both the self-join and the foreign key cases, the left columns must be
   195  // not-null, and the right columns must be unfiltered.
   196  //
   197  //  Why do the left columns have to be not-null and the right columns
   198  //  unfiltered?
   199  //  * In both the self-join and the foreign-key cases, a non-null value in
   200  //    the left column guarantees a corresponding value in the right column. As
   201  //    long as no nulls have been added to the left column and no values have
   202  //    been removed from the right, this property will be valid.
   203  //
   204  // Note: in the foreign key case, if the key's match method is match simple, all
   205  // columns in the foreign key must be not-null in order to guarantee that all
   206  // rows will have a match in the referenced table.
   207  func filtersMatchAllLeftRows(left, right RelExpr, filters FiltersExpr) bool {
   208  	md := left.Memo().Metadata()
   209  
   210  	// Cross join case.
   211  	if len(filters) == 0 {
   212  		if !right.Relational().Cardinality.CanBeZero() {
   213  			// Case 1a: this is a cross join and there's at least one row in the right
   214  			// input, so every left row is guaranteed to match at least once.
   215  			return true
   216  		}
   217  		// Case 1b: if there is at least one not-null foreign key column referencing
   218  		// the unfiltered right columns, return true. Otherwise, false.
   219  		return makeForeignKeyMap(
   220  			md, left.Relational().NotNullCols, DeriveJoinMultiplicity(right).UnfilteredCols) != nil
   221  	}
   222  
   223  	leftColIDs := left.Relational().NotNullCols
   224  	rightColIDs := DeriveJoinMultiplicity(right).UnfilteredCols
   225  	if rightColIDs.Empty() {
   226  		// Right input has no unfiltered columns.
   227  		return false
   228  	}
   229  
   230  	var fkColMap map[opt.ColumnID]opt.ColumnID
   231  
   232  	for i := range filters {
   233  		eq, _ := filters[i].Condition.(*EqExpr)
   234  		if eq == nil {
   235  			// Conjunct is not an equality comparison.
   236  			return false
   237  		}
   238  
   239  		leftVar, _ := eq.Left.(*VariableExpr)
   240  		rightVar, _ := eq.Right.(*VariableExpr)
   241  		if leftVar == nil || rightVar == nil {
   242  			// Conjunct does not directly compare two columns.
   243  			return false
   244  		}
   245  
   246  		leftColID := leftVar.Col
   247  		rightColID := rightVar.Col
   248  
   249  		// Normalize leftColID to come from leftColIDs.
   250  		if !leftColIDs.Contains(leftColID) {
   251  			leftColID, rightColID = rightColID, leftColID
   252  		}
   253  		if !leftColIDs.Contains(leftColID) || !rightColIDs.Contains(rightColID) {
   254  			// Columns don't come from both sides of join, left column is nullable or
   255  			// right column is filtered.
   256  			return false
   257  		}
   258  
   259  		leftTab := md.ColumnMeta(leftColID).Table
   260  		rightTab := md.ColumnMeta(rightColID).Table
   261  		if leftTab == 0 || rightTab == 0 {
   262  			// Columns don't come from base tables.
   263  			return false
   264  		}
   265  
   266  		if md.TableMeta(leftTab).Table == md.TableMeta(rightTab).Table {
   267  			// Case 2a: check self-join case.
   268  			leftColOrd := leftTab.ColumnOrdinal(leftColID)
   269  			rightColOrd := rightTab.ColumnOrdinal(rightColID)
   270  			if leftColOrd != rightColOrd {
   271  				// Left and right column ordinals do not match.
   272  				return false
   273  			}
   274  		} else {
   275  			// Case 2b: check foreign-key case.
   276  			if fkColMap == nil {
   277  				// Lazily construct a map from all not-null foreign key columns on the
   278  				// left to all unfiltered referenced columns on the right.
   279  				fkColMap = makeForeignKeyMap(md, leftColIDs, rightColIDs)
   280  				if fkColMap == nil {
   281  					// No valid foreign key relations were found.
   282  					return false
   283  				}
   284  			}
   285  			if refCol, ok := fkColMap[leftColID]; !ok || refCol != rightColID {
   286  				// There is no valid foreign key relation from leftColID to
   287  				// rightColID.
   288  				return false
   289  			}
   290  		}
   291  	}
   292  
   293  	return true
   294  }
   295  
   296  // makeForeignKeyMap returns a map from left foreign key columns to right
   297  // referenced columns. The given left columns should not be nullable and the
   298  // right columns should be guaranteed to be unfiltered, or the foreign key
   299  // relation may not hold. If the key's match method isn't match full, all
   300  // foreign key columns must be not-null, or the key relation is not guaranteed
   301  // to have a match for each row. If no valid foreign key relations are found,
   302  // fkColMap is nil.
   303  func makeForeignKeyMap(
   304  	md *opt.Metadata, leftNotNullCols, rightUnfilteredCols opt.ColSet,
   305  ) map[opt.ColumnID]opt.ColumnID {
   306  	var tableIDMap map[cat.StableID]opt.TableID
   307  	var fkColMap map[opt.ColumnID]opt.ColumnID
   308  	var lastSeen opt.TableID
   309  
   310  	// Walk through the left columns and add foreign key and referenced columns to
   311  	// the output mapping if they come from the leftNotNullCols and
   312  	// rightUnfilteredCols ColSets respectively.
   313  	for col, ok := leftNotNullCols.Next(0); ok; col, ok = leftNotNullCols.Next(col + 1) {
   314  		fkTableID := md.ColumnMeta(col).Table
   315  		if fkTableID < 1 {
   316  			// The column does not come from a base table.
   317  			continue
   318  		}
   319  		if fkTableID == lastSeen {
   320  			// We have already encountered this TableID. (This works because ColumnIDs
   321  			// with the same TableID are clustered together).
   322  			continue
   323  		}
   324  		lastSeen = fkTableID
   325  		fkTableMeta := md.TableMeta(fkTableID)
   326  		if fkTableMeta.IgnoreForeignKeys {
   327  			// We are not allowed to use any of this table's foreign keys.
   328  			continue
   329  		}
   330  		fkTable := fkTableMeta.Table
   331  		for i, cnt := 0, fkTable.OutboundForeignKeyCount(); i < cnt; i++ {
   332  			fk := fkTable.OutboundForeignKey(i)
   333  			if !fk.Validated() {
   334  				// The data is not guaranteed to follow the foreign key constraint.
   335  				continue
   336  			}
   337  			if tableIDMap == nil {
   338  				// Lazily initialize tableIDMap.
   339  				tableIDMap = makeStableTableIDMap(md, rightUnfilteredCols)
   340  				if len(tableIDMap) == 0 {
   341  					// No valid tables were found from the right side.
   342  					break
   343  				}
   344  			}
   345  			refTableID, ok := tableIDMap[fk.ReferencedTableID()]
   346  			if !ok {
   347  				// There is no valid right table corresponding to the referenced table.
   348  				continue
   349  			}
   350  			var leftCols, rightCols []opt.ColumnID
   351  			fkValid := true
   352  			for j, numCols := 0, fk.ColumnCount(); j < numCols; j++ {
   353  				leftOrd := fk.OriginColumnOrdinal(fkTable, j)
   354  				rightOrd := fk.ReferencedColumnOrdinal(md.Table(refTableID), j)
   355  				leftCol := fkTableID.ColumnID(leftOrd)
   356  				rightCol := refTableID.ColumnID(rightOrd)
   357  				if !leftNotNullCols.Contains(leftCol) {
   358  					// Not all FK columns are part of the equality conditions. There are two
   359  					// cases:
   360  					// 1. MATCH SIMPLE/PARTIAL: if this column is nullable, rows from this
   361  					//    foreign key are not guaranteed to match.
   362  					// 2. MATCH FULL: FK rows are still guaranteed to match because the
   363  					//    non-present columns can only be NULL if all FK columns are NULL.
   364  					if fk.MatchMethod() != tree.MatchFull {
   365  						fkValid = false
   366  						break
   367  					}
   368  					continue
   369  				}
   370  				if !rightUnfilteredCols.Contains(rightCol) {
   371  					continue
   372  				}
   373  				leftCols = append(leftCols, leftCol)
   374  				rightCols = append(rightCols, rightCol)
   375  			}
   376  			if !fkValid {
   377  				// The foreign key relations should only be added to the mapping if the
   378  				// foreign key is guaranteed a match for every row.
   379  				continue
   380  			}
   381  			for i := range leftCols {
   382  				// Add any valid foreign key relations to the mapping.
   383  				if fkColMap == nil {
   384  					// Lazily initialize fkColMap
   385  					fkColMap = map[opt.ColumnID]opt.ColumnID{}
   386  				}
   387  				fkColMap[leftCols[i]] = rightCols[i]
   388  			}
   389  		}
   390  	}
   391  	return fkColMap
   392  }
   393  
   394  // makeStableTableIDMap creates a mapping from the StableIDs of the base tables
   395  // to the meta TableIDs for the given columns.
   396  func makeStableTableIDMap(md *opt.Metadata, cols opt.ColSet) map[cat.StableID]opt.TableID {
   397  	idMap := map[cat.StableID]opt.TableID{}
   398  	for col, ok := cols.Next(0); ok; col, ok = cols.Next(col + 1) {
   399  		metaTableID := md.ColumnMeta(col).Table
   400  		if metaTableID == 0 {
   401  			continue
   402  		}
   403  		stableTableID := md.Table(metaTableID).ID()
   404  		if prevID, ok := idMap[stableTableID]; ok && prevID != metaTableID {
   405  			// Avoid dealing with cases where multiple meta tables reference the same
   406  			// base table so that only one TableID has to be stored.
   407  			return map[cat.StableID]opt.TableID{}
   408  		}
   409  		idMap[stableTableID] = metaTableID
   410  	}
   411  	return idMap
   412  }
   413  
   414  // getFiltersFDs returns a FuncDepSet with the FDs from the FiltersItems in
   415  // the given FiltersExpr.
   416  func getFiltersFDs(filters FiltersExpr) props.FuncDepSet {
   417  	if len(filters) == 1 {
   418  		return filters[0].ScalarProps().FuncDeps
   419  	}
   420  
   421  	filtersFDs := props.FuncDepSet{}
   422  	for i := range filters {
   423  		filtersFDs.AddFrom(&filters[i].ScalarProps().FuncDeps)
   424  	}
   425  	return filtersFDs
   426  }