github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/optbuilder/join.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package optbuilder
    12  
    13  import (
    14  	"github.com/cockroachdb/cockroach/pkg/server/telemetry"
    15  	"github.com/cockroachdb/cockroach/pkg/sql/opt"
    16  	"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
    17  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    23  	"github.com/cockroachdb/cockroach/pkg/util"
    24  	"github.com/cockroachdb/errors"
    25  )
    26  
    27  // buildJoin builds a set of memo groups that represent the given join table
    28  // expression.
    29  //
    30  // See Builder.buildStmt for a description of the remaining input and
    31  // return values.
    32  func (b *Builder) buildJoin(
    33  	join *tree.JoinTableExpr, locking lockingSpec, inScope *scope,
    34  ) (outScope *scope) {
    35  	leftScope := b.buildDataSource(join.Left, nil /* indexFlags */, locking, inScope)
    36  
    37  	isLateral := false
    38  	inScopeRight := inScope
    39  	// If this is a lateral join, use leftScope as inScope for the right side.
    40  	// The right side scope of a LATERAL join includes the columns produced by
    41  	// the left side.
    42  	if t, ok := join.Right.(*tree.AliasedTableExpr); ok && t.Lateral {
    43  		telemetry.Inc(sqltelemetry.LateralJoinUseCounter)
    44  		isLateral = true
    45  		inScopeRight = leftScope
    46  		inScopeRight.context = exprKindLateralJoin
    47  	}
    48  
    49  	rightScope := b.buildDataSource(join.Right, nil /* indexFlags */, locking, inScopeRight)
    50  
    51  	// Check that the same table name is not used on both sides.
    52  	b.validateJoinTableNames(leftScope, rightScope)
    53  
    54  	joinType := sqlbase.JoinTypeFromAstString(join.JoinType)
    55  	var flags memo.JoinFlags
    56  	switch join.Hint {
    57  	case "":
    58  	case tree.AstHash:
    59  		telemetry.Inc(sqltelemetry.HashJoinHintUseCounter)
    60  		flags = memo.AllowHashJoinStoreRight
    61  
    62  	case tree.AstLookup:
    63  		telemetry.Inc(sqltelemetry.LookupJoinHintUseCounter)
    64  		flags = memo.AllowLookupJoinIntoRight
    65  		if joinType != sqlbase.InnerJoin && joinType != sqlbase.LeftOuterJoin {
    66  			panic(pgerror.Newf(pgcode.Syntax,
    67  				"%s can only be used with INNER or LEFT joins", tree.AstLookup,
    68  			))
    69  		}
    70  
    71  	case tree.AstMerge:
    72  		telemetry.Inc(sqltelemetry.MergeJoinHintUseCounter)
    73  		flags = memo.AllowMergeJoin
    74  
    75  	default:
    76  		panic(pgerror.Newf(
    77  			pgcode.FeatureNotSupported, "join hint %s not supported", join.Hint,
    78  		))
    79  	}
    80  
    81  	switch cond := join.Cond.(type) {
    82  	case tree.NaturalJoinCond, *tree.UsingJoinCond:
    83  		outScope = inScope.push()
    84  
    85  		var jb usingJoinBuilder
    86  		jb.init(b, joinType, flags, leftScope, rightScope, outScope)
    87  
    88  		switch t := cond.(type) {
    89  		case tree.NaturalJoinCond:
    90  			jb.buildNaturalJoin(t)
    91  		case *tree.UsingJoinCond:
    92  			jb.buildUsingJoin(t)
    93  		}
    94  		return outScope
    95  
    96  	case *tree.OnJoinCond, nil:
    97  		// Append columns added by the children, as they are visible to the filter.
    98  		outScope = inScope.push()
    99  		outScope.appendColumnsFromScope(leftScope)
   100  		outScope.appendColumnsFromScope(rightScope)
   101  
   102  		var filters memo.FiltersExpr
   103  		if on, ok := cond.(*tree.OnJoinCond); ok {
   104  			// Do not allow special functions in the ON clause.
   105  			b.semaCtx.Properties.Require(
   106  				exprKindOn.String(), tree.RejectGenerators|tree.RejectWindowApplications,
   107  			)
   108  			outScope.context = exprKindOn
   109  			filter := b.buildScalar(
   110  				outScope.resolveAndRequireType(on.Expr, types.Bool), outScope, nil, nil, nil,
   111  			)
   112  			filters = memo.FiltersExpr{b.factory.ConstructFiltersItem(filter)}
   113  		} else {
   114  			filters = memo.TrueFilter
   115  		}
   116  
   117  		left := leftScope.expr.(memo.RelExpr)
   118  		right := rightScope.expr.(memo.RelExpr)
   119  		outScope.expr = b.constructJoin(
   120  			joinType, left, right, filters, &memo.JoinPrivate{Flags: flags}, isLateral,
   121  		)
   122  		return outScope
   123  
   124  	default:
   125  		panic(errors.AssertionFailedf("unsupported join condition %#v", cond))
   126  	}
   127  }
   128  
   129  // validateJoinTableNames checks that table names are not repeated between the
   130  // left and right sides of a join. leftTables contains a pre-built map of the
   131  // tables from the left side of the join, and rightScope contains the
   132  // scopeColumns (and corresponding table names) from the right side of the
   133  // join.
   134  func (b *Builder) validateJoinTableNames(leftScope, rightScope *scope) {
   135  	// Try to derive smaller subset of columns which need to be validated.
   136  	leftOrds := b.findJoinColsToValidate(leftScope)
   137  	rightOrds := b.findJoinColsToValidate(rightScope)
   138  
   139  	// Look for table name in left scope that exists in right scope.
   140  	for left, ok := leftOrds.Next(0); ok; left, ok = leftOrds.Next(left + 1) {
   141  		leftName := &leftScope.cols[left].table
   142  
   143  		for right, ok := rightOrds.Next(0); ok; right, ok = rightOrds.Next(right + 1) {
   144  			rightName := &rightScope.cols[right].table
   145  
   146  			// Must match all name parts.
   147  			if leftName.ObjectName != rightName.ObjectName ||
   148  				leftName.SchemaName != rightName.SchemaName ||
   149  				leftName.CatalogName != rightName.CatalogName {
   150  				continue
   151  			}
   152  
   153  			panic(pgerror.Newf(
   154  				pgcode.DuplicateAlias,
   155  				"source name %q specified more than once (missing AS clause)",
   156  				tree.ErrString(&leftName.ObjectName),
   157  			))
   158  		}
   159  	}
   160  }
   161  
   162  // findJoinColsToValidate creates a FastIntSet containing the ordinal of each
   163  // column that has a different table name than the previous column. This is a
   164  // fast way of reducing the set of columns that need to checked for duplicate
   165  // names by validateJoinTableNames.
   166  func (b *Builder) findJoinColsToValidate(scope *scope) util.FastIntSet {
   167  	var ords util.FastIntSet
   168  	for i := range scope.cols {
   169  		// Allow joins of sources that define columns with no
   170  		// associated table name. At worst, the USING/NATURAL
   171  		// detection code or expression analysis for ON will detect an
   172  		// ambiguity later.
   173  		if scope.cols[i].table.ObjectName == "" {
   174  			continue
   175  		}
   176  
   177  		if i == 0 || scope.cols[i].table != scope.cols[i-1].table {
   178  			ords.Add(i)
   179  		}
   180  	}
   181  	return ords
   182  }
   183  
   184  var invalidLateralJoin = pgerror.New(pgcode.Syntax, "The combining JOIN type must be INNER or LEFT for a LATERAL reference")
   185  
   186  func (b *Builder) constructJoin(
   187  	joinType sqlbase.JoinType,
   188  	left, right memo.RelExpr,
   189  	on memo.FiltersExpr,
   190  	private *memo.JoinPrivate,
   191  	isLateral bool,
   192  ) memo.RelExpr {
   193  	switch joinType {
   194  	case sqlbase.InnerJoin:
   195  		if isLateral {
   196  			return b.factory.ConstructInnerJoinApply(left, right, on, private)
   197  		}
   198  		return b.factory.ConstructInnerJoin(left, right, on, private)
   199  	case sqlbase.LeftOuterJoin:
   200  		if isLateral {
   201  			return b.factory.ConstructLeftJoinApply(left, right, on, private)
   202  		}
   203  		return b.factory.ConstructLeftJoin(left, right, on, private)
   204  	case sqlbase.RightOuterJoin:
   205  		if isLateral {
   206  			panic(invalidLateralJoin)
   207  		}
   208  		return b.factory.ConstructRightJoin(left, right, on, private)
   209  	case sqlbase.FullOuterJoin:
   210  		if isLateral {
   211  			panic(invalidLateralJoin)
   212  		}
   213  		return b.factory.ConstructFullJoin(left, right, on, private)
   214  	default:
   215  		panic(pgerror.Newf(pgcode.FeatureNotSupported,
   216  			"unsupported JOIN type %d", joinType))
   217  	}
   218  }
   219  
   220  // usingJoinBuilder helps to build a USING join or natural join. It finds the
   221  // columns in the left and right relations that match the columns provided in
   222  // the names parameter (or names common to both sides in case of natural join),
   223  // and creates equality predicate(s) with those columns. It also ensures that
   224  // there is a single output column for each match name (other columns with the
   225  // same name are hidden).
   226  //
   227  // -- Merged columns --
   228  //
   229  // With NATURAL JOIN or JOIN USING (a,b,c,...), SQL allows us to refer to the
   230  // columns a,b,c directly; these columns have the following semantics:
   231  //   a = IFNULL(left.a, right.a)
   232  //   b = IFNULL(left.b, right.b)
   233  //   c = IFNULL(left.c, right.c)
   234  //   ...
   235  //
   236  // Furthermore, a star has to resolve the columns in the following order:
   237  // merged columns, non-equality columns from the left table, non-equality
   238  // columns from the right table. To perform this rearrangement, we use a
   239  // projection on top of the join. Note that the original columns must
   240  // still be accessible via left.a, right.a (they will just be hidden).
   241  //
   242  // For inner or left outer joins, a is always the same as left.a.
   243  //
   244  // For right outer joins, a is always equal to right.a; but for some types
   245  // (like collated strings), this doesn't mean it is the same as right.a. In
   246  // this case we must still use the IFNULL construct.
   247  //
   248  // Example:
   249  //
   250  //  left has columns (a,b,x)
   251  //  right has columns (a,b,y)
   252  //
   253  //  - SELECT * FROM left JOIN right USING(a,b)
   254  //
   255  //  join has columns:
   256  //    1: left.a
   257  //    2: left.b
   258  //    3: left.x
   259  //    4: right.a
   260  //    5: right.b
   261  //    6: right.y
   262  //
   263  //  projection has columns and corresponding variable expressions:
   264  //    1: a aka left.a        @1
   265  //    2: b aka left.b        @2
   266  //    3: left.x              @3
   267  //    4: right.a (hidden)    @4
   268  //    5: right.b (hidden)    @5
   269  //    6: right.y             @6
   270  //
   271  // If the join was a FULL OUTER JOIN, the columns would be:
   272  //    1: a                   IFNULL(@1,@4)
   273  //    2: b                   IFNULL(@2,@5)
   274  //    3: left.a (hidden)     @1
   275  //    4: left.b (hidden)     @2
   276  //    5: left.x              @3
   277  //    6: right.a (hidden)    @4
   278  //    7: right.b (hidden)    @5
   279  //    8: right.y             @6
   280  //
   281  type usingJoinBuilder struct {
   282  	b          *Builder
   283  	joinType   sqlbase.JoinType
   284  	joinFlags  memo.JoinFlags
   285  	filters    memo.FiltersExpr
   286  	leftScope  *scope
   287  	rightScope *scope
   288  	outScope   *scope
   289  
   290  	// hideCols contains the join columns which are hidden in the result
   291  	// expression. Note that we cannot simply store the column ids since the
   292  	// same column may be used multiple times with different aliases.
   293  	hideCols map[*scopeColumn]struct{}
   294  
   295  	// showCols contains the join columns which are not hidden in the result
   296  	// expression. Note that we cannot simply store the column ids since the
   297  	// same column may be used multiple times with different aliases.
   298  	showCols map[*scopeColumn]struct{}
   299  
   300  	// ifNullCols contains the ids of each synthesized column which performs the
   301  	// IFNULL check for a pair of join columns.
   302  	ifNullCols opt.ColSet
   303  }
   304  
   305  func (jb *usingJoinBuilder) init(
   306  	b *Builder,
   307  	joinType sqlbase.JoinType,
   308  	flags memo.JoinFlags,
   309  	leftScope, rightScope, outScope *scope,
   310  ) {
   311  	jb.b = b
   312  	jb.joinType = joinType
   313  	jb.joinFlags = flags
   314  	jb.leftScope = leftScope
   315  	jb.rightScope = rightScope
   316  	jb.outScope = outScope
   317  	jb.hideCols = make(map[*scopeColumn]struct{})
   318  	jb.showCols = make(map[*scopeColumn]struct{})
   319  }
   320  
   321  // buildUsingJoin constructs a Join operator with join columns matching the
   322  // the names in the given join condition.
   323  func (jb *usingJoinBuilder) buildUsingJoin(using *tree.UsingJoinCond) {
   324  	var seenCols opt.ColSet
   325  	for _, name := range using.Cols {
   326  		// Find left and right USING columns in the scopes.
   327  		leftCol := jb.findUsingColumn(jb.leftScope.cols, name, "left table")
   328  		if leftCol == nil {
   329  			jb.raiseUndefinedColError(name, "left")
   330  		}
   331  		if seenCols.Contains(leftCol.id) {
   332  			// Same name exists more than once in USING column name list.
   333  			panic(pgerror.Newf(pgcode.DuplicateColumn,
   334  				"column name %q appears more than once in USING clause", tree.ErrString(&name)))
   335  		}
   336  		seenCols.Add(leftCol.id)
   337  
   338  		rightCol := jb.findUsingColumn(jb.rightScope.cols, name, "right table")
   339  		if rightCol == nil {
   340  			jb.raiseUndefinedColError(name, "right")
   341  		}
   342  
   343  		jb.addEqualityCondition(leftCol, rightCol)
   344  	}
   345  
   346  	jb.finishBuild()
   347  }
   348  
   349  // buildNaturalJoin constructs a Join operator with join columns derived from
   350  // matching names in the left and right inputs.
   351  func (jb *usingJoinBuilder) buildNaturalJoin(natural tree.NaturalJoinCond) {
   352  	// Only add equality conditions for non-hidden columns with matching name in
   353  	// both the left and right inputs.
   354  	var seenCols opt.ColSet
   355  	for i := range jb.leftScope.cols {
   356  		leftCol := &jb.leftScope.cols[i]
   357  		if leftCol.hidden {
   358  			continue
   359  		}
   360  		if seenCols.Contains(leftCol.id) {
   361  			// Don't raise an error if the id matches but it has a different name.
   362  			for j := 0; j < i; j++ {
   363  				col := &jb.leftScope.cols[j]
   364  				if col.id == leftCol.id && col.name == leftCol.name {
   365  					jb.raiseDuplicateColError(leftCol.name, "left table")
   366  				}
   367  			}
   368  		}
   369  		seenCols.Add(leftCol.id)
   370  
   371  		rightCol := jb.findUsingColumn(jb.rightScope.cols, leftCol.name, "right table")
   372  		if rightCol != nil {
   373  			jb.addEqualityCondition(leftCol, rightCol)
   374  		}
   375  	}
   376  
   377  	jb.finishBuild()
   378  }
   379  
   380  // finishBuild adds any non-join columns to the output scope and then constructs
   381  // the Join operator. If at least one "if null" column exists, the join must be
   382  // wrapped in a Project operator that performs the required IFNULL checks.
   383  func (jb *usingJoinBuilder) finishBuild() {
   384  	jb.addRemainingCols(jb.leftScope.cols)
   385  	jb.addRemainingCols(jb.rightScope.cols)
   386  
   387  	jb.outScope.expr = jb.b.constructJoin(
   388  		jb.joinType,
   389  		jb.leftScope.expr.(memo.RelExpr),
   390  		jb.rightScope.expr.(memo.RelExpr),
   391  		jb.filters,
   392  		&memo.JoinPrivate{Flags: jb.joinFlags},
   393  		false, /* isLateral */
   394  	)
   395  
   396  	if !jb.ifNullCols.Empty() {
   397  		// Wrap in a projection to include the merged columns and ensure that all
   398  		// remaining columns are passed through unchanged.
   399  		for i := range jb.outScope.cols {
   400  			col := &jb.outScope.cols[i]
   401  			if !jb.ifNullCols.Contains(col.id) {
   402  				// Mark column as passthrough.
   403  				col.scalar = nil
   404  			}
   405  		}
   406  
   407  		jb.outScope.expr = jb.b.constructProject(jb.outScope.expr.(memo.RelExpr), jb.outScope.cols)
   408  	}
   409  }
   410  
   411  // addRemainingCols iterates through each of the columns in cols and performs
   412  // one of the following actions:
   413  // (1) If the column is part of the hideCols set, then it is a join column that
   414  //     needs to be added to output scope, with the hidden attribute set to true.
   415  // (2) If the column is part of the showCols set, then it is a join column that
   416  //     has already been added to the output scope by addEqualityCondition, so
   417  //     skip it now.
   418  // (3) All other columns are added to the scope without modification.
   419  func (jb *usingJoinBuilder) addRemainingCols(cols []scopeColumn) {
   420  	for i := range cols {
   421  		col := &cols[i]
   422  		if _, ok := jb.hideCols[col]; ok {
   423  			jb.outScope.cols = append(jb.outScope.cols, *col)
   424  			jb.outScope.cols[len(jb.outScope.cols)-1].hidden = true
   425  		} else if _, ok := jb.showCols[col]; !ok {
   426  			jb.outScope.cols = append(jb.outScope.cols, *col)
   427  		}
   428  	}
   429  }
   430  
   431  // findUsingColumn finds the column in cols that has the given name. If no such
   432  // column exists, findUsingColumn returns nil. If multiple columns with the name
   433  // exist, then findUsingColumn raises an error. The context is used for error
   434  // reporting.
   435  func (jb *usingJoinBuilder) findUsingColumn(
   436  	cols []scopeColumn, name tree.Name, context string,
   437  ) *scopeColumn {
   438  	var foundCol *scopeColumn
   439  	for i := range cols {
   440  		col := &cols[i]
   441  		if !col.hidden && col.name == name {
   442  			if foundCol != nil {
   443  				jb.raiseDuplicateColError(name, context)
   444  			}
   445  			foundCol = col
   446  		}
   447  	}
   448  	return foundCol
   449  }
   450  
   451  // addEqualityCondition constructs a new Eq expression comparing the given left
   452  // and right columns. In addition, it adds a new column to the output scope that
   453  // represents the "merged" value of the left and right columns. This could be
   454  // either the left or right column value, or, in the case of a FULL JOIN, an
   455  // IFNULL(left, right) expression.
   456  func (jb *usingJoinBuilder) addEqualityCondition(leftCol, rightCol *scopeColumn) {
   457  	// First, check if the comparison would even be valid.
   458  	if !leftCol.typ.Equivalent(rightCol.typ) {
   459  		if _, found := tree.FindEqualComparisonFunction(leftCol.typ, rightCol.typ); !found {
   460  			panic(pgerror.Newf(pgcode.DatatypeMismatch,
   461  				"JOIN/USING types %s for left and %s for right cannot be matched for column %q",
   462  				leftCol.typ, rightCol.typ, tree.ErrString(&leftCol.name)))
   463  		}
   464  	}
   465  
   466  	// Construct the predicate.
   467  	leftVar := jb.b.factory.ConstructVariable(leftCol.id)
   468  	rightVar := jb.b.factory.ConstructVariable(rightCol.id)
   469  	eq := jb.b.factory.ConstructEq(leftVar, rightVar)
   470  	jb.filters = append(jb.filters, jb.b.factory.ConstructFiltersItem(eq))
   471  
   472  	// Add the merged column to the scope, constructing a new column if needed.
   473  	if jb.joinType == sqlbase.InnerJoin || jb.joinType == sqlbase.LeftOuterJoin {
   474  		// The merged column is the same as the corresponding column from the
   475  		// left side.
   476  		jb.outScope.cols = append(jb.outScope.cols, *leftCol)
   477  		jb.showCols[leftCol] = struct{}{}
   478  		jb.hideCols[rightCol] = struct{}{}
   479  	} else if jb.joinType == sqlbase.RightOuterJoin &&
   480  		!sqlbase.HasCompositeKeyEncoding(leftCol.typ) {
   481  		// The merged column is the same as the corresponding column from the
   482  		// right side.
   483  		jb.outScope.cols = append(jb.outScope.cols, *rightCol)
   484  		jb.showCols[rightCol] = struct{}{}
   485  		jb.hideCols[leftCol] = struct{}{}
   486  	} else {
   487  		// Construct a new merged column to represent IFNULL(left, right).
   488  		var typ *types.T
   489  		if leftCol.typ.Family() != types.UnknownFamily {
   490  			typ = leftCol.typ
   491  		} else {
   492  			typ = rightCol.typ
   493  		}
   494  		texpr := tree.NewTypedCoalesceExpr(tree.TypedExprs{leftCol, rightCol}, typ)
   495  		merged := jb.b.factory.ConstructCoalesce(memo.ScalarListExpr{leftVar, rightVar})
   496  		col := jb.b.synthesizeColumn(jb.outScope, string(leftCol.name), typ, texpr, merged)
   497  		jb.ifNullCols.Add(col.id)
   498  		jb.hideCols[leftCol] = struct{}{}
   499  		jb.hideCols[rightCol] = struct{}{}
   500  	}
   501  }
   502  
   503  func (jb *usingJoinBuilder) raiseDuplicateColError(name tree.Name, context string) {
   504  	panic(pgerror.Newf(pgcode.DuplicateColumn,
   505  		"common column name %q appears more than once in %s", tree.ErrString(&name), context))
   506  }
   507  
   508  func (jb *usingJoinBuilder) raiseUndefinedColError(name tree.Name, context string) {
   509  	panic(pgerror.Newf(pgcode.UndefinedColumn,
   510  		"column \"%s\" specified in USING clause does not exist in %s table", name, context))
   511  }