github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/union.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package sql
    12  
    13  import (
    14  	"context"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
    17  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    21  	"github.com/cockroachdb/errors"
    22  )
    23  
    24  // unionNode is a planNode whose rows are the result of one of three set
    25  // operations (UNION, INTERSECT, or EXCEPT) on left and right. There are two
    26  // variations of each set operation: distinct, which always returns unique
    27  // results, and all, which does no uniqueing.
    28  //
    29  // Ordering of rows is expected to be handled externally to unionNode.
    30  // TODO(dan): In the long run, this is insufficient. If we know both left and
    31  // right are ordered the same way, we can do the set logic without the map
    32  // state. Additionally, if the unionNode has an ordering then we can hint it
    33  // down to left and right and force the condition for this first optimization.
    34  //
    35  // All six of the operations can be completed without cacheing rows by
    36  // iterating one side then the other and keeping counts of unique rows
    37  // in a map. The logic is common for all six. However, because EXCEPT
    38  // needs to iterate the right side first, the common code always reads
    39  // the right operand first. Meanwhile, we invert the operands for the
    40  // non-EXCEPT cases in order to preserve the appearance of the
    41  // original specified order.
    42  //
    43  // The emit logic for each op is represented by implementors of the
    44  // unionNodeEmit interface. The emitRight method is called for each row output
    45  // by the right side and passed a hashable representation of the row. If it
    46  // returns true, the row is emitted. After all right rows are examined, then
    47  // each left row is passed to emitLeft in the same way.
    48  //
    49  // An example: intersectNodeEmitAll
    50  // VALUES (1), (1), (1), (2), (2) INTERSECT ALL VALUES (1), (3), (1)
    51  // ----
    52  // 1
    53  // 1
    54  // There are three 1s on the left and two 1s on the right, so we emit 1, 1.
    55  // Nothing else is in both.
    56  //  emitRight: For each row, increment the map entry.
    57  //  emitLeft: For each row, if the row is not present in the map, it was not in
    58  //    both, don't emit. Otherwise, if the count for the row was > 0, emit and
    59  //    decrement the entry. Otherwise, the row was on the right, but we've
    60  //    already emitted as many as were on the right, don't emit.
    61  type unionNode struct {
    62  	// right and left are the data source operands.
    63  	// right is read first, to populate the `emit` field.
    64  	right, left planNode
    65  
    66  	// columns contains the metadata for the results of this node.
    67  	columns sqlbase.ResultColumns
    68  	// inverted, when true, indicates that the right plan corresponds to
    69  	// the left operand in the input SQL syntax, and vice-versa.
    70  	inverted bool
    71  	// emitAll is a performance optimization for UNION ALL. When set
    72  	// the union logic avoids the `emit` logic entirely.
    73  	emitAll bool
    74  
    75  	// unionType is the type of operation (UNION, INTERSECT, EXCEPT)
    76  	unionType tree.UnionType
    77  	// all indicates if the operation is the ALL or DISTINCT version
    78  	all bool
    79  }
    80  
    81  func (p *planner) newUnionNode(
    82  	typ tree.UnionType, all bool, left, right planNode,
    83  ) (planNode, error) {
    84  	emitAll := false
    85  	switch typ {
    86  	case tree.UnionOp:
    87  		if all {
    88  			emitAll = true
    89  		}
    90  	case tree.IntersectOp:
    91  	case tree.ExceptOp:
    92  	default:
    93  		return nil, errors.Errorf("%v is not supported", typ)
    94  	}
    95  
    96  	leftColumns := planColumns(left)
    97  	rightColumns := planColumns(right)
    98  	if len(leftColumns) != len(rightColumns) {
    99  		return nil, pgerror.Newf(
   100  			pgcode.Syntax,
   101  			"each %v query must have the same number of columns: %d vs %d",
   102  			typ, len(leftColumns), len(rightColumns),
   103  		)
   104  	}
   105  	unionColumns := append(sqlbase.ResultColumns(nil), leftColumns...)
   106  	for i := 0; i < len(unionColumns); i++ {
   107  		l := leftColumns[i]
   108  		r := rightColumns[i]
   109  		// TODO(dan): This currently checks whether the types are exactly the same,
   110  		// but Postgres is more lenient:
   111  		// http://www.postgresql.org/docs/9.5/static/typeconv-union-case.html.
   112  		if !(l.Typ.Equivalent(r.Typ) || l.Typ.Family() == types.UnknownFamily || r.Typ.Family() == types.UnknownFamily) {
   113  			return nil, pgerror.Newf(pgcode.DatatypeMismatch,
   114  				"%v types %s and %s cannot be matched", typ, l.Typ, r.Typ)
   115  		}
   116  		if l.Typ.Family() == types.UnknownFamily {
   117  			unionColumns[i].Typ = r.Typ
   118  		}
   119  	}
   120  
   121  	inverted := false
   122  	if typ != tree.ExceptOp {
   123  		// The logic below reads the rows from the right operand first,
   124  		// because for EXCEPT in particular this is what we need to match.
   125  		// However for the other operators (UNION, INTERSECT) it is
   126  		// actually confusing to see the right values come up first in the
   127  		// results. So invert this here, to reduce surprise by users.
   128  		left, right = right, left
   129  		inverted = true
   130  	}
   131  
   132  	node := &unionNode{
   133  		right:     right,
   134  		left:      left,
   135  		columns:   unionColumns,
   136  		inverted:  inverted,
   137  		emitAll:   emitAll,
   138  		unionType: typ,
   139  		all:       all,
   140  	}
   141  	return node, nil
   142  }
   143  
   144  func (n *unionNode) startExec(params runParams) error {
   145  	panic("unionNode cannot be run in local mode")
   146  }
   147  
   148  func (n *unionNode) Next(params runParams) (bool, error) {
   149  	panic("unionNode cannot be run in local mode")
   150  }
   151  
   152  func (n *unionNode) Values() tree.Datums {
   153  	panic("unionNode cannot be run in local mode")
   154  }
   155  
   156  func (n *unionNode) Close(ctx context.Context) {
   157  	n.right.Close(ctx)
   158  	n.left.Close(ctx)
   159  }