github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/union.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package sql 12 13 import ( 14 "context" 15 16 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" 17 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" 18 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 19 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 20 "github.com/cockroachdb/cockroach/pkg/sql/types" 21 "github.com/cockroachdb/errors" 22 ) 23 24 // unionNode is a planNode whose rows are the result of one of three set 25 // operations (UNION, INTERSECT, or EXCEPT) on left and right. There are two 26 // variations of each set operation: distinct, which always returns unique 27 // results, and all, which does no uniqueing. 28 // 29 // Ordering of rows is expected to be handled externally to unionNode. 30 // TODO(dan): In the long run, this is insufficient. If we know both left and 31 // right are ordered the same way, we can do the set logic without the map 32 // state. Additionally, if the unionNode has an ordering then we can hint it 33 // down to left and right and force the condition for this first optimization. 34 // 35 // All six of the operations can be completed without cacheing rows by 36 // iterating one side then the other and keeping counts of unique rows 37 // in a map. The logic is common for all six. However, because EXCEPT 38 // needs to iterate the right side first, the common code always reads 39 // the right operand first. Meanwhile, we invert the operands for the 40 // non-EXCEPT cases in order to preserve the appearance of the 41 // original specified order. 42 // 43 // The emit logic for each op is represented by implementors of the 44 // unionNodeEmit interface. The emitRight method is called for each row output 45 // by the right side and passed a hashable representation of the row. If it 46 // returns true, the row is emitted. After all right rows are examined, then 47 // each left row is passed to emitLeft in the same way. 48 // 49 // An example: intersectNodeEmitAll 50 // VALUES (1), (1), (1), (2), (2) INTERSECT ALL VALUES (1), (3), (1) 51 // ---- 52 // 1 53 // 1 54 // There are three 1s on the left and two 1s on the right, so we emit 1, 1. 55 // Nothing else is in both. 56 // emitRight: For each row, increment the map entry. 57 // emitLeft: For each row, if the row is not present in the map, it was not in 58 // both, don't emit. Otherwise, if the count for the row was > 0, emit and 59 // decrement the entry. Otherwise, the row was on the right, but we've 60 // already emitted as many as were on the right, don't emit. 61 type unionNode struct { 62 // right and left are the data source operands. 63 // right is read first, to populate the `emit` field. 64 right, left planNode 65 66 // columns contains the metadata for the results of this node. 67 columns sqlbase.ResultColumns 68 // inverted, when true, indicates that the right plan corresponds to 69 // the left operand in the input SQL syntax, and vice-versa. 70 inverted bool 71 // emitAll is a performance optimization for UNION ALL. When set 72 // the union logic avoids the `emit` logic entirely. 73 emitAll bool 74 75 // unionType is the type of operation (UNION, INTERSECT, EXCEPT) 76 unionType tree.UnionType 77 // all indicates if the operation is the ALL or DISTINCT version 78 all bool 79 } 80 81 func (p *planner) newUnionNode( 82 typ tree.UnionType, all bool, left, right planNode, 83 ) (planNode, error) { 84 emitAll := false 85 switch typ { 86 case tree.UnionOp: 87 if all { 88 emitAll = true 89 } 90 case tree.IntersectOp: 91 case tree.ExceptOp: 92 default: 93 return nil, errors.Errorf("%v is not supported", typ) 94 } 95 96 leftColumns := planColumns(left) 97 rightColumns := planColumns(right) 98 if len(leftColumns) != len(rightColumns) { 99 return nil, pgerror.Newf( 100 pgcode.Syntax, 101 "each %v query must have the same number of columns: %d vs %d", 102 typ, len(leftColumns), len(rightColumns), 103 ) 104 } 105 unionColumns := append(sqlbase.ResultColumns(nil), leftColumns...) 106 for i := 0; i < len(unionColumns); i++ { 107 l := leftColumns[i] 108 r := rightColumns[i] 109 // TODO(dan): This currently checks whether the types are exactly the same, 110 // but Postgres is more lenient: 111 // http://www.postgresql.org/docs/9.5/static/typeconv-union-case.html. 112 if !(l.Typ.Equivalent(r.Typ) || l.Typ.Family() == types.UnknownFamily || r.Typ.Family() == types.UnknownFamily) { 113 return nil, pgerror.Newf(pgcode.DatatypeMismatch, 114 "%v types %s and %s cannot be matched", typ, l.Typ, r.Typ) 115 } 116 if l.Typ.Family() == types.UnknownFamily { 117 unionColumns[i].Typ = r.Typ 118 } 119 } 120 121 inverted := false 122 if typ != tree.ExceptOp { 123 // The logic below reads the rows from the right operand first, 124 // because for EXCEPT in particular this is what we need to match. 125 // However for the other operators (UNION, INTERSECT) it is 126 // actually confusing to see the right values come up first in the 127 // results. So invert this here, to reduce surprise by users. 128 left, right = right, left 129 inverted = true 130 } 131 132 node := &unionNode{ 133 right: right, 134 left: left, 135 columns: unionColumns, 136 inverted: inverted, 137 emitAll: emitAll, 138 unionType: typ, 139 all: all, 140 } 141 return node, nil 142 } 143 144 func (n *unionNode) startExec(params runParams) error { 145 panic("unionNode cannot be run in local mode") 146 } 147 148 func (n *unionNode) Next(params runParams) (bool, error) { 149 panic("unionNode cannot be run in local mode") 150 } 151 152 func (n *unionNode) Values() tree.Datums { 153 panic("unionNode cannot be run in local mode") 154 } 155 156 func (n *unionNode) Close(ctx context.Context) { 157 n.right.Close(ctx) 158 n.left.Close(ctx) 159 }