github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/stream_merger.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package rowexec
    12  
    13  import (
    14  	"context"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    17  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    21  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    22  	"github.com/cockroachdb/cockroach/pkg/util/mon"
    23  	"github.com/cockroachdb/errors"
    24  )
    25  
    26  // We define a group to be a set of rows from a given source with the same
    27  // group key, in this case the set of ordered columns. streamMerger emits
    28  // batches of rows that are the cross-product of matching groups from each
    29  // stream.
    30  type streamMerger struct {
    31  	left       streamGroupAccumulator
    32  	right      streamGroupAccumulator
    33  	leftGroup  []sqlbase.EncDatumRow
    34  	rightGroup []sqlbase.EncDatumRow
    35  	// nulLEquality indicates when NULL = NULL is truth-y. This is helpful
    36  	// when we want NULL to be meaningful during equality, for example
    37  	// during SCRUB secondary index checks.
    38  	nullEquality bool
    39  	datumAlloc   sqlbase.DatumAlloc
    40  }
    41  
    42  func (sm *streamMerger) start(ctx context.Context) {
    43  	sm.left.start(ctx)
    44  	sm.right.start(ctx)
    45  }
    46  
    47  // NextBatch returns a set of rows from the left stream and a set of rows from
    48  // the right stream, all matching on the equality columns. One of the sets can
    49  // be empty.
    50  func (sm *streamMerger) NextBatch(
    51  	ctx context.Context, evalCtx *tree.EvalContext,
    52  ) ([]sqlbase.EncDatumRow, []sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) {
    53  	if sm.leftGroup == nil {
    54  		var meta *execinfrapb.ProducerMetadata
    55  		sm.leftGroup, meta = sm.left.nextGroup(ctx, evalCtx)
    56  		if meta != nil {
    57  			return nil, nil, meta
    58  		}
    59  	}
    60  	if sm.rightGroup == nil {
    61  		var meta *execinfrapb.ProducerMetadata
    62  		sm.rightGroup, meta = sm.right.nextGroup(ctx, evalCtx)
    63  		if meta != nil {
    64  			return nil, nil, meta
    65  		}
    66  	}
    67  	if sm.leftGroup == nil && sm.rightGroup == nil {
    68  		return nil, nil, nil
    69  	}
    70  
    71  	var lrow, rrow sqlbase.EncDatumRow
    72  	if len(sm.leftGroup) > 0 {
    73  		lrow = sm.leftGroup[0]
    74  	}
    75  	if len(sm.rightGroup) > 0 {
    76  		rrow = sm.rightGroup[0]
    77  	}
    78  
    79  	cmp, err := CompareEncDatumRowForMerge(
    80  		sm.left.types, lrow, rrow, sm.left.ordering, sm.right.ordering,
    81  		sm.nullEquality, &sm.datumAlloc, evalCtx,
    82  	)
    83  	if err != nil {
    84  		return nil, nil, &execinfrapb.ProducerMetadata{Err: err}
    85  	}
    86  	var leftGroup, rightGroup []sqlbase.EncDatumRow
    87  	if cmp <= 0 {
    88  		leftGroup = sm.leftGroup
    89  		sm.leftGroup = nil
    90  	}
    91  	if cmp >= 0 {
    92  		rightGroup = sm.rightGroup
    93  		sm.rightGroup = nil
    94  	}
    95  	return leftGroup, rightGroup, nil
    96  }
    97  
    98  // CompareEncDatumRowForMerge EncDatumRow compares two EncDatumRows for merging.
    99  // When merging two streams and preserving the order (as in a MergeSort or
   100  // a MergeJoin) compare the head of the streams, emitting the one that sorts
   101  // first. It allows for the EncDatumRow to be nil if one of the streams is
   102  // exhausted (and hence nil). CompareEncDatumRowForMerge returns 0 when both
   103  // rows are nil, and a nil row is considered greater than any non-nil row.
   104  // CompareEncDatumRowForMerge assumes that the two rows have the same columns
   105  // in the same orders, but can handle different ordering directions. It takes
   106  // a DatumAlloc which is used for decoding if any underlying EncDatum is not
   107  // yet decoded.
   108  func CompareEncDatumRowForMerge(
   109  	lhsTypes []*types.T,
   110  	lhs, rhs sqlbase.EncDatumRow,
   111  	leftOrdering, rightOrdering sqlbase.ColumnOrdering,
   112  	nullEquality bool,
   113  	da *sqlbase.DatumAlloc,
   114  	evalCtx *tree.EvalContext,
   115  ) (int, error) {
   116  	if lhs == nil && rhs == nil {
   117  		return 0, nil
   118  	}
   119  	if lhs == nil {
   120  		return 1, nil
   121  	}
   122  	if rhs == nil {
   123  		return -1, nil
   124  	}
   125  	if len(leftOrdering) != len(rightOrdering) {
   126  		return 0, errors.Errorf(
   127  			"cannot compare two EncDatumRow types that have different length ColumnOrderings",
   128  		)
   129  	}
   130  
   131  	for i, ord := range leftOrdering {
   132  		lIdx := ord.ColIdx
   133  		rIdx := rightOrdering[i].ColIdx
   134  		// If both datums are NULL, we need to follow SQL semantics where
   135  		// they are not equal. This differs from our datum semantics where
   136  		// they are equal. In the case where we want to consider NULLs to be
   137  		// equal, we continue and skip to the next datums in the row.
   138  		if lhs[lIdx].IsNull() && rhs[rIdx].IsNull() {
   139  			if !nullEquality {
   140  				// We can return either -1 or 1, it does not change the behavior.
   141  				return -1, nil
   142  			}
   143  			continue
   144  		}
   145  		cmp, err := lhs[lIdx].Compare(lhsTypes[lIdx], da, evalCtx, &rhs[rIdx])
   146  		if err != nil {
   147  			return 0, err
   148  		}
   149  		if cmp != 0 {
   150  			if leftOrdering[i].Direction == encoding.Descending {
   151  				cmp = -cmp
   152  			}
   153  			return cmp, nil
   154  		}
   155  	}
   156  	return 0, nil
   157  }
   158  
   159  func (sm *streamMerger) close(ctx context.Context) {
   160  	sm.left.close(ctx)
   161  	sm.right.close(ctx)
   162  }
   163  
   164  // makeStreamMerger creates a streamMerger, joining rows from leftSource with
   165  // rows from rightSource.
   166  //
   167  // All metadata from the sources is forwarded to metadataSink.
   168  func makeStreamMerger(
   169  	leftSource execinfra.RowSource,
   170  	leftOrdering sqlbase.ColumnOrdering,
   171  	rightSource execinfra.RowSource,
   172  	rightOrdering sqlbase.ColumnOrdering,
   173  	nullEquality bool,
   174  	memMonitor *mon.BytesMonitor,
   175  ) (streamMerger, error) {
   176  	if len(leftOrdering) != len(rightOrdering) {
   177  		return streamMerger{}, errors.Errorf(
   178  			"ordering lengths don't match: %d and %d", len(leftOrdering), len(rightOrdering))
   179  	}
   180  	for i, ord := range leftOrdering {
   181  		if ord.Direction != rightOrdering[i].Direction {
   182  			return streamMerger{}, errors.New("Ordering mismatch")
   183  		}
   184  	}
   185  
   186  	return streamMerger{
   187  		left:         makeStreamGroupAccumulator(leftSource, leftOrdering, memMonitor),
   188  		right:        makeStreamGroupAccumulator(rightSource, rightOrdering, memMonitor),
   189  		nullEquality: nullEquality,
   190  	}, nil
   191  }