github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/joinreader_strategies.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package rowexec
    12  
    13  import (
    14  	"context"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    17  	"github.com/cockroachdb/cockroach/pkg/sql/rowcontainer"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/span"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    21  	"github.com/cockroachdb/cockroach/pkg/util/log"
    22  	"github.com/cockroachdb/errors"
    23  )
    24  
    25  type defaultSpanGenerator struct {
    26  	spanBuilder *span.Builder
    27  	numKeyCols  int
    28  	lookupCols  []uint32
    29  
    30  	indexKeyRow          sqlbase.EncDatumRow
    31  	keyToInputRowIndices map[string][]int
    32  
    33  	scratchSpans roachpb.Spans
    34  }
    35  
    36  // Generate spans for a given row.
    37  // If lookup columns are specified will use those to collect the relevant
    38  // columns. Otherwise the first rows are assumed to correspond with the index.
    39  // It additionally returns whether the row contains null, which is needed to
    40  // decide whether or not to split the generated span into separate family
    41  // specific spans.
    42  func (g *defaultSpanGenerator) generateSpan(
    43  	row sqlbase.EncDatumRow,
    44  ) (_ roachpb.Span, containsNull bool, _ error) {
    45  	numLookupCols := len(g.lookupCols)
    46  	if numLookupCols > g.numKeyCols {
    47  		return roachpb.Span{}, false, errors.Errorf(
    48  			"%d lookup columns specified, expecting at most %d", numLookupCols, g.numKeyCols)
    49  	}
    50  
    51  	g.indexKeyRow = g.indexKeyRow[:0]
    52  	for _, id := range g.lookupCols {
    53  		g.indexKeyRow = append(g.indexKeyRow, row[id])
    54  	}
    55  	return g.spanBuilder.SpanFromEncDatums(g.indexKeyRow, numLookupCols)
    56  }
    57  
    58  func (g *defaultSpanGenerator) hasNullLookupColumn(row sqlbase.EncDatumRow) bool {
    59  	for _, colIdx := range g.lookupCols {
    60  		if row[colIdx].IsNull() {
    61  			return true
    62  		}
    63  	}
    64  	return false
    65  }
    66  
    67  func (g *defaultSpanGenerator) generateSpans(rows []sqlbase.EncDatumRow) (roachpb.Spans, error) {
    68  	// This loop gets optimized to a runtime.mapclear call.
    69  	for k := range g.keyToInputRowIndices {
    70  		delete(g.keyToInputRowIndices, k)
    71  	}
    72  	// We maintain a map from index key to the corresponding input rows so we can
    73  	// join the index results to the inputs.
    74  	g.scratchSpans = g.scratchSpans[:0]
    75  	for i, inputRow := range rows {
    76  		if g.hasNullLookupColumn(inputRow) {
    77  			continue
    78  		}
    79  		generatedSpan, containsNull, err := g.generateSpan(inputRow)
    80  		if err != nil {
    81  			return nil, err
    82  		}
    83  		inputRowIndices := g.keyToInputRowIndices[string(generatedSpan.Key)]
    84  		if inputRowIndices == nil {
    85  			g.scratchSpans = g.spanBuilder.MaybeSplitSpanIntoSeparateFamilies(
    86  				g.scratchSpans, generatedSpan, len(g.lookupCols), containsNull)
    87  		}
    88  		g.keyToInputRowIndices[string(generatedSpan.Key)] = append(inputRowIndices, i)
    89  	}
    90  	return g.scratchSpans, nil
    91  }
    92  
    93  type joinReaderStrategy interface {
    94  	// getLookupRowsBatchSizeHint returns the size in bytes of the batch of lookup
    95  	// rows.
    96  	getLookupRowsBatchSizeHint() int64
    97  	// processLookupRows consumes the rows the joinReader has buffered and should
    98  	// return the lookup spans.
    99  	processLookupRows(rows []sqlbase.EncDatumRow) (roachpb.Spans, error)
   100  	// processLookedUpRow processes a looked up row. A joinReaderState is returned
   101  	// to indicate the next state to transition to. If this next state is
   102  	// jrPerformingLookup, processLookedUpRow will be called again if the looked
   103  	// up rows have not been exhausted. A transition to jrStateUnknown is
   104  	// unsupported, but if an error is returned, the joinReader will transition
   105  	// to draining.
   106  	processLookedUpRow(ctx context.Context, row sqlbase.EncDatumRow, key roachpb.Key) (joinReaderState, error)
   107  	// prepareToEmit informs the strategy implementation that all looked up rows
   108  	// have been read, and that it should prepare for calls to nextRowToEmit.
   109  	prepareToEmit(ctx context.Context)
   110  	// nextRowToEmit gets the next row to emit from the strategy. An accompanying
   111  	// joinReaderState is also returned, indicating a state to transition to after
   112  	// emitting this row. A transition to jrStateUnknown is unsupported, but if an
   113  	// error is returned, the joinReader will transition to draining.
   114  	nextRowToEmit(ctx context.Context) (sqlbase.EncDatumRow, joinReaderState, error)
   115  	// spilled returns whether the strategy spilled to disk.
   116  	spilled() bool
   117  	// close releases any resources associated with the joinReaderStrategy.
   118  	close(ctx context.Context)
   119  }
   120  
   121  // joinReaderNoOrderingStrategy is a joinReaderStrategy that doesn't maintain
   122  // the input ordering. This is more performant than joinReaderOrderingStrategy.
   123  type joinReaderNoOrderingStrategy struct {
   124  	*joinerBase
   125  	defaultSpanGenerator
   126  	isPartialJoin bool
   127  	inputRows     []sqlbase.EncDatumRow
   128  	// matched[i] specifies whether inputRows[i] had a match.
   129  	matched []bool
   130  
   131  	scratchMatchingInputRowIndices []int
   132  
   133  	emitState struct {
   134  		// processingLookupRow is an explicit boolean that specifies whether the
   135  		// strategy is currently processing a match. This is set to true in
   136  		// processLookedUpRow and causes nextRowToEmit to process the data in
   137  		// emitState. If set to false, the strategy determines in nextRowToEmit
   138  		// that no more looked up rows need processing, so unmatched input rows need
   139  		// to be emitted.
   140  		processingLookupRow            bool
   141  		unmatchedInputRowIndicesCursor int
   142  		// unmatchedInputRowIndices is used only when emitting unmatched rows after
   143  		// processing lookup results. It is populated once when first emitting
   144  		// unmatched rows.
   145  		unmatchedInputRowIndices      []int
   146  		matchingInputRowIndicesCursor int
   147  		matchingInputRowIndices       []int
   148  		lookedUpRow                   sqlbase.EncDatumRow
   149  	}
   150  }
   151  
   152  // getLookupRowsBatchSizeHint returns the batch size for the join reader no
   153  // ordering strategy. This number was chosen by running TPCH queries 7, 9, 10,
   154  // and 11 with varying batch sizes and choosing the smallest batch size that
   155  // offered a significant performance improvement. Larger batch sizes offered
   156  // small to no marginal improvements.
   157  func (s *joinReaderNoOrderingStrategy) getLookupRowsBatchSizeHint() int64 {
   158  	return 2 << 20 /* 2 MiB */
   159  }
   160  
   161  func (s *joinReaderNoOrderingStrategy) processLookupRows(
   162  	rows []sqlbase.EncDatumRow,
   163  ) (roachpb.Spans, error) {
   164  	s.inputRows = rows
   165  	if cap(s.matched) < len(s.inputRows) {
   166  		s.matched = make([]bool, len(s.inputRows))
   167  	} else {
   168  		s.matched = s.matched[:len(s.inputRows)]
   169  		for i := range s.matched {
   170  			s.matched[i] = false
   171  		}
   172  	}
   173  	return s.generateSpans(s.inputRows)
   174  }
   175  
   176  func (s *joinReaderNoOrderingStrategy) processLookedUpRow(
   177  	_ context.Context, row sqlbase.EncDatumRow, key roachpb.Key,
   178  ) (joinReaderState, error) {
   179  	matchingInputRowIndices := s.keyToInputRowIndices[string(key)]
   180  	if s.isPartialJoin {
   181  		// In the case of partial joins, only process input rows that have not been
   182  		// matched yet. Make a copy of the matching input row indices to avoid
   183  		// overwriting the caller's slice.
   184  		s.scratchMatchingInputRowIndices = s.scratchMatchingInputRowIndices[:0]
   185  		for _, inputRowIdx := range matchingInputRowIndices {
   186  			if !s.matched[inputRowIdx] {
   187  				s.scratchMatchingInputRowIndices = append(s.scratchMatchingInputRowIndices, inputRowIdx)
   188  			}
   189  		}
   190  		matchingInputRowIndices = s.scratchMatchingInputRowIndices
   191  	}
   192  	s.emitState.processingLookupRow = true
   193  	s.emitState.lookedUpRow = row
   194  	s.emitState.matchingInputRowIndices = matchingInputRowIndices
   195  	s.emitState.matchingInputRowIndicesCursor = 0
   196  	return jrEmittingRows, nil
   197  }
   198  
   199  func (s *joinReaderNoOrderingStrategy) prepareToEmit(ctx context.Context) {}
   200  
   201  func (s *joinReaderNoOrderingStrategy) nextRowToEmit(
   202  	_ context.Context,
   203  ) (sqlbase.EncDatumRow, joinReaderState, error) {
   204  	if !s.emitState.processingLookupRow {
   205  		// processLookedUpRow was not called before nextRowToEmit, which means that
   206  		// the next unmatched row needs to be processed.
   207  		if !shouldEmitUnmatchedRow(leftSide, s.joinType) {
   208  			// The joinType does not require the joiner to emit unmatched rows. Move
   209  			// on to the next batch of lookup rows.
   210  			return nil, jrReadingInput, nil
   211  		}
   212  
   213  		if len(s.matched) != 0 {
   214  			s.emitState.unmatchedInputRowIndices = s.emitState.unmatchedInputRowIndices[:0]
   215  			for inputRowIdx, m := range s.matched {
   216  				if !m {
   217  					s.emitState.unmatchedInputRowIndices = append(s.emitState.unmatchedInputRowIndices, inputRowIdx)
   218  				}
   219  			}
   220  			s.matched = s.matched[:0]
   221  			s.emitState.unmatchedInputRowIndicesCursor = 0
   222  		}
   223  
   224  		if s.emitState.unmatchedInputRowIndicesCursor >= len(s.emitState.unmatchedInputRowIndices) {
   225  			// All unmatched rows have been emitted.
   226  			return nil, jrReadingInput, nil
   227  		}
   228  		inputRow := s.inputRows[s.emitState.unmatchedInputRowIndices[s.emitState.unmatchedInputRowIndicesCursor]]
   229  		s.emitState.unmatchedInputRowIndicesCursor++
   230  		if !s.joinType.ShouldIncludeRightColsInOutput() {
   231  			return inputRow, jrEmittingRows, nil
   232  		}
   233  		return s.renderUnmatchedRow(inputRow, leftSide), jrEmittingRows, nil
   234  	}
   235  
   236  	for s.emitState.matchingInputRowIndicesCursor < len(s.emitState.matchingInputRowIndices) {
   237  		inputRowIdx := s.emitState.matchingInputRowIndices[s.emitState.matchingInputRowIndicesCursor]
   238  		s.emitState.matchingInputRowIndicesCursor++
   239  		inputRow := s.inputRows[inputRowIdx]
   240  
   241  		// Render the output row, this also evaluates the ON condition.
   242  		outputRow, err := s.render(inputRow, s.emitState.lookedUpRow)
   243  		if err != nil {
   244  			return nil, jrStateUnknown, err
   245  		}
   246  		if outputRow == nil {
   247  			// This row failed the ON condition, so it remains unmatched.
   248  			continue
   249  		}
   250  
   251  		s.matched[inputRowIdx] = true
   252  		if !s.joinType.ShouldIncludeRightColsInOutput() {
   253  			if s.joinType == sqlbase.LeftAntiJoin {
   254  				// Skip emitting row.
   255  				continue
   256  			}
   257  			return inputRow, jrEmittingRows, nil
   258  		}
   259  		return outputRow, jrEmittingRows, nil
   260  	}
   261  
   262  	// Processed all matches for a given lookup row, move to the next lookup row.
   263  	// Set processingLookupRow to false explicitly so if the joinReader re-enters
   264  	// nextRowToEmit, the strategy knows that no more lookup rows were processed
   265  	// and should proceed to emit unmatched rows.
   266  	s.emitState.processingLookupRow = false
   267  	return nil, jrPerformingLookup, nil
   268  }
   269  
   270  func (s *joinReaderNoOrderingStrategy) spilled() bool { return false }
   271  
   272  func (s *joinReaderNoOrderingStrategy) close(_ context.Context) {}
   273  
   274  // partialJoinSentinel is used as the inputRowIdxToLookedUpRowIndices value for
   275  // semi- and anti-joins, where we only need to know about the existence of a
   276  // match.
   277  var partialJoinSentinel = []int{-1}
   278  
   279  // joinReaderOrderingStrategy is a joinReaderStrategy that maintains the input
   280  // ordering. This is more expensive than joinReaderNoOrderingStrategy.
   281  type joinReaderOrderingStrategy struct {
   282  	*joinerBase
   283  	defaultSpanGenerator
   284  	isPartialJoin bool
   285  
   286  	inputRows []sqlbase.EncDatumRow
   287  
   288  	// inputRowIdxToLookedUpRowIndices is a multimap from input row indices to
   289  	// corresponding looked up row indices. It's populated in the
   290  	// jrPerformingLookup state. For non partial joins (everything but semi/anti
   291  	// join), the looked up rows are the rows that came back from the lookup
   292  	// span for each input row, without checking for matches with respect to the
   293  	// on-condition. For semi/anti join, we store at most one sentinel value,
   294  	// indicating a matching lookup if it's present, since the right side of a
   295  	// semi/anti join is not used.
   296  	inputRowIdxToLookedUpRowIndices [][]int
   297  
   298  	lookedUpRowIdx int
   299  	lookedUpRows   *rowcontainer.DiskBackedNumberedRowContainer
   300  
   301  	// emitCursor contains information about where the next row to emit is within
   302  	// inputRowIdxToLookedUpRowIndices.
   303  	emitCursor struct {
   304  		// inputRowIdx contains the index into inputRowIdxToLookedUpRowIndices that
   305  		// we're about to emit.
   306  		inputRowIdx int
   307  		// outputRowIdx contains the index into the inputRowIdx'th row of
   308  		// inputRowIdxToLookedUpRowIndices that we're about to emit.
   309  		outputRowIdx int
   310  		// seenMatch is true if there was a match at the current inputRowIdx. A
   311  		// match means that there's no need to output an outer or anti join row.
   312  		seenMatch bool
   313  	}
   314  }
   315  
   316  func (s *joinReaderOrderingStrategy) getLookupRowsBatchSizeHint() int64 {
   317  	// TODO(asubiotto): Eventually we might want to adjust this batch size
   318  	//  dynamically based on whether the result row container spilled or not.
   319  	return 10 << 10 /* 10 KiB */
   320  }
   321  
   322  func (s *joinReaderOrderingStrategy) processLookupRows(
   323  	rows []sqlbase.EncDatumRow,
   324  ) (roachpb.Spans, error) {
   325  	// Maintain a map from input row index to the corresponding output rows. This
   326  	// will allow us to preserve the order of the input in the face of multiple
   327  	// input rows having the same lookup keyspan, or if we're doing an outer join
   328  	// and we need to emit unmatched rows.
   329  	if cap(s.inputRowIdxToLookedUpRowIndices) >= len(rows) {
   330  		s.inputRowIdxToLookedUpRowIndices = s.inputRowIdxToLookedUpRowIndices[:len(rows)]
   331  		for i := range s.inputRowIdxToLookedUpRowIndices {
   332  			s.inputRowIdxToLookedUpRowIndices[i] = s.inputRowIdxToLookedUpRowIndices[i][:0]
   333  		}
   334  	} else {
   335  		s.inputRowIdxToLookedUpRowIndices = make([][]int, len(rows))
   336  	}
   337  
   338  	s.inputRows = rows
   339  	return s.generateSpans(s.inputRows)
   340  }
   341  
   342  func (s *joinReaderOrderingStrategy) processLookedUpRow(
   343  	ctx context.Context, row sqlbase.EncDatumRow, key roachpb.Key,
   344  ) (joinReaderState, error) {
   345  	matchingInputRowIndices := s.keyToInputRowIndices[string(key)]
   346  	if !s.isPartialJoin {
   347  		// Replace missing values with nulls to appease the row container.
   348  		for i := range row {
   349  			if row[i].IsUnset() {
   350  				row[i].Datum = tree.DNull
   351  			}
   352  		}
   353  		if _, err := s.lookedUpRows.AddRow(ctx, row); err != nil {
   354  			return jrStateUnknown, err
   355  		}
   356  	}
   357  
   358  	// Update our map from input rows to looked up rows.
   359  	for _, inputRowIdx := range matchingInputRowIndices {
   360  		if !s.isPartialJoin {
   361  			s.inputRowIdxToLookedUpRowIndices[inputRowIdx] = append(
   362  				s.inputRowIdxToLookedUpRowIndices[inputRowIdx], s.lookedUpRowIdx)
   363  			continue
   364  		}
   365  
   366  		// During a SemiJoin or AntiJoin, we only output if we've seen no match
   367  		// for this input row yet. Additionally, since we don't have to render
   368  		// anything to output a Semi or Anti join match, we can evaluate our
   369  		// on condition now and only buffer if we pass it.
   370  		if len(s.inputRowIdxToLookedUpRowIndices[inputRowIdx]) == 0 {
   371  			renderedRow, err := s.render(s.inputRows[inputRowIdx], row)
   372  			if err != nil {
   373  				return jrStateUnknown, err
   374  			}
   375  			if renderedRow == nil {
   376  				// We failed our on-condition - don't buffer anything.
   377  				continue
   378  			}
   379  			s.inputRowIdxToLookedUpRowIndices[inputRowIdx] = partialJoinSentinel
   380  		}
   381  	}
   382  	s.lookedUpRowIdx++
   383  
   384  	return jrPerformingLookup, nil
   385  }
   386  
   387  func (s *joinReaderOrderingStrategy) prepareToEmit(ctx context.Context) {
   388  	if !s.isPartialJoin {
   389  		s.lookedUpRows.SetupForRead(ctx, s.inputRowIdxToLookedUpRowIndices)
   390  	}
   391  }
   392  
   393  func (s *joinReaderOrderingStrategy) nextRowToEmit(
   394  	ctx context.Context,
   395  ) (sqlbase.EncDatumRow, joinReaderState, error) {
   396  	if s.emitCursor.inputRowIdx >= len(s.inputRowIdxToLookedUpRowIndices) {
   397  		log.VEventf(ctx, 1, "done emitting rows")
   398  		// Ready for another input batch. Reset state.
   399  		s.emitCursor.outputRowIdx = 0
   400  		s.emitCursor.inputRowIdx = 0
   401  		s.emitCursor.seenMatch = false
   402  		if err := s.lookedUpRows.UnsafeReset(ctx); err != nil {
   403  			return nil, jrStateUnknown, err
   404  		}
   405  		s.lookedUpRowIdx = 0
   406  		return nil, jrReadingInput, nil
   407  	}
   408  
   409  	inputRow := s.inputRows[s.emitCursor.inputRowIdx]
   410  	lookedUpRows := s.inputRowIdxToLookedUpRowIndices[s.emitCursor.inputRowIdx]
   411  	if s.emitCursor.outputRowIdx >= len(lookedUpRows) {
   412  		// We have no more rows for the current input row. Emit an outer or anti
   413  		// row if we didn't see a match, and bump to the next input row.
   414  		s.emitCursor.inputRowIdx++
   415  		s.emitCursor.outputRowIdx = 0
   416  		seenMatch := s.emitCursor.seenMatch
   417  		s.emitCursor.seenMatch = false
   418  		if !seenMatch {
   419  			switch s.joinType {
   420  			case sqlbase.LeftOuterJoin:
   421  				// An outer-join non-match means we emit the input row with NULLs for
   422  				// the right side (if it passes the ON-condition).
   423  				if renderedRow := s.renderUnmatchedRow(inputRow, leftSide); renderedRow != nil {
   424  					return renderedRow, jrEmittingRows, nil
   425  				}
   426  			case sqlbase.LeftAntiJoin:
   427  				// An anti-join non-match means we emit the input row.
   428  				return inputRow, jrEmittingRows, nil
   429  			}
   430  		}
   431  		return nil, jrEmittingRows, nil
   432  	}
   433  
   434  	lookedUpRowIdx := lookedUpRows[s.emitCursor.outputRowIdx]
   435  	s.emitCursor.outputRowIdx++
   436  	switch s.joinType {
   437  	case sqlbase.LeftSemiJoin:
   438  		// A semi-join match means we emit our input row.
   439  		s.emitCursor.seenMatch = true
   440  		return inputRow, jrEmittingRows, nil
   441  	case sqlbase.LeftAntiJoin:
   442  		// An anti-join match means we emit nothing.
   443  		s.emitCursor.seenMatch = true
   444  		return nil, jrEmittingRows, nil
   445  	}
   446  
   447  	lookedUpRow, err := s.lookedUpRows.GetRow(s.Ctx, lookedUpRowIdx, false /* skip */)
   448  	if err != nil {
   449  		return nil, jrStateUnknown, err
   450  	}
   451  	outputRow, err := s.render(inputRow, lookedUpRow)
   452  	if err != nil {
   453  		return nil, jrStateUnknown, err
   454  	}
   455  	if outputRow != nil {
   456  		s.emitCursor.seenMatch = true
   457  	}
   458  	return outputRow, jrEmittingRows, nil
   459  }
   460  
   461  func (s *joinReaderOrderingStrategy) spilled() bool {
   462  	return s.lookedUpRows.Spilled()
   463  }
   464  
   465  func (s *joinReaderOrderingStrategy) close(ctx context.Context) {
   466  	if s.lookedUpRows != nil {
   467  		s.lookedUpRows.Close(ctx)
   468  	}
   469  }