github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/indexjoiner.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package rowexec
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/row"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/scrub"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/span"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    24  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    25  	"github.com/cockroachdb/errors"
    26  	"github.com/opentracing/opentracing-go"
    27  )
    28  
    29  const indexJoinerBatchSize = 10000
    30  
    31  // indexJoiner performs a join between a secondary index, the `input`, and the
    32  // primary index of the same table, `desc`, to retrieve columns which are not
    33  // stored in the secondary index.
    34  type indexJoiner struct {
    35  	execinfra.ProcessorBase
    36  
    37  	input execinfra.RowSource
    38  	desc  sqlbase.TableDescriptor
    39  
    40  	// fetcher wraps the row.Fetcher used to perform lookups. This enables the
    41  	// indexJoiner to wrap the fetcher with a stat collector when necessary.
    42  	fetcher rowFetcher
    43  	// fetcherReady indicates that we have started an index scan and there are
    44  	// potentially more rows to retrieve.
    45  	fetcherReady bool
    46  	// Batch size for fetches. Not a constant so we can lower for testing.
    47  	batchSize int
    48  
    49  	// spans is the batch of spans we will next retrieve from the index.
    50  	spans roachpb.Spans
    51  
    52  	alloc sqlbase.DatumAlloc
    53  
    54  	spanBuilder *span.Builder
    55  }
    56  
    57  var _ execinfra.Processor = &indexJoiner{}
    58  var _ execinfra.RowSource = &indexJoiner{}
    59  var _ execinfrapb.MetadataSource = &indexJoiner{}
    60  var _ execinfra.OpNode = &indexJoiner{}
    61  
    62  const indexJoinerProcName = "index joiner"
    63  
    64  // newIndexJoiner returns a new indexJoiner.
    65  func newIndexJoiner(
    66  	flowCtx *execinfra.FlowCtx,
    67  	processorID int32,
    68  	spec *execinfrapb.JoinReaderSpec,
    69  	input execinfra.RowSource,
    70  	post *execinfrapb.PostProcessSpec,
    71  	output execinfra.RowReceiver,
    72  ) (execinfra.RowSourcedProcessor, error) {
    73  	if spec.IndexIdx != 0 {
    74  		return nil, errors.Errorf("index join must be against primary index")
    75  	}
    76  	ij := &indexJoiner{
    77  		input:     input,
    78  		desc:      spec.Table,
    79  		batchSize: indexJoinerBatchSize,
    80  	}
    81  	needMutations := spec.Visibility == execinfra.ScanVisibilityPublicAndNotPublic
    82  	if err := ij.Init(
    83  		ij,
    84  		post,
    85  		ij.desc.ColumnTypesWithMutations(needMutations),
    86  		flowCtx,
    87  		processorID,
    88  		output,
    89  		nil, /* memMonitor */
    90  		execinfra.ProcStateOpts{
    91  			InputsToDrain: []execinfra.RowSource{ij.input},
    92  			TrailingMetaCallback: func(ctx context.Context) []execinfrapb.ProducerMetadata {
    93  				ij.InternalClose()
    94  				return ij.generateMeta(ctx)
    95  			},
    96  		},
    97  	); err != nil {
    98  		return nil, err
    99  	}
   100  	var fetcher row.Fetcher
   101  	if _, _, err := initRowFetcher(
   102  		flowCtx,
   103  		&fetcher,
   104  		&ij.desc,
   105  		0, /* primary index */
   106  		ij.desc.ColumnIdxMapWithMutations(needMutations),
   107  		false, /* reverse */
   108  		ij.Out.NeededColumns(),
   109  		false, /* isCheck */
   110  		&ij.alloc,
   111  		spec.Visibility,
   112  		spec.LockingStrength,
   113  	); err != nil {
   114  		return nil, err
   115  	}
   116  
   117  	if sp := opentracing.SpanFromContext(flowCtx.EvalCtx.Ctx()); sp != nil && tracing.IsRecording(sp) {
   118  		// Enable stats collection.
   119  		ij.input = newInputStatCollector(ij.input)
   120  		ij.fetcher = newRowFetcherStatCollector(&fetcher)
   121  		ij.FinishTrace = ij.outputStatsToTrace
   122  	} else {
   123  		ij.fetcher = &fetcher
   124  	}
   125  
   126  	ij.spanBuilder = span.MakeBuilder(flowCtx.Codec(), &spec.Table, &spec.Table.PrimaryIndex)
   127  	ij.spanBuilder.SetNeededColumns(ij.Out.NeededColumns())
   128  
   129  	return ij, nil
   130  }
   131  
   132  // SetBatchSize sets the desired batch size. It should only be used in tests.
   133  func (ij *indexJoiner) SetBatchSize(batchSize int) {
   134  	ij.batchSize = batchSize
   135  }
   136  
   137  // Start is part of the RowSource interface.
   138  func (ij *indexJoiner) Start(ctx context.Context) context.Context {
   139  	ij.input.Start(ctx)
   140  	return ij.StartInternal(ctx, indexJoinerProcName)
   141  }
   142  
   143  // Next is part of the RowSource interface.
   144  func (ij *indexJoiner) Next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) {
   145  	for ij.State == execinfra.StateRunning {
   146  		if !ij.fetcherReady {
   147  			// Retrieve a batch of rows from the input.
   148  			for len(ij.spans) < ij.batchSize {
   149  				row, meta := ij.input.Next()
   150  				if meta != nil {
   151  					if meta.Err != nil {
   152  						ij.MoveToDraining(nil /* err */)
   153  					}
   154  					return nil, meta
   155  				}
   156  				if row == nil {
   157  					break
   158  				}
   159  				spans, err := ij.generateSpans(row)
   160  				if err != nil {
   161  					ij.MoveToDraining(err)
   162  					return nil, ij.DrainHelper()
   163  				}
   164  				ij.spans = append(ij.spans, spans...)
   165  			}
   166  			if len(ij.spans) == 0 {
   167  				// All done.
   168  				ij.MoveToDraining(nil /* err */)
   169  				return nil, ij.DrainHelper()
   170  			}
   171  			// Scan the primary index for this batch.
   172  			err := ij.fetcher.StartScan(
   173  				ij.Ctx, ij.FlowCtx.Txn, ij.spans, false /* limitBatches */, 0, /* limitHint */
   174  				ij.FlowCtx.TraceKV)
   175  			if err != nil {
   176  				ij.MoveToDraining(err)
   177  				return nil, ij.DrainHelper()
   178  			}
   179  			ij.fetcherReady = true
   180  			ij.spans = ij.spans[:0]
   181  		}
   182  		row, _, _, err := ij.fetcher.NextRow(ij.Ctx)
   183  		if err != nil {
   184  			ij.MoveToDraining(scrub.UnwrapScrubError(err))
   185  			return nil, ij.DrainHelper()
   186  		}
   187  		if row == nil {
   188  			// Done with this batch.
   189  			ij.fetcherReady = false
   190  		} else if outRow := ij.ProcessRowHelper(row); outRow != nil {
   191  			return outRow, nil
   192  		}
   193  	}
   194  	return nil, ij.DrainHelper()
   195  }
   196  
   197  // ConsumerClosed is part of the RowSource interface.
   198  func (ij *indexJoiner) ConsumerClosed() {
   199  	// The consumer is done, Next() will not be called again.
   200  	ij.InternalClose()
   201  }
   202  
   203  func (ij *indexJoiner) generateSpans(row sqlbase.EncDatumRow) (roachpb.Spans, error) {
   204  	numKeyCols := len(ij.desc.PrimaryIndex.ColumnIDs)
   205  	if len(row) < numKeyCols {
   206  		return nil, errors.Errorf(
   207  			"index join input has %d columns, expected at least %d", len(row), numKeyCols)
   208  	}
   209  	// There may be extra values on the row, e.g. to allow an ordered
   210  	// synchronizer to interleave multiple input streams. Will need at most
   211  	// numKeyCols.
   212  	span, containsNull, err := ij.spanBuilder.SpanFromEncDatums(row, numKeyCols)
   213  	if err != nil {
   214  		return nil, err
   215  	}
   216  	return ij.spanBuilder.MaybeSplitSpanIntoSeparateFamilies(
   217  		nil /* appendTo */, span, numKeyCols, containsNull,
   218  	), nil
   219  }
   220  
   221  // outputStatsToTrace outputs the collected indexJoiner stats to the trace. Will
   222  // fail silently if the indexJoiner is not collecting stats.
   223  func (ij *indexJoiner) outputStatsToTrace() {
   224  	is, ok := getInputStats(ij.FlowCtx, ij.input)
   225  	if !ok {
   226  		return
   227  	}
   228  	ils, ok := getFetcherInputStats(ij.FlowCtx, ij.fetcher)
   229  	if !ok {
   230  		return
   231  	}
   232  	jrs := &JoinReaderStats{
   233  		InputStats:       is,
   234  		IndexLookupStats: ils,
   235  	}
   236  	if sp := opentracing.SpanFromContext(ij.Ctx); sp != nil {
   237  		tracing.SetSpanStats(sp, jrs)
   238  	}
   239  }
   240  
   241  func (ij *indexJoiner) generateMeta(ctx context.Context) []execinfrapb.ProducerMetadata {
   242  	if tfs := execinfra.GetLeafTxnFinalState(ctx, ij.FlowCtx.Txn); tfs != nil {
   243  		return []execinfrapb.ProducerMetadata{{LeafTxnFinalState: tfs}}
   244  	}
   245  	return nil
   246  }
   247  
   248  // DrainMeta is part of the MetadataSource interface.
   249  func (ij *indexJoiner) DrainMeta(ctx context.Context) []execinfrapb.ProducerMetadata {
   250  	return ij.generateMeta(ctx)
   251  }
   252  
   253  // ChildCount is part of the execinfra.OpNode interface.
   254  func (ij *indexJoiner) ChildCount(verbose bool) int {
   255  	if _, ok := ij.input.(execinfra.OpNode); ok {
   256  		return 1
   257  	}
   258  	return 0
   259  }
   260  
   261  // Child is part of the execinfra.OpNode interface.
   262  func (ij *indexJoiner) Child(nth int, verbose bool) execinfra.OpNode {
   263  	if nth == 0 {
   264  		if n, ok := ij.input.(execinfra.OpNode); ok {
   265  			return n
   266  		}
   267  		panic("input to indexJoiner is not an execinfra.OpNode")
   268  	}
   269  	panic(fmt.Sprintf("invalid index %d", nth))
   270  }