github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/indexjoiner.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package rowexec 12 13 import ( 14 "context" 15 "fmt" 16 17 "github.com/cockroachdb/cockroach/pkg/roachpb" 18 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 19 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 20 "github.com/cockroachdb/cockroach/pkg/sql/row" 21 "github.com/cockroachdb/cockroach/pkg/sql/scrub" 22 "github.com/cockroachdb/cockroach/pkg/sql/span" 23 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 24 "github.com/cockroachdb/cockroach/pkg/util/tracing" 25 "github.com/cockroachdb/errors" 26 "github.com/opentracing/opentracing-go" 27 ) 28 29 const indexJoinerBatchSize = 10000 30 31 // indexJoiner performs a join between a secondary index, the `input`, and the 32 // primary index of the same table, `desc`, to retrieve columns which are not 33 // stored in the secondary index. 34 type indexJoiner struct { 35 execinfra.ProcessorBase 36 37 input execinfra.RowSource 38 desc sqlbase.TableDescriptor 39 40 // fetcher wraps the row.Fetcher used to perform lookups. This enables the 41 // indexJoiner to wrap the fetcher with a stat collector when necessary. 42 fetcher rowFetcher 43 // fetcherReady indicates that we have started an index scan and there are 44 // potentially more rows to retrieve. 45 fetcherReady bool 46 // Batch size for fetches. Not a constant so we can lower for testing. 47 batchSize int 48 49 // spans is the batch of spans we will next retrieve from the index. 50 spans roachpb.Spans 51 52 alloc sqlbase.DatumAlloc 53 54 spanBuilder *span.Builder 55 } 56 57 var _ execinfra.Processor = &indexJoiner{} 58 var _ execinfra.RowSource = &indexJoiner{} 59 var _ execinfrapb.MetadataSource = &indexJoiner{} 60 var _ execinfra.OpNode = &indexJoiner{} 61 62 const indexJoinerProcName = "index joiner" 63 64 // newIndexJoiner returns a new indexJoiner. 65 func newIndexJoiner( 66 flowCtx *execinfra.FlowCtx, 67 processorID int32, 68 spec *execinfrapb.JoinReaderSpec, 69 input execinfra.RowSource, 70 post *execinfrapb.PostProcessSpec, 71 output execinfra.RowReceiver, 72 ) (execinfra.RowSourcedProcessor, error) { 73 if spec.IndexIdx != 0 { 74 return nil, errors.Errorf("index join must be against primary index") 75 } 76 ij := &indexJoiner{ 77 input: input, 78 desc: spec.Table, 79 batchSize: indexJoinerBatchSize, 80 } 81 needMutations := spec.Visibility == execinfra.ScanVisibilityPublicAndNotPublic 82 if err := ij.Init( 83 ij, 84 post, 85 ij.desc.ColumnTypesWithMutations(needMutations), 86 flowCtx, 87 processorID, 88 output, 89 nil, /* memMonitor */ 90 execinfra.ProcStateOpts{ 91 InputsToDrain: []execinfra.RowSource{ij.input}, 92 TrailingMetaCallback: func(ctx context.Context) []execinfrapb.ProducerMetadata { 93 ij.InternalClose() 94 return ij.generateMeta(ctx) 95 }, 96 }, 97 ); err != nil { 98 return nil, err 99 } 100 var fetcher row.Fetcher 101 if _, _, err := initRowFetcher( 102 flowCtx, 103 &fetcher, 104 &ij.desc, 105 0, /* primary index */ 106 ij.desc.ColumnIdxMapWithMutations(needMutations), 107 false, /* reverse */ 108 ij.Out.NeededColumns(), 109 false, /* isCheck */ 110 &ij.alloc, 111 spec.Visibility, 112 spec.LockingStrength, 113 ); err != nil { 114 return nil, err 115 } 116 117 if sp := opentracing.SpanFromContext(flowCtx.EvalCtx.Ctx()); sp != nil && tracing.IsRecording(sp) { 118 // Enable stats collection. 119 ij.input = newInputStatCollector(ij.input) 120 ij.fetcher = newRowFetcherStatCollector(&fetcher) 121 ij.FinishTrace = ij.outputStatsToTrace 122 } else { 123 ij.fetcher = &fetcher 124 } 125 126 ij.spanBuilder = span.MakeBuilder(flowCtx.Codec(), &spec.Table, &spec.Table.PrimaryIndex) 127 ij.spanBuilder.SetNeededColumns(ij.Out.NeededColumns()) 128 129 return ij, nil 130 } 131 132 // SetBatchSize sets the desired batch size. It should only be used in tests. 133 func (ij *indexJoiner) SetBatchSize(batchSize int) { 134 ij.batchSize = batchSize 135 } 136 137 // Start is part of the RowSource interface. 138 func (ij *indexJoiner) Start(ctx context.Context) context.Context { 139 ij.input.Start(ctx) 140 return ij.StartInternal(ctx, indexJoinerProcName) 141 } 142 143 // Next is part of the RowSource interface. 144 func (ij *indexJoiner) Next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) { 145 for ij.State == execinfra.StateRunning { 146 if !ij.fetcherReady { 147 // Retrieve a batch of rows from the input. 148 for len(ij.spans) < ij.batchSize { 149 row, meta := ij.input.Next() 150 if meta != nil { 151 if meta.Err != nil { 152 ij.MoveToDraining(nil /* err */) 153 } 154 return nil, meta 155 } 156 if row == nil { 157 break 158 } 159 spans, err := ij.generateSpans(row) 160 if err != nil { 161 ij.MoveToDraining(err) 162 return nil, ij.DrainHelper() 163 } 164 ij.spans = append(ij.spans, spans...) 165 } 166 if len(ij.spans) == 0 { 167 // All done. 168 ij.MoveToDraining(nil /* err */) 169 return nil, ij.DrainHelper() 170 } 171 // Scan the primary index for this batch. 172 err := ij.fetcher.StartScan( 173 ij.Ctx, ij.FlowCtx.Txn, ij.spans, false /* limitBatches */, 0, /* limitHint */ 174 ij.FlowCtx.TraceKV) 175 if err != nil { 176 ij.MoveToDraining(err) 177 return nil, ij.DrainHelper() 178 } 179 ij.fetcherReady = true 180 ij.spans = ij.spans[:0] 181 } 182 row, _, _, err := ij.fetcher.NextRow(ij.Ctx) 183 if err != nil { 184 ij.MoveToDraining(scrub.UnwrapScrubError(err)) 185 return nil, ij.DrainHelper() 186 } 187 if row == nil { 188 // Done with this batch. 189 ij.fetcherReady = false 190 } else if outRow := ij.ProcessRowHelper(row); outRow != nil { 191 return outRow, nil 192 } 193 } 194 return nil, ij.DrainHelper() 195 } 196 197 // ConsumerClosed is part of the RowSource interface. 198 func (ij *indexJoiner) ConsumerClosed() { 199 // The consumer is done, Next() will not be called again. 200 ij.InternalClose() 201 } 202 203 func (ij *indexJoiner) generateSpans(row sqlbase.EncDatumRow) (roachpb.Spans, error) { 204 numKeyCols := len(ij.desc.PrimaryIndex.ColumnIDs) 205 if len(row) < numKeyCols { 206 return nil, errors.Errorf( 207 "index join input has %d columns, expected at least %d", len(row), numKeyCols) 208 } 209 // There may be extra values on the row, e.g. to allow an ordered 210 // synchronizer to interleave multiple input streams. Will need at most 211 // numKeyCols. 212 span, containsNull, err := ij.spanBuilder.SpanFromEncDatums(row, numKeyCols) 213 if err != nil { 214 return nil, err 215 } 216 return ij.spanBuilder.MaybeSplitSpanIntoSeparateFamilies( 217 nil /* appendTo */, span, numKeyCols, containsNull, 218 ), nil 219 } 220 221 // outputStatsToTrace outputs the collected indexJoiner stats to the trace. Will 222 // fail silently if the indexJoiner is not collecting stats. 223 func (ij *indexJoiner) outputStatsToTrace() { 224 is, ok := getInputStats(ij.FlowCtx, ij.input) 225 if !ok { 226 return 227 } 228 ils, ok := getFetcherInputStats(ij.FlowCtx, ij.fetcher) 229 if !ok { 230 return 231 } 232 jrs := &JoinReaderStats{ 233 InputStats: is, 234 IndexLookupStats: ils, 235 } 236 if sp := opentracing.SpanFromContext(ij.Ctx); sp != nil { 237 tracing.SetSpanStats(sp, jrs) 238 } 239 } 240 241 func (ij *indexJoiner) generateMeta(ctx context.Context) []execinfrapb.ProducerMetadata { 242 if tfs := execinfra.GetLeafTxnFinalState(ctx, ij.FlowCtx.Txn); tfs != nil { 243 return []execinfrapb.ProducerMetadata{{LeafTxnFinalState: tfs}} 244 } 245 return nil 246 } 247 248 // DrainMeta is part of the MetadataSource interface. 249 func (ij *indexJoiner) DrainMeta(ctx context.Context) []execinfrapb.ProducerMetadata { 250 return ij.generateMeta(ctx) 251 } 252 253 // ChildCount is part of the execinfra.OpNode interface. 254 func (ij *indexJoiner) ChildCount(verbose bool) int { 255 if _, ok := ij.input.(execinfra.OpNode); ok { 256 return 1 257 } 258 return 0 259 } 260 261 // Child is part of the execinfra.OpNode interface. 262 func (ij *indexJoiner) Child(nth int, verbose bool) execinfra.OpNode { 263 if nth == 0 { 264 if n, ok := ij.input.(execinfra.OpNode); ok { 265 return n 266 } 267 panic("input to indexJoiner is not an execinfra.OpNode") 268 } 269 panic(fmt.Sprintf("invalid index %d", nth)) 270 }