github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/tablereader.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package rowexec 12 13 import ( 14 "context" 15 "fmt" 16 "sync" 17 "time" 18 19 "github.com/cockroachdb/cockroach/pkg/roachpb" 20 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 21 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 22 "github.com/cockroachdb/cockroach/pkg/sql/row" 23 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 24 "github.com/cockroachdb/cockroach/pkg/util/humanizeutil" 25 "github.com/cockroachdb/cockroach/pkg/util/log" 26 "github.com/cockroachdb/cockroach/pkg/util/tracing" 27 "github.com/cockroachdb/errors" 28 "github.com/opentracing/opentracing-go" 29 ) 30 31 // tableReader is the start of a computation flow; it performs KV operations to 32 // retrieve rows for a table, runs a filter expression, and passes rows with the 33 // desired column values to an output RowReceiver. 34 // See docs/RFCS/distributed_sql.md 35 type tableReader struct { 36 execinfra.ProcessorBase 37 38 spans roachpb.Spans 39 limitHint int64 40 41 // maxResults is non-zero if there is a limit on the total number of rows 42 // that the tableReader will read. 43 maxResults uint64 44 45 // See TableReaderSpec.MaxTimestampAgeNanos. 46 maxTimestampAge time.Duration 47 48 ignoreMisplannedRanges bool 49 50 // fetcher wraps a row.Fetcher, allowing the tableReader to add a stat 51 // collection layer. 52 fetcher rowFetcher 53 alloc sqlbase.DatumAlloc 54 55 // rowsRead is the number of rows read and is tracked unconditionally. 56 rowsRead int64 57 } 58 59 var _ execinfra.Processor = &tableReader{} 60 var _ execinfra.RowSource = &tableReader{} 61 var _ execinfrapb.MetadataSource = &tableReader{} 62 var _ execinfra.Releasable = &tableReader{} 63 var _ execinfra.OpNode = &tableReader{} 64 65 const tableReaderProcName = "table reader" 66 67 var trPool = sync.Pool{ 68 New: func() interface{} { 69 return &tableReader{} 70 }, 71 } 72 73 // newTableReader creates a tableReader. 74 func newTableReader( 75 flowCtx *execinfra.FlowCtx, 76 processorID int32, 77 spec *execinfrapb.TableReaderSpec, 78 post *execinfrapb.PostProcessSpec, 79 output execinfra.RowReceiver, 80 ) (*tableReader, error) { 81 // NB: we hit this with a zero NodeID (but !ok) with multi-tenancy. 82 if nodeID, ok := flowCtx.NodeID.OptionalNodeID(); ok && nodeID == 0 { 83 return nil, errors.Errorf("attempting to create a tableReader with uninitialized NodeID") 84 } 85 86 tr := trPool.Get().(*tableReader) 87 88 tr.limitHint = execinfra.LimitHint(spec.LimitHint, post) 89 tr.maxResults = spec.MaxResults 90 tr.maxTimestampAge = time.Duration(spec.MaxTimestampAgeNanos) 91 92 returnMutations := spec.Visibility == execinfra.ScanVisibilityPublicAndNotPublic 93 types := spec.Table.ColumnTypesWithMutations(returnMutations) 94 tr.ignoreMisplannedRanges = flowCtx.Local 95 if err := tr.Init( 96 tr, 97 post, 98 types, 99 flowCtx, 100 processorID, 101 output, 102 nil, /* memMonitor */ 103 execinfra.ProcStateOpts{ 104 // We don't pass tr.input as an inputToDrain; tr.input is just an adapter 105 // on top of a Fetcher; draining doesn't apply to it. Moreover, Andrei 106 // doesn't trust that the adapter will do the right thing on a Next() call 107 // after it had previously returned an error. 108 InputsToDrain: nil, 109 TrailingMetaCallback: tr.generateTrailingMeta, 110 }, 111 ); err != nil { 112 return nil, err 113 } 114 115 neededColumns := tr.Out.NeededColumns() 116 117 var fetcher row.Fetcher 118 columnIdxMap := spec.Table.ColumnIdxMapWithMutations(returnMutations) 119 if _, _, err := initRowFetcher( 120 flowCtx, &fetcher, &spec.Table, int(spec.IndexIdx), columnIdxMap, spec.Reverse, 121 neededColumns, spec.IsCheck, &tr.alloc, spec.Visibility, spec.LockingStrength, 122 ); err != nil { 123 return nil, err 124 } 125 126 nSpans := len(spec.Spans) 127 if cap(tr.spans) >= nSpans { 128 tr.spans = tr.spans[:nSpans] 129 } else { 130 tr.spans = make(roachpb.Spans, nSpans) 131 } 132 for i, s := range spec.Spans { 133 tr.spans[i] = s.Span 134 } 135 136 if sp := opentracing.SpanFromContext(flowCtx.EvalCtx.Ctx()); sp != nil && tracing.IsRecording(sp) { 137 tr.fetcher = newRowFetcherStatCollector(&fetcher) 138 tr.FinishTrace = tr.outputStatsToTrace 139 } else { 140 tr.fetcher = &fetcher 141 } 142 143 return tr, nil 144 } 145 146 func (tr *tableReader) generateTrailingMeta(ctx context.Context) []execinfrapb.ProducerMetadata { 147 trailingMeta := tr.generateMeta(ctx) 148 tr.InternalClose() 149 return trailingMeta 150 } 151 152 // Start is part of the RowSource interface. 153 func (tr *tableReader) Start(ctx context.Context) context.Context { 154 if tr.FlowCtx.Txn == nil { 155 log.Fatalf(ctx, "tableReader outside of txn") 156 } 157 158 ctx = tr.StartInternal(ctx, tableReaderProcName) 159 160 limitBatches := execinfra.ScanShouldLimitBatches(tr.maxResults, tr.limitHint, tr.FlowCtx) 161 log.VEventf(ctx, 1, "starting scan with limitBatches %t", limitBatches) 162 var err error 163 if tr.maxTimestampAge == 0 { 164 err = tr.fetcher.StartScan( 165 ctx, tr.FlowCtx.Txn, tr.spans, 166 limitBatches, tr.limitHint, tr.FlowCtx.TraceKV, 167 ) 168 } else { 169 initialTS := tr.FlowCtx.Txn.ReadTimestamp() 170 err = tr.fetcher.StartInconsistentScan( 171 ctx, tr.FlowCtx.Cfg.DB, initialTS, 172 tr.maxTimestampAge, tr.spans, 173 limitBatches, tr.limitHint, tr.FlowCtx.TraceKV, 174 ) 175 } 176 177 if err != nil { 178 tr.MoveToDraining(err) 179 } 180 return ctx 181 } 182 183 // Release releases this tableReader back to the pool. 184 func (tr *tableReader) Release() { 185 tr.ProcessorBase.Reset() 186 tr.fetcher.Reset() 187 *tr = tableReader{ 188 ProcessorBase: tr.ProcessorBase, 189 fetcher: tr.fetcher, 190 spans: tr.spans[:0], 191 rowsRead: 0, 192 } 193 trPool.Put(tr) 194 } 195 196 var tableReaderProgressFrequency int64 = 5000 197 198 // TestingSetScannedRowProgressFrequency changes the frequency at which 199 // row-scanned progress metadata is emitted by table readers. 200 func TestingSetScannedRowProgressFrequency(val int64) func() { 201 oldVal := tableReaderProgressFrequency 202 tableReaderProgressFrequency = val 203 return func() { tableReaderProgressFrequency = oldVal } 204 } 205 206 // Next is part of the RowSource interface. 207 func (tr *tableReader) Next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) { 208 for tr.State == execinfra.StateRunning { 209 // Check if it is time to emit a progress update. 210 if tr.rowsRead >= tableReaderProgressFrequency { 211 meta := execinfrapb.GetProducerMeta() 212 meta.Metrics = execinfrapb.GetMetricsMeta() 213 meta.Metrics.RowsRead = tr.rowsRead 214 tr.rowsRead = 0 215 return nil, meta 216 } 217 218 row, _, _, err := tr.fetcher.NextRow(tr.Ctx) 219 if row == nil || err != nil { 220 tr.MoveToDraining(err) 221 break 222 } 223 224 // When tracing is enabled, number of rows read is tracked twice (once 225 // here, and once through InputStats). This is done so that non-tracing 226 // case can avoid tracking of the stall time which gives a noticeable 227 // performance hit. 228 tr.rowsRead++ 229 if outRow := tr.ProcessRowHelper(row); outRow != nil { 230 return outRow, nil 231 } 232 } 233 return nil, tr.DrainHelper() 234 } 235 236 // ConsumerClosed is part of the RowSource interface. 237 func (tr *tableReader) ConsumerClosed() { 238 // The consumer is done, Next() will not be called again. 239 tr.InternalClose() 240 } 241 242 var _ execinfrapb.DistSQLSpanStats = &TableReaderStats{} 243 244 const tableReaderTagPrefix = "tablereader." 245 246 // Stats implements the SpanStats interface. 247 func (trs *TableReaderStats) Stats() map[string]string { 248 inputStatsMap := trs.InputStats.Stats(tableReaderTagPrefix) 249 inputStatsMap[tableReaderTagPrefix+bytesReadTagSuffix] = humanizeutil.IBytes(trs.BytesRead) 250 return inputStatsMap 251 } 252 253 // StatsForQueryPlan implements the DistSQLSpanStats interface. 254 func (trs *TableReaderStats) StatsForQueryPlan() []string { 255 return append( 256 trs.InputStats.StatsForQueryPlan("" /* prefix */), 257 fmt.Sprintf("%s: %s", bytesReadQueryPlanSuffix, humanizeutil.IBytes(trs.BytesRead)), 258 ) 259 } 260 261 // outputStatsToTrace outputs the collected tableReader stats to the trace. Will 262 // fail silently if the tableReader is not collecting stats. 263 func (tr *tableReader) outputStatsToTrace() { 264 is, ok := getFetcherInputStats(tr.FlowCtx, tr.fetcher) 265 if !ok { 266 return 267 } 268 if sp := opentracing.SpanFromContext(tr.Ctx); sp != nil { 269 tracing.SetSpanStats(sp, &TableReaderStats{ 270 InputStats: is, 271 BytesRead: tr.fetcher.GetBytesRead(), 272 }) 273 } 274 } 275 276 func (tr *tableReader) generateMeta(ctx context.Context) []execinfrapb.ProducerMetadata { 277 var trailingMeta []execinfrapb.ProducerMetadata 278 if !tr.ignoreMisplannedRanges { 279 nodeID, ok := tr.FlowCtx.NodeID.OptionalNodeID() 280 if ok { 281 ranges := execinfra.MisplannedRanges(ctx, tr.fetcher.GetRangesInfo(), nodeID) 282 if ranges != nil { 283 trailingMeta = append(trailingMeta, execinfrapb.ProducerMetadata{Ranges: ranges}) 284 } 285 } 286 } 287 if tfs := execinfra.GetLeafTxnFinalState(ctx, tr.FlowCtx.Txn); tfs != nil { 288 trailingMeta = append(trailingMeta, execinfrapb.ProducerMetadata{LeafTxnFinalState: tfs}) 289 } 290 291 meta := execinfrapb.GetProducerMeta() 292 meta.Metrics = execinfrapb.GetMetricsMeta() 293 meta.Metrics.BytesRead, meta.Metrics.RowsRead = tr.fetcher.GetBytesRead(), tr.rowsRead 294 trailingMeta = append(trailingMeta, *meta) 295 return trailingMeta 296 } 297 298 // DrainMeta is part of the MetadataSource interface. 299 func (tr *tableReader) DrainMeta(ctx context.Context) []execinfrapb.ProducerMetadata { 300 return tr.generateMeta(ctx) 301 } 302 303 // ChildCount is part of the execinfra.OpNode interface. 304 func (tr *tableReader) ChildCount(bool) int { 305 return 0 306 } 307 308 // Child is part of the execinfra.OpNode interface. 309 func (tr *tableReader) Child(nth int, _ bool) execinfra.OpNode { 310 panic(fmt.Sprintf("invalid index %d", nth)) 311 }