github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/cfetcher.go

github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/cfetcher.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package colexec
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"fmt"
    17  	"sort"
    18  	"strings"
    19  
    20  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    21  	"github.com/cockroachdb/cockroach/pkg/keys"
    22  	"github.com/cockroachdb/cockroach/pkg/kv"
    23  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/colencoding"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror"
    26  	"github.com/cockroachdb/cockroach/pkg/sql/colmem"
    27  	"github.com/cockroachdb/cockroach/pkg/sql/row"
    28  	"github.com/cockroachdb/cockroach/pkg/sql/scrub"
    29  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    30  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    31  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    32  	"github.com/cockroachdb/cockroach/pkg/util"
    33  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    34  	"github.com/cockroachdb/cockroach/pkg/util/log"
    35  	"github.com/cockroachdb/errors"
    36  )
    37  
    38  // Only unique secondary indexes have extra columns to decode (namely the
    39  // primary index columns).
    40  func cHasExtraCols(table *cTableInfo) bool {
    41  	return table.isSecondaryIndex && table.index.Unique
    42  }
    43  
    44  type cTableInfo struct {
    45  	// -- Fields initialized once --
    46  
    47  	// Used to determine whether a key retrieved belongs to the span we
    48  	// want to scan.
    49  	spans            roachpb.Spans
    50  	desc             *sqlbase.ImmutableTableDescriptor
    51  	index            *sqlbase.IndexDescriptor
    52  	isSecondaryIndex bool
    53  	indexColumnDirs  []sqlbase.IndexDescriptor_Direction
    54  
    55  	// The table columns to use for fetching, possibly including ones currently in
    56  	// schema changes.
    57  	cols []sqlbase.ColumnDescriptor
    58  
    59  	// The ordered list of ColumnIDs that are required.
    60  	neededColsList []int
    61  
    62  	// The set of required value-component column ordinals in the table.
    63  	neededValueColsByIdx util.FastIntSet
    64  
    65  	// Map used to get the index for columns in cols.
    66  	colIdxMap colIdxMap
    67  
    68  	// One value per column that is part of the key; each value is a column
    69  	// index (into cols); -1 if we don't need the value for that column.
    70  	indexColOrdinals []int
    71  	// allIndexColOrdinals is the same as indexColOrdinals but
    72  	// does not contain any -1's. It is meant to be used only in logging.
    73  	allIndexColOrdinals []int
    74  
    75  	// The set of column ordinals which are both composite and part of the index
    76  	// key.
    77  	compositeIndexColOrdinals util.FastIntSet
    78  
    79  	// One value per column that is part of the key; each value is a column
    80  	// index (into cols); -1 if we don't need the value for that column.
    81  	extraValColOrdinals []int
    82  	// allExtraValColOrdinals is the same as extraValColOrdinals but
    83  	// does not contain any -1's. It is meant to be used only in logging.
    84  	allExtraValColOrdinals []int
    85  
    86  	// maxColumnFamilyID is the maximum possible family id for the configured
    87  	// table.
    88  	maxColumnFamilyID sqlbase.FamilyID
    89  
    90  	// knownPrefixLength is the number of bytes in the index key prefix this
    91  	// Fetcher is configured for. The index key prefix is the table id, index
    92  	// id pair at the start of the key.
    93  	knownPrefixLength int
    94  
    95  	keyValTypes []*types.T
    96  	extraTypes  []*types.T
    97  
    98  	da sqlbase.DatumAlloc
    99  }
   100  
   101  // colIdxMap is a "map" that contains the ordinal in cols for each ColumnID
   102  // in the table to fetch. This map is used to figure out what index within a
   103  // row a particular value-component column goes into. Value-component columns
   104  // are encoded with a column id prefix, with the guarantee that within any
   105  // given row, the column ids are always increasing. Because of this guarantee,
   106  // we can store this map as two sorted lists that the fetcher keeps an index
   107  // into, giving fast access during decoding.
   108  //
   109  // It implements sort.Interface to be sortable on vals, while keeping ords
   110  // matched up to the order of vals.
   111  type colIdxMap struct {
   112  	// vals is the sorted list of sqlbase.ColumnIDs in the table to fetch.
   113  	vals sqlbase.ColumnIDs
   114  	// colIdxOrds is the list of ordinals in cols for each column in colIdxVals.
   115  	// The ith entry in colIdxOrds is the ordinal within cols for the ith column
   116  	// in colIdxVals.
   117  	ords []int
   118  }
   119  
   120  // Len implements sort.Interface.
   121  func (m colIdxMap) Len() int {
   122  	return len(m.vals)
   123  }
   124  
   125  // Less implements sort.Interface.
   126  func (m colIdxMap) Less(i, j int) bool {
   127  	return m.vals[i] < m.vals[j]
   128  }
   129  
   130  // Swap implements sort.Interface.
   131  func (m colIdxMap) Swap(i, j int) {
   132  	m.vals[i], m.vals[j] = m.vals[j], m.vals[i]
   133  	m.ords[i], m.ords[j] = m.ords[j], m.ords[i]
   134  }
   135  
   136  func (m colIdxMap) get(c sqlbase.ColumnID) (int, bool) {
   137  	for i, v := range m.vals {
   138  		if v == c {
   139  			return m.ords[i], true
   140  		}
   141  	}
   142  	return 0, false
   143  }
   144  
   145  // cFetcher handles fetching kvs and forming table rows for an
   146  // arbitrary number of tables.
   147  // Usage:
   148  //   var rf cFetcher
   149  //   err := rf.Init(..)
   150  //   // Handle err
   151  //   err := rf.StartScan(..)
   152  //   // Handle err
   153  //   for {
   154  //      res, err := rf.nextBatch()
   155  //      // Handle err
   156  //      if res.colBatch.Length() == 0 {
   157  //         // Done
   158  //         break
   159  //      }
   160  //      // Process res.colBatch
   161  //   }
   162  type cFetcher struct {
   163  	// table is the table that's configured for fetching.
   164  	table *cTableInfo
   165  
   166  	// reverse denotes whether or not the spans should be read in reverse
   167  	// or not when StartScan is invoked.
   168  	reverse bool
   169  
   170  	// maxKeysPerRow memoizes the maximum number of keys per row
   171  	// out of all the tables. This is used to calculate the kvBatchFetcher's
   172  	// firstBatchLimit.
   173  	maxKeysPerRow int
   174  
   175  	// True if the index key must be decoded.
   176  	// If there is more than one table, the index key must always be decoded.
   177  	// This is only false if there are no needed columns and the (single)
   178  	// table has no interleave children.
   179  	mustDecodeIndexKey bool
   180  
   181  	// lockStr represents the row-level locking mode to use when fetching rows.
   182  	lockStr sqlbase.ScanLockingStrength
   183  
   184  	// returnRangeInfo, if set, causes the underlying kvBatchFetcher to return
   185  	// information about the ranges descriptors/leases uses in servicing the
   186  	// requests. This has some cost, so it's only enabled by DistSQL when this
   187  	// info is actually useful for correcting the plan (e.g. not for the PK-side
   188  	// of an index-join).
   189  	// If set, GetRangesInfo() can be used to retrieve the accumulated info.
   190  	returnRangeInfo bool
   191  
   192  	// traceKV indicates whether or not session tracing is enabled. It is set
   193  	// when beginning a new scan.
   194  	traceKV bool
   195  
   196  	// fetcher is the underlying fetcher that provides KVs.
   197  	fetcher *row.KVFetcher
   198  
   199  	// machine contains fields that get updated during the run of the fetcher.
   200  	machine struct {
   201  		// state is the queue of next states of the state machine. The 0th entry
   202  		// is the next state.
   203  		state [3]fetcherState
   204  		// rowIdx is always set to the ordinal of the row we're currently writing to
   205  		// within the current batch. It's incremented as soon as we detect that a row
   206  		// is finished.
   207  		rowIdx int
   208  		// curSpan is the current span that the kv fetcher just returned data from.
   209  		curSpan roachpb.Span
   210  		// nextKV is the kv to process next.
   211  		nextKV roachpb.KeyValue
   212  		// seekPrefix is the prefix to seek to in stateSeekPrefix.
   213  		seekPrefix roachpb.Key
   214  
   215  		// remainingValueColsByIdx is the set of value columns that are yet to be
   216  		// seen during the decoding of the current row.
   217  		remainingValueColsByIdx util.FastIntSet
   218  		// lastRowPrefix is the row prefix for the last row we saw a key for. New
   219  		// keys are compared against this prefix to determine whether they're part
   220  		// of a new row or not.
   221  		lastRowPrefix roachpb.Key
   222  		// prettyValueBuf is a temp buffer used to create strings for tracing.
   223  		prettyValueBuf *bytes.Buffer
   224  
   225  		// batch is the output batch the fetcher writes to.
   226  		batch coldata.Batch
   227  
   228  		// colvecs is a slice of the ColVecs within batch, pulled out to avoid
   229  		// having to call batch.Vec too often in the tight loop.
   230  		colvecs []coldata.Vec
   231  	}
   232  
   233  	// adapter is a utility struct that helps with memory accounting.
   234  	adapter struct {
   235  		ctx       context.Context
   236  		allocator *colmem.Allocator
   237  		batch     coldata.Batch
   238  		err       error
   239  	}
   240  }
   241  
   242  // Init sets up a Fetcher for a given table and index. If we are using a
   243  // non-primary index, tables.ValNeededForCol can only refer to columns in the
   244  // index.
   245  func (rf *cFetcher) Init(
   246  	codec keys.SQLCodec,
   247  	allocator *colmem.Allocator,
   248  	reverse bool,
   249  	lockStr sqlbase.ScanLockingStrength,
   250  	returnRangeInfo bool,
   251  	isCheck bool,
   252  	tables ...row.FetcherTableArgs,
   253  ) error {
   254  	rf.adapter.allocator = allocator
   255  	if len(tables) == 0 {
   256  		return errors.AssertionFailedf("no tables to fetch from")
   257  	}
   258  
   259  	rf.reverse = reverse
   260  	rf.lockStr = lockStr
   261  	rf.returnRangeInfo = returnRangeInfo
   262  
   263  	if len(tables) > 1 {
   264  		return errors.New("multiple tables not supported in cfetcher")
   265  	}
   266  
   267  	tableArgs := tables[0]
   268  
   269  	m := colIdxMap{
   270  		vals: make(sqlbase.ColumnIDs, 0, len(tableArgs.ColIdxMap)),
   271  		ords: make([]int, 0, len(tableArgs.ColIdxMap)),
   272  	}
   273  	for k, v := range tableArgs.ColIdxMap {
   274  		m.vals = append(m.vals, k)
   275  		m.ords = append(m.ords, v)
   276  	}
   277  	sort.Sort(m)
   278  	colDescriptors := tableArgs.Cols
   279  	table := &cTableInfo{
   280  		spans:            tableArgs.Spans,
   281  		desc:             tableArgs.Desc,
   282  		colIdxMap:        m,
   283  		index:            tableArgs.Index,
   284  		isSecondaryIndex: tableArgs.IsSecondaryIndex,
   285  		cols:             colDescriptors,
   286  	}
   287  
   288  	typs := make([]*types.T, len(colDescriptors))
   289  	for i := range typs {
   290  		typs[i] = colDescriptors[i].Type
   291  	}
   292  
   293  	rf.machine.batch = allocator.NewMemBatch(typs)
   294  	rf.machine.colvecs = rf.machine.batch.ColVecs()
   295  
   296  	var err error
   297  
   298  	var neededCols util.FastIntSet
   299  	// Scan through the entire columns map to see which columns are
   300  	// required.
   301  	table.neededColsList = make([]int, 0, tableArgs.ValNeededForCol.Len())
   302  	for col, idx := range tableArgs.ColIdxMap {
   303  		if tableArgs.ValNeededForCol.Contains(idx) {
   304  			// The idx-th column is required.
   305  			neededCols.Add(int(col))
   306  			table.neededColsList = append(table.neededColsList, int(col))
   307  		}
   308  	}
   309  	sort.Ints(table.neededColsList)
   310  
   311  	table.knownPrefixLength = len(sqlbase.MakeIndexKeyPrefix(codec, table.desc.TableDesc(), table.index.ID))
   312  
   313  	var indexColumnIDs []sqlbase.ColumnID
   314  	indexColumnIDs, table.indexColumnDirs = table.index.FullColumnIDs()
   315  
   316  	compositeColumnIDs := util.MakeFastIntSet()
   317  	for _, id := range table.index.CompositeColumnIDs {
   318  		compositeColumnIDs.Add(int(id))
   319  	}
   320  
   321  	table.neededValueColsByIdx = tableArgs.ValNeededForCol.Copy()
   322  	neededIndexCols := 0
   323  	nIndexCols := len(indexColumnIDs)
   324  	if cap(table.indexColOrdinals) >= nIndexCols {
   325  		table.indexColOrdinals = table.indexColOrdinals[:nIndexCols]
   326  	} else {
   327  		table.indexColOrdinals = make([]int, nIndexCols)
   328  	}
   329  	if cap(table.allIndexColOrdinals) >= nIndexCols {
   330  		table.allIndexColOrdinals = table.allIndexColOrdinals[:nIndexCols]
   331  	} else {
   332  		table.allIndexColOrdinals = make([]int, nIndexCols)
   333  	}
   334  	for i, id := range indexColumnIDs {
   335  		colIdx, ok := tableArgs.ColIdxMap[id]
   336  		table.allIndexColOrdinals[i] = colIdx
   337  		if ok && neededCols.Contains(int(id)) {
   338  			table.indexColOrdinals[i] = colIdx
   339  			neededIndexCols++
   340  			// A composite column might also have a value encoding which must be
   341  			// decoded. Others can be removed from neededValueColsByIdx.
   342  			if compositeColumnIDs.Contains(int(id)) {
   343  				table.compositeIndexColOrdinals.Add(colIdx)
   344  			} else {
   345  				table.neededValueColsByIdx.Remove(colIdx)
   346  			}
   347  		} else {
   348  			table.indexColOrdinals[i] = -1
   349  			if neededCols.Contains(int(id)) {
   350  				return errors.AssertionFailedf("needed column %d not in colIdxMap", id)
   351  			}
   352  		}
   353  	}
   354  	// Unique secondary indexes contain the extra column IDs as part of
   355  	// the value component. We process these separately, so we need to know
   356  	// what extra columns are composite or not.
   357  	if table.isSecondaryIndex && table.index.Unique {
   358  		for _, id := range table.index.ExtraColumnIDs {
   359  			colIdx, ok := tableArgs.ColIdxMap[id]
   360  			if ok && neededCols.Contains(int(id)) {
   361  				if compositeColumnIDs.Contains(int(id)) {
   362  					table.compositeIndexColOrdinals.Add(colIdx)
   363  				} else {
   364  					table.neededValueColsByIdx.Remove(colIdx)
   365  				}
   366  			}
   367  		}
   368  	}
   369  
   370  	// - If there are interleaves, we need to read the index key in order to
   371  	//   determine whether this row is actually part of the index we're scanning.
   372  	// - If there are needed columns from the index key, we need to read it.
   373  	//
   374  	// Otherwise, we can completely avoid decoding the index key.
   375  	if neededIndexCols > 0 || len(table.index.InterleavedBy) > 0 || len(table.index.Interleave.Ancestors) > 0 {
   376  		rf.mustDecodeIndexKey = true
   377  	}
   378  
   379  	if table.isSecondaryIndex {
   380  		for i := range table.cols {
   381  			if neededCols.Contains(int(table.cols[i].ID)) && !table.index.ContainsColumnID(table.cols[i].ID) {
   382  				return errors.Errorf("requested column %s not in index", table.cols[i].Name)
   383  			}
   384  		}
   385  	}
   386  
   387  	// Prepare our index key vals slice.
   388  	table.keyValTypes, err = sqlbase.GetColumnTypes(table.desc.TableDesc(), indexColumnIDs)
   389  	if err != nil {
   390  		return err
   391  	}
   392  	if cHasExtraCols(table) {
   393  		// Unique secondary indexes have a value that is the
   394  		// primary index key.
   395  		// Primary indexes only contain ascendingly-encoded
   396  		// values. If this ever changes, we'll probably have to
   397  		// figure out the directions here too.
   398  		table.extraTypes, err = sqlbase.GetColumnTypes(table.desc.TableDesc(), table.index.ExtraColumnIDs)
   399  		nExtraColumns := len(table.index.ExtraColumnIDs)
   400  		if cap(table.extraValColOrdinals) >= nExtraColumns {
   401  			table.extraValColOrdinals = table.extraValColOrdinals[:nExtraColumns]
   402  		} else {
   403  			table.extraValColOrdinals = make([]int, nExtraColumns)
   404  		}
   405  
   406  		if cap(table.allExtraValColOrdinals) >= nExtraColumns {
   407  			table.allExtraValColOrdinals = table.allExtraValColOrdinals[:nExtraColumns]
   408  		} else {
   409  			table.allExtraValColOrdinals = make([]int, nExtraColumns)
   410  		}
   411  
   412  		for i, id := range table.index.ExtraColumnIDs {
   413  			table.allExtraValColOrdinals[i] = tableArgs.ColIdxMap[id]
   414  			if neededCols.Contains(int(id)) {
   415  				table.extraValColOrdinals[i] = tableArgs.ColIdxMap[id]
   416  			} else {
   417  				table.extraValColOrdinals[i] = -1
   418  			}
   419  		}
   420  		if err != nil {
   421  			return err
   422  		}
   423  	}
   424  
   425  	// Keep track of the maximum keys per row to accommodate a
   426  	// limitHint when StartScan is invoked.
   427  	keysPerRow, err := table.desc.KeysPerRow(table.index.ID)
   428  	if err != nil {
   429  		return err
   430  	}
   431  	if keysPerRow > rf.maxKeysPerRow {
   432  		rf.maxKeysPerRow = keysPerRow
   433  	}
   434  
   435  	for i := range table.desc.Families {
   436  		id := table.desc.Families[i].ID
   437  		if id > table.maxColumnFamilyID {
   438  			table.maxColumnFamilyID = id
   439  		}
   440  	}
   441  
   442  	rf.table = table
   443  
   444  	return nil
   445  }
   446  
   447  // StartScan initializes and starts the key-value scan. Can be used multiple
   448  // times.
   449  func (rf *cFetcher) StartScan(
   450  	ctx context.Context,
   451  	txn *kv.Txn,
   452  	spans roachpb.Spans,
   453  	limitBatches bool,
   454  	limitHint int64,
   455  	traceKV bool,
   456  ) error {
   457  	if len(spans) == 0 {
   458  		return errors.AssertionFailedf("no spans")
   459  	}
   460  
   461  	rf.traceKV = traceKV
   462  
   463  	// If we have a limit hint, we limit the first batch size. Subsequent
   464  	// batches get larger to avoid making things too slow (e.g. in case we have
   465  	// a very restrictive filter and actually have to retrieve a lot of rows).
   466  	firstBatchLimit := limitHint
   467  	if firstBatchLimit != 0 {
   468  		// The limitHint is a row limit, but each row could be made up
   469  		// of more than one key. We take the maximum possible keys
   470  		// per row out of all the table rows we could potentially
   471  		// scan over.
   472  		firstBatchLimit = limitHint * int64(rf.maxKeysPerRow)
   473  		// We need an extra key to make sure we form the last row.
   474  		firstBatchLimit++
   475  	}
   476  
   477  	f, err := row.NewKVFetcher(
   478  		txn, spans, rf.reverse, limitBatches, firstBatchLimit, rf.lockStr, rf.returnRangeInfo,
   479  	)
   480  	if err != nil {
   481  		return err
   482  	}
   483  	rf.fetcher = f
   484  	rf.machine.lastRowPrefix = nil
   485  	rf.machine.state[0] = stateInitFetch
   486  	return nil
   487  }
   488  
   489  // fetcherState is the state enum for nextBatch.
   490  type fetcherState int
   491  
   492  //go:generate stringer -type=fetcherState
   493  
   494  const (
   495  	stateInvalid fetcherState = iota
   496  
   497  	// stateInitFetch is the empty state of a fetcher: there is no current KV to
   498  	// look at, and there's no current row, either because the fetcher has just
   499  	// started, or because the last row was already finalized.
   500  	//
   501  	//   1. fetch next kv into nextKV buffer
   502  	//     -> decodeFirstKVOfRow
   503  	stateInitFetch
   504  
   505  	// stateResetBatch resets the batch of a fetcher, removing nulls and the
   506  	// selection vector.
   507  	stateResetBatch
   508  
   509  	// stateDecodeFirstKVOfRow is the state of looking at a key that is part of
   510  	// a row that the fetcher hasn't processed before. s.machine.nextKV must be
   511  	// set.
   512  	//   1. skip common prefix
   513  	//   2. parse key (past common prefix) into row buffer, setting last row prefix buffer
   514  	//   3. interleave detected?
   515  	//      - set skip prefix
   516  	//      -> seekPrefix(decodeFirstKVOfRow)
   517  	//   4. parse value into row buffer.
   518  	//   5. 1-cf or secondary index?
   519  	//     -> doneRow(initFetch)
   520  	//   else:
   521  	//     -> fetchNextKVWithUnfinishedRow
   522  	stateDecodeFirstKVOfRow
   523  
   524  	// stateSeekPrefix is the state of skipping all keys that sort before
   525  	// (or after, in the case of a reverse scan) a prefix. s.machine.seekPrefix
   526  	// must be set to the prefix to seek to. state[1] must be set, and seekPrefix
   527  	// will transition to that state once it finds the first key with that prefix.
   528  	//   1. fetch next kv into nextKV buffer
   529  	//   2. kv doesn't match seek prefix?
   530  	//     -> seekPrefix
   531  	//   else:
   532  	//     -> nextState
   533  	stateSeekPrefix
   534  
   535  	// stateFetchNextKVWithUnfinishedRow is the state of getting a new key for
   536  	// the current row. The machine will read a new key from the underlying
   537  	// fetcher, process it, and either add the results to the current row, or
   538  	// shift to a new row.
   539  	//   1. fetch next kv into nextKV buffer
   540  	//   2. skip common prefix
   541  	//   3. check equality to last row prefix buffer
   542  	//   4. no?
   543  	//     -> finalizeRow(decodeFirstKVOfRow)
   544  	//   5. skip to end of last row prefix buffer
   545  	//   6. interleave detected?
   546  	//     - set skip prefix
   547  	//     -> finalizeRow(seekPrefix(decodeFirstKVOfRow))
   548  	//   6. parse value into row buffer
   549  	//   7. -> fetchNextKVWithUnfinishedRow
   550  	stateFetchNextKVWithUnfinishedRow
   551  
   552  	// stateFinalizeRow is the state of finalizing a row. It assumes that no more
   553  	// keys for the current row are present.
   554  	// state[1] must be set, and stateFinalizeRow will transition to that state
   555  	// once it finishes finalizing the row.
   556  	//   1. fill missing nulls
   557  	//   2. bump rowIdx
   558  	//   -> nextState and optionally return if row-by-row or batch full
   559  	stateFinalizeRow
   560  
   561  	// stateEmitLastBatch emits the current batch and then transitions to
   562  	// stateFinished.
   563  	stateEmitLastBatch
   564  
   565  	// stateFinished is the end state of the state machine - it causes nextBatch
   566  	// to return empty batches forever.
   567  	stateFinished
   568  )
   569  
   570  // Turn this on to enable super verbose logging of the fetcher state machine.
   571  const debugState = false
   572  
   573  // NextBatch is nextBatch with the addition of memory accounting.
   574  func (rf *cFetcher) NextBatch(ctx context.Context) (coldata.Batch, error) {
   575  	rf.adapter.ctx = ctx
   576  	rf.adapter.allocator.PerformOperation(
   577  		rf.machine.colvecs,
   578  		rf.nextAdapter,
   579  	)
   580  	return rf.adapter.batch, rf.adapter.err
   581  }
   582  
   583  func (rf *cFetcher) nextAdapter() {
   584  	rf.adapter.batch, rf.adapter.err = rf.nextBatch(rf.adapter.ctx)
   585  }
   586  
   587  // nextBatch processes keys until we complete one batch of rows,
   588  // coldata.BatchSize() in length, which are returned in columnar format as a
   589  // coldata.Batch. The batch contains one Vec per table column, regardless of
   590  // the index used; columns that are not needed (as per neededCols) are empty.
   591  // The Batch should not be modified and is only valid until the next call.
   592  // When there are no more rows, the Batch.Length is 0.
   593  func (rf *cFetcher) nextBatch(ctx context.Context) (coldata.Batch, error) {
   594  	for {
   595  		if debugState {
   596  			log.Infof(ctx, "State %s", rf.machine.state[0])
   597  		}
   598  		switch rf.machine.state[0] {
   599  		case stateInvalid:
   600  			return nil, errors.New("invalid fetcher state")
   601  		case stateInitFetch:
   602  			moreKeys, kv, newSpan, err := rf.fetcher.NextKV(ctx)
   603  			if err != nil {
   604  				return nil, colexecerror.NewStorageError(err)
   605  			}
   606  			if !moreKeys {
   607  				rf.machine.state[0] = stateEmitLastBatch
   608  				continue
   609  			}
   610  			if newSpan {
   611  				rf.machine.curSpan = rf.fetcher.Span
   612  				// TODO(jordan): parse the logical longest common prefix of the span
   613  				// into a buffer. The logical longest common prefix is the longest
   614  				// common prefix that contains only full key components. For example,
   615  				// the keys /Table/53/1/foo/bar/10 and /Table/53/1/foo/bop/10 would
   616  				// have LLCS of /Table/53/1/foo, even though they share a b prefix of
   617  				// the next key, since that prefix isn't a complete key component.
   618  				/*
   619  					lcs := rf.fetcher.span.LongestCommonPrefix()
   620  					// parse lcs into stuff
   621  					key, matches, err := sqlbase.DecodeIndexKeyWithoutTableIDIndexIDPrefix(
   622  						rf.table.desc, rf.table.info.index, rf.table.info.keyValTypes,
   623  						rf.table.keyVals, rf.table.info.indexColumnDirs, kv.Key[rf.table.info.knownPrefixLength:],
   624  					)
   625  					if err != nil {
   626  						// This is expected - the longest common prefix of the keyspan might
   627  						// end half way through a key. Suppress the error and set the actual
   628  						// LCS we'll use later to the decodable components of the key.
   629  					}
   630  				*/
   631  			}
   632  
   633  			rf.machine.nextKV = kv
   634  			rf.machine.state[0] = stateDecodeFirstKVOfRow
   635  
   636  		case stateResetBatch:
   637  			rf.machine.batch.ResetInternalBatch()
   638  			rf.shiftState()
   639  		case stateDecodeFirstKVOfRow:
   640  			// foundNull is set when decoding a new index key for a row finds a NULL value
   641  			// in the index key. This is used when decoding unique secondary indexes in order
   642  			// to tell whether they have extra columns appended to the key.
   643  			var foundNull bool
   644  			if rf.mustDecodeIndexKey || rf.traceKV {
   645  				if debugState {
   646  					log.Infof(ctx, "Decoding first key %s", rf.machine.nextKV.Key)
   647  				}
   648  				var (
   649  					key     []byte
   650  					matches bool
   651  					err     error
   652  				)
   653  				indexOrds := rf.table.indexColOrdinals
   654  				if rf.traceKV {
   655  					indexOrds = rf.table.allIndexColOrdinals
   656  				}
   657  				key, matches, foundNull, err = colencoding.DecodeIndexKeyToCols(
   658  					&rf.table.da,
   659  					rf.machine.colvecs,
   660  					rf.machine.rowIdx,
   661  					rf.table.desc,
   662  					rf.table.index,
   663  					indexOrds,
   664  					rf.table.keyValTypes,
   665  					rf.table.indexColumnDirs,
   666  					rf.machine.nextKV.Key[rf.table.knownPrefixLength:],
   667  				)
   668  				if err != nil {
   669  					return nil, err
   670  				}
   671  				if !matches {
   672  					// We found an interleave. Set our skip prefix.
   673  					seekPrefix := rf.machine.nextKV.Key[:len(key)+rf.table.knownPrefixLength]
   674  					if debugState {
   675  						log.Infof(ctx, "Setting seek prefix to %s", seekPrefix)
   676  					}
   677  					rf.machine.seekPrefix = seekPrefix
   678  					rf.machine.state[0] = stateSeekPrefix
   679  					rf.machine.state[1] = stateDecodeFirstKVOfRow
   680  					continue
   681  				}
   682  				prefix := rf.machine.nextKV.Key[:len(rf.machine.nextKV.Key)-len(key)]
   683  				rf.machine.lastRowPrefix = prefix
   684  			} else {
   685  				// If mustDecodeIndexKey was false, we can't possibly have an
   686  				// interleaved row on our hands, so we can figure out our row prefix
   687  				// without parsing any keys by using GetRowPrefixLength.
   688  				prefixLen, err := keys.GetRowPrefixLength(rf.machine.nextKV.Key)
   689  				if err != nil {
   690  					return nil, err
   691  				}
   692  				rf.machine.lastRowPrefix = rf.machine.nextKV.Key[:prefixLen]
   693  			}
   694  
   695  			// For unique secondary indexes, the index-key does not distinguish one row
   696  			// from the next if both rows contain identical values along with a NULL.
   697  			// Consider the keys:
   698  			//
   699  			//   /test/unique_idx/NULL/0
   700  			//   /test/unique_idx/NULL/1
   701  			//
   702  			// The index-key extracted from the above keys is /test/unique_idx/NULL. The
   703  			// trailing /0 and /1 are the primary key used to unique-ify the keys when a
   704  			// NULL is present. When a null is present in the index key, we cut off more
   705  			// of the index key so that the prefix includes the primary key columns.
   706  			//
   707  			// Note that we do not need to do this for non-unique secondary indexes because
   708  			// the extra columns in the primary key will _always_ be there, so we can decode
   709  			// them when processing the index. The difference with unique secondary indexes
   710  			// is that the extra columns are not always there, and are used to unique-ify
   711  			// the index key, rather than provide the primary key column values.
   712  			if foundNull && rf.table.isSecondaryIndex && rf.table.index.Unique && len(rf.table.desc.Families) != 1 {
   713  				// We get the remaining bytes after the computed prefix, and then
   714  				// slice off the extra encoded columns from those bytes. We calculate
   715  				// how many bytes were sliced away, and then extend lastRowPrefix
   716  				// by that amount.
   717  				prefixLen := len(rf.machine.lastRowPrefix)
   718  				remainingBytes := rf.machine.nextKV.Key[prefixLen:]
   719  				origRemainingBytesLen := len(remainingBytes)
   720  				for range rf.table.index.ExtraColumnIDs {
   721  					var err error
   722  					// Slice off an extra encoded column from remainingBytes.
   723  					remainingBytes, err = sqlbase.SkipTableKey(remainingBytes)
   724  					if err != nil {
   725  						return nil, err
   726  					}
   727  				}
   728  				rf.machine.lastRowPrefix = rf.machine.nextKV.Key[:prefixLen+(origRemainingBytesLen-len(remainingBytes))]
   729  			}
   730  
   731  			familyID, err := rf.getCurrentColumnFamilyID()
   732  			if err != nil {
   733  				return nil, err
   734  			}
   735  			rf.machine.remainingValueColsByIdx.CopyFrom(rf.table.neededValueColsByIdx)
   736  			// Process the current KV's value component.
   737  			prettyKey, prettyVal, err := rf.processValue(ctx, familyID)
   738  			if err != nil {
   739  				return nil, err
   740  			}
   741  			if rf.traceKV {
   742  				log.VEventf(ctx, 2, "fetched: %s -> %s", prettyKey, prettyVal)
   743  			}
   744  			if len(rf.table.desc.Families) == 1 {
   745  				rf.machine.state[0] = stateFinalizeRow
   746  				rf.machine.state[1] = stateInitFetch
   747  				continue
   748  			}
   749  			rf.machine.state[0] = stateFetchNextKVWithUnfinishedRow
   750  		case stateSeekPrefix:
   751  			for {
   752  				moreRows, kv, _, err := rf.fetcher.NextKV(ctx)
   753  				if err != nil {
   754  					return nil, colexecerror.NewStorageError(err)
   755  				}
   756  				if debugState {
   757  					log.Infof(ctx, "found kv %s, seeking to prefix %s", kv.Key, rf.machine.seekPrefix)
   758  				}
   759  				if !moreRows {
   760  					// We ran out of data, so ignore whatever our next state was going to
   761  					// be and emit the final batch.
   762  					rf.machine.state[1] = stateEmitLastBatch
   763  					break
   764  				}
   765  				// The order we perform the comparison in depends on whether we are
   766  				// performing a reverse scan or not. If we are performing a reverse
   767  				// scan, then we want to seek until we find a key less than seekPrefix.
   768  				var comparison int
   769  				if rf.reverse {
   770  					comparison = bytes.Compare(rf.machine.seekPrefix, kv.Key)
   771  				} else {
   772  					comparison = bytes.Compare(kv.Key, rf.machine.seekPrefix)
   773  				}
   774  				// TODO(jordan): if nextKV returns newSpan = true, set the new span
   775  				//  prefix and indicate that it needs decoding.
   776  				if comparison >= 0 {
   777  					rf.machine.nextKV = kv
   778  					break
   779  				}
   780  			}
   781  			rf.shiftState()
   782  
   783  		case stateFetchNextKVWithUnfinishedRow:
   784  			moreKVs, kv, _, err := rf.fetcher.NextKV(ctx)
   785  			if err != nil {
   786  				return nil, colexecerror.NewStorageError(err)
   787  			}
   788  			if !moreKVs {
   789  				// No more data. Finalize the row and exit.
   790  				rf.machine.state[0] = stateFinalizeRow
   791  				rf.machine.state[1] = stateEmitLastBatch
   792  				continue
   793  			}
   794  			// TODO(jordan): if nextKV returns newSpan = true, set the new span
   795  			// prefix and indicate that it needs decoding.
   796  			rf.machine.nextKV = kv
   797  			if debugState {
   798  				log.Infof(ctx, "Decoding next key %s", rf.machine.nextKV.Key)
   799  			}
   800  
   801  			// TODO(jordan): optimize this prefix check by skipping span prefix.
   802  			if !bytes.HasPrefix(kv.Key, rf.machine.lastRowPrefix) {
   803  				// The kv we just found is from a different row.
   804  				rf.machine.state[0] = stateFinalizeRow
   805  				rf.machine.state[1] = stateDecodeFirstKVOfRow
   806  				continue
   807  			}
   808  
   809  			key := kv.Key[len(rf.machine.lastRowPrefix):]
   810  			_, foundInterleave := encoding.DecodeIfInterleavedSentinel(key)
   811  
   812  			if foundInterleave {
   813  				// The key we just found isn't relevant to the current row, so finalize
   814  				// the current row, then skip all KVs with the current interleave prefix.
   815  				rf.machine.state[0] = stateFinalizeRow
   816  				rf.machine.state[1] = stateSeekPrefix
   817  				rf.machine.state[2] = stateDecodeFirstKVOfRow
   818  				continue
   819  			}
   820  
   821  			familyID, err := rf.getCurrentColumnFamilyID()
   822  			if err != nil {
   823  				return nil, err
   824  			}
   825  
   826  			// Process the current KV's value component.
   827  			prettyKey, prettyVal, err := rf.processValue(ctx, familyID)
   828  			if err != nil {
   829  				return nil, err
   830  			}
   831  			if rf.traceKV {
   832  				log.VEventf(ctx, 2, "fetched: %s -> %s", prettyKey, prettyVal)
   833  			}
   834  
   835  			if familyID == rf.table.maxColumnFamilyID {
   836  				// We know the row can't have any more keys, so finalize the row.
   837  				rf.machine.state[0] = stateFinalizeRow
   838  				rf.machine.state[1] = stateInitFetch
   839  			} else {
   840  				// Continue with current state.
   841  				rf.machine.state[0] = stateFetchNextKVWithUnfinishedRow
   842  			}
   843  
   844  		case stateFinalizeRow:
   845  			// We're finished with a row. Bump the row index, fill the row in with
   846  			// nulls if necessary, emit the batch if necessary, and move to the next
   847  			// state.
   848  			if err := rf.fillNulls(); err != nil {
   849  				return nil, err
   850  			}
   851  			rf.machine.rowIdx++
   852  			rf.shiftState()
   853  			if rf.machine.rowIdx >= coldata.BatchSize() {
   854  				rf.pushState(stateResetBatch)
   855  				rf.machine.batch.SetLength(rf.machine.rowIdx)
   856  				rf.machine.rowIdx = 0
   857  				return rf.machine.batch, nil
   858  			}
   859  
   860  		case stateEmitLastBatch:
   861  			rf.machine.state[0] = stateFinished
   862  			rf.machine.batch.SetLength(rf.machine.rowIdx)
   863  			rf.machine.rowIdx = 0
   864  			return rf.machine.batch, nil
   865  
   866  		case stateFinished:
   867  			return coldata.ZeroBatch, nil
   868  		}
   869  	}
   870  }
   871  
   872  // shiftState shifts the state queue to the left, removing the first element and
   873  // clearing the last element.
   874  func (rf *cFetcher) shiftState() {
   875  	copy(rf.machine.state[:2], rf.machine.state[1:])
   876  	rf.machine.state[2] = stateInvalid
   877  }
   878  
   879  func (rf *cFetcher) pushState(state fetcherState) {
   880  	copy(rf.machine.state[1:], rf.machine.state[:2])
   881  	rf.machine.state[0] = state
   882  }
   883  
   884  // getDatumAt returns the converted datum object at the given (colIdx, rowIdx).
   885  // This function is meant for tracing and should not be used in hot paths.
   886  func (rf *cFetcher) getDatumAt(colIdx int, rowIdx int, typ *types.T) tree.Datum {
   887  	return PhysicalTypeColElemToDatum(rf.machine.colvecs[colIdx], rowIdx, &rf.table.da, typ)
   888  }
   889  
   890  // processValue processes the state machine's current value component, setting
   891  // columns in the rowIdx'th tuple in the current batch depending on what data
   892  // is found in the current value component.
   893  // If debugStrings is true, returns pretty printed key and value
   894  // information in prettyKey/prettyValue (otherwise they are empty strings).
   895  func (rf *cFetcher) processValue(
   896  	ctx context.Context, familyID sqlbase.FamilyID,
   897  ) (prettyKey string, prettyValue string, err error) {
   898  	table := rf.table
   899  
   900  	if rf.traceKV {
   901  		var buf strings.Builder
   902  		buf.WriteByte('/')
   903  		buf.WriteString(rf.table.desc.Name)
   904  		buf.WriteByte('/')
   905  		buf.WriteString(rf.table.index.Name)
   906  		for _, idx := range rf.table.allIndexColOrdinals {
   907  			buf.WriteByte('/')
   908  			if idx != -1 {
   909  				buf.WriteString(rf.getDatumAt(idx, rf.machine.rowIdx, rf.table.cols[idx].Type).String())
   910  			} else {
   911  				buf.WriteByte('?')
   912  			}
   913  		}
   914  		prettyKey = buf.String()
   915  	}
   916  
   917  	if len(table.neededColsList) == 0 {
   918  		// We don't need to decode any values.
   919  		if rf.traceKV {
   920  			prettyValue = tree.DNull.String()
   921  		}
   922  		return prettyKey, prettyValue, nil
   923  	}
   924  
   925  	val := rf.machine.nextKV.Value
   926  	if !table.isSecondaryIndex || table.index.EncodingType == sqlbase.PrimaryIndexEncoding {
   927  		// If familyID is 0, kv.Value contains values for composite key columns.
   928  		// These columns already have a table.row value assigned above, but that value
   929  		// (obtained from the key encoding) might not be correct (e.g. for decimals,
   930  		// it might not contain the right number of trailing 0s; for collated
   931  		// strings, it is one of potentially many strings with the same collation
   932  		// key).
   933  		//
   934  		// In these cases, the correct value will be present in family 0 and the
   935  		// table.row value gets overwritten.
   936  
   937  		switch val.GetTag() {
   938  		case roachpb.ValueType_TUPLE:
   939  			// In this case, we don't need to decode the column family ID, because
   940  			// the ValueType_TUPLE encoding includes the column id with every encoded
   941  			// column value.
   942  			tupleBytes, err := val.GetTuple()
   943  			if err != nil {
   944  				return "", "", err
   945  			}
   946  			prettyKey, prettyValue, err = rf.processValueTuple(ctx, table, tupleBytes, prettyKey)
   947  			if err != nil {
   948  				return "", "", err
   949  			}
   950  		default:
   951  			var family *sqlbase.ColumnFamilyDescriptor
   952  			family, err = table.desc.FindFamilyByID(familyID)
   953  			if err != nil {
   954  				return "", "", scrub.WrapError(scrub.IndexKeyDecodingError, err)
   955  			}
   956  
   957  			prettyKey, prettyValue, err = rf.processValueSingle(ctx, table, family, prettyKey)
   958  			if err != nil {
   959  				return "", "", err
   960  			}
   961  		}
   962  		if err != nil {
   963  			return "", "", scrub.WrapError(scrub.IndexValueDecodingError, err)
   964  		}
   965  	} else {
   966  		tag := val.GetTag()
   967  		var valueBytes []byte
   968  		switch tag {
   969  		case roachpb.ValueType_BYTES:
   970  			// If we have the ValueType_BYTES on a secondary index, then we know we
   971  			// are looking at column family 0. Column family 0 stores the extra primary
   972  			// key columns if they are present, so we decode them here.
   973  			valueBytes, err = val.GetBytes()
   974  			if err != nil {
   975  				return "", "", scrub.WrapError(scrub.IndexValueDecodingError, err)
   976  			}
   977  
   978  			if cHasExtraCols(table) {
   979  				// This is a unique secondary index; decode the extra
   980  				// column values from the value.
   981  				var err error
   982  				extraColOrds := table.extraValColOrdinals
   983  				if rf.traceKV {
   984  					extraColOrds = table.allExtraValColOrdinals
   985  				}
   986  				valueBytes, _, err = colencoding.DecodeKeyValsToCols(
   987  					&table.da,
   988  					rf.machine.colvecs,
   989  					rf.machine.rowIdx,
   990  					extraColOrds,
   991  					table.extraTypes,
   992  					nil,
   993  					&rf.machine.remainingValueColsByIdx,
   994  					valueBytes,
   995  				)
   996  				if err != nil {
   997  					return "", "", scrub.WrapError(scrub.SecondaryIndexKeyExtraValueDecodingError, err)
   998  				}
   999  				if rf.traceKV {
  1000  					var buf strings.Builder
  1001  					for j := range table.extraTypes {
  1002  						idx := table.allExtraValColOrdinals[j]
  1003  						buf.WriteByte('/')
  1004  						buf.WriteString(rf.getDatumAt(idx, rf.machine.rowIdx, rf.table.cols[idx].Type).String())
  1005  					}
  1006  					prettyValue = buf.String()
  1007  				}
  1008  			}
  1009  		case roachpb.ValueType_TUPLE:
  1010  			valueBytes, err = val.GetTuple()
  1011  			if err != nil {
  1012  				return "", "", scrub.WrapError(scrub.IndexValueDecodingError, err)
  1013  			}
  1014  		}
  1015  
  1016  		if len(valueBytes) > 0 {
  1017  			prettyKey, prettyValue, err = rf.processValueBytes(
  1018  				ctx, table, valueBytes, prettyKey,
  1019  			)
  1020  			if err != nil {
  1021  				return "", "", scrub.WrapError(scrub.IndexValueDecodingError, err)
  1022  			}
  1023  		}
  1024  	}
  1025  
  1026  	if rf.traceKV && prettyValue == "" {
  1027  		prettyValue = tree.DNull.String()
  1028  	}
  1029  
  1030  	return prettyKey, prettyValue, nil
  1031  }
  1032  
  1033  // processValueSingle processes the given value (of column
  1034  // family.DefaultColumnID), setting values in table.row accordingly. The key is
  1035  // only used for logging.
  1036  func (rf *cFetcher) processValueSingle(
  1037  	ctx context.Context,
  1038  	table *cTableInfo,
  1039  	family *sqlbase.ColumnFamilyDescriptor,
  1040  	prettyKeyPrefix string,
  1041  ) (prettyKey string, prettyValue string, err error) {
  1042  	prettyKey = prettyKeyPrefix
  1043  	// If this is the row sentinel (in the legacy pre-family format),
  1044  	// a value is not expected, so we're done.
  1045  	if family.ID == 0 {
  1046  		return "", "", nil
  1047  	}
  1048  
  1049  	colID := family.DefaultColumnID
  1050  	if colID == 0 {
  1051  		return "", "", errors.Errorf("single entry value with no default column id")
  1052  	}
  1053  
  1054  	var needDecode bool
  1055  	if rf.traceKV {
  1056  		needDecode = true
  1057  	} else {
  1058  		for i := range table.neededColsList {
  1059  			if table.neededColsList[i] == int(colID) {
  1060  				needDecode = true
  1061  				break
  1062  			}
  1063  		}
  1064  	}
  1065  
  1066  	if needDecode {
  1067  		if idx, ok := table.colIdxMap.get(colID); ok {
  1068  			if rf.traceKV {
  1069  				prettyKey = fmt.Sprintf("%s/%s", prettyKey, table.desc.Columns[idx].Name)
  1070  			}
  1071  			val := rf.machine.nextKV.Value
  1072  			if len(val.RawBytes) == 0 {
  1073  				return prettyKey, "", nil
  1074  			}
  1075  			typ := table.cols[idx].Type
  1076  			err := colencoding.UnmarshalColumnValueToCol(
  1077  				&table.da, rf.machine.colvecs[idx], rf.machine.rowIdx, typ, val,
  1078  			)
  1079  			if err != nil {
  1080  				return "", "", err
  1081  			}
  1082  			rf.machine.remainingValueColsByIdx.Remove(idx)
  1083  
  1084  			if rf.traceKV {
  1085  				prettyValue = rf.getDatumAt(idx, rf.machine.rowIdx, typ).String()
  1086  			}
  1087  			if row.DebugRowFetch {
  1088  				log.Infof(ctx, "Scan %s -> %v", rf.machine.nextKV.Key, "?")
  1089  			}
  1090  			return prettyKey, prettyValue, nil
  1091  		}
  1092  	}
  1093  
  1094  	// No need to unmarshal the column value. Either the column was part of
  1095  	// the index key or it isn't needed.
  1096  	if row.DebugRowFetch {
  1097  		log.Infof(ctx, "Scan %s -> [%d] (skipped)", rf.machine.nextKV.Key, colID)
  1098  	}
  1099  	return "", "", nil
  1100  }
  1101  
  1102  func (rf *cFetcher) processValueBytes(
  1103  	ctx context.Context, table *cTableInfo, valueBytes []byte, prettyKeyPrefix string,
  1104  ) (prettyKey string, prettyValue string, err error) {
  1105  	prettyKey = prettyKeyPrefix
  1106  	if rf.traceKV {
  1107  		if rf.machine.prettyValueBuf == nil {
  1108  			rf.machine.prettyValueBuf = &bytes.Buffer{}
  1109  		}
  1110  		rf.machine.prettyValueBuf.Reset()
  1111  	}
  1112  
  1113  	// Composite columns that are key encoded in the value (like the pk columns
  1114  	// in a unique secondary index) have gotten removed from the set of
  1115  	// remaining value columns. So, we need to add them back in here in case
  1116  	// they have full value encoded composite values.
  1117  	rf.table.compositeIndexColOrdinals.ForEach(func(i int) {
  1118  		rf.machine.remainingValueColsByIdx.Add(i)
  1119  	})
  1120  
  1121  	var (
  1122  		colIDDiff          uint32
  1123  		lastColID          sqlbase.ColumnID
  1124  		dataOffset         int
  1125  		typ                encoding.Type
  1126  		lastColIDIndex     int
  1127  		lastNeededColIndex int
  1128  	)
  1129  	for len(valueBytes) > 0 && rf.machine.remainingValueColsByIdx.Len() > 0 {
  1130  		_, dataOffset, colIDDiff, typ, err = encoding.DecodeValueTag(valueBytes)
  1131  		if err != nil {
  1132  			return "", "", err
  1133  		}
  1134  		colID := lastColID + sqlbase.ColumnID(colIDDiff)
  1135  		lastColID = colID
  1136  		var colIsNeeded bool
  1137  		for ; lastNeededColIndex < len(table.neededColsList); lastNeededColIndex++ {
  1138  			nextNeededColID := table.neededColsList[lastNeededColIndex]
  1139  			if nextNeededColID == int(colID) {
  1140  				colIsNeeded = true
  1141  				break
  1142  			} else if nextNeededColID > int(colID) {
  1143  				break
  1144  			}
  1145  		}
  1146  		if !colIsNeeded {
  1147  			// This column wasn't requested, so read its length and skip it.
  1148  			len, err := encoding.PeekValueLengthWithOffsetsAndType(valueBytes, dataOffset, typ)
  1149  			if err != nil {
  1150  				return "", "", err
  1151  			}
  1152  			valueBytes = valueBytes[len:]
  1153  			if row.DebugRowFetch {
  1154  				log.Infof(ctx, "Scan %s -> [%d] (skipped)", rf.machine.nextKV.Key, colID)
  1155  			}
  1156  			continue
  1157  		}
  1158  		idx := -1
  1159  		for ; lastColIDIndex < len(table.colIdxMap.vals); lastColIDIndex++ {
  1160  			if table.colIdxMap.vals[lastColIDIndex] == colID {
  1161  				idx = table.colIdxMap.ords[lastColIDIndex]
  1162  				break
  1163  			}
  1164  		}
  1165  		if idx == -1 {
  1166  			return "", "", errors.Errorf("missing colid %d", colID)
  1167  		}
  1168  
  1169  		if rf.traceKV {
  1170  			prettyKey = fmt.Sprintf("%s/%s", prettyKey, table.desc.Columns[idx].Name)
  1171  		}
  1172  
  1173  		vec := rf.machine.colvecs[idx]
  1174  
  1175  		valTyp := table.cols[idx].Type
  1176  		valueBytes, err = colencoding.DecodeTableValueToCol(
  1177  			&table.da, vec, rf.machine.rowIdx, typ, dataOffset, valTyp, valueBytes,
  1178  		)
  1179  		if err != nil {
  1180  			return "", "", err
  1181  		}
  1182  		rf.machine.remainingValueColsByIdx.Remove(idx)
  1183  		if rf.traceKV {
  1184  			dVal := rf.getDatumAt(idx, rf.machine.rowIdx, valTyp)
  1185  			if _, err := fmt.Fprintf(rf.machine.prettyValueBuf, "/%v", dVal.String()); err != nil {
  1186  				return "", "", err
  1187  			}
  1188  		}
  1189  	}
  1190  	if rf.traceKV {
  1191  		prettyValue = rf.machine.prettyValueBuf.String()
  1192  	}
  1193  	return prettyKey, prettyValue, nil
  1194  }
  1195  
  1196  // processValueTuple processes the given values (of columns family.ColumnIDs),
  1197  // setting values in the rf.row accordingly. The key is only used for logging.
  1198  func (rf *cFetcher) processValueTuple(
  1199  	ctx context.Context, table *cTableInfo, tupleBytes []byte, prettyKeyPrefix string,
  1200  ) (prettyKey string, prettyValue string, err error) {
  1201  	return rf.processValueBytes(ctx, table, tupleBytes, prettyKeyPrefix)
  1202  }
  1203  
  1204  func (rf *cFetcher) fillNulls() error {
  1205  	table := rf.table
  1206  	if rf.machine.remainingValueColsByIdx.Empty() {
  1207  		return nil
  1208  	}
  1209  	for i, ok := rf.machine.remainingValueColsByIdx.Next(0); ok; i, ok = rf.machine.remainingValueColsByIdx.Next(i + 1) {
  1210  		// Composite index columns may have a key but no value. Ignore them so we
  1211  		// don't incorrectly mark them as null.
  1212  		if table.compositeIndexColOrdinals.Contains(i) {
  1213  			continue
  1214  		}
  1215  		if !table.cols[i].Nullable {
  1216  			var indexColValues []string
  1217  			for _, idx := range table.indexColOrdinals {
  1218  				if idx != -1 {
  1219  					indexColValues = append(indexColValues, rf.getDatumAt(idx, rf.machine.rowIdx, rf.table.cols[idx].Type).String())
  1220  				} else {
  1221  					indexColValues = append(indexColValues, "?")
  1222  				}
  1223  				return scrub.WrapError(scrub.UnexpectedNullValueError, errors.Errorf(
  1224  					"Non-nullable column \"%s:%s\" with no value! Index scanned was %q with the index key columns (%s) and the values (%s)",
  1225  					table.desc.Name, table.cols[i].Name, table.index.Name,
  1226  					strings.Join(table.index.ColumnNames, ","), strings.Join(indexColValues, ",")))
  1227  			}
  1228  		}
  1229  		rf.machine.colvecs[i].Nulls().SetNull(rf.machine.rowIdx)
  1230  	}
  1231  	return nil
  1232  }
  1233  
  1234  // GetRangesInfo returns information about the ranges where the rows came from.
  1235  // The RangeInfo's are deduped and not ordered.
  1236  func (rf *cFetcher) GetRangesInfo() []roachpb.RangeInfo {
  1237  	f := rf.fetcher
  1238  	if f == nil {
  1239  		// Not yet initialized.
  1240  		return nil
  1241  	}
  1242  	return rf.fetcher.GetRangesInfo()
  1243  }
  1244  
  1245  // getCurrentColumnFamilyID returns the column family id of the key in
  1246  // rf.machine.nextKV.Key.
  1247  func (rf *cFetcher) getCurrentColumnFamilyID() (sqlbase.FamilyID, error) {
  1248  	// If the table only has 1 column family, and its ID is 0, we know that the
  1249  	// key has to be the 0th column family.
  1250  	if rf.table.maxColumnFamilyID == 0 {
  1251  		return 0, nil
  1252  	}
  1253  	// The column family is encoded in the final bytes of the key. The last
  1254  	// byte of the key is the length of the column family id encoding
  1255  	// itself. See encoding.md for more details, and see MakeFamilyKey for
  1256  	// the routine that performs this encoding.
  1257  	var id uint64
  1258  	_, id, err := encoding.DecodeUvarintAscending(rf.machine.nextKV.Key[len(rf.machine.lastRowPrefix):])
  1259  	if err != nil {
  1260  		return 0, scrub.WrapError(scrub.IndexKeyDecodingError, err)
  1261  	}
  1262  	return sqlbase.FamilyID(id), nil
  1263  }