github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/sqlbase/index_encoding.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package sqlbase
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"sort"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/geo/geoindex"
    19  	"github.com/cockroachdb/cockroach/pkg/keys"
    20  	"github.com/cockroachdb/cockroach/pkg/kv"
    21  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    24  	"github.com/cockroachdb/cockroach/pkg/util"
    25  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    26  	"github.com/cockroachdb/cockroach/pkg/util/json"
    27  	"github.com/cockroachdb/cockroach/pkg/util/unique"
    28  	"github.com/cockroachdb/errors"
    29  )
    30  
    31  // This file contains facilities to encode primary and secondary
    32  // indexes on SQL tables.
    33  
    34  // MakeIndexKeyPrefix returns the key prefix used for the index's data. If you
    35  // need the corresponding Span, prefer desc.IndexSpan(indexID) or
    36  // desc.PrimaryIndexSpan().
    37  func MakeIndexKeyPrefix(codec keys.SQLCodec, desc *TableDescriptor, indexID IndexID) []byte {
    38  	if i, err := desc.FindIndexByID(indexID); err == nil && len(i.Interleave.Ancestors) > 0 {
    39  		ancestor := &i.Interleave.Ancestors[0]
    40  		return codec.IndexPrefix(uint32(ancestor.TableID), uint32(ancestor.IndexID))
    41  	}
    42  	return codec.IndexPrefix(uint32(desc.ID), uint32(indexID))
    43  }
    44  
    45  // EncodeIndexKey creates a key by concatenating keyPrefix with the
    46  // encodings of the columns in the index, and returns the key and
    47  // whether any of the encoded values were NULLs.
    48  //
    49  // If a table or index is interleaved, `encoding.interleavedSentinel`
    50  // is used in place of the family id (a varint) to signal the next
    51  // component of the key.  An example of one level of interleaving (a
    52  // parent):
    53  // /<parent_table_id>/<parent_index_id>/<field_1>/<field_2>/NullDesc/<table_id>/<index_id>/<field_3>/<family>
    54  //
    55  // Note that ExtraColumnIDs are not encoded, so the result isn't always a
    56  // full index key.
    57  func EncodeIndexKey(
    58  	tableDesc *TableDescriptor,
    59  	index *IndexDescriptor,
    60  	colMap map[ColumnID]int,
    61  	values []tree.Datum,
    62  	keyPrefix []byte,
    63  ) (key []byte, containsNull bool, err error) {
    64  	return EncodePartialIndexKey(
    65  		tableDesc,
    66  		index,
    67  		len(index.ColumnIDs), /* encode all columns */
    68  		colMap,
    69  		values,
    70  		keyPrefix,
    71  	)
    72  }
    73  
    74  // EncodePartialIndexSpan creates the minimal key span for the key specified by the
    75  // given table, index, and values, with the same method as
    76  // EncodePartialIndexKey.
    77  func EncodePartialIndexSpan(
    78  	tableDesc *TableDescriptor,
    79  	index *IndexDescriptor,
    80  	numCols int,
    81  	colMap map[ColumnID]int,
    82  	values []tree.Datum,
    83  	keyPrefix []byte,
    84  ) (span roachpb.Span, containsNull bool, err error) {
    85  	var key roachpb.Key
    86  	var endKey roachpb.Key
    87  	key, containsNull, err = EncodePartialIndexKey(tableDesc, index, numCols, colMap, values, keyPrefix)
    88  	if err != nil {
    89  		return span, containsNull, err
    90  	}
    91  	if numCols == len(index.ColumnIDs) {
    92  		// If all values in the input index were specified, append an interleave
    93  		// marker instead of PrefixEnding the key, to avoid including any child
    94  		// interleaves of the input key.
    95  		endKey = encoding.EncodeInterleavedSentinel(key)
    96  	} else {
    97  		endKey = key.PrefixEnd()
    98  	}
    99  	return roachpb.Span{Key: key, EndKey: endKey}, containsNull, nil
   100  }
   101  
   102  // EncodePartialIndexKey encodes a partial index key; only the first numCols of
   103  // the index key columns are encoded. The index key columns are
   104  //  - index.ColumnIDs for unique indexes, and
   105  //  - append(index.ColumnIDs, index.ExtraColumnIDs) for non-unique indexes.
   106  func EncodePartialIndexKey(
   107  	tableDesc *TableDescriptor,
   108  	index *IndexDescriptor,
   109  	numCols int,
   110  	colMap map[ColumnID]int,
   111  	values []tree.Datum,
   112  	keyPrefix []byte,
   113  ) (key []byte, containsNull bool, err error) {
   114  	var colIDs, extraColIDs []ColumnID
   115  	if numCols <= len(index.ColumnIDs) {
   116  		colIDs = index.ColumnIDs[:numCols]
   117  	} else {
   118  		if index.Unique || numCols > len(index.ColumnIDs)+len(index.ExtraColumnIDs) {
   119  			return nil, false, errors.Errorf("encoding too many columns (%d)", numCols)
   120  		}
   121  		colIDs = index.ColumnIDs
   122  		extraColIDs = index.ExtraColumnIDs[:numCols-len(index.ColumnIDs)]
   123  	}
   124  
   125  	// We know we will append to the key which will cause the capacity to grow so
   126  	// make it bigger from the get-go.
   127  	// Add twice the key prefix as an initial guess.
   128  	// Add 3 bytes for every ancestor: table,index id + interleave sentinel.
   129  	// Add 2 bytes for every column value. An underestimate for all but low integers.
   130  	key = make([]byte, len(keyPrefix), 2*len(keyPrefix)+3*len(index.Interleave.Ancestors)+2*len(values))
   131  	copy(key, keyPrefix)
   132  
   133  	dirs := directions(index.ColumnDirections)
   134  
   135  	if len(index.Interleave.Ancestors) > 0 {
   136  		for i, ancestor := range index.Interleave.Ancestors {
   137  			// The first ancestor is assumed to already be encoded in keyPrefix.
   138  			if i != 0 {
   139  				key = EncodePartialTableIDIndexID(key, ancestor.TableID, ancestor.IndexID)
   140  			}
   141  
   142  			partial := false
   143  			length := int(ancestor.SharedPrefixLen)
   144  			if length > len(colIDs) {
   145  				length = len(colIDs)
   146  				partial = true
   147  			}
   148  			var n bool
   149  			key, n, err = EncodeColumns(colIDs[:length], dirs[:length], colMap, values, key)
   150  			if err != nil {
   151  				return nil, false, err
   152  			}
   153  			containsNull = containsNull || n
   154  			if partial {
   155  				// Early stop. Note that if we had exactly SharedPrefixLen columns
   156  				// remaining, we want to append the next tableID/indexID pair because
   157  				// that results in a more specific key.
   158  				return key, containsNull, nil
   159  			}
   160  			colIDs, dirs = colIDs[length:], dirs[length:]
   161  			// Each ancestor is separated by an interleaved
   162  			// sentinel (0xfe).
   163  			key = encoding.EncodeInterleavedSentinel(key)
   164  		}
   165  
   166  		key = EncodePartialTableIDIndexID(key, tableDesc.ID, index.ID)
   167  	}
   168  
   169  	var n bool
   170  	key, n, err = EncodeColumns(colIDs, dirs, colMap, values, key)
   171  	if err != nil {
   172  		return nil, false, err
   173  	}
   174  	containsNull = containsNull || n
   175  
   176  	key, n, err = EncodeColumns(extraColIDs, nil /* directions */, colMap, values, key)
   177  	if err != nil {
   178  		return nil, false, err
   179  	}
   180  	containsNull = containsNull || n
   181  	return key, containsNull, nil
   182  }
   183  
   184  type directions []IndexDescriptor_Direction
   185  
   186  func (d directions) get(i int) (encoding.Direction, error) {
   187  	if i < len(d) {
   188  		return d[i].ToEncodingDirection()
   189  	}
   190  	return encoding.Ascending, nil
   191  }
   192  
   193  // MakeSpanFromEncDatums creates a minimal index key span on the input
   194  // values. A minimal index key span is a span that includes the fewest possible
   195  // keys after the start key generated by the input values.
   196  //
   197  // The start key is generated by concatenating keyPrefix with the encodings of
   198  // the given EncDatum values. The values, types, and dirs parameters should be
   199  // specified in the same order as the index key columns and may be a prefix.
   200  //
   201  // If a table or index is interleaved, `encoding.interleavedSentinel` is used
   202  // in place of the family id (a varint) to signal the next component of the
   203  // key.  An example of one level of interleaving (a parent):
   204  // /<parent_table_id>/<parent_index_id>/<field_1>/<field_2>/NullDesc/<table_id>/<index_id>/<field_3>/<family>
   205  func MakeSpanFromEncDatums(
   206  	values EncDatumRow,
   207  	types []*types.T,
   208  	dirs []IndexDescriptor_Direction,
   209  	tableDesc *TableDescriptor,
   210  	index *IndexDescriptor,
   211  	alloc *DatumAlloc,
   212  	keyPrefix []byte,
   213  ) (_ roachpb.Span, containsNull bool, _ error) {
   214  	startKey, complete, containsNull, err := makeKeyFromEncDatums(values, types, dirs, tableDesc, index, alloc, keyPrefix)
   215  	if err != nil {
   216  		return roachpb.Span{}, false, err
   217  	}
   218  
   219  	var endKey roachpb.Key
   220  	if complete && index.Unique {
   221  		// If all values in the input index were specified and the input index is
   222  		// unique, indicating that it might have child interleaves, append an
   223  		// interleave marker instead of PrefixEnding the key, to avoid including
   224  		// any child interleaves of the input key.
   225  		//
   226  		// Note that currently only primary indexes can contain interleaved
   227  		// tables or indexes, so this condition is broader than necessary in
   228  		// case one day we permit interleaving into arbitrary unique indexes.
   229  		// Note also that we could precisely only emit an interleaved sentinel
   230  		// if this index does in fact have interleaves - we choose not to do
   231  		// that to make testing simpler and traces and spans more consistent.
   232  		endKey = encoding.EncodeInterleavedSentinel(startKey)
   233  	} else {
   234  		endKey = startKey.PrefixEnd()
   235  	}
   236  	return roachpb.Span{Key: startKey, EndKey: endKey}, containsNull, nil
   237  }
   238  
   239  // NeededColumnFamilyIDs returns the minimal set of column families required to
   240  // retrieve neededCols for the specified table and index. The returned FamilyIDs
   241  // are in sorted order.
   242  func NeededColumnFamilyIDs(
   243  	neededCols util.FastIntSet, table *TableDescriptor, index *IndexDescriptor,
   244  ) []FamilyID {
   245  	if len(table.Families) == 1 {
   246  		return []FamilyID{table.Families[0].ID}
   247  	}
   248  
   249  	// Build some necessary data structures for column metadata.
   250  	columns := table.ColumnsWithMutations(true)
   251  	colIdxMap := table.ColumnIdxMapWithMutations(true)
   252  	var indexedCols util.FastIntSet
   253  	var compositeCols util.FastIntSet
   254  	var extraCols util.FastIntSet
   255  	for _, columnID := range index.ColumnIDs {
   256  		columnOrdinal := colIdxMap[columnID]
   257  		indexedCols.Add(columnOrdinal)
   258  	}
   259  	for _, columnID := range index.CompositeColumnIDs {
   260  		columnOrdinal := colIdxMap[columnID]
   261  		compositeCols.Add(columnOrdinal)
   262  	}
   263  	for _, columnID := range index.ExtraColumnIDs {
   264  		columnOrdinal := colIdxMap[columnID]
   265  		extraCols.Add(columnOrdinal)
   266  	}
   267  
   268  	// The column family with ID 0 is special because it always has a KV entry.
   269  	// Other column families will omit a value if all their columns are null, so
   270  	// we may need to retrieve family 0 to use as a sentinel for distinguishing
   271  	// between null values and the absence of a row. Also, secondary indexes store
   272  	// values here for composite and "extra" columns. ("Extra" means primary key
   273  	// columns which are not indexed.)
   274  	var family0 *ColumnFamilyDescriptor
   275  	hasSecondaryEncoding := index.GetEncodingType(table.PrimaryIndex.ID) == SecondaryIndexEncoding
   276  
   277  	// First iterate over the needed columns and look for a few special cases:
   278  	// columns which can be decoded from the key and columns whose value is stored
   279  	// in family 0.
   280  	family0Needed := false
   281  	nc := neededCols.Copy()
   282  	neededCols.ForEach(func(columnOrdinal int) {
   283  		if indexedCols.Contains(columnOrdinal) && !compositeCols.Contains(columnOrdinal) {
   284  			// We can decode this column from the index key, so no particular family
   285  			// is needed.
   286  			nc.Remove(columnOrdinal)
   287  		}
   288  		if hasSecondaryEncoding && (compositeCols.Contains(columnOrdinal) ||
   289  			extraCols.Contains(columnOrdinal)) {
   290  			// Secondary indexes store composite and "extra" column values in family
   291  			// 0.
   292  			family0Needed = true
   293  			nc.Remove(columnOrdinal)
   294  		}
   295  	})
   296  
   297  	// Iterate over the column families to find which ones contain needed columns.
   298  	// We also keep track of whether all of the needed families' columns are
   299  	// nullable, since this means we need column family 0 as a sentinel, even if
   300  	// none of its columns are needed.
   301  	var neededFamilyIDs []FamilyID
   302  	allFamiliesNullable := true
   303  	for i := range table.Families {
   304  		family := &table.Families[i]
   305  		needed := false
   306  		nullable := true
   307  		if family.ID == 0 {
   308  			// Set column family 0 aside in case we need it as a sentinel.
   309  			family0 = family
   310  			if family0Needed {
   311  				needed = true
   312  			}
   313  			nullable = false
   314  		}
   315  		for _, columnID := range family.ColumnIDs {
   316  			if needed && !nullable {
   317  				// Nothing left to check.
   318  				break
   319  			}
   320  			columnOrdinal := colIdxMap[columnID]
   321  			if nc.Contains(columnOrdinal) {
   322  				needed = true
   323  			}
   324  			if !columns[columnOrdinal].Nullable && (!indexedCols.Contains(columnOrdinal) ||
   325  				compositeCols.Contains(columnOrdinal) && !hasSecondaryEncoding) {
   326  				// The column is non-nullable and cannot be decoded from a different
   327  				// family, so this column family must have a KV entry for every row.
   328  				nullable = false
   329  			}
   330  		}
   331  		if needed {
   332  			neededFamilyIDs = append(neededFamilyIDs, family.ID)
   333  			if !nullable {
   334  				allFamiliesNullable = false
   335  			}
   336  		}
   337  	}
   338  	if family0 == nil {
   339  		panic("column family 0 not found")
   340  	}
   341  
   342  	// If all the needed families are nullable, we also need family 0 as a
   343  	// sentinel. Note that this is only the case if family 0 was not already added
   344  	// to neededFamilyIDs.
   345  	if allFamiliesNullable {
   346  		// Prepend family 0.
   347  		neededFamilyIDs = append(neededFamilyIDs, 0)
   348  		copy(neededFamilyIDs[1:], neededFamilyIDs)
   349  		neededFamilyIDs[0] = family0.ID
   350  	}
   351  
   352  	return neededFamilyIDs
   353  }
   354  
   355  // SplitSpanIntoSeparateFamilies splits a span representing a single row point
   356  // lookup into separate disjoint spans that request only the particular column
   357  // families from neededFamilies instead of requesting all the families. It is up
   358  // to the client to ensure the requested span represents a single row lookup and
   359  // that the span splitting is appropriate (see CanSplitSpanIntoSeparateFamilies).
   360  //
   361  // The function accepts a slice of spans to append to.
   362  func SplitSpanIntoSeparateFamilies(
   363  	appendTo roachpb.Spans, span roachpb.Span, neededFamilies []FamilyID,
   364  ) roachpb.Spans {
   365  	span.Key = span.Key[:len(span.Key):len(span.Key)] // avoid mutation and aliasing
   366  	for i, familyID := range neededFamilies {
   367  		var famSpan roachpb.Span
   368  		famSpan.Key = keys.MakeFamilyKey(span.Key, uint32(familyID))
   369  		famSpan.EndKey = famSpan.Key.PrefixEnd()
   370  		if i > 0 && familyID == neededFamilies[i-1]+1 {
   371  			// This column family is adjacent to the previous one. We can merge
   372  			// the two spans into one.
   373  			appendTo[len(appendTo)-1].EndKey = famSpan.EndKey
   374  		} else {
   375  			appendTo = append(appendTo, famSpan)
   376  		}
   377  	}
   378  	return appendTo
   379  }
   380  
   381  // makeKeyFromEncDatums creates an index key by concatenating keyPrefix with the
   382  // encodings of the given EncDatum values. The values, types, and dirs
   383  // parameters should be specified in the same order as the index key columns and
   384  // may be a prefix. The complete return value is true if the resultant key
   385  // fully constrains the index.
   386  //
   387  // If a table or index is interleaved, `encoding.interleavedSentinel` is used
   388  // in place of the family id (a varint) to signal the next component of the
   389  // key.  An example of one level of interleaving (a parent):
   390  // /<parent_table_id>/<parent_index_id>/<field_1>/<field_2>/NullDesc/<table_id>/<index_id>/<field_3>/<family>
   391  func makeKeyFromEncDatums(
   392  	values EncDatumRow,
   393  	types []*types.T,
   394  	dirs []IndexDescriptor_Direction,
   395  	tableDesc *TableDescriptor,
   396  	index *IndexDescriptor,
   397  	alloc *DatumAlloc,
   398  	keyPrefix []byte,
   399  ) (_ roachpb.Key, complete bool, containsNull bool, _ error) {
   400  	// Values may be a prefix of the index columns.
   401  	if len(values) > len(dirs) {
   402  		return nil, false, false, errors.Errorf("%d values, %d directions", len(values), len(dirs))
   403  	}
   404  	if len(values) != len(types) {
   405  		return nil, false, false, errors.Errorf("%d values, %d types", len(values), len(types))
   406  	}
   407  	// We know we will append to the key which will cause the capacity to grow
   408  	// so make it bigger from the get-go.
   409  	key := make(roachpb.Key, len(keyPrefix), len(keyPrefix)*2)
   410  	copy(key, keyPrefix)
   411  
   412  	if len(index.Interleave.Ancestors) > 0 {
   413  		for i, ancestor := range index.Interleave.Ancestors {
   414  			// The first ancestor is assumed to already be encoded in keyPrefix.
   415  			if i != 0 {
   416  				key = EncodePartialTableIDIndexID(key, ancestor.TableID, ancestor.IndexID)
   417  			}
   418  
   419  			partial := false
   420  			length := int(ancestor.SharedPrefixLen)
   421  			if length > len(types) {
   422  				length = len(types)
   423  				partial = true
   424  			}
   425  			var (
   426  				err error
   427  				n   bool
   428  			)
   429  			key, n, err = appendEncDatumsToKey(key, types[:length], values[:length], dirs[:length], alloc)
   430  			if err != nil {
   431  				return nil, false, false, err
   432  			}
   433  			containsNull = containsNull || n
   434  			if partial {
   435  				// Early stop - the number of desired columns was fewer than the number
   436  				// left in the current interleave.
   437  				return key, false, false, nil
   438  			}
   439  			types, values, dirs = types[length:], values[length:], dirs[length:]
   440  
   441  			// Each ancestor is separated by an interleaved
   442  			// sentinel (0xfe).
   443  			key = encoding.EncodeInterleavedSentinel(key)
   444  		}
   445  
   446  		key = EncodePartialTableIDIndexID(key, tableDesc.ID, index.ID)
   447  	}
   448  	var (
   449  		err error
   450  		n   bool
   451  	)
   452  	key, n, err = appendEncDatumsToKey(key, types, values, dirs, alloc)
   453  	if err != nil {
   454  		return key, false, false, err
   455  	}
   456  	containsNull = containsNull || n
   457  	return key, len(types) == len(index.ColumnIDs), containsNull, err
   458  }
   459  
   460  // findColumnValue returns the value corresponding to the column. If
   461  // the column isn't present return a NULL value.
   462  func findColumnValue(column ColumnID, colMap map[ColumnID]int, values []tree.Datum) tree.Datum {
   463  	if i, ok := colMap[column]; ok {
   464  		// TODO(pmattis): Need to convert the values[i] value to the type
   465  		// expected by the column.
   466  		return values[i]
   467  	}
   468  	return tree.DNull
   469  }
   470  
   471  // appendEncDatumsToKey concatenates the encoded representations of
   472  // the datums at the end of the given roachpb.Key.
   473  func appendEncDatumsToKey(
   474  	key roachpb.Key,
   475  	types []*types.T,
   476  	values EncDatumRow,
   477  	dirs []IndexDescriptor_Direction,
   478  	alloc *DatumAlloc,
   479  ) (_ roachpb.Key, containsNull bool, _ error) {
   480  	for i, val := range values {
   481  		encoding := DatumEncoding_ASCENDING_KEY
   482  		if dirs[i] == IndexDescriptor_DESC {
   483  			encoding = DatumEncoding_DESCENDING_KEY
   484  		}
   485  		if val.IsNull() {
   486  			containsNull = true
   487  		}
   488  		var err error
   489  		key, err = val.Encode(types[i], alloc, encoding, key)
   490  		if err != nil {
   491  			return nil, false, err
   492  		}
   493  	}
   494  	return key, containsNull, nil
   495  }
   496  
   497  // EncodePartialTableIDIndexID encodes a table id followed by an index id to an
   498  // existing key. The key must already contain a tenant id.
   499  func EncodePartialTableIDIndexID(key []byte, tableID ID, indexID IndexID) []byte {
   500  	return keys.MakeTableIDIndexID(key, uint32(tableID), uint32(indexID))
   501  }
   502  
   503  // DecodePartialTableIDIndexID decodes a table id followed by an index id. The
   504  // input key must already have its tenant id removed.
   505  func DecodePartialTableIDIndexID(key []byte) ([]byte, ID, IndexID, error) {
   506  	key, tableID, indexID, err := keys.DecodeTableIDIndexID(key)
   507  	return key, ID(tableID), IndexID(indexID), err
   508  }
   509  
   510  // DecodeIndexKeyPrefix decodes the prefix of an index key and returns the
   511  // index id and a slice for the rest of the key.
   512  //
   513  // Don't use this function in the scan "hot path".
   514  func DecodeIndexKeyPrefix(
   515  	codec keys.SQLCodec, desc *TableDescriptor, key []byte,
   516  ) (indexID IndexID, remaining []byte, err error) {
   517  	key, err = codec.StripTenantPrefix(key)
   518  	if err != nil {
   519  		return 0, nil, err
   520  	}
   521  
   522  	// TODO(dan): This whole operation is n^2 because of the interleaves
   523  	// bookkeeping. We could improve it to n with a prefix tree of components.
   524  
   525  	interleaves := append([]IndexDescriptor{desc.PrimaryIndex}, desc.Indexes...)
   526  
   527  	for component := 0; ; component++ {
   528  		var tableID ID
   529  		key, tableID, indexID, err = DecodePartialTableIDIndexID(key)
   530  		if err != nil {
   531  			return 0, nil, err
   532  		}
   533  		if tableID == desc.ID {
   534  			// Once desc's table id has been decoded, there can be no more
   535  			// interleaves.
   536  			break
   537  		}
   538  
   539  		for i := len(interleaves) - 1; i >= 0; i-- {
   540  			if len(interleaves[i].Interleave.Ancestors) <= component ||
   541  				interleaves[i].Interleave.Ancestors[component].TableID != tableID ||
   542  				interleaves[i].Interleave.Ancestors[component].IndexID != indexID {
   543  
   544  				// This component, and thus this interleave, doesn't match what was
   545  				// decoded, remove it.
   546  				copy(interleaves[i:], interleaves[i+1:])
   547  				interleaves = interleaves[:len(interleaves)-1]
   548  			}
   549  		}
   550  		// The decoded key doesn't many any known interleaves
   551  		if len(interleaves) == 0 {
   552  			return 0, nil, errors.Errorf("no known interleaves for key")
   553  		}
   554  
   555  		// Anything left has the same SharedPrefixLen at index `component`, so just
   556  		// use the first one.
   557  		for i := uint32(0); i < interleaves[0].Interleave.Ancestors[component].SharedPrefixLen; i++ {
   558  			l, err := encoding.PeekLength(key)
   559  			if err != nil {
   560  				return 0, nil, err
   561  			}
   562  			key = key[l:]
   563  		}
   564  
   565  		// Consume the interleaved sentinel.
   566  		var ok bool
   567  		key, ok = encoding.DecodeIfInterleavedSentinel(key)
   568  		if !ok {
   569  			return 0, nil, errors.Errorf("invalid interleave key")
   570  		}
   571  	}
   572  
   573  	return indexID, key, err
   574  }
   575  
   576  // DecodeIndexKey decodes the values that are a part of the specified index
   577  // key (setting vals).
   578  //
   579  // The remaining bytes in the index key are returned which will either be an
   580  // encoded column ID for the primary key index, the primary key suffix for
   581  // non-unique secondary indexes or unique secondary indexes containing NULL or
   582  // empty. If the given descriptor does not match the key, false is returned with
   583  // no error.
   584  func DecodeIndexKey(
   585  	codec keys.SQLCodec,
   586  	desc *TableDescriptor,
   587  	index *IndexDescriptor,
   588  	types []*types.T,
   589  	vals []EncDatum,
   590  	colDirs []IndexDescriptor_Direction,
   591  	key []byte,
   592  ) (remainingKey []byte, matches bool, foundNull bool, _ error) {
   593  	key, err := codec.StripTenantPrefix(key)
   594  	if err != nil {
   595  		return nil, false, false, err
   596  	}
   597  	key, _, _, err = DecodePartialTableIDIndexID(key)
   598  	if err != nil {
   599  		return nil, false, false, err
   600  	}
   601  	return DecodeIndexKeyWithoutTableIDIndexIDPrefix(desc, index, types, vals, colDirs, key)
   602  }
   603  
   604  // DecodeIndexKeyWithoutTableIDIndexIDPrefix is the same as DecodeIndexKey,
   605  // except it expects its index key is missing in its tenant id and first table
   606  // id / index id key prefix.
   607  func DecodeIndexKeyWithoutTableIDIndexIDPrefix(
   608  	desc *TableDescriptor,
   609  	index *IndexDescriptor,
   610  	types []*types.T,
   611  	vals []EncDatum,
   612  	colDirs []IndexDescriptor_Direction,
   613  	key []byte,
   614  ) (remainingKey []byte, matches bool, foundNull bool, _ error) {
   615  	var decodedTableID ID
   616  	var decodedIndexID IndexID
   617  	var err error
   618  
   619  	if len(index.Interleave.Ancestors) > 0 {
   620  		for i, ancestor := range index.Interleave.Ancestors {
   621  			// Our input key had its first table id / index id chopped off, so
   622  			// don't try to decode those for the first ancestor.
   623  			if i != 0 {
   624  				key, decodedTableID, decodedIndexID, err = DecodePartialTableIDIndexID(key)
   625  				if err != nil {
   626  					return nil, false, false, err
   627  				}
   628  				if decodedTableID != ancestor.TableID || decodedIndexID != ancestor.IndexID {
   629  					return nil, false, false, nil
   630  				}
   631  			}
   632  
   633  			length := int(ancestor.SharedPrefixLen)
   634  			var isNull bool
   635  			key, isNull, err = DecodeKeyVals(types[:length], vals[:length], colDirs[:length], key)
   636  			if err != nil {
   637  				return nil, false, false, err
   638  			}
   639  			types, vals, colDirs = types[length:], vals[length:], colDirs[length:]
   640  			foundNull = foundNull || isNull
   641  
   642  			// Consume the interleaved sentinel.
   643  			var ok bool
   644  			key, ok = encoding.DecodeIfInterleavedSentinel(key)
   645  			if !ok {
   646  				return nil, false, false, nil
   647  			}
   648  		}
   649  
   650  		key, decodedTableID, decodedIndexID, err = DecodePartialTableIDIndexID(key)
   651  		if err != nil {
   652  			return nil, false, false, err
   653  		}
   654  		if decodedTableID != desc.ID || decodedIndexID != index.ID {
   655  			return nil, false, false, nil
   656  		}
   657  	}
   658  
   659  	var isNull bool
   660  	key, isNull, err = DecodeKeyVals(types, vals, colDirs, key)
   661  	if err != nil {
   662  		return nil, false, false, err
   663  	}
   664  	foundNull = foundNull || isNull
   665  
   666  	// We're expecting a column family id next (a varint). If
   667  	// interleavedSentinel is actually next, then this key is for a child
   668  	// table.
   669  	if _, ok := encoding.DecodeIfInterleavedSentinel(key); ok {
   670  		return nil, false, false, nil
   671  	}
   672  
   673  	return key, true, foundNull, nil
   674  }
   675  
   676  // DecodeKeyVals decodes the values that are part of the key. The decoded
   677  // values are stored in the vals. If this slice is nil, the direction
   678  // used will default to encoding.Ascending.
   679  // DecodeKeyVals returns whether or not NULL was encountered in the key.
   680  func DecodeKeyVals(
   681  	types []*types.T, vals []EncDatum, directions []IndexDescriptor_Direction, key []byte,
   682  ) ([]byte, bool, error) {
   683  	if directions != nil && len(directions) != len(vals) {
   684  		return nil, false, errors.Errorf("encoding directions doesn't parallel vals: %d vs %d.",
   685  			len(directions), len(vals))
   686  	}
   687  	foundNull := false
   688  	for j := range vals {
   689  		enc := DatumEncoding_ASCENDING_KEY
   690  		if directions != nil && (directions[j] == IndexDescriptor_DESC) {
   691  			enc = DatumEncoding_DESCENDING_KEY
   692  		}
   693  		var err error
   694  		vals[j], key, err = EncDatumFromBuffer(types[j], enc, key)
   695  		if err != nil {
   696  			return nil, false, err
   697  		}
   698  		if vals[j].IsNull() {
   699  			foundNull = true
   700  		}
   701  	}
   702  	return key, foundNull, nil
   703  }
   704  
   705  // ExtractIndexKey constructs the index (primary) key for a row from any index
   706  // key/value entry, including secondary indexes.
   707  //
   708  // Don't use this function in the scan "hot path".
   709  func ExtractIndexKey(
   710  	a *DatumAlloc, codec keys.SQLCodec, tableDesc *TableDescriptor, entry kv.KeyValue,
   711  ) (roachpb.Key, error) {
   712  	indexID, key, err := DecodeIndexKeyPrefix(codec, tableDesc, entry.Key)
   713  	if err != nil {
   714  		return nil, err
   715  	}
   716  	if indexID == tableDesc.PrimaryIndex.ID {
   717  		return entry.Key, nil
   718  	}
   719  
   720  	index, err := tableDesc.FindIndexByID(indexID)
   721  	if err != nil {
   722  		return nil, err
   723  	}
   724  
   725  	// Extract the values for index.ColumnIDs.
   726  	indexTypes, err := GetColumnTypes(tableDesc, index.ColumnIDs)
   727  	if err != nil {
   728  		return nil, err
   729  	}
   730  	values := make([]EncDatum, len(index.ColumnIDs))
   731  	dirs := index.ColumnDirections
   732  	if len(index.Interleave.Ancestors) > 0 {
   733  		// TODO(dan): In the interleaved index case, we parse the key twice; once to
   734  		// find the index id so we can look up the descriptor, and once to extract
   735  		// the values. Only parse once.
   736  		var ok bool
   737  		_, ok, _, err = DecodeIndexKey(codec, tableDesc, index, indexTypes, values, dirs, entry.Key)
   738  		if err != nil {
   739  			return nil, err
   740  		}
   741  		if !ok {
   742  			return nil, errors.Errorf("descriptor did not match key")
   743  		}
   744  	} else {
   745  		key, _, err = DecodeKeyVals(indexTypes, values, dirs, key)
   746  		if err != nil {
   747  			return nil, err
   748  		}
   749  	}
   750  
   751  	// Extract the values for index.ExtraColumnIDs
   752  	extraTypes, err := GetColumnTypes(tableDesc, index.ExtraColumnIDs)
   753  	if err != nil {
   754  		return nil, err
   755  	}
   756  	extraValues := make([]EncDatum, len(index.ExtraColumnIDs))
   757  	dirs = make([]IndexDescriptor_Direction, len(index.ExtraColumnIDs))
   758  	for i := range index.ExtraColumnIDs {
   759  		// Implicit columns are always encoded Ascending.
   760  		dirs[i] = IndexDescriptor_ASC
   761  	}
   762  	extraKey := key
   763  	if index.Unique {
   764  		extraKey, err = entry.Value.GetBytes()
   765  		if err != nil {
   766  			return nil, err
   767  		}
   768  	}
   769  	_, _, err = DecodeKeyVals(extraTypes, extraValues, dirs, extraKey)
   770  	if err != nil {
   771  		return nil, err
   772  	}
   773  
   774  	// Encode the index key from its components.
   775  	colMap := make(map[ColumnID]int)
   776  	for i, columnID := range index.ColumnIDs {
   777  		colMap[columnID] = i
   778  	}
   779  	for i, columnID := range index.ExtraColumnIDs {
   780  		colMap[columnID] = i + len(index.ColumnIDs)
   781  	}
   782  	indexKeyPrefix := MakeIndexKeyPrefix(codec, tableDesc, tableDesc.PrimaryIndex.ID)
   783  
   784  	decodedValues := make([]tree.Datum, len(values)+len(extraValues))
   785  	for i, value := range values {
   786  		err := value.EnsureDecoded(indexTypes[i], a)
   787  		if err != nil {
   788  			return nil, err
   789  		}
   790  		decodedValues[i] = value.Datum
   791  	}
   792  	for i, value := range extraValues {
   793  		err := value.EnsureDecoded(extraTypes[i], a)
   794  		if err != nil {
   795  			return nil, err
   796  		}
   797  		decodedValues[len(values)+i] = value.Datum
   798  	}
   799  	indexKey, _, err := EncodeIndexKey(
   800  		tableDesc, &tableDesc.PrimaryIndex, colMap, decodedValues, indexKeyPrefix)
   801  	return indexKey, err
   802  }
   803  
   804  // IndexEntry represents an encoded key/value for an index entry.
   805  type IndexEntry struct {
   806  	Key   roachpb.Key
   807  	Value roachpb.Value
   808  	// Only used for forward indexes.
   809  	Family FamilyID
   810  }
   811  
   812  // valueEncodedColumn represents a composite or stored column of a secondary
   813  // index.
   814  type valueEncodedColumn struct {
   815  	id          ColumnID
   816  	isComposite bool
   817  }
   818  
   819  // byID implements sort.Interface for []valueEncodedColumn based on the id
   820  // field.
   821  type byID []valueEncodedColumn
   822  
   823  func (a byID) Len() int           { return len(a) }
   824  func (a byID) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
   825  func (a byID) Less(i, j int) bool { return a[i].id < a[j].id }
   826  
   827  // EncodeInvertedIndexKeys creates a list of inverted index keys by
   828  // concatenating keyPrefix with the encodings of the column in the
   829  // index.
   830  func EncodeInvertedIndexKeys(
   831  	tableDesc *TableDescriptor,
   832  	index *IndexDescriptor,
   833  	colMap map[ColumnID]int,
   834  	values []tree.Datum,
   835  	keyPrefix []byte,
   836  ) (key [][]byte, err error) {
   837  	if len(index.ColumnIDs) > 1 {
   838  		return nil, errors.AssertionFailedf("trying to apply inverted index to more than one column")
   839  	}
   840  
   841  	var val tree.Datum
   842  	if i, ok := colMap[index.ColumnIDs[0]]; ok {
   843  		val = values[i]
   844  	} else {
   845  		val = tree.DNull
   846  	}
   847  	if !geoindex.IsEmptyConfig(&index.GeoConfig) {
   848  		return EncodeGeoInvertedIndexTableKeys(val, keyPrefix, index)
   849  	}
   850  	return EncodeInvertedIndexTableKeys(val, keyPrefix)
   851  }
   852  
   853  // EncodeInvertedIndexTableKeys produces one inverted index key per element in
   854  // the input datum, which should be a container (either JSON or Array). For
   855  // JSON, "element" means unique path through the document. Each output key is
   856  // prefixed by inKey, and is guaranteed to be lexicographically sortable, but
   857  // not guaranteed to be round-trippable during decoding. If the input Datum
   858  // is (SQL) NULL, no inverted index keys will be produced, because inverted
   859  // indexes cannot and do not need to satisfy the predicate col IS NULL.
   860  func EncodeInvertedIndexTableKeys(val tree.Datum, inKey []byte) (key [][]byte, err error) {
   861  	if val == tree.DNull {
   862  		return nil, nil
   863  	}
   864  	datum := tree.UnwrapDatum(nil, val)
   865  	switch val.ResolvedType().Family() {
   866  	case types.JsonFamily:
   867  		return json.EncodeInvertedIndexKeys(inKey, val.(*tree.DJSON).JSON)
   868  	case types.ArrayFamily:
   869  		return encodeArrayInvertedIndexTableKeys(val.(*tree.DArray), inKey)
   870  	}
   871  	return nil, errors.AssertionFailedf("trying to apply inverted index to unsupported type %s", datum.ResolvedType())
   872  }
   873  
   874  // encodeArrayInvertedIndexTableKeys returns a list of inverted index keys for
   875  // the given input array, one per entry in the array. The input inKey is
   876  // prefixed to all returned keys.
   877  // N.B.: This won't return any keys for
   878  func encodeArrayInvertedIndexTableKeys(val *tree.DArray, inKey []byte) (key [][]byte, err error) {
   879  	outKeys := make([][]byte, 0, len(val.Array))
   880  	for i := range val.Array {
   881  		d := val.Array[i]
   882  		if d == tree.DNull {
   883  			// We don't need to make keys for NULL, since in SQL:
   884  			// SELECT ARRAY[1, NULL, 2] @> ARRAY[NULL]
   885  			// returns false.
   886  			continue
   887  		}
   888  		outKey := make([]byte, len(inKey))
   889  		copy(outKey, inKey)
   890  		newKey, err := EncodeTableKey(outKey, d, encoding.Ascending)
   891  		if err != nil {
   892  			return nil, err
   893  		}
   894  		outKeys = append(outKeys, newKey)
   895  	}
   896  	outKeys = unique.UniquifyByteSlices(outKeys)
   897  	return outKeys, nil
   898  }
   899  
   900  // EncodeGeoInvertedIndexTableKeys is the equivalent of EncodeInvertedIndexTableKeys
   901  // for Geography and Geometry.
   902  func EncodeGeoInvertedIndexTableKeys(
   903  	val tree.Datum, inKey []byte, index *IndexDescriptor,
   904  ) (key [][]byte, err error) {
   905  	if val == tree.DNull {
   906  		return nil, nil
   907  	}
   908  	switch val.ResolvedType().Family() {
   909  	case types.GeographyFamily:
   910  		index := geoindex.NewS2GeographyIndex(*index.GeoConfig.S2Geography)
   911  		intKeys, err := index.InvertedIndexKeys(context.TODO(), val.(*tree.DGeography).Geography)
   912  		if err != nil {
   913  			return nil, err
   914  		}
   915  		return encodeGeoKeys(inKey, intKeys)
   916  	case types.GeometryFamily:
   917  		index := geoindex.NewS2GeometryIndex(*index.GeoConfig.S2Geometry)
   918  		intKeys, err := index.InvertedIndexKeys(context.TODO(), val.(*tree.DGeometry).Geometry)
   919  		if err != nil {
   920  			return nil, err
   921  		}
   922  		return encodeGeoKeys(inKey, intKeys)
   923  	default:
   924  		return nil, errors.Errorf("internal error: unexpected type: %s", val.ResolvedType().Family())
   925  	}
   926  }
   927  
   928  func encodeGeoKeys(inKey []byte, geoKeys []geoindex.Key) (keys [][]byte, err error) {
   929  	keys = make([][]byte, 0, len(geoKeys))
   930  	for _, k := range geoKeys {
   931  		outKey := make([]byte, len(inKey))
   932  		copy(outKey, inKey)
   933  		d := (tree.DInt)(k)
   934  		newKey, err := EncodeTableKey(outKey, &d, encoding.Ascending)
   935  		if err != nil {
   936  			return nil, err
   937  		}
   938  		keys = append(keys, newKey)
   939  	}
   940  	return keys, nil
   941  }
   942  
   943  // EncodePrimaryIndex constructs a list of k/v pairs for a
   944  // row encoded as a primary index. This function mirrors the encoding
   945  // logic in prepareInsertOrUpdateBatch in pkg/sql/row/writer.go.
   946  // It is somewhat duplicated here due to the different arguments
   947  // that prepareOrInsertUpdateBatch needs and uses to generate
   948  // the k/v's for the row it inserts. includeEmpty controls
   949  // whether or not k/v's with empty values should be returned.
   950  // It returns indexEntries in family sorted order.
   951  func EncodePrimaryIndex(
   952  	codec keys.SQLCodec,
   953  	tableDesc *TableDescriptor,
   954  	index *IndexDescriptor,
   955  	colMap map[ColumnID]int,
   956  	values []tree.Datum,
   957  	includeEmpty bool,
   958  ) ([]IndexEntry, error) {
   959  	keyPrefix := MakeIndexKeyPrefix(codec, tableDesc, index.ID)
   960  	indexKey, _, err := EncodeIndexKey(tableDesc, index, colMap, values, keyPrefix)
   961  	if err != nil {
   962  		return nil, err
   963  	}
   964  	// This information should be precomputed on the table descriptor.
   965  	indexedColumns := map[ColumnID]struct{}{}
   966  	for _, colID := range index.ColumnIDs {
   967  		indexedColumns[colID] = struct{}{}
   968  	}
   969  	var entryValue []byte
   970  	indexEntries := make([]IndexEntry, 0, len(tableDesc.Families))
   971  	var columnsToEncode []valueEncodedColumn
   972  
   973  	for i := range tableDesc.Families {
   974  		var err error
   975  		family := &tableDesc.Families[i]
   976  		if i > 0 {
   977  			indexKey = indexKey[:len(indexKey):len(indexKey)]
   978  			entryValue = entryValue[:0]
   979  			columnsToEncode = columnsToEncode[:0]
   980  		}
   981  		familyKey := keys.MakeFamilyKey(indexKey, uint32(family.ID))
   982  		// The decoders expect that column family 0 is encoded with a TUPLE value tag, so we
   983  		// don't want to use the untagged value encoding.
   984  		if len(family.ColumnIDs) == 1 && family.ColumnIDs[0] == family.DefaultColumnID && family.ID != 0 {
   985  			datum := findColumnValue(family.DefaultColumnID, colMap, values)
   986  			// We want to include this column if its value is non-null or
   987  			// we were requested to include all of the columns.
   988  			if datum != tree.DNull || includeEmpty {
   989  				col, err := tableDesc.FindColumnByID(family.DefaultColumnID)
   990  				if err != nil {
   991  					return nil, err
   992  				}
   993  				value, err := MarshalColumnValue(col, datum)
   994  				if err != nil {
   995  					return nil, err
   996  				}
   997  				indexEntries = append(indexEntries, IndexEntry{Key: familyKey, Value: value, Family: family.ID})
   998  			}
   999  			continue
  1000  		}
  1001  
  1002  		for _, colID := range family.ColumnIDs {
  1003  			if _, ok := indexedColumns[colID]; !ok {
  1004  				columnsToEncode = append(columnsToEncode, valueEncodedColumn{id: colID})
  1005  				continue
  1006  			}
  1007  			if cdatum, ok := values[colMap[colID]].(tree.CompositeDatum); ok {
  1008  				if cdatum.IsComposite() {
  1009  					columnsToEncode = append(columnsToEncode, valueEncodedColumn{id: colID, isComposite: true})
  1010  					continue
  1011  				}
  1012  			}
  1013  		}
  1014  		sort.Sort(byID(columnsToEncode))
  1015  		entryValue, err = writeColumnValues(entryValue, colMap, values, columnsToEncode)
  1016  		if err != nil {
  1017  			return nil, err
  1018  		}
  1019  		if family.ID != 0 && len(entryValue) == 0 && !includeEmpty {
  1020  			continue
  1021  		}
  1022  		entry := IndexEntry{Key: familyKey, Family: family.ID}
  1023  		entry.Value.SetTuple(entryValue)
  1024  		indexEntries = append(indexEntries, entry)
  1025  	}
  1026  	return indexEntries, nil
  1027  }
  1028  
  1029  // EncodeSecondaryIndex encodes key/values for a secondary
  1030  // index. colMap maps ColumnIDs to indices in `values`. This returns a
  1031  // slice of IndexEntry. includeEmpty controls whether or not
  1032  // EncodeSecondaryIndex should return k/v's that contain
  1033  // empty values. For forward indexes the returned list of
  1034  // index entries is in family sorted order.
  1035  func EncodeSecondaryIndex(
  1036  	codec keys.SQLCodec,
  1037  	tableDesc *TableDescriptor,
  1038  	secondaryIndex *IndexDescriptor,
  1039  	colMap map[ColumnID]int,
  1040  	values []tree.Datum,
  1041  	includeEmpty bool,
  1042  ) ([]IndexEntry, error) {
  1043  	secondaryIndexKeyPrefix := MakeIndexKeyPrefix(codec, tableDesc, secondaryIndex.ID)
  1044  
  1045  	// Use the primary key encoding for covering indexes.
  1046  	if secondaryIndex.GetEncodingType(tableDesc.PrimaryIndex.ID) == PrimaryIndexEncoding {
  1047  		return EncodePrimaryIndex(codec, tableDesc, secondaryIndex, colMap, values, includeEmpty)
  1048  	}
  1049  
  1050  	var containsNull = false
  1051  	var secondaryKeys [][]byte
  1052  	var err error
  1053  	if secondaryIndex.Type == IndexDescriptor_INVERTED {
  1054  		secondaryKeys, err = EncodeInvertedIndexKeys(tableDesc, secondaryIndex, colMap, values, secondaryIndexKeyPrefix)
  1055  	} else {
  1056  		var secondaryIndexKey []byte
  1057  		secondaryIndexKey, containsNull, err = EncodeIndexKey(
  1058  			tableDesc, secondaryIndex, colMap, values, secondaryIndexKeyPrefix)
  1059  
  1060  		secondaryKeys = [][]byte{secondaryIndexKey}
  1061  	}
  1062  	if err != nil {
  1063  		return []IndexEntry{}, err
  1064  	}
  1065  
  1066  	// Add the extra columns - they are encoded in ascending order which is done
  1067  	// by passing nil for the encoding directions.
  1068  	extraKey, _, err := EncodeColumns(secondaryIndex.ExtraColumnIDs, nil,
  1069  		colMap, values, nil)
  1070  	if err != nil {
  1071  		return []IndexEntry{}, err
  1072  	}
  1073  
  1074  	// entries is the resulting array that we will return. We allocate upfront at least
  1075  	// len(secondaryKeys) positions to avoid allocations from appending.
  1076  	entries := make([]IndexEntry, 0, len(secondaryKeys))
  1077  	for _, key := range secondaryKeys {
  1078  		if !secondaryIndex.Unique || containsNull {
  1079  			// If the index is not unique or it contains a NULL value, append
  1080  			// extraKey to the key in order to make it unique.
  1081  			key = append(key, extraKey...)
  1082  		}
  1083  
  1084  		if len(tableDesc.Families) == 1 ||
  1085  			secondaryIndex.Type == IndexDescriptor_INVERTED ||
  1086  			secondaryIndex.Version == BaseIndexFormatVersion {
  1087  			// We do all computation that affects indexes with families in a separate code path to avoid performance
  1088  			// regression for tables without column families.
  1089  			entry, err := encodeSecondaryIndexNoFamilies(secondaryIndex, colMap, key, values, extraKey)
  1090  			if err != nil {
  1091  				return []IndexEntry{}, err
  1092  			}
  1093  			entries = append(entries, entry)
  1094  		} else {
  1095  			// This is only executed once as len(secondaryKeys) = 1 for non inverted secondary indexes.
  1096  			// Create a mapping of family ID to stored columns.
  1097  			// TODO (rohany): we want to share this information across calls to EncodeSecondaryIndex --
  1098  			//  its not easy to do this right now. It would be nice if the index descriptor or table descriptor
  1099  			//  had this information computed/cached for us.
  1100  			familyToColumns := make(map[FamilyID][]valueEncodedColumn)
  1101  			addToFamilyColMap := func(id FamilyID, column valueEncodedColumn) {
  1102  				if _, ok := familyToColumns[id]; !ok {
  1103  					familyToColumns[id] = []valueEncodedColumn{}
  1104  				}
  1105  				familyToColumns[id] = append(familyToColumns[id], column)
  1106  			}
  1107  			// Ensure that column family 0 always generates a k/v pair.
  1108  			familyToColumns[0] = []valueEncodedColumn{}
  1109  			// All composite columns are stored in family 0.
  1110  			for _, id := range secondaryIndex.CompositeColumnIDs {
  1111  				addToFamilyColMap(0, valueEncodedColumn{id: id, isComposite: true})
  1112  			}
  1113  			for _, family := range tableDesc.Families {
  1114  				for _, id := range secondaryIndex.StoreColumnIDs {
  1115  					for _, col := range family.ColumnIDs {
  1116  						if id == col {
  1117  							addToFamilyColMap(family.ID, valueEncodedColumn{id: id, isComposite: false})
  1118  						}
  1119  					}
  1120  				}
  1121  			}
  1122  			entries, err = encodeSecondaryIndexWithFamilies(
  1123  				familyToColumns, secondaryIndex, colMap, key, values, extraKey, entries, includeEmpty)
  1124  			if err != nil {
  1125  				return []IndexEntry{}, err
  1126  			}
  1127  		}
  1128  	}
  1129  	return entries, nil
  1130  }
  1131  
  1132  // encodeSecondaryIndexWithFamilies generates a k/v pair for
  1133  // each family/column pair in familyMap. The row parameter will be
  1134  // modified by the function, so copy it before using. includeEmpty
  1135  // controls whether or not k/v's with empty values will be returned.
  1136  // The returned indexEntries are in family sorted order.
  1137  func encodeSecondaryIndexWithFamilies(
  1138  	familyMap map[FamilyID][]valueEncodedColumn,
  1139  	index *IndexDescriptor,
  1140  	colMap map[ColumnID]int,
  1141  	key []byte,
  1142  	row []tree.Datum,
  1143  	extraKeyCols []byte,
  1144  	results []IndexEntry,
  1145  	includeEmpty bool,
  1146  ) ([]IndexEntry, error) {
  1147  	var (
  1148  		value []byte
  1149  		err   error
  1150  	)
  1151  	origKeyLen := len(key)
  1152  	// TODO (rohany): is there a natural way of caching this information as well?
  1153  	// We have to iterate over the map in sorted family order. Other parts of the code
  1154  	// depend on a per-call consistent order of keys generated.
  1155  	familyIDs := make([]int, 0, len(familyMap))
  1156  	for familyID := range familyMap {
  1157  		familyIDs = append(familyIDs, int(familyID))
  1158  	}
  1159  	sort.Ints(familyIDs)
  1160  	for _, familyID := range familyIDs {
  1161  		storedColsInFam := familyMap[FamilyID(familyID)]
  1162  		// Ensure that future appends to key will cause a copy and not overwrite
  1163  		// existing key values.
  1164  		key = key[:origKeyLen:origKeyLen]
  1165  
  1166  		// If we aren't storing any columns in this family and we are not the first family,
  1167  		// skip onto the next family. We need to write family 0 no matter what to ensure
  1168  		// that each row has at least one entry in the DB.
  1169  		if len(storedColsInFam) == 0 && familyID != 0 {
  1170  			continue
  1171  		}
  1172  
  1173  		sort.Sort(byID(storedColsInFam))
  1174  
  1175  		key = keys.MakeFamilyKey(key, uint32(familyID))
  1176  		if index.Unique && familyID == 0 {
  1177  			// Note that a unique secondary index that contains a NULL column value
  1178  			// will have extraKey appended to the key and stored in the value. We
  1179  			// require extraKey to be appended to the key in order to make the key
  1180  			// unique. We could potentially get rid of the duplication here but at
  1181  			// the expense of complicating scanNode when dealing with unique
  1182  			// secondary indexes.
  1183  			value = extraKeyCols
  1184  		} else {
  1185  			// The zero value for an index-value is a 0-length bytes value.
  1186  			value = []byte{}
  1187  		}
  1188  
  1189  		value, err = writeColumnValues(value, colMap, row, storedColsInFam)
  1190  		if err != nil {
  1191  			return []IndexEntry{}, err
  1192  		}
  1193  		entry := IndexEntry{Key: key, Family: FamilyID(familyID)}
  1194  		// If we aren't looking at family 0 and don't have a value,
  1195  		// don't include an entry for this k/v.
  1196  		if familyID != 0 && len(value) == 0 && !includeEmpty {
  1197  			continue
  1198  		}
  1199  		// If we are looking at family 0, encode the data as BYTES, as it might
  1200  		// include encoded primary key columns. For other families, use the
  1201  		// tuple encoding for the value.
  1202  		if familyID == 0 {
  1203  			entry.Value.SetBytes(value)
  1204  		} else {
  1205  			entry.Value.SetTuple(value)
  1206  		}
  1207  		results = append(results, entry)
  1208  	}
  1209  	return results, nil
  1210  }
  1211  
  1212  // encodeSecondaryIndexNoFamilies takes a mostly constructed
  1213  // secondary index key (without the family/sentinel at
  1214  // the end), and appends the 0 family sentinel to it, and
  1215  // constructs the value portion of the index. This function
  1216  // performs the index encoding version before column
  1217  // families were introduced onto secondary indexes.
  1218  func encodeSecondaryIndexNoFamilies(
  1219  	index *IndexDescriptor,
  1220  	colMap map[ColumnID]int,
  1221  	key []byte,
  1222  	row []tree.Datum,
  1223  	extraKeyCols []byte,
  1224  ) (IndexEntry, error) {
  1225  	var (
  1226  		value []byte
  1227  		err   error
  1228  	)
  1229  	// If we aren't encoding index keys with families, all index keys use the sentinel family 0.
  1230  	key = keys.MakeFamilyKey(key, 0)
  1231  	if index.Unique {
  1232  		// Note that a unique secondary index that contains a NULL column value
  1233  		// will have extraKey appended to the key and stored in the value. We
  1234  		// require extraKey to be appended to the key in order to make the key
  1235  		// unique. We could potentially get rid of the duplication here but at
  1236  		// the expense of complicating scanNode when dealing with unique
  1237  		// secondary indexes.
  1238  		value = append(value, extraKeyCols...)
  1239  	} else {
  1240  		// The zero value for an index-value is a 0-length bytes value.
  1241  		value = []byte{}
  1242  	}
  1243  	var cols []valueEncodedColumn
  1244  	// Since we aren't encoding data with families, we just encode all stored and composite columns in the value.
  1245  	for _, id := range index.StoreColumnIDs {
  1246  		cols = append(cols, valueEncodedColumn{id: id, isComposite: false})
  1247  	}
  1248  	for _, id := range index.CompositeColumnIDs {
  1249  		// Inverted indexes on a composite type (i.e. an array of composite types)
  1250  		// should not add the indexed column to the value.
  1251  		if index.Type == IndexDescriptor_INVERTED && id == index.ColumnIDs[0] {
  1252  			continue
  1253  		}
  1254  		cols = append(cols, valueEncodedColumn{id: id, isComposite: true})
  1255  	}
  1256  	sort.Sort(byID(cols))
  1257  	value, err = writeColumnValues(value, colMap, row, cols)
  1258  	if err != nil {
  1259  		return IndexEntry{}, err
  1260  	}
  1261  	entry := IndexEntry{Key: key, Family: 0}
  1262  	entry.Value.SetBytes(value)
  1263  	return entry, nil
  1264  }
  1265  
  1266  // writeColumnValues writes the value encoded versions of the desired columns from the input
  1267  // row of datums into the value byte slice.
  1268  func writeColumnValues(
  1269  	value []byte, colMap map[ColumnID]int, row []tree.Datum, columns []valueEncodedColumn,
  1270  ) ([]byte, error) {
  1271  	var lastColID ColumnID
  1272  	for _, col := range columns {
  1273  		val := findColumnValue(col.id, colMap, row)
  1274  		if val == tree.DNull || (col.isComposite && !val.(tree.CompositeDatum).IsComposite()) {
  1275  			continue
  1276  		}
  1277  		if lastColID > col.id {
  1278  			panic(fmt.Errorf("cannot write column id %d after %d", col.id, lastColID))
  1279  		}
  1280  		colIDDiff := col.id - lastColID
  1281  		lastColID = col.id
  1282  		var err error
  1283  		value, err = EncodeTableValue(value, colIDDiff, val, nil)
  1284  		if err != nil {
  1285  			return nil, err
  1286  		}
  1287  	}
  1288  	return value, nil
  1289  }
  1290  
  1291  // EncodeSecondaryIndexes encodes key/values for the secondary indexes. colMap
  1292  // maps ColumnIDs to indices in `values`. secondaryIndexEntries is the return
  1293  // value (passed as a parameter so the caller can reuse between rows) and is
  1294  // expected to be the same length as indexes.
  1295  func EncodeSecondaryIndexes(
  1296  	codec keys.SQLCodec,
  1297  	tableDesc *TableDescriptor,
  1298  	indexes []IndexDescriptor,
  1299  	colMap map[ColumnID]int,
  1300  	values []tree.Datum,
  1301  	secondaryIndexEntries []IndexEntry,
  1302  	includeEmpty bool,
  1303  ) ([]IndexEntry, error) {
  1304  	if len(secondaryIndexEntries) > 0 {
  1305  		panic("Length of secondaryIndexEntries was non-zero")
  1306  	}
  1307  	for i := range indexes {
  1308  		entries, err := EncodeSecondaryIndex(codec, tableDesc, &indexes[i], colMap, values, includeEmpty)
  1309  		if err != nil {
  1310  			return secondaryIndexEntries, err
  1311  		}
  1312  		// Normally, each index will have exactly one entry. However, inverted
  1313  		// indexes can have 0 or >1 entries, as well as secondary indexes which
  1314  		// store columns from multiple column families.
  1315  		secondaryIndexEntries = append(secondaryIndexEntries, entries...)
  1316  	}
  1317  	return secondaryIndexEntries, nil
  1318  }
  1319  
  1320  // IndexKeyEquivSignature parses an index key if and only if the index
  1321  // key belongs to a table where its equivalence signature and all its
  1322  // interleave ancestors' signatures can be found in
  1323  // validEquivSignatures.
  1324  //
  1325  // Its validEquivSignatures argument is a map containing equivalence
  1326  // signatures of valid ancestors of the desired table and of the
  1327  // desired table itself.
  1328  //
  1329  // IndexKeyEquivSignature returns whether or not the index key
  1330  // satisfies the above condition, the value mapped to by the desired
  1331  // table (could be a table index), and the rest of the key that's not
  1332  // part of the signature.
  1333  //
  1334  // It also requires two []byte buffers: one for the signature
  1335  // (signatureBuf) and one for the rest of the key (keyRestBuf).
  1336  //
  1337  // The equivalence signature defines the equivalence classes for the
  1338  // signature of potentially interleaved tables. For example, the
  1339  // equivalence signatures for the following interleaved indexes:
  1340  //
  1341  //    <parent@primary>
  1342  //    <child@secondary>
  1343  //
  1344  // and index keys
  1345  //    <parent index key>:   /<parent table id>/<parent index id>/<val 1>/<val 2>
  1346  //    <child index key>:    /<parent table id>/<parent index id>/<val 1>/<val 2>/#/<child table id>/child index id>/<val 3>/<val 4>
  1347  //
  1348  // correspond to the equivalence signatures
  1349  //    <parent@primary>:     /<parent table id>/<parent index id>
  1350  //    <child@secondary>:    /<parent table id>/<parent index id>/#/<child table id>/<child index id>
  1351  //
  1352  // Equivalence signatures allow us to associate an index key with its
  1353  // table without having to invoke DecodeIndexKey multiple times.
  1354  //
  1355  // IndexKeyEquivSignature will return false if the a table's
  1356  // ancestor's signature or the table's signature (table which the
  1357  // index key belongs to) is not mapped in validEquivSignatures.
  1358  //
  1359  // For example, suppose the given key is
  1360  //
  1361  //    /<t2 table id>/<t2 index id>/<val t2>/#/<t3 table id>/<t3 table id>/<val t3>
  1362  //
  1363  // and validEquivSignatures contains
  1364  //
  1365  //    /<t1 table id>/t1 index id>
  1366  //    /<t1 table id>/t1 index id>/#/<t4 table id>/<t4 index id>
  1367  //
  1368  // IndexKeyEquivSignature will short-circuit and return false once
  1369  //
  1370  //    /<t2 table id>/<t2 index id>
  1371  //
  1372  // is processed since t2's signature is not specified in validEquivSignatures.
  1373  func IndexKeyEquivSignature(
  1374  	key []byte, validEquivSignatures map[string]int, signatureBuf []byte, restBuf []byte,
  1375  ) (tableIdx int, restResult []byte, success bool, err error) {
  1376  	signatureBuf = signatureBuf[:0]
  1377  	restResult = restBuf[:0]
  1378  	for {
  1379  		// Well-formed key is guaranteed to to have 2 varints for every
  1380  		// ancestor: the TableID and IndexID.
  1381  		// We extract these out and add them to our buffer.
  1382  		for i := 0; i < 2; i++ {
  1383  			idLen, err := encoding.PeekLength(key)
  1384  			if err != nil {
  1385  				return 0, nil, false, err
  1386  			}
  1387  			signatureBuf = append(signatureBuf, key[:idLen]...)
  1388  			key = key[idLen:]
  1389  		}
  1390  
  1391  		// The current signature (either an ancestor table's or the key's)
  1392  		// is not one of the validEquivSignatures.
  1393  		// We can short-circuit and return false.
  1394  		recentTableIdx, found := validEquivSignatures[string(signatureBuf)]
  1395  		if !found {
  1396  			return 0, nil, false, nil
  1397  		}
  1398  
  1399  		var isSentinel bool
  1400  		// Peek and discard encoded index values.
  1401  		for {
  1402  			key, isSentinel = encoding.DecodeIfInterleavedSentinel(key)
  1403  			// We stop once the key is empty or if we encounter a
  1404  			// sentinel for the next TableID-IndexID pair.
  1405  			if len(key) == 0 || isSentinel {
  1406  				break
  1407  			}
  1408  			len, err := encoding.PeekLength(key)
  1409  			if err != nil {
  1410  				return 0, nil, false, err
  1411  			}
  1412  			// Append any other bytes (column values initially,
  1413  			// then family ID and timestamp) to return.
  1414  			restResult = append(restResult, key[:len]...)
  1415  			key = key[len:]
  1416  		}
  1417  
  1418  		if !isSentinel {
  1419  			// The key has been fully decomposed and is valid up to
  1420  			// this point.
  1421  			// Return the most recent table index from
  1422  			// validEquivSignatures.
  1423  			return recentTableIdx, restResult, true, nil
  1424  		}
  1425  		// If there was a sentinel, we know there are more
  1426  		// descendant(s).
  1427  		// We insert an interleave sentinel and continue extracting the
  1428  		// next descendant's IDs.
  1429  		signatureBuf = encoding.EncodeInterleavedSentinel(signatureBuf)
  1430  	}
  1431  }
  1432  
  1433  // TableEquivSignatures returns the equivalence signatures for each interleave
  1434  // ancestor and itself. See IndexKeyEquivSignature for more info.
  1435  func TableEquivSignatures(
  1436  	desc *TableDescriptor, index *IndexDescriptor,
  1437  ) (signatures [][]byte, err error) {
  1438  	// signatures contains the slice reference to the signature of every
  1439  	// ancestor of the current table-index.
  1440  	// The last slice reference is the given table-index's signature.
  1441  	signatures = make([][]byte, len(index.Interleave.Ancestors)+1)
  1442  	// fullSignature is the backing byte slice for each individual signature
  1443  	// as it buffers each block of table and index IDs.
  1444  	// We eagerly allocate 4 bytes for each of the two IDs per ancestor
  1445  	// (which can fit Uvarint IDs up to 2^17-1 without another allocation),
  1446  	// 1 byte for each interleave sentinel, and 4 bytes each for the given
  1447  	// table's and index's ID.
  1448  	fullSignature := make([]byte, 0, len(index.Interleave.Ancestors)*9+8)
  1449  
  1450  	// Encode the table's ancestors' TableIDs and IndexIDs.
  1451  	for i, ancestor := range index.Interleave.Ancestors {
  1452  		fullSignature = EncodePartialTableIDIndexID(fullSignature, ancestor.TableID, ancestor.IndexID)
  1453  		// Create a reference up to this point for the ancestor's
  1454  		// signature.
  1455  		signatures[i] = fullSignature
  1456  		// Append Interleave sentinel after every ancestor.
  1457  		fullSignature = encoding.EncodeInterleavedSentinel(fullSignature)
  1458  	}
  1459  
  1460  	// Encode the table's table and index IDs.
  1461  	fullSignature = EncodePartialTableIDIndexID(fullSignature, desc.ID, index.ID)
  1462  	// Create a reference for the given table's signature as the last
  1463  	// element of signatures.
  1464  	signatures[len(signatures)-1] = fullSignature
  1465  
  1466  	return signatures, nil
  1467  }
  1468  
  1469  // maxKeyTokens returns the maximum number of key tokens in an index's key,
  1470  // including the table ID, index ID, and index column values (including extra
  1471  // columns that may be stored in the key).
  1472  // It requires knowledge of whether the key will or might contain a NULL value:
  1473  // if uncertain, pass in true to 'overestimate' the maxKeyTokens.
  1474  //
  1475  // In general, a key belonging to an interleaved index grandchild is encoded as:
  1476  //
  1477  //    /table/index/<parent-pk1>/.../<parent-pkX>/#/table/index/<child-pk1>/.../<child-pkY>/#/table/index/<grandchild-pk1>/.../<grandchild-pkZ>
  1478  //
  1479  // The part of the key with respect to the grandchild index would be
  1480  // the entire key since there are no grand-grandchild table/index IDs or
  1481  // <grandgrandchild-pk>. The maximal prefix of the key that belongs to child is
  1482  //
  1483  //    /table/index/<parent-pk1>/.../<parent-pkX>/#/table/index/<child-pk1>/.../<child-pkY>
  1484  //
  1485  // and the maximal prefix of the key that belongs to parent is
  1486  //
  1487  //    /table/index/<parent-pk1>/.../<parent-pkX>
  1488  //
  1489  // This returns the maximum number of <tokens> in this prefix.
  1490  func maxKeyTokens(index *IndexDescriptor, containsNull bool) int {
  1491  	nTables := len(index.Interleave.Ancestors) + 1
  1492  	nKeyCols := len(index.ColumnIDs)
  1493  
  1494  	// Non-unique secondary indexes or unique secondary indexes with a NULL
  1495  	// value have additional columns in the key that may appear in a span
  1496  	// (e.g. primary key columns not part of the index).
  1497  	// See EncodeSecondaryIndex.
  1498  	if !index.Unique || containsNull {
  1499  		nKeyCols += len(index.ExtraColumnIDs)
  1500  	}
  1501  
  1502  	// To illustrate how we compute max # of key tokens, take the
  1503  	// key in the example above and let the respective index be child.
  1504  	// We'd like to return the number of bytes in
  1505  	//
  1506  	//    /table/index/<parent-pk1>/.../<parent-pkX>/#/table/index/<child-pk1>/.../<child-pkY>
  1507  	// For each table-index, there is
  1508  	//    1. table ID
  1509  	//    2. index ID
  1510  	//    3. interleave sentinel
  1511  	// or 3 * nTables.
  1512  	// Each <parent-pkX> must be a part of the index's columns (nKeys).
  1513  	// Finally, we do not want to include the interleave sentinel for the
  1514  	// current index (-1).
  1515  	return 3*nTables + nKeyCols - 1
  1516  }
  1517  
  1518  // AdjustStartKeyForInterleave adjusts the start key to skip unnecessary
  1519  // interleaved sections.
  1520  //
  1521  // For example, if child is interleaved into parent, a typical parent
  1522  // span might look like
  1523  //    /1 - /3
  1524  // and a typical child span might look like
  1525  //    /1/#/2 - /2/#/5
  1526  // Suppose the parent span is
  1527  //    /1/#/2 - /3
  1528  // where the start key is a child's index key. Notice that the first parent
  1529  // key read actually starts at /2 since all the parent keys with the prefix
  1530  // /1 come before the child key /1/#/2 (and is not read in the span).
  1531  // We can thus push forward the start key from /1/#/2 to /2. If the start key
  1532  // was /1, we cannot push this forwards since that is the first key we want
  1533  // to read.
  1534  func AdjustStartKeyForInterleave(
  1535  	codec keys.SQLCodec, index *IndexDescriptor, start roachpb.Key,
  1536  ) (roachpb.Key, error) {
  1537  	// Remove the tenant prefix before decomposing.
  1538  	strippedStart, err := codec.StripTenantPrefix(start)
  1539  	if err != nil {
  1540  		return roachpb.Key{}, err
  1541  	}
  1542  
  1543  	keyTokens, containsNull, err := encoding.DecomposeKeyTokens(strippedStart)
  1544  	if err != nil {
  1545  		return roachpb.Key{}, err
  1546  	}
  1547  	nIndexTokens := maxKeyTokens(index, containsNull)
  1548  
  1549  	// This is either the index's own key or one of its ancestor's key.
  1550  	// Nothing to do.
  1551  	if len(keyTokens) <= nIndexTokens {
  1552  		return start, nil
  1553  	}
  1554  
  1555  	// len(keyTokens) > nIndexTokens, so this must be a child key.
  1556  	// Transform /1/#/2 --> /2.
  1557  	firstNTokenLen := 0
  1558  	for _, token := range keyTokens[:nIndexTokens] {
  1559  		firstNTokenLen += len(token)
  1560  	}
  1561  
  1562  	return start[:firstNTokenLen].PrefixEnd(), nil
  1563  }
  1564  
  1565  // AdjustEndKeyForInterleave returns an exclusive end key. It does two things:
  1566  //    - determines the end key based on the prior: inclusive vs exclusive
  1567  //    - adjusts the end key to skip unnecessary interleaved sections
  1568  //
  1569  // For example, the parent span composed from the filter PK >= 1 and PK < 3 is
  1570  //    /1 - /3
  1571  // This reads all keys up to the first parent key for PK = 3. If parent had
  1572  // interleaved tables and keys, it would unnecessarily scan over interleaved
  1573  // rows under PK2 (e.g. /2/#/5).
  1574  // We can instead "tighten" or adjust the end key from /3 to /2/#.
  1575  // DO NOT pass in any keys that have been invoked with PrefixEnd: this may
  1576  // cause issues when trying to decode the key tokens.
  1577  // AdjustEndKeyForInterleave is idempotent upon successive invocation(s).
  1578  func AdjustEndKeyForInterleave(
  1579  	codec keys.SQLCodec,
  1580  	table *TableDescriptor,
  1581  	index *IndexDescriptor,
  1582  	end roachpb.Key,
  1583  	inclusive bool,
  1584  ) (roachpb.Key, error) {
  1585  	if index.Type == IndexDescriptor_INVERTED {
  1586  		return end.PrefixEnd(), nil
  1587  	}
  1588  
  1589  	// Remove the tenant prefix before decomposing.
  1590  	strippedEnd, err := codec.StripTenantPrefix(end)
  1591  	if err != nil {
  1592  		return roachpb.Key{}, err
  1593  	}
  1594  
  1595  	// To illustrate, suppose we have the interleaved hierarchy
  1596  	//    parent
  1597  	//	child
  1598  	//	  grandchild
  1599  	// Suppose our target index is child.
  1600  	keyTokens, containsNull, err := encoding.DecomposeKeyTokens(strippedEnd)
  1601  	if err != nil {
  1602  		return roachpb.Key{}, err
  1603  	}
  1604  	nIndexTokens := maxKeyTokens(index, containsNull)
  1605  
  1606  	// Sibling/nibling keys: it is possible for this key to be part
  1607  	// of a sibling tree in the interleaved hierarchy, especially after
  1608  	// partitioning on range split keys.
  1609  	// As such, a sibling may be interpretted as an ancestor (if the sibling
  1610  	// has fewer key-encoded columns) or a descendant (if the sibling has
  1611  	// more key-encoded columns). Similarly for niblings.
  1612  	// This is fine because if the sibling is sorted before or after the
  1613  	// current index (child in our example), it is not possible for us to
  1614  	// adjust the sibling key such that we add or remove child (the current
  1615  	// index's) rows from our span.
  1616  
  1617  	if index.ID != table.PrimaryIndex.ID || len(keyTokens) < nIndexTokens {
  1618  		// Case 1: secondary index, parent key or partial child key:
  1619  		// Secondary indexes cannot have interleaved rows.
  1620  		// We cannot adjust or tighten parent keys with respect to a
  1621  		// child index.
  1622  		// Partial child keys e.g. /1/#/1 vs /1/#/1/2 cannot have
  1623  		// interleaved rows.
  1624  		// Nothing to do besides making the end key exclusive if it was
  1625  		// initially inclusive.
  1626  		if inclusive {
  1627  			end = end.PrefixEnd()
  1628  		}
  1629  		return end, nil
  1630  	}
  1631  
  1632  	if len(keyTokens) == nIndexTokens {
  1633  		// Case 2: child key
  1634  
  1635  		lastToken := keyTokens[len(keyTokens)-1]
  1636  		_, isNotNullDesc := encoding.DecodeIfNotNullDescending(lastToken)
  1637  		// If this is the child's key and the last value in the key is
  1638  		// NotNullDesc, then it does not need (read: shouldn't) to be
  1639  		// tightened.
  1640  		// For example, the query with IS NOT NULL may generate
  1641  		// the end key
  1642  		//    /1/#/NOTNULLDESC
  1643  		if isNotNullDesc {
  1644  			if inclusive {
  1645  				end = end.PrefixEnd()
  1646  			}
  1647  			return end, nil
  1648  		}
  1649  
  1650  		// We only want to UndoPrefixEnd if the end key passed is not
  1651  		// inclusive initially.
  1652  		if !inclusive {
  1653  			lastType := encoding.PeekType(lastToken)
  1654  			if lastType == encoding.Bytes || lastType == encoding.BytesDesc || lastType == encoding.Decimal {
  1655  				// If the last value is of type Decimals or
  1656  				// Bytes then this is more difficult since the
  1657  				// escape term is the last value.
  1658  				// TODO(richardwu): Figure out how to go back 1
  1659  				// logical bytes/decimal value.
  1660  				return end, nil
  1661  			}
  1662  
  1663  			// We first iterate back to the previous key value
  1664  			//    /1/#/1 --> /1/#/0
  1665  			undoPrefixEnd, ok := encoding.UndoPrefixEnd(end)
  1666  			if !ok {
  1667  				return end, nil
  1668  			}
  1669  			end = undoPrefixEnd
  1670  		}
  1671  
  1672  		// /1/#/0 --> /1/#/0/#
  1673  		return encoding.EncodeInterleavedSentinel(end), nil
  1674  	}
  1675  
  1676  	// len(keyTokens) > nIndexTokens
  1677  	// Case 3: tightened child, sibling/nibling, or grandchild key
  1678  
  1679  	// Case 3a: tightened child key
  1680  	// This could from a previous invocation of AdjustEndKeyForInterleave.
  1681  	// For example, if during index selection the key for child was
  1682  	// tightened
  1683  	//	/1/#/2 --> /1/#/1/#
  1684  	// We don't really want to tighten on '#' again.
  1685  	if _, isSentinel := encoding.DecodeIfInterleavedSentinel(keyTokens[nIndexTokens]); isSentinel && len(keyTokens)-1 == nIndexTokens {
  1686  		if inclusive {
  1687  			end = end.PrefixEnd()
  1688  		}
  1689  		return end, nil
  1690  	}
  1691  
  1692  	// Case 3b/c: sibling/nibling or grandchild key
  1693  	// Ideally, we want to form
  1694  	//    /1/#/2/#/3 --> /1/#/2/#
  1695  	// We truncate up to and including the interleave sentinel (or next
  1696  	// sibling/nibling column value) after the last index key token.
  1697  	firstNTokenLen := 0
  1698  	for _, token := range keyTokens[:nIndexTokens] {
  1699  		firstNTokenLen += len(token)
  1700  	}
  1701  
  1702  	return end[:firstNTokenLen+1], nil
  1703  }