github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowcontainer/disk_row_container.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package rowcontainer
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/diskmap"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    22  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    23  	"github.com/cockroachdb/cockroach/pkg/util/log"
    24  	"github.com/cockroachdb/cockroach/pkg/util/mon"
    25  	"github.com/cockroachdb/errors"
    26  )
    27  
    28  // DiskRowContainer is a SortableRowContainer that stores rows on disk according
    29  // to the ordering specified in DiskRowContainer.ordering. The underlying store
    30  // is a SortedDiskMap so the sorting itself is delegated. Use an iterator
    31  // created through NewIterator() to read the rows in sorted order.
    32  type DiskRowContainer struct {
    33  	diskMap diskmap.SortedDiskMap
    34  	// diskAcc keeps track of disk usage.
    35  	diskAcc mon.BoundAccount
    36  	// bufferedRows buffers writes to the diskMap.
    37  	bufferedRows  diskmap.SortedDiskMapBatchWriter
    38  	scratchKey    []byte
    39  	scratchVal    []byte
    40  	scratchEncRow sqlbase.EncDatumRow
    41  
    42  	// For computing mean encoded row bytes.
    43  	totalEncodedRowBytes uint64
    44  
    45  	// lastReadKey is used to implement NewFinalIterator. Refer to the method's
    46  	// comment for more information.
    47  	lastReadKey []byte
    48  
    49  	// topK is set by callers through InitTopK. Since rows are kept in sorted
    50  	// order, topK will simply limit iterators to read the first k rows.
    51  	topK int
    52  
    53  	// rowID is used as a key suffix to prevent duplicate rows from overwriting
    54  	// each other.
    55  	rowID uint64
    56  
    57  	// types is the schema of rows in the container.
    58  	types []*types.T
    59  	// ordering is the order in which rows should be sorted.
    60  	ordering sqlbase.ColumnOrdering
    61  	// encodings keeps around the DatumEncoding equivalents of the encoding
    62  	// directions in ordering to avoid conversions in hot paths.
    63  	encodings []sqlbase.DatumEncoding
    64  	// valueIdxs holds the indexes of the columns that we encode as values. The
    65  	// columns described by ordering will be encoded as keys. See
    66  	// MakeDiskRowContainer() for more encoding specifics.
    67  	valueIdxs []int
    68  
    69  	// See comment in DoDeDuplicate().
    70  	deDuplicate bool
    71  	// A mapping from a key to the dense row index assigned to the key. It
    72  	// contains all the key strings that are potentially buffered in bufferedRows.
    73  	// Since we need to de-duplicate for every insert attempt, we don't want to
    74  	// keep flushing bufferedRows after every insert.
    75  	// There is currently no memory-accounting for the deDupCache, just like there
    76  	// is none for the bufferedRows. Both will be approximately the same size.
    77  	deDupCache map[string]int
    78  
    79  	diskMonitor *mon.BytesMonitor
    80  	engine      diskmap.Factory
    81  
    82  	datumAlloc *sqlbase.DatumAlloc
    83  }
    84  
    85  var _ SortableRowContainer = &DiskRowContainer{}
    86  var _ DeDupingRowContainer = &DiskRowContainer{}
    87  
    88  // MakeDiskRowContainer creates a DiskRowContainer with the given engine as the
    89  // underlying store that rows are stored on.
    90  // Arguments:
    91  // 	- diskMonitor is used to monitor this DiskRowContainer's disk usage.
    92  // 	- types is the schema of rows that will be added to this container.
    93  // 	- ordering is the output ordering; the order in which rows should be sorted.
    94  // 	- e is the underlying store that rows are stored on.
    95  func MakeDiskRowContainer(
    96  	diskMonitor *mon.BytesMonitor,
    97  	types []*types.T,
    98  	ordering sqlbase.ColumnOrdering,
    99  	e diskmap.Factory,
   100  ) DiskRowContainer {
   101  	diskMap := e.NewSortedDiskMap()
   102  	d := DiskRowContainer{
   103  		diskMap:       diskMap,
   104  		diskAcc:       diskMonitor.MakeBoundAccount(),
   105  		types:         types,
   106  		ordering:      ordering,
   107  		scratchEncRow: make(sqlbase.EncDatumRow, len(types)),
   108  		diskMonitor:   diskMonitor,
   109  		engine:        e,
   110  		datumAlloc:    &sqlbase.DatumAlloc{},
   111  	}
   112  	d.bufferedRows = d.diskMap.NewBatchWriter()
   113  
   114  	// The ordering is specified for a subset of the columns. These will be
   115  	// encoded as a key in the given order according to the given direction so
   116  	// that the sorting can be delegated to the underlying SortedDiskMap. To
   117  	// avoid converting encoding.Direction to sqlbase.DatumEncoding we do this
   118  	// once at initialization and store the conversions in d.encodings.
   119  	// We encode the other columns as values. The indexes of these columns are
   120  	// kept around in d.valueIdxs to have them ready in hot paths.
   121  	// For composite columns that are specified in d.ordering, the Datum is
   122  	// encoded both in the key for comparison and in the value for decoding.
   123  	orderingIdxs := make(map[int]struct{})
   124  	for _, orderInfo := range d.ordering {
   125  		orderingIdxs[orderInfo.ColIdx] = struct{}{}
   126  	}
   127  	d.valueIdxs = make([]int, 0, len(d.types))
   128  	for i := range d.types {
   129  		// TODO(asubiotto): A datum of a type for which HasCompositeKeyEncoding
   130  		// returns true may not necessarily need to be encoded in the value, so
   131  		// make this more fine-grained. See IsComposite() methods in
   132  		// pkg/sql/parser/datum.go.
   133  		if _, ok := orderingIdxs[i]; !ok || sqlbase.HasCompositeKeyEncoding(d.types[i]) {
   134  			d.valueIdxs = append(d.valueIdxs, i)
   135  		}
   136  	}
   137  
   138  	d.encodings = make([]sqlbase.DatumEncoding, len(d.ordering))
   139  	for i, orderInfo := range ordering {
   140  		d.encodings[i] = sqlbase.EncodingDirToDatumEncoding(orderInfo.Direction)
   141  	}
   142  
   143  	return d
   144  }
   145  
   146  // DoDeDuplicate causes DiskRowContainer to behave as an implementation of
   147  // DeDupingRowContainer. It should not be mixed with calls to AddRow() (except
   148  // when the AddRow() already represent deduplicated rows). It de-duplicates
   149  // the keys such that only the first row with the given key will be stored.
   150  // The index returned in AddRowWithDedup() is a dense index starting from 0,
   151  // representing when that key was first added. This feature does not combine
   152  // with Sort(), Reorder() etc., and only to be used for assignment of these
   153  // dense indexes. The main reason to add this to DiskBackedRowContainer is to
   154  // avoid significant code duplication in constructing another row container.
   155  func (d *DiskRowContainer) DoDeDuplicate() {
   156  	d.deDuplicate = true
   157  	d.deDupCache = make(map[string]int)
   158  }
   159  
   160  // Len is part of the SortableRowContainer interface.
   161  func (d *DiskRowContainer) Len() int {
   162  	return int(d.rowID)
   163  }
   164  
   165  // AddRow is part of the SortableRowContainer interface.
   166  //
   167  // It is additionally used in de-duping mode by DiskBackedRowContainer when
   168  // switching from a memory container to this disk container, since it is
   169  // adding rows that are already de-duped. Once it has added all the already
   170  // de-duped rows, it should switch to using AddRowWithDeDup() and never call
   171  // AddRow() again.
   172  //
   173  // Note: if key calculation changes, computeKey() of hashMemRowIterator should
   174  // be changed accordingly.
   175  func (d *DiskRowContainer) AddRow(ctx context.Context, row sqlbase.EncDatumRow) error {
   176  	if err := d.encodeRow(ctx, row); err != nil {
   177  		return err
   178  	}
   179  	if err := d.diskAcc.Grow(ctx, int64(len(d.scratchKey)+len(d.scratchVal))); err != nil {
   180  		return pgerror.Wrapf(err, pgcode.OutOfMemory,
   181  			"this query requires additional disk space")
   182  	}
   183  	if err := d.bufferedRows.Put(d.scratchKey, d.scratchVal); err != nil {
   184  		return err
   185  	}
   186  	// See comment above on when this is used for already de-duplicated
   187  	// rows -- we need to track these in the de-dup cache so that later
   188  	// calls to AddRowWithDeDup() de-duplicate wrt this cache.
   189  	if d.deDuplicate {
   190  		if d.bufferedRows.NumPutsSinceFlush() == 0 {
   191  			d.clearDeDupCache()
   192  		} else {
   193  			d.deDupCache[string(d.scratchKey)] = int(d.rowID)
   194  		}
   195  	}
   196  	d.totalEncodedRowBytes += uint64(len(d.scratchKey) + len(d.scratchVal))
   197  	d.scratchKey = d.scratchKey[:0]
   198  	d.scratchVal = d.scratchVal[:0]
   199  	d.rowID++
   200  	return nil
   201  }
   202  
   203  // AddRowWithDeDup is part of the DeDupingRowContainer interface.
   204  func (d *DiskRowContainer) AddRowWithDeDup(
   205  	ctx context.Context, row sqlbase.EncDatumRow,
   206  ) (int, error) {
   207  	if err := d.encodeRow(ctx, row); err != nil {
   208  		return 0, err
   209  	}
   210  	defer func() {
   211  		d.scratchKey = d.scratchKey[:0]
   212  		d.scratchVal = d.scratchVal[:0]
   213  	}()
   214  	// First use the cache to de-dup.
   215  	entry, ok := d.deDupCache[string(d.scratchKey)]
   216  	if ok {
   217  		return entry, nil
   218  	}
   219  	// Since not in cache, we need to use an iterator to de-dup.
   220  	// TODO(sumeer): this read is expensive:
   221  	// - if there is a significant  fraction of duplicates, we can do better
   222  	//   with a larger cache
   223  	// - if duplicates are rare, use a bloom filter for all the keys in the
   224  	//   diskMap, since a miss in the bloom filter allows us to write to the
   225  	//   diskMap without reading.
   226  	iter := d.diskMap.NewIterator()
   227  	defer iter.Close()
   228  	iter.SeekGE(d.scratchKey)
   229  	valid, err := iter.Valid()
   230  	if err != nil {
   231  		return 0, err
   232  	}
   233  	if valid && bytes.Equal(iter.UnsafeKey(), d.scratchKey) {
   234  		// Found the key. Note that as documented in DeDupingRowContainer,
   235  		// this feature is limited to the case where the whole row is
   236  		// encoded into the key. The value only contains the dense RowID
   237  		// assigned to the key.
   238  		_, idx, err := encoding.DecodeUvarintAscending(iter.UnsafeValue())
   239  		if err != nil {
   240  			return 0, err
   241  		}
   242  		return int(idx), nil
   243  	}
   244  	if err := d.diskAcc.Grow(ctx, int64(len(d.scratchKey)+len(d.scratchVal))); err != nil {
   245  		return 0, pgerror.Wrapf(err, pgcode.OutOfMemory,
   246  			"this query requires additional disk space")
   247  	}
   248  	if err := d.bufferedRows.Put(d.scratchKey, d.scratchVal); err != nil {
   249  		return 0, err
   250  	}
   251  	if d.bufferedRows.NumPutsSinceFlush() == 0 {
   252  		d.clearDeDupCache()
   253  	} else {
   254  		d.deDupCache[string(d.scratchKey)] = int(d.rowID)
   255  	}
   256  	d.totalEncodedRowBytes += uint64(len(d.scratchKey) + len(d.scratchVal))
   257  	idx := int(d.rowID)
   258  	d.rowID++
   259  	return idx, nil
   260  }
   261  
   262  func (d *DiskRowContainer) clearDeDupCache() {
   263  	for k := range d.deDupCache {
   264  		delete(d.deDupCache, k)
   265  	}
   266  }
   267  
   268  func (d *DiskRowContainer) testingFlushBuffer(ctx context.Context) {
   269  	if err := d.bufferedRows.Flush(); err != nil {
   270  		log.Fatalf(ctx, "%v", err)
   271  	}
   272  	d.clearDeDupCache()
   273  }
   274  
   275  func (d *DiskRowContainer) encodeRow(ctx context.Context, row sqlbase.EncDatumRow) error {
   276  	if len(row) != len(d.types) {
   277  		log.Fatalf(ctx, "invalid row length %d, expected %d", len(row), len(d.types))
   278  	}
   279  
   280  	for i, orderInfo := range d.ordering {
   281  		col := orderInfo.ColIdx
   282  		var err error
   283  		d.scratchKey, err = row[col].Encode(d.types[col], d.datumAlloc, d.encodings[i], d.scratchKey)
   284  		if err != nil {
   285  			return err
   286  		}
   287  	}
   288  	if !d.deDuplicate {
   289  		for _, i := range d.valueIdxs {
   290  			var err error
   291  			d.scratchVal, err = row[i].Encode(d.types[i], d.datumAlloc, sqlbase.DatumEncoding_VALUE, d.scratchVal)
   292  			if err != nil {
   293  				return err
   294  			}
   295  		}
   296  		// Put a unique row to keep track of duplicates. Note that this will not
   297  		// mess with key decoding.
   298  		d.scratchKey = encoding.EncodeUvarintAscending(d.scratchKey, d.rowID)
   299  	} else {
   300  		// Add the row id to the value. Note that in this de-duping case the
   301  		// row id is the only thing in the value since the whole row is encoded
   302  		// into the key. Note that the key could have types for which
   303  		// HasCompositeKeyEncoding() returns true and we do not encode them
   304  		// into the value (only in the key) for this DeDupingRowContainer. This
   305  		// is ok since:
   306  		// - The DeDupingRowContainer never needs to return the original row
   307  		//   (there is no get method).
   308  		// - The columns encoded into the key are the primary key columns
   309  		//   of the original table, so the key encoding represents a unique
   310  		//   row in the original table (the key encoding here is not only
   311  		//   a determinant of sort ordering).
   312  		d.scratchVal = encoding.EncodeUvarintAscending(d.scratchVal, d.rowID)
   313  	}
   314  	return nil
   315  }
   316  
   317  // Sort is a noop because the use of a SortedDiskMap as the underlying store
   318  // keeps the rows in sorted order.
   319  func (d *DiskRowContainer) Sort(context.Context) {}
   320  
   321  // Reorder implements ReorderableRowContainer. It creates a new
   322  // DiskRowContainer with the requested ordering and adds a row one by one from
   323  // the current DiskRowContainer, the latter is closed at the end.
   324  func (d *DiskRowContainer) Reorder(ctx context.Context, ordering sqlbase.ColumnOrdering) error {
   325  	// We need to create a new DiskRowContainer since its ordering can only be
   326  	// changed at initialization.
   327  	newContainer := MakeDiskRowContainer(d.diskMonitor, d.types, ordering, d.engine)
   328  	i := d.NewFinalIterator(ctx)
   329  	defer i.Close()
   330  	for i.Rewind(); ; i.Next() {
   331  		if ok, err := i.Valid(); err != nil {
   332  			return err
   333  		} else if !ok {
   334  			break
   335  		}
   336  		row, err := i.Row()
   337  		if err != nil {
   338  			return err
   339  		}
   340  		if err := newContainer.AddRow(ctx, row); err != nil {
   341  			return err
   342  		}
   343  	}
   344  	d.Close(ctx)
   345  	*d = newContainer
   346  	return nil
   347  }
   348  
   349  // InitTopK limits iterators to read the first k rows.
   350  func (d *DiskRowContainer) InitTopK() {
   351  	d.topK = d.Len()
   352  }
   353  
   354  // MaybeReplaceMax adds row to the DiskRowContainer. The SortedDiskMap will
   355  // sort this row into the top k if applicable.
   356  func (d *DiskRowContainer) MaybeReplaceMax(ctx context.Context, row sqlbase.EncDatumRow) error {
   357  	return d.AddRow(ctx, row)
   358  }
   359  
   360  // MeanEncodedRowBytes returns the mean bytes consumed by an encoded row stored in
   361  // this container.
   362  func (d *DiskRowContainer) MeanEncodedRowBytes() int {
   363  	if d.rowID == 0 {
   364  		return 0
   365  	}
   366  	return int(d.totalEncodedRowBytes / d.rowID)
   367  }
   368  
   369  // UnsafeReset is part of the SortableRowContainer interface.
   370  func (d *DiskRowContainer) UnsafeReset(ctx context.Context) error {
   371  	_ = d.bufferedRows.Close(ctx)
   372  	if err := d.diskMap.Clear(); err != nil {
   373  		return err
   374  	}
   375  	d.diskAcc.Clear(ctx)
   376  	d.bufferedRows = d.diskMap.NewBatchWriter()
   377  	d.clearDeDupCache()
   378  	d.lastReadKey = nil
   379  	d.rowID = 0
   380  	d.totalEncodedRowBytes = 0
   381  	return nil
   382  }
   383  
   384  // Close is part of the SortableRowContainer interface.
   385  func (d *DiskRowContainer) Close(ctx context.Context) {
   386  	// We can ignore the error here because the flushed data is immediately cleared
   387  	// in the following Close.
   388  	_ = d.bufferedRows.Close(ctx)
   389  	d.diskMap.Close(ctx)
   390  	d.diskAcc.Close(ctx)
   391  }
   392  
   393  // keyValToRow decodes a key and a value byte slice stored with AddRow() into
   394  // a sqlbase.EncDatumRow. The returned EncDatumRow is only valid until the next
   395  // call to keyValToRow().
   396  func (d *DiskRowContainer) keyValToRow(k []byte, v []byte) (sqlbase.EncDatumRow, error) {
   397  	for i, orderInfo := range d.ordering {
   398  		// Types with composite key encodings are decoded from the value.
   399  		if sqlbase.HasCompositeKeyEncoding(d.types[orderInfo.ColIdx]) {
   400  			// Skip over the encoded key.
   401  			encLen, err := encoding.PeekLength(k)
   402  			if err != nil {
   403  				return nil, err
   404  			}
   405  			k = k[encLen:]
   406  			continue
   407  		}
   408  		var err error
   409  		col := orderInfo.ColIdx
   410  		d.scratchEncRow[col], k, err = sqlbase.EncDatumFromBuffer(d.types[col], d.encodings[i], k)
   411  		if err != nil {
   412  			return nil, errors.NewAssertionErrorWithWrappedErrf(err,
   413  				"unable to decode row, column idx %d", errors.Safe(col))
   414  		}
   415  	}
   416  	for _, i := range d.valueIdxs {
   417  		var err error
   418  		d.scratchEncRow[i], v, err = sqlbase.EncDatumFromBuffer(d.types[i], sqlbase.DatumEncoding_VALUE, v)
   419  		if err != nil {
   420  			return nil, errors.NewAssertionErrorWithWrappedErrf(err,
   421  				"unable to decode row, value idx %d", errors.Safe(i))
   422  		}
   423  	}
   424  	return d.scratchEncRow, nil
   425  }
   426  
   427  // diskRowIterator iterates over the rows in a DiskRowContainer.
   428  type diskRowIterator struct {
   429  	rowContainer *DiskRowContainer
   430  	rowBuf       []byte
   431  	diskmap.SortedDiskMapIterator
   432  }
   433  
   434  var _ RowIterator = &diskRowIterator{}
   435  
   436  func (d *DiskRowContainer) newIterator(ctx context.Context) diskRowIterator {
   437  	if err := d.bufferedRows.Flush(); err != nil {
   438  		log.Fatalf(ctx, "%v", err)
   439  	}
   440  	return diskRowIterator{rowContainer: d, SortedDiskMapIterator: d.diskMap.NewIterator()}
   441  }
   442  
   443  //NewIterator is part of the SortableRowContainer interface.
   444  func (d *DiskRowContainer) NewIterator(ctx context.Context) RowIterator {
   445  	i := d.newIterator(ctx)
   446  	if d.topK > 0 {
   447  		return &diskRowTopKIterator{RowIterator: &i, k: d.topK}
   448  	}
   449  	return &i
   450  }
   451  
   452  // Row returns the current row. The returned sqlbase.EncDatumRow is only valid
   453  // until the next call to Row().
   454  func (r *diskRowIterator) Row() (sqlbase.EncDatumRow, error) {
   455  	if ok, err := r.Valid(); err != nil {
   456  		return nil, errors.NewAssertionErrorWithWrappedErrf(err, "unable to check row validity")
   457  	} else if !ok {
   458  		return nil, errors.AssertionFailedf("invalid row")
   459  	}
   460  
   461  	k := r.UnsafeKey()
   462  	v := r.UnsafeValue()
   463  	// TODO(asubiotto): the "true ||" should not be necessary. We should be to
   464  	// reuse rowBuf, yet doing so causes
   465  	// TestDiskBackedIndexedRowContainer/ReorderingOnDisk, TestHashJoiner, and
   466  	// TestSorter to fail. Some caller of Row() is presumably not making a copy
   467  	// of the return value.
   468  	if true || cap(r.rowBuf) < len(k)+len(v) {
   469  		r.rowBuf = make([]byte, 0, len(k)+len(v))
   470  	}
   471  	r.rowBuf = r.rowBuf[:len(k)+len(v)]
   472  	copy(r.rowBuf, k)
   473  	copy(r.rowBuf[len(k):], v)
   474  	k = r.rowBuf[:len(k)]
   475  	v = r.rowBuf[len(k):]
   476  
   477  	return r.rowContainer.keyValToRow(k, v)
   478  }
   479  
   480  func (r *diskRowIterator) Close() {
   481  	if r.SortedDiskMapIterator != nil {
   482  		r.SortedDiskMapIterator.Close()
   483  	}
   484  }
   485  
   486  // numberedRowIterator is a specialization of diskRowIterator that is
   487  // only for the case where the key is the rowID assigned in AddRow().
   488  type numberedRowIterator struct {
   489  	*diskRowIterator
   490  	scratchKey []byte
   491  }
   492  
   493  func (d *DiskRowContainer) newNumberedIterator(ctx context.Context) *numberedRowIterator {
   494  	i := d.newIterator(ctx)
   495  	return &numberedRowIterator{diskRowIterator: &i}
   496  }
   497  
   498  func (n numberedRowIterator) seekToIndex(idx int) {
   499  	n.scratchKey = encoding.EncodeUvarintAscending(n.scratchKey, uint64(idx))
   500  	n.SeekGE(n.scratchKey)
   501  }
   502  
   503  type diskRowFinalIterator struct {
   504  	diskRowIterator
   505  }
   506  
   507  var _ RowIterator = &diskRowFinalIterator{}
   508  
   509  // NewFinalIterator returns an iterator that reads rows exactly once throughout
   510  // the lifetime of a DiskRowContainer. Rows are not actually discarded from the
   511  // DiskRowContainer, but the lastReadKey is kept track of in order to serve as
   512  // the start key for future diskRowFinalIterators.
   513  // NOTE: Don't use NewFinalIterator if you passed in an ordering for the rows
   514  // and will be adding rows between iterations. New rows could sort before the
   515  // current row.
   516  func (d *DiskRowContainer) NewFinalIterator(ctx context.Context) RowIterator {
   517  	i := diskRowFinalIterator{diskRowIterator: d.newIterator(ctx)}
   518  	if d.topK > 0 {
   519  		return &diskRowTopKIterator{RowIterator: &i, k: d.topK}
   520  	}
   521  	return &i
   522  }
   523  
   524  func (r *diskRowFinalIterator) Rewind() {
   525  	r.SeekGE(r.diskRowIterator.rowContainer.lastReadKey)
   526  	if r.diskRowIterator.rowContainer.lastReadKey != nil {
   527  		r.Next()
   528  	}
   529  }
   530  
   531  func (r *diskRowFinalIterator) Row() (sqlbase.EncDatumRow, error) {
   532  	row, err := r.diskRowIterator.Row()
   533  	if err != nil {
   534  		return nil, err
   535  	}
   536  	r.diskRowIterator.rowContainer.lastReadKey =
   537  		append(r.diskRowIterator.rowContainer.lastReadKey[:0], r.UnsafeKey()...)
   538  	return row, nil
   539  }
   540  
   541  type diskRowTopKIterator struct {
   542  	RowIterator
   543  	position int
   544  	// k is the limit of rows to read.
   545  	k int
   546  }
   547  
   548  var _ RowIterator = &diskRowTopKIterator{}
   549  
   550  func (d *diskRowTopKIterator) Rewind() {
   551  	d.RowIterator.Rewind()
   552  	d.position = 0
   553  }
   554  
   555  func (d *diskRowTopKIterator) Valid() (bool, error) {
   556  	if d.position >= d.k {
   557  		return false, nil
   558  	}
   559  	return d.RowIterator.Valid()
   560  }
   561  
   562  func (d *diskRowTopKIterator) Next() {
   563  	d.position++
   564  	d.RowIterator.Next()
   565  }