github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/table_stats.go (about)

     1  // Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package pebble
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"github.com/cockroachdb/errors"
    12  	"github.com/cockroachdb/pebble/internal/base"
    13  	"github.com/cockroachdb/pebble/internal/keyspan"
    14  	"github.com/cockroachdb/pebble/internal/manifest"
    15  	"github.com/cockroachdb/pebble/sstable"
    16  )
    17  
    18  // In-memory statistics about tables help inform compaction picking, but may
    19  // be expensive to calculate or load from disk. Every time a database is
    20  // opened, these statistics must be reloaded or recalculated. To minimize
    21  // impact on user activity and compactions, we load these statistics
    22  // asynchronously in the background and store loaded statistics in each
    23  // table's *FileMetadata.
    24  //
    25  // This file implements the asynchronous loading of statistics by maintaining
    26  // a list of files that require statistics, alongside their LSM levels.
    27  // Whenever new files are added to the LSM, the files are appended to
    28  // d.mu.tableStats.pending. If a stats collection job is not currently
    29  // running, one is started in a separate goroutine.
    30  //
    31  // The stats collection job grabs and clears the pending list, computes table
    32  // statistics relative to the current readState and updates the tables' file
    33  // metadata. New pending files may accumulate during a stats collection job,
    34  // so a completing job triggers a new job if necessary. Only one job runs at a
    35  // time.
    36  //
    37  // When an existing database is opened, all files lack in-memory statistics.
    38  // These files' stats are loaded incrementally whenever the pending list is
    39  // empty by scanning a current readState for files missing statistics. Once a
    40  // job completes a scan without finding any remaining files without
    41  // statistics, it flips a `loadedInitial` flag. From then on, the stats
    42  // collection job only needs to load statistics for new files appended to the
    43  // pending list.
    44  
    45  func (d *DB) maybeCollectTableStatsLocked() {
    46  	if d.shouldCollectTableStatsLocked() {
    47  		go d.collectTableStats()
    48  	}
    49  }
    50  
    51  // updateTableStatsLocked is called when new files are introduced, after the
    52  // read state has been updated. It may trigger a new stat collection.
    53  // DB.mu must be locked when calling.
    54  func (d *DB) updateTableStatsLocked(newFiles []manifest.NewFileEntry) {
    55  	var needStats bool
    56  	for _, nf := range newFiles {
    57  		if !nf.Meta.StatsValid() {
    58  			needStats = true
    59  			break
    60  		}
    61  	}
    62  	if !needStats {
    63  		return
    64  	}
    65  
    66  	d.mu.tableStats.pending = append(d.mu.tableStats.pending, newFiles...)
    67  	d.maybeCollectTableStatsLocked()
    68  }
    69  
    70  func (d *DB) shouldCollectTableStatsLocked() bool {
    71  	return !d.mu.tableStats.loading &&
    72  		d.closed.Load() == nil &&
    73  		!d.opts.private.disableTableStats &&
    74  		(len(d.mu.tableStats.pending) > 0 || !d.mu.tableStats.loadedInitial)
    75  }
    76  
    77  // collectTableStats runs a table stats collection job, returning true if the
    78  // invocation did the collection work, false otherwise (e.g. if another job was
    79  // already running).
    80  func (d *DB) collectTableStats() bool {
    81  	const maxTableStatsPerScan = 50
    82  
    83  	d.mu.Lock()
    84  	if !d.shouldCollectTableStatsLocked() {
    85  		d.mu.Unlock()
    86  		return false
    87  	}
    88  
    89  	pending := d.mu.tableStats.pending
    90  	d.mu.tableStats.pending = nil
    91  	d.mu.tableStats.loading = true
    92  	jobID := d.mu.nextJobID
    93  	d.mu.nextJobID++
    94  	loadedInitial := d.mu.tableStats.loadedInitial
    95  	// Drop DB.mu before performing IO.
    96  	d.mu.Unlock()
    97  
    98  	// Every run of collectTableStats either collects stats from the pending
    99  	// list (if non-empty) or from scanning the version (loadedInitial is
   100  	// false). This job only runs if at least one of those conditions holds.
   101  
   102  	// Grab a read state to scan for tables.
   103  	rs := d.loadReadState()
   104  	var collected []collectedStats
   105  	var hints []deleteCompactionHint
   106  	if len(pending) > 0 {
   107  		collected, hints = d.loadNewFileStats(rs, pending)
   108  	} else {
   109  		var moreRemain bool
   110  		var buf [maxTableStatsPerScan]collectedStats
   111  		collected, hints, moreRemain = d.scanReadStateTableStats(rs, buf[:0])
   112  		loadedInitial = !moreRemain
   113  	}
   114  	rs.unref()
   115  
   116  	// Update the FileMetadata with the loaded stats while holding d.mu.
   117  	d.mu.Lock()
   118  	defer d.mu.Unlock()
   119  	d.mu.tableStats.loading = false
   120  	if loadedInitial && !d.mu.tableStats.loadedInitial {
   121  		d.mu.tableStats.loadedInitial = loadedInitial
   122  		d.opts.EventListener.TableStatsLoaded(TableStatsInfo{
   123  			JobID: jobID,
   124  		})
   125  	}
   126  
   127  	maybeCompact := false
   128  	for _, c := range collected {
   129  		c.fileMetadata.Stats = c.TableStats
   130  		maybeCompact = maybeCompact || fileCompensation(c.fileMetadata) > 0
   131  		c.fileMetadata.StatsMarkValid()
   132  	}
   133  	d.mu.tableStats.cond.Broadcast()
   134  	d.maybeCollectTableStatsLocked()
   135  	if len(hints) > 0 && !d.opts.private.disableDeleteOnlyCompactions {
   136  		// Verify that all of the hint tombstones' files still exist in the
   137  		// current version. Otherwise, the tombstone itself may have been
   138  		// compacted into L6 and more recent keys may have had their sequence
   139  		// numbers zeroed.
   140  		//
   141  		// Note that it's possible that the tombstone file is being compacted
   142  		// presently. In that case, the file will be present in v. When the
   143  		// compaction finishes compacting the tombstone file, it will detect
   144  		// and clear the hint.
   145  		//
   146  		// See DB.maybeUpdateDeleteCompactionHints.
   147  		v := d.mu.versions.currentVersion()
   148  		keepHints := hints[:0]
   149  		for _, h := range hints {
   150  			if v.Contains(h.tombstoneLevel, d.cmp, h.tombstoneFile) {
   151  				keepHints = append(keepHints, h)
   152  			}
   153  		}
   154  		d.mu.compact.deletionHints = append(d.mu.compact.deletionHints, keepHints...)
   155  	}
   156  	if maybeCompact {
   157  		d.maybeScheduleCompaction()
   158  	}
   159  	return true
   160  }
   161  
   162  type collectedStats struct {
   163  	*fileMetadata
   164  	manifest.TableStats
   165  }
   166  
   167  func (d *DB) loadNewFileStats(
   168  	rs *readState, pending []manifest.NewFileEntry,
   169  ) ([]collectedStats, []deleteCompactionHint) {
   170  	var hints []deleteCompactionHint
   171  	collected := make([]collectedStats, 0, len(pending))
   172  	for _, nf := range pending {
   173  		// A file's stats might have been populated by an earlier call to
   174  		// loadNewFileStats if the file was moved.
   175  		// NB: We're not holding d.mu which protects f.Stats, but only
   176  		// collectTableStats updates f.Stats for active files, and we
   177  		// ensure only one goroutine runs it at a time through
   178  		// d.mu.tableStats.loading.
   179  		if nf.Meta.StatsValid() {
   180  			continue
   181  		}
   182  
   183  		// The file isn't guaranteed to still be live in the readState's
   184  		// version. It may have been deleted or moved. Skip it if it's not in
   185  		// the expected level.
   186  		if !rs.current.Contains(nf.Level, d.cmp, nf.Meta) {
   187  			continue
   188  		}
   189  
   190  		stats, newHints, err := d.loadTableStats(
   191  			rs.current, nf.Level,
   192  			nf.Meta,
   193  		)
   194  		if err != nil {
   195  			d.opts.EventListener.BackgroundError(err)
   196  			continue
   197  		}
   198  		// NB: We don't update the FileMetadata yet, because we aren't
   199  		// holding DB.mu. We'll copy it to the FileMetadata after we're
   200  		// finished with IO.
   201  		collected = append(collected, collectedStats{
   202  			fileMetadata: nf.Meta,
   203  			TableStats:   stats,
   204  		})
   205  		hints = append(hints, newHints...)
   206  	}
   207  	return collected, hints
   208  }
   209  
   210  // scanReadStateTableStats is run by an active stat collection job when there
   211  // are no pending new files, but there might be files that existed at Open for
   212  // which we haven't loaded table stats.
   213  func (d *DB) scanReadStateTableStats(
   214  	rs *readState, fill []collectedStats,
   215  ) ([]collectedStats, []deleteCompactionHint, bool) {
   216  	moreRemain := false
   217  	var hints []deleteCompactionHint
   218  	sizesChecked := make(map[base.DiskFileNum]struct{})
   219  	for l, levelMetadata := range rs.current.Levels {
   220  		iter := levelMetadata.Iter()
   221  		for f := iter.First(); f != nil; f = iter.Next() {
   222  			// NB: We're not holding d.mu which protects f.Stats, but only the
   223  			// active stats collection job updates f.Stats for active files,
   224  			// and we ensure only one goroutine runs it at a time through
   225  			// d.mu.tableStats.loading. This makes it safe to read validity
   226  			// through f.Stats.ValidLocked despite not holding d.mu.
   227  			if f.StatsValid() {
   228  				continue
   229  			}
   230  
   231  			// Limit how much work we do per read state. The older the read
   232  			// state is, the higher the likelihood files are no longer being
   233  			// used in the current version. If we've exhausted our allowance,
   234  			// return true for the last return value to signal there's more
   235  			// work to do.
   236  			if len(fill) == cap(fill) {
   237  				moreRemain = true
   238  				return fill, hints, moreRemain
   239  			}
   240  
   241  			// If the file is remote and not SharedForeign, we should check if its size
   242  			// matches. This is because checkConsistency skips over remote files.
   243  			//
   244  			// SharedForeign and External files are skipped as their sizes are allowed
   245  			// to have a mismatch; the size stored in the FileBacking is just the part
   246  			// of the file that is referenced by this Pebble instance, not the size of
   247  			// the whole object.
   248  			objMeta, err := d.objProvider.Lookup(fileTypeTable, f.FileBacking.DiskFileNum)
   249  			if err != nil {
   250  				// Set `moreRemain` so we'll try again.
   251  				moreRemain = true
   252  				d.opts.EventListener.BackgroundError(err)
   253  				continue
   254  			}
   255  
   256  			shouldCheckSize := objMeta.IsRemote() &&
   257  				!d.objProvider.IsSharedForeign(objMeta) &&
   258  				!objMeta.IsExternal()
   259  			if _, ok := sizesChecked[f.FileBacking.DiskFileNum]; !ok && shouldCheckSize {
   260  				size, err := d.objProvider.Size(objMeta)
   261  				fileSize := f.FileBacking.Size
   262  				if err != nil {
   263  					moreRemain = true
   264  					d.opts.EventListener.BackgroundError(err)
   265  					continue
   266  				}
   267  				if size != int64(fileSize) {
   268  					err := errors.Errorf(
   269  						"during consistency check in loadTableStats: L%d: %s: object size mismatch (%s): %d (provider) != %d (MANIFEST)",
   270  						errors.Safe(l), f.FileNum, d.objProvider.Path(objMeta),
   271  						errors.Safe(size), errors.Safe(fileSize))
   272  					d.opts.EventListener.BackgroundError(err)
   273  					d.opts.Logger.Fatalf("%s", err)
   274  				}
   275  
   276  				sizesChecked[f.FileBacking.DiskFileNum] = struct{}{}
   277  			}
   278  
   279  			stats, newHints, err := d.loadTableStats(
   280  				rs.current, l, f,
   281  			)
   282  			if err != nil {
   283  				// Set `moreRemain` so we'll try again.
   284  				moreRemain = true
   285  				d.opts.EventListener.BackgroundError(err)
   286  				continue
   287  			}
   288  			fill = append(fill, collectedStats{
   289  				fileMetadata: f,
   290  				TableStats:   stats,
   291  			})
   292  			hints = append(hints, newHints...)
   293  		}
   294  	}
   295  	return fill, hints, moreRemain
   296  }
   297  
   298  func (d *DB) loadTableStats(
   299  	v *version, level int, meta *fileMetadata,
   300  ) (manifest.TableStats, []deleteCompactionHint, error) {
   301  	var stats manifest.TableStats
   302  	var compactionHints []deleteCompactionHint
   303  	err := d.tableCache.withCommonReader(
   304  		meta, func(r sstable.CommonReader) (err error) {
   305  			props := r.CommonProperties()
   306  			stats.NumEntries = props.NumEntries
   307  			stats.NumDeletions = props.NumDeletions
   308  			if props.NumPointDeletions() > 0 {
   309  				if err = d.loadTablePointKeyStats(props, v, level, meta, &stats); err != nil {
   310  					return
   311  				}
   312  			}
   313  			if props.NumRangeDeletions > 0 || props.NumRangeKeyDels > 0 {
   314  				if compactionHints, err = d.loadTableRangeDelStats(
   315  					r, v, level, meta, &stats,
   316  				); err != nil {
   317  					return
   318  				}
   319  			}
   320  			// TODO(travers): Once we have real-world data, consider collecting
   321  			// additional stats that may provide improved heuristics for compaction
   322  			// picking.
   323  			stats.NumRangeKeySets = props.NumRangeKeySets
   324  			stats.ValueBlocksSize = props.ValueBlocksSize
   325  			return
   326  		})
   327  	if err != nil {
   328  		return stats, nil, err
   329  	}
   330  	return stats, compactionHints, nil
   331  }
   332  
   333  // loadTablePointKeyStats calculates the point key statistics for the given
   334  // table. The provided manifest.TableStats are updated.
   335  func (d *DB) loadTablePointKeyStats(
   336  	props *sstable.CommonProperties,
   337  	v *version,
   338  	level int,
   339  	meta *fileMetadata,
   340  	stats *manifest.TableStats,
   341  ) error {
   342  	// TODO(jackson): If the file has a wide keyspace, the average
   343  	// value size beneath the entire file might not be representative
   344  	// of the size of the keys beneath the point tombstones.
   345  	// We could write the ranges of 'clusters' of point tombstones to
   346  	// a sstable property and call averageValueSizeBeneath for each of
   347  	// these narrower ranges to improve the estimate.
   348  	avgValLogicalSize, compressionRatio, err := d.estimateSizesBeneath(v, level, meta, props)
   349  	if err != nil {
   350  		return err
   351  	}
   352  	stats.PointDeletionsBytesEstimate =
   353  		pointDeletionsBytesEstimate(meta.Size, props, avgValLogicalSize, compressionRatio)
   354  	return nil
   355  }
   356  
   357  // loadTableRangeDelStats calculates the range deletion and range key deletion
   358  // statistics for the given table.
   359  func (d *DB) loadTableRangeDelStats(
   360  	r sstable.CommonReader, v *version, level int, meta *fileMetadata, stats *manifest.TableStats,
   361  ) ([]deleteCompactionHint, error) {
   362  	iter, err := newCombinedDeletionKeyspanIter(d.opts.Comparer, r, meta)
   363  	if err != nil {
   364  		return nil, err
   365  	}
   366  	defer iter.Close()
   367  	var compactionHints []deleteCompactionHint
   368  	// We iterate over the defragmented range tombstones and range key deletions,
   369  	// which ensures we don't double count ranges deleted at different sequence
   370  	// numbers. Also, merging abutting tombstones reduces the number of calls to
   371  	// estimateReclaimedSizeBeneath which is costly, and improves the accuracy of
   372  	// our overall estimate.
   373  	for s := iter.First(); s != nil; s = iter.Next() {
   374  		start, end := s.Start, s.End
   375  		// We only need to consider deletion size estimates for tables that contain
   376  		// RANGEDELs.
   377  		var maxRangeDeleteSeqNum uint64
   378  		for _, k := range s.Keys {
   379  			if k.Kind() == base.InternalKeyKindRangeDelete && maxRangeDeleteSeqNum < k.SeqNum() {
   380  				maxRangeDeleteSeqNum = k.SeqNum()
   381  				break
   382  			}
   383  		}
   384  
   385  		// If the file is in the last level of the LSM, there is no data beneath
   386  		// it. The fact that there is still a range tombstone in a bottommost file
   387  		// indicates two possibilites:
   388  		//   1. an open snapshot kept the tombstone around, and the data the
   389  		//      tombstone deletes is contained within the file itself.
   390  		//   2. the file was ingested.
   391  		// In the first case, we'd like to estimate disk usage within the file
   392  		// itself since compacting the file will drop that covered data. In the
   393  		// second case, we expect that compacting the file will NOT drop any
   394  		// data and rewriting the file is a waste of write bandwidth. We can
   395  		// distinguish these cases by looking at the file metadata's sequence
   396  		// numbers. A file's range deletions can only delete data within the
   397  		// file at lower sequence numbers. All keys in an ingested sstable adopt
   398  		// the same sequence number, preventing tombstones from deleting keys
   399  		// within the same file. We check here if the largest RANGEDEL sequence
   400  		// number is greater than the file's smallest sequence number. If it is,
   401  		// the RANGEDEL could conceivably (although inconclusively) delete data
   402  		// within the same file.
   403  		//
   404  		// Note that this heuristic is imperfect. If a table containing a range
   405  		// deletion is ingested into L5 and subsequently compacted into L6 but
   406  		// an open snapshot prevents elision of covered keys in L6, the
   407  		// resulting RangeDeletionsBytesEstimate will incorrectly include all
   408  		// covered keys.
   409  		//
   410  		// TODO(jackson): We could prevent the above error in the heuristic by
   411  		// computing the file's RangeDeletionsBytesEstimate during the
   412  		// compaction itself. It's unclear how common this is.
   413  		//
   414  		// NOTE: If the span `s` wholly contains a table containing range keys,
   415  		// the returned size estimate will be slightly inflated by the range key
   416  		// block. However, in practice, range keys are expected to be rare, and
   417  		// the size of the range key block relative to the overall size of the
   418  		// table is expected to be small.
   419  		if level == numLevels-1 && meta.SmallestSeqNum < maxRangeDeleteSeqNum {
   420  			size, err := r.EstimateDiskUsage(start, end)
   421  			if err != nil {
   422  				return nil, err
   423  			}
   424  			stats.RangeDeletionsBytesEstimate += size
   425  
   426  			// As the file is in the bottommost level, there is no need to collect a
   427  			// deletion hint.
   428  			continue
   429  		}
   430  
   431  		// While the size estimates for point keys should only be updated if this
   432  		// span contains a range del, the sequence numbers are required for the
   433  		// hint. Unconditionally descend, but conditionally update the estimates.
   434  		hintType := compactionHintFromKeys(s.Keys)
   435  		estimate, hintSeqNum, err := d.estimateReclaimedSizeBeneath(v, level, start, end, hintType)
   436  		if err != nil {
   437  			return nil, err
   438  		}
   439  		stats.RangeDeletionsBytesEstimate += estimate
   440  
   441  		// If any files were completely contained with the range,
   442  		// hintSeqNum is the smallest sequence number contained in any
   443  		// such file.
   444  		if hintSeqNum == math.MaxUint64 {
   445  			continue
   446  		}
   447  		hint := deleteCompactionHint{
   448  			hintType:                hintType,
   449  			start:                   make([]byte, len(start)),
   450  			end:                     make([]byte, len(end)),
   451  			tombstoneFile:           meta,
   452  			tombstoneLevel:          level,
   453  			tombstoneLargestSeqNum:  s.LargestSeqNum(),
   454  			tombstoneSmallestSeqNum: s.SmallestSeqNum(),
   455  			fileSmallestSeqNum:      hintSeqNum,
   456  		}
   457  		copy(hint.start, start)
   458  		copy(hint.end, end)
   459  		compactionHints = append(compactionHints, hint)
   460  	}
   461  	return compactionHints, err
   462  }
   463  
   464  func (d *DB) estimateSizesBeneath(
   465  	v *version, level int, meta *fileMetadata, fileProps *sstable.CommonProperties,
   466  ) (avgValueLogicalSize, compressionRatio float64, err error) {
   467  	// Find all files in lower levels that overlap with meta,
   468  	// summing their value sizes and entry counts.
   469  	file := meta
   470  	var fileSum, keySum, valSum, entryCount uint64
   471  	// Include the file itself. This is important because in some instances, the
   472  	// computed compression ratio is applied to the tombstones contained within
   473  	// `meta` itself. If there are no files beneath `meta` in the LSM, we would
   474  	// calculate a compression ratio of 0 which is not accurate for the file's
   475  	// own tombstones.
   476  	fileSum += file.Size
   477  	entryCount += fileProps.NumEntries
   478  	keySum += fileProps.RawKeySize
   479  	valSum += fileProps.RawValueSize
   480  
   481  	addPhysicalTableStats := func(r *sstable.Reader) (err error) {
   482  		fileSum += file.Size
   483  		entryCount += r.Properties.NumEntries
   484  		keySum += r.Properties.RawKeySize
   485  		valSum += r.Properties.RawValueSize
   486  		return nil
   487  	}
   488  	addVirtualTableStats := func(v sstable.VirtualReader) (err error) {
   489  		fileSum += file.Size
   490  		entryCount += file.Stats.NumEntries
   491  		keySum += v.Properties.RawKeySize
   492  		valSum += v.Properties.RawValueSize
   493  		return nil
   494  	}
   495  
   496  	for l := level + 1; l < numLevels; l++ {
   497  		overlaps := v.Overlaps(l, d.cmp, meta.Smallest.UserKey,
   498  			meta.Largest.UserKey, meta.Largest.IsExclusiveSentinel())
   499  		iter := overlaps.Iter()
   500  		for file = iter.First(); file != nil; file = iter.Next() {
   501  			var err error
   502  			if file.Virtual {
   503  				err = d.tableCache.withVirtualReader(file.VirtualMeta(), addVirtualTableStats)
   504  			} else {
   505  				err = d.tableCache.withReader(file.PhysicalMeta(), addPhysicalTableStats)
   506  			}
   507  			if err != nil {
   508  				return 0, 0, err
   509  			}
   510  		}
   511  	}
   512  	if entryCount == 0 {
   513  		return 0, 0, nil
   514  	}
   515  	// RawKeySize and RawValueSize are uncompressed totals. We'll need to scale
   516  	// the value sum according to the data size to account for compression,
   517  	// index blocks and metadata overhead. Eg:
   518  	//
   519  	//    Compression rate        ×  Average uncompressed value size
   520  	//
   521  	//                            ↓
   522  	//
   523  	//         FileSize              RawValueSize
   524  	//   -----------------------  ×  ------------
   525  	//   RawKeySize+RawValueSize     NumEntries
   526  	//
   527  	// We return the average logical value size plus the compression ratio,
   528  	// leaving the scaling to the caller. This allows the caller to perform
   529  	// additional compression ratio scaling if necessary.
   530  	uncompressedSum := float64(keySum + valSum)
   531  	compressionRatio = float64(fileSum) / uncompressedSum
   532  	avgValueLogicalSize = (float64(valSum) / float64(entryCount))
   533  	return avgValueLogicalSize, compressionRatio, nil
   534  }
   535  
   536  func (d *DB) estimateReclaimedSizeBeneath(
   537  	v *version, level int, start, end []byte, hintType deleteCompactionHintType,
   538  ) (estimate uint64, hintSeqNum uint64, err error) {
   539  	// Find all files in lower levels that overlap with the deleted range
   540  	// [start, end).
   541  	//
   542  	// An overlapping file might be completely contained by the range
   543  	// tombstone, in which case we can count the entire file size in
   544  	// our estimate without doing any additional I/O.
   545  	//
   546  	// Otherwise, estimating the range for the file requires
   547  	// additional I/O to read the file's index blocks.
   548  	hintSeqNum = math.MaxUint64
   549  	for l := level + 1; l < numLevels; l++ {
   550  		overlaps := v.Overlaps(l, d.cmp, start, end, true /* exclusiveEnd */)
   551  		iter := overlaps.Iter()
   552  		for file := iter.First(); file != nil; file = iter.Next() {
   553  			startCmp := d.cmp(start, file.Smallest.UserKey)
   554  			endCmp := d.cmp(file.Largest.UserKey, end)
   555  			if startCmp <= 0 && (endCmp < 0 || endCmp == 0 && file.Largest.IsExclusiveSentinel()) {
   556  				// The range fully contains the file, so skip looking it up in table
   557  				// cache/looking at its indexes and add the full file size. Whether the
   558  				// disk estimate and hint seqnums are updated depends on a) the type of
   559  				// hint that requested the estimate and b) the keys contained in this
   560  				// current file.
   561  				var updateEstimates, updateHints bool
   562  				switch hintType {
   563  				case deleteCompactionHintTypePointKeyOnly:
   564  					// The range deletion byte estimates should only be updated if this
   565  					// table contains point keys. This ends up being an overestimate in
   566  					// the case that table also has range keys, but such keys are expected
   567  					// to contribute a negligible amount of the table's overall size,
   568  					// relative to point keys.
   569  					if file.HasPointKeys {
   570  						updateEstimates = true
   571  					}
   572  					// As the initiating span contained only range dels, hints can only be
   573  					// updated if this table does _not_ contain range keys.
   574  					if !file.HasRangeKeys {
   575  						updateHints = true
   576  					}
   577  				case deleteCompactionHintTypeRangeKeyOnly:
   578  					// The initiating span contained only range key dels. The estimates
   579  					// apply only to point keys, and are therefore not updated.
   580  					updateEstimates = false
   581  					// As the initiating span contained only range key dels, hints can
   582  					// only be updated if this table does _not_ contain point keys.
   583  					if !file.HasPointKeys {
   584  						updateHints = true
   585  					}
   586  				case deleteCompactionHintTypePointAndRangeKey:
   587  					// Always update the estimates and hints, as this hint type can drop a
   588  					// file, irrespective of the mixture of keys. Similar to above, the
   589  					// range del bytes estimates is an overestimate.
   590  					updateEstimates, updateHints = true, true
   591  				default:
   592  					panic(fmt.Sprintf("pebble: unknown hint type %s", hintType))
   593  				}
   594  				if updateEstimates {
   595  					estimate += file.Size
   596  				}
   597  				if updateHints && hintSeqNum > file.SmallestSeqNum {
   598  					hintSeqNum = file.SmallestSeqNum
   599  				}
   600  			} else if d.cmp(file.Smallest.UserKey, end) <= 0 && d.cmp(start, file.Largest.UserKey) <= 0 {
   601  				// Partial overlap.
   602  				if hintType == deleteCompactionHintTypeRangeKeyOnly {
   603  					// If the hint that generated this overlap contains only range keys,
   604  					// there is no need to calculate disk usage, as the reclaimable space
   605  					// is expected to be minimal relative to point keys.
   606  					continue
   607  				}
   608  				var size uint64
   609  				var err error
   610  				if file.Virtual {
   611  					err = d.tableCache.withVirtualReader(
   612  						file.VirtualMeta(), func(r sstable.VirtualReader) (err error) {
   613  							size, err = r.EstimateDiskUsage(start, end)
   614  							return err
   615  						})
   616  				} else {
   617  					err = d.tableCache.withReader(
   618  						file.PhysicalMeta(), func(r *sstable.Reader) (err error) {
   619  							size, err = r.EstimateDiskUsage(start, end)
   620  							return err
   621  						})
   622  				}
   623  
   624  				if err != nil {
   625  					return 0, hintSeqNum, err
   626  				}
   627  				estimate += size
   628  			}
   629  		}
   630  	}
   631  	return estimate, hintSeqNum, nil
   632  }
   633  
   634  func maybeSetStatsFromProperties(meta physicalMeta, props *sstable.Properties) bool {
   635  	// If a table contains range deletions or range key deletions, we defer the
   636  	// stats collection. There are two main reasons for this:
   637  	//
   638  	//  1. Estimating the potential for reclaimed space due to a range deletion
   639  	//     tombstone requires scanning the LSM - a potentially expensive operation
   640  	//     that should be deferred.
   641  	//  2. Range deletions and / or range key deletions present an opportunity to
   642  	//     compute "deletion hints", which also requires a scan of the LSM to
   643  	//     compute tables that would be eligible for deletion.
   644  	//
   645  	// These two tasks are deferred to the table stats collector goroutine.
   646  	if props.NumRangeDeletions != 0 || props.NumRangeKeyDels != 0 {
   647  		return false
   648  	}
   649  
   650  	// If a table is more than 10% point deletions without user-provided size
   651  	// estimates, don't calculate the PointDeletionsBytesEstimate statistic
   652  	// using our limited knowledge. The table stats collector can populate the
   653  	// stats and calculate an average of value size of all the tables beneath
   654  	// the table in the LSM, which will be more accurate.
   655  	if unsizedDels := (props.NumDeletions - props.NumSizedDeletions); unsizedDels > props.NumEntries/10 {
   656  		return false
   657  	}
   658  
   659  	var pointEstimate uint64
   660  	if props.NumEntries > 0 {
   661  		// Use the file's own average key and value sizes as an estimate. This
   662  		// doesn't require any additional IO and since the number of point
   663  		// deletions in the file is low, the error introduced by this crude
   664  		// estimate is expected to be small.
   665  		commonProps := &props.CommonProperties
   666  		avgValSize, compressionRatio := estimatePhysicalSizes(meta.Size, commonProps)
   667  		pointEstimate = pointDeletionsBytesEstimate(meta.Size, commonProps, avgValSize, compressionRatio)
   668  	}
   669  
   670  	meta.Stats.NumEntries = props.NumEntries
   671  	meta.Stats.NumDeletions = props.NumDeletions
   672  	meta.Stats.NumRangeKeySets = props.NumRangeKeySets
   673  	meta.Stats.PointDeletionsBytesEstimate = pointEstimate
   674  	meta.Stats.RangeDeletionsBytesEstimate = 0
   675  	meta.Stats.ValueBlocksSize = props.ValueBlocksSize
   676  	meta.StatsMarkValid()
   677  	return true
   678  }
   679  
   680  func pointDeletionsBytesEstimate(
   681  	fileSize uint64, props *sstable.CommonProperties, avgValLogicalSize, compressionRatio float64,
   682  ) (estimate uint64) {
   683  	if props.NumEntries == 0 {
   684  		return 0
   685  	}
   686  	numPointDels := props.NumPointDeletions()
   687  	if numPointDels == 0 {
   688  		return 0
   689  	}
   690  	// Estimate the potential space to reclaim using the table's own properties.
   691  	// There may or may not be keys covered by any individual point tombstone.
   692  	// If not, compacting the point tombstone into L6 will at least allow us to
   693  	// drop the point deletion key and will reclaim the tombstone's key bytes.
   694  	// If there are covered key(s), we also get to drop key and value bytes for
   695  	// each covered key.
   696  	//
   697  	// Some point tombstones (DELSIZEDs) carry a user-provided estimate of the
   698  	// uncompressed size of entries that will be elided by fully compacting the
   699  	// tombstone. For these tombstones, there's no guesswork—we use the
   700  	// RawPointTombstoneValueSizeHint property which is the sum of all these
   701  	// tombstones' encoded values.
   702  	//
   703  	// For un-sized point tombstones (DELs), we estimate assuming that each
   704  	// point tombstone on average covers 1 key and using average value sizes.
   705  	// This is almost certainly an overestimate, but that's probably okay
   706  	// because point tombstones can slow range iterations even when they don't
   707  	// cover a key.
   708  	//
   709  	// TODO(jackson): This logic doesn't directly incorporate fixed per-key
   710  	// overhead (8-byte trailer, plus at least 1 byte encoding the length of the
   711  	// key and 1 byte encoding the length of the value). This overhead is
   712  	// indirectly incorporated through the compression ratios, but that results
   713  	// in the overhead being smeared per key-byte and value-byte, rather than
   714  	// per-entry. This per-key fixed overhead can be nontrivial, especially for
   715  	// dense swaths of point tombstones. Give some thought as to whether we
   716  	// should directly include fixed per-key overhead in the calculations.
   717  
   718  	// Below, we calculate the tombstone contributions and the shadowed keys'
   719  	// contributions separately.
   720  	var tombstonesLogicalSize float64
   721  	var shadowedLogicalSize float64
   722  
   723  	// 1. Calculate the contribution of the tombstone keys themselves.
   724  	if props.RawPointTombstoneKeySize > 0 {
   725  		tombstonesLogicalSize += float64(props.RawPointTombstoneKeySize)
   726  	} else {
   727  		// This sstable predates the existence of the RawPointTombstoneKeySize
   728  		// property. We can use the average key size within the file itself and
   729  		// the count of point deletions to estimate the size.
   730  		tombstonesLogicalSize += float64(numPointDels * props.RawKeySize / props.NumEntries)
   731  	}
   732  
   733  	// 2. Calculate the contribution of the keys shadowed by tombstones.
   734  	//
   735  	// 2a. First account for keys shadowed by DELSIZED tombstones. THE DELSIZED
   736  	// tombstones encode the size of both the key and value of the shadowed KV
   737  	// entries. These sizes are aggregated into a sstable property.
   738  	shadowedLogicalSize += float64(props.RawPointTombstoneValueSize)
   739  
   740  	// 2b. Calculate the contribution of the KV entries shadowed by ordinary DEL
   741  	// keys.
   742  	numUnsizedDels := numPointDels - props.NumSizedDeletions
   743  	{
   744  		// The shadowed keys have the same exact user keys as the tombstones
   745  		// themselves, so we can use the `tombstonesLogicalSize` we computed
   746  		// earlier as an estimate. There's a complication that
   747  		// `tombstonesLogicalSize` may include DELSIZED keys we already
   748  		// accounted for.
   749  		shadowedLogicalSize += float64(tombstonesLogicalSize) / float64(numPointDels) * float64(numUnsizedDels)
   750  
   751  		// Calculate the contribution of the deleted values. The caller has
   752  		// already computed an average logical size (possibly computed across
   753  		// many sstables).
   754  		shadowedLogicalSize += float64(numUnsizedDels) * avgValLogicalSize
   755  	}
   756  
   757  	// Scale both tombstone and shadowed totals by logical:physical ratios to
   758  	// account for compression, metadata overhead, etc.
   759  	//
   760  	//      Physical             FileSize
   761  	//     -----------  = -----------------------
   762  	//      Logical       RawKeySize+RawValueSize
   763  	//
   764  	return uint64((tombstonesLogicalSize + shadowedLogicalSize) * compressionRatio)
   765  }
   766  
   767  func estimatePhysicalSizes(
   768  	fileSize uint64, props *sstable.CommonProperties,
   769  ) (avgValLogicalSize, compressionRatio float64) {
   770  	// RawKeySize and RawValueSize are uncompressed totals. Scale according to
   771  	// the data size to account for compression, index blocks and metadata
   772  	// overhead. Eg:
   773  	//
   774  	//    Compression rate        ×  Average uncompressed value size
   775  	//
   776  	//                            ↓
   777  	//
   778  	//         FileSize              RawValSize
   779  	//   -----------------------  ×  ----------
   780  	//   RawKeySize+RawValueSize     NumEntries
   781  	//
   782  	uncompressedSum := props.RawKeySize + props.RawValueSize
   783  	compressionRatio = float64(fileSize) / float64(uncompressedSum)
   784  	avgValLogicalSize = (float64(props.RawValueSize) / float64(props.NumEntries))
   785  	return avgValLogicalSize, compressionRatio
   786  }
   787  
   788  // newCombinedDeletionKeyspanIter returns a keyspan.FragmentIterator that
   789  // returns "ranged deletion" spans for a single table, providing a combined view
   790  // of both range deletion and range key deletion spans. The
   791  // tableRangedDeletionIter is intended for use in the specific case of computing
   792  // the statistics and deleteCompactionHints for a single table.
   793  //
   794  // As an example, consider the following set of spans from the range deletion
   795  // and range key blocks of a table:
   796  //
   797  //		      |---------|     |---------|         |-------| RANGEKEYDELs
   798  //		|-----------|-------------|           |-----|       RANGEDELs
   799  //	  __________________________________________________________
   800  //		a b c d e f g h i j k l m n o p q r s t u v w x y z
   801  //
   802  // The tableRangedDeletionIter produces the following set of output spans, where
   803  // '1' indicates a span containing only range deletions, '2' is a span
   804  // containing only range key deletions, and '3' is a span containing a mixture
   805  // of both range deletions and range key deletions.
   806  //
   807  //		   1       3       1    3    2          1  3   2
   808  //		|-----|---------|-----|---|-----|     |---|-|-----|
   809  //	  __________________________________________________________
   810  //		a b c d e f g h i j k l m n o p q r s t u v w x y z
   811  //
   812  // Algorithm.
   813  //
   814  // The iterator first defragments the range deletion and range key blocks
   815  // separately. During this defragmentation, the range key block is also filtered
   816  // so that keys other than range key deletes are ignored. The range delete and
   817  // range key delete keyspaces are then merged.
   818  //
   819  // Note that the only fragmentation introduced by merging is from where a range
   820  // del span overlaps with a range key del span. Within the bounds of any overlap
   821  // there is guaranteed to be no further fragmentation, as the constituent spans
   822  // have already been defragmented. To the left and right of any overlap, the
   823  // same reasoning applies. For example,
   824  //
   825  //		         |--------|         |-------| RANGEKEYDEL
   826  //		|---------------------------|         RANGEDEL
   827  //		|----1---|----3---|----1----|---2---| Merged, fragmented spans.
   828  //	  __________________________________________________________
   829  //		a b c d e f g h i j k l m n o p q r s t u v w x y z
   830  //
   831  // Any fragmented abutting spans produced by the merging iter will be of
   832  // differing types (i.e. a transition from a span with homogenous key kinds to a
   833  // heterogeneous span, or a transition from a span with exclusively range dels
   834  // to a span with exclusively range key dels). Therefore, further
   835  // defragmentation is not required.
   836  //
   837  // Each span returned by the tableRangeDeletionIter will have at most four keys,
   838  // corresponding to the largest and smallest sequence numbers encountered across
   839  // the range deletes and range keys deletes that comprised the merged spans.
   840  func newCombinedDeletionKeyspanIter(
   841  	comparer *base.Comparer, cr sstable.CommonReader, m *fileMetadata,
   842  ) (keyspan.FragmentIterator, error) {
   843  	// The range del iter and range key iter are each wrapped in their own
   844  	// defragmenting iter. For each iter, abutting spans can always be merged.
   845  	var equal = keyspan.DefragmentMethodFunc(func(_ base.Equal, a, b *keyspan.Span) bool { return true })
   846  	// Reduce keys by maintaining a slice of at most length two, corresponding to
   847  	// the largest and smallest keys in the defragmented span. This maintains the
   848  	// contract that the emitted slice is sorted by (SeqNum, Kind) descending.
   849  	reducer := func(current, incoming []keyspan.Key) []keyspan.Key {
   850  		if len(current) == 0 && len(incoming) == 0 {
   851  			// While this should never occur in practice, a defensive return is used
   852  			// here to preserve correctness.
   853  			return current
   854  		}
   855  		var largest, smallest keyspan.Key
   856  		var set bool
   857  		for _, keys := range [2][]keyspan.Key{current, incoming} {
   858  			if len(keys) == 0 {
   859  				continue
   860  			}
   861  			first, last := keys[0], keys[len(keys)-1]
   862  			if !set {
   863  				largest, smallest = first, last
   864  				set = true
   865  				continue
   866  			}
   867  			if first.Trailer > largest.Trailer {
   868  				largest = first
   869  			}
   870  			if last.Trailer < smallest.Trailer {
   871  				smallest = last
   872  			}
   873  		}
   874  		if largest.Equal(comparer.Equal, smallest) {
   875  			current = append(current[:0], largest)
   876  		} else {
   877  			current = append(current[:0], largest, smallest)
   878  		}
   879  		return current
   880  	}
   881  
   882  	// The separate iters for the range dels and range keys are wrapped in a
   883  	// merging iter to join the keyspaces into a single keyspace. The separate
   884  	// iters are only added if the particular key kind is present.
   885  	mIter := &keyspan.MergingIter{}
   886  	var transform = keyspan.TransformerFunc(func(cmp base.Compare, in keyspan.Span, out *keyspan.Span) error {
   887  		if in.KeysOrder != keyspan.ByTrailerDesc {
   888  			panic("pebble: combined deletion iter encountered keys in non-trailer descending order")
   889  		}
   890  		out.Start, out.End = in.Start, in.End
   891  		out.Keys = append(out.Keys[:0], in.Keys...)
   892  		out.KeysOrder = keyspan.ByTrailerDesc
   893  		// NB: The order of by-trailer descending may have been violated,
   894  		// because we've layered rangekey and rangedel iterators from the same
   895  		// sstable into the same keyspan.MergingIter. The MergingIter will
   896  		// return the keys in the order that the child iterators were provided.
   897  		// Sort the keys to ensure they're sorted by trailer descending.
   898  		keyspan.SortKeysByTrailer(&out.Keys)
   899  		return nil
   900  	})
   901  	mIter.Init(comparer.Compare, transform, new(keyspan.MergingBuffers))
   902  
   903  	iter, err := cr.NewRawRangeDelIter()
   904  	if err != nil {
   905  		return nil, err
   906  	}
   907  	if iter != nil {
   908  		dIter := &keyspan.DefragmentingIter{}
   909  		dIter.Init(comparer, iter, equal, reducer, new(keyspan.DefragmentingBuffers))
   910  		iter = dIter
   911  		// Truncate tombstones to the containing file's bounds if necessary.
   912  		// See docs/range_deletions.md for why this is necessary.
   913  		iter = keyspan.Truncate(
   914  			comparer.Compare, iter, m.Smallest.UserKey, m.Largest.UserKey,
   915  			nil, nil, false, /* panicOnUpperTruncate */
   916  		)
   917  		mIter.AddLevel(iter)
   918  	}
   919  
   920  	iter, err = cr.NewRawRangeKeyIter()
   921  	if err != nil {
   922  		return nil, err
   923  	}
   924  	if iter != nil {
   925  		// Wrap the range key iterator in a filter that elides keys other than range
   926  		// key deletions.
   927  		iter = keyspan.Filter(iter, func(in *keyspan.Span, out *keyspan.Span) (keep bool) {
   928  			out.Start, out.End = in.Start, in.End
   929  			out.Keys = out.Keys[:0]
   930  			for _, k := range in.Keys {
   931  				if k.Kind() != base.InternalKeyKindRangeKeyDelete {
   932  					continue
   933  				}
   934  				out.Keys = append(out.Keys, k)
   935  			}
   936  			return len(out.Keys) > 0
   937  		}, comparer.Compare)
   938  		dIter := &keyspan.DefragmentingIter{}
   939  		dIter.Init(comparer, iter, equal, reducer, new(keyspan.DefragmentingBuffers))
   940  		iter = dIter
   941  		mIter.AddLevel(iter)
   942  	}
   943  
   944  	return mIter, nil
   945  }
   946  
   947  // rangeKeySetsAnnotator implements manifest.Annotator, annotating B-Tree nodes
   948  // with the sum of the files' counts of range key fragments. Its annotation type
   949  // is a *uint64. The count of range key sets may change once a table's stats are
   950  // loaded asynchronously, so its values are marked as cacheable only if a file's
   951  // stats have been loaded.
   952  type rangeKeySetsAnnotator struct{}
   953  
   954  var _ manifest.Annotator = rangeKeySetsAnnotator{}
   955  
   956  func (a rangeKeySetsAnnotator) Zero(dst interface{}) interface{} {
   957  	if dst == nil {
   958  		return new(uint64)
   959  	}
   960  	v := dst.(*uint64)
   961  	*v = 0
   962  	return v
   963  }
   964  
   965  func (a rangeKeySetsAnnotator) Accumulate(
   966  	f *fileMetadata, dst interface{},
   967  ) (v interface{}, cacheOK bool) {
   968  	vptr := dst.(*uint64)
   969  	*vptr = *vptr + f.Stats.NumRangeKeySets
   970  	return vptr, f.StatsValid()
   971  }
   972  
   973  func (a rangeKeySetsAnnotator) Merge(src interface{}, dst interface{}) interface{} {
   974  	srcV := src.(*uint64)
   975  	dstV := dst.(*uint64)
   976  	*dstV = *dstV + *srcV
   977  	return dstV
   978  }
   979  
   980  // countRangeKeySetFragments counts the number of RANGEKEYSET keys across all
   981  // files of the LSM. It only counts keys in files for which table stats have
   982  // been loaded. It uses a b-tree annotator to cache intermediate values between
   983  // calculations when possible.
   984  func countRangeKeySetFragments(v *version) (count uint64) {
   985  	for l := 0; l < numLevels; l++ {
   986  		if v.RangeKeyLevels[l].Empty() {
   987  			continue
   988  		}
   989  		count += *v.RangeKeyLevels[l].Annotation(rangeKeySetsAnnotator{}).(*uint64)
   990  	}
   991  	return count
   992  }
   993  
   994  // tombstonesAnnotator implements manifest.Annotator, annotating B-Tree nodes
   995  // with the sum of the files' counts of tombstones (DEL, SINGLEDEL and RANGEDELk
   996  // eys). Its annotation type is a *uint64. The count of tombstones may change
   997  // once a table's stats are loaded asynchronously, so its values are marked as
   998  // cacheable only if a file's stats have been loaded.
   999  type tombstonesAnnotator struct{}
  1000  
  1001  var _ manifest.Annotator = tombstonesAnnotator{}
  1002  
  1003  func (a tombstonesAnnotator) Zero(dst interface{}) interface{} {
  1004  	if dst == nil {
  1005  		return new(uint64)
  1006  	}
  1007  	v := dst.(*uint64)
  1008  	*v = 0
  1009  	return v
  1010  }
  1011  
  1012  func (a tombstonesAnnotator) Accumulate(
  1013  	f *fileMetadata, dst interface{},
  1014  ) (v interface{}, cacheOK bool) {
  1015  	vptr := dst.(*uint64)
  1016  	*vptr = *vptr + f.Stats.NumDeletions
  1017  	return vptr, f.StatsValid()
  1018  }
  1019  
  1020  func (a tombstonesAnnotator) Merge(src interface{}, dst interface{}) interface{} {
  1021  	srcV := src.(*uint64)
  1022  	dstV := dst.(*uint64)
  1023  	*dstV = *dstV + *srcV
  1024  	return dstV
  1025  }
  1026  
  1027  // countTombstones counts the number of tombstone (DEL, SINGLEDEL and RANGEDEL)
  1028  // internal keys across all files of the LSM. It only counts keys in files for
  1029  // which table stats have been loaded. It uses a b-tree annotator to cache
  1030  // intermediate values between calculations when possible.
  1031  func countTombstones(v *version) (count uint64) {
  1032  	for l := 0; l < numLevels; l++ {
  1033  		if v.Levels[l].Empty() {
  1034  			continue
  1035  		}
  1036  		count += *v.Levels[l].Annotation(tombstonesAnnotator{}).(*uint64)
  1037  	}
  1038  	return count
  1039  }
  1040  
  1041  // valueBlocksSizeAnnotator implements manifest.Annotator, annotating B-Tree
  1042  // nodes with the sum of the files' Properties.ValueBlocksSize. Its annotation
  1043  // type is a *uint64. The value block size may change once a table's stats are
  1044  // loaded asynchronously, so its values are marked as cacheable only if a
  1045  // file's stats have been loaded.
  1046  type valueBlocksSizeAnnotator struct{}
  1047  
  1048  var _ manifest.Annotator = valueBlocksSizeAnnotator{}
  1049  
  1050  func (a valueBlocksSizeAnnotator) Zero(dst interface{}) interface{} {
  1051  	if dst == nil {
  1052  		return new(uint64)
  1053  	}
  1054  	v := dst.(*uint64)
  1055  	*v = 0
  1056  	return v
  1057  }
  1058  
  1059  func (a valueBlocksSizeAnnotator) Accumulate(
  1060  	f *fileMetadata, dst interface{},
  1061  ) (v interface{}, cacheOK bool) {
  1062  	vptr := dst.(*uint64)
  1063  	*vptr = *vptr + f.Stats.ValueBlocksSize
  1064  	return vptr, f.StatsValid()
  1065  }
  1066  
  1067  func (a valueBlocksSizeAnnotator) Merge(src interface{}, dst interface{}) interface{} {
  1068  	srcV := src.(*uint64)
  1069  	dstV := dst.(*uint64)
  1070  	*dstV = *dstV + *srcV
  1071  	return dstV
  1072  }
  1073  
  1074  // valueBlocksSizeForLevel returns the Properties.ValueBlocksSize across all
  1075  // files for a level of the LSM. It only includes the size for files for which
  1076  // table stats have been loaded. It uses a b-tree annotator to cache
  1077  // intermediate values between calculations when possible. It must not be
  1078  // called concurrently.
  1079  //
  1080  // REQUIRES: 0 <= level <= numLevels.
  1081  func valueBlocksSizeForLevel(v *version, level int) (count uint64) {
  1082  	if v.Levels[level].Empty() {
  1083  		return 0
  1084  	}
  1085  	return *v.Levels[level].Annotation(valueBlocksSizeAnnotator{}).(*uint64)
  1086  }