github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/metrics.go (about)

     1  // Copyright 2019 The LevelDB-Go and Pebble and Bitalostored Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package bitalostable
     6  
     7  import (
     8  	"fmt"
     9  
    10  	"github.com/HdrHistogram/hdrhistogram-go"
    11  	"github.com/cockroachdb/redact"
    12  	"github.com/zuoyebang/bitalostable/internal/base"
    13  	"github.com/zuoyebang/bitalostable/internal/cache"
    14  	"github.com/zuoyebang/bitalostable/internal/humanize"
    15  	"github.com/zuoyebang/bitalostable/sstable"
    16  )
    17  
    18  // CacheMetrics holds metrics for the block and table cache.
    19  type CacheMetrics = cache.Metrics
    20  
    21  // FilterMetrics holds metrics for the filter policy
    22  type FilterMetrics = sstable.FilterMetrics
    23  
    24  // ThroughputMetric is a cumulative throughput metric. See the detailed
    25  // comment in base.
    26  type ThroughputMetric = base.ThroughputMetric
    27  
    28  func formatCacheMetrics(w redact.SafePrinter, m *CacheMetrics, name redact.SafeString) {
    29  	w.Printf("%7s %9s %7s %6.1f%%  (score == hit-rate)\n",
    30  		name,
    31  		humanize.SI.Int64(m.Count),
    32  		humanize.IEC.Int64(m.Size),
    33  		redact.Safe(hitRate(m.Hits, m.Misses)))
    34  }
    35  
    36  // LevelMetrics holds per-level metrics such as the number of files and total
    37  // size of the files, and compaction related metrics.
    38  type LevelMetrics struct {
    39  	// The number of sublevels within the level. The sublevel count corresponds
    40  	// to the read amplification for the level. An empty level will have a
    41  	// sublevel count of 0, implying no read amplification. Only L0 will have
    42  	// a sublevel count other than 0 or 1.
    43  	Sublevels int32
    44  	// The total number of files in the level.
    45  	NumFiles int64
    46  	// The total size in bytes of the files in the level.
    47  	Size int64
    48  	// The level's compaction score.
    49  	Score float64
    50  	// The number of incoming bytes from other levels read during
    51  	// compactions. This excludes bytes moved and bytes ingested. For L0 this is
    52  	// the bytes written to the WAL.
    53  	BytesIn uint64
    54  	// The number of bytes ingested. The sibling metric for tables is
    55  	// TablesIngested.
    56  	BytesIngested uint64
    57  	// The number of bytes moved into the level by a "move" compaction. The
    58  	// sibling metric for tables is TablesMoved.
    59  	BytesMoved uint64
    60  	// The number of bytes read for compactions at the level. This includes bytes
    61  	// read from other levels (BytesIn), as well as bytes read for the level.
    62  	BytesRead uint64
    63  	// The number of bytes written during compactions. The sibling
    64  	// metric for tables is TablesCompacted. This metric may be summed
    65  	// with BytesFlushed to compute the total bytes written for the level.
    66  	BytesCompacted uint64
    67  	// The number of bytes written during flushes. The sibling
    68  	// metrics for tables is TablesFlushed. This metric is always
    69  	// zero for all levels other than L0.
    70  	BytesFlushed uint64
    71  	// The number of sstables compacted to this level.
    72  	TablesCompacted uint64
    73  	// The number of sstables flushed to this level.
    74  	TablesFlushed uint64
    75  	// The number of sstables ingested into the level.
    76  	TablesIngested uint64
    77  	// The number of sstables moved to this level by a "move" compaction.
    78  	TablesMoved uint64
    79  }
    80  
    81  // Add updates the counter metrics for the level.
    82  func (m *LevelMetrics) Add(u *LevelMetrics) {
    83  	m.NumFiles += u.NumFiles
    84  	m.Size += u.Size
    85  	m.BytesIn += u.BytesIn
    86  	m.BytesIngested += u.BytesIngested
    87  	m.BytesMoved += u.BytesMoved
    88  	m.BytesRead += u.BytesRead
    89  	m.BytesCompacted += u.BytesCompacted
    90  	m.BytesFlushed += u.BytesFlushed
    91  	m.TablesCompacted += u.TablesCompacted
    92  	m.TablesFlushed += u.TablesFlushed
    93  	m.TablesIngested += u.TablesIngested
    94  	m.TablesMoved += u.TablesMoved
    95  }
    96  
    97  // WriteAmp computes the write amplification for compactions at this
    98  // level. Computed as (BytesFlushed + BytesCompacted) / BytesIn.
    99  func (m *LevelMetrics) WriteAmp() float64 {
   100  	if m.BytesIn == 0 {
   101  		return 0
   102  	}
   103  	return float64(m.BytesFlushed+m.BytesCompacted) / float64(m.BytesIn)
   104  }
   105  
   106  // format generates a string of the receiver's metrics, formatting it into the
   107  // supplied buffer.
   108  func (m *LevelMetrics) format(w redact.SafePrinter, score redact.SafeValue) {
   109  	w.Printf("%9d %7s %7s %7s %7s %7s %7s %7s %7s %7s %7s %7d %7.1f\n",
   110  		redact.Safe(m.NumFiles),
   111  		humanize.IEC.Int64(m.Size),
   112  		score,
   113  		humanize.IEC.Uint64(m.BytesIn),
   114  		humanize.IEC.Uint64(m.BytesIngested),
   115  		humanize.SI.Uint64(m.TablesIngested),
   116  		humanize.IEC.Uint64(m.BytesMoved),
   117  		humanize.SI.Uint64(m.TablesMoved),
   118  		humanize.IEC.Uint64(m.BytesFlushed+m.BytesCompacted),
   119  		humanize.SI.Uint64(m.TablesFlushed+m.TablesCompacted),
   120  		humanize.IEC.Uint64(m.BytesRead),
   121  		redact.Safe(m.Sublevels),
   122  		redact.Safe(m.WriteAmp()))
   123  }
   124  
   125  // Metrics holds metrics for various subsystems of the DB such as the Cache,
   126  // Compactions, WAL, and per-Level metrics.
   127  //
   128  // TODO(peter): The testing of these metrics is relatively weak. There should
   129  // be testing that performs various operations on a DB and verifies that the
   130  // metrics reflect those operations.
   131  type Metrics struct {
   132  	BlockCache CacheMetrics
   133  
   134  	Compact struct {
   135  		// The total number of compactions, and per-compaction type counts.
   136  		Count            int64
   137  		DefaultCount     int64
   138  		DeleteOnlyCount  int64
   139  		ElisionOnlyCount int64
   140  		MoveCount        int64
   141  		ReadCount        int64
   142  		RewriteCount     int64
   143  		MultiLevelCount  int64
   144  		// An estimate of the number of bytes that need to be compacted for the LSM
   145  		// to reach a stable state.
   146  		EstimatedDebt uint64
   147  		// Number of bytes present in sstables being written by in-progress
   148  		// compactions. This value will be zero if there are no in-progress
   149  		// compactions.
   150  		InProgressBytes int64
   151  		// Number of compactions that are in-progress.
   152  		NumInProgress int64
   153  		// MarkedFiles is a count of files that are marked for
   154  		// compaction. Such files are compacted in a rewrite compaction
   155  		// when no other compactions are picked.
   156  		MarkedFiles int
   157  	}
   158  
   159  	Flush struct {
   160  		// The total number of flushes.
   161  		Count int64
   162  	}
   163  
   164  	Filter FilterMetrics
   165  
   166  	Levels [numLevels]LevelMetrics
   167  
   168  	MemTable struct {
   169  		// The number of bytes allocated by memtables and large (flushable)
   170  		// batches.
   171  		Size uint64
   172  		// The count of memtables.
   173  		Count int64
   174  		// The number of bytes present in zombie memtables which are no longer
   175  		// referenced by the current DB state but are still in use by an iterator.
   176  		ZombieSize uint64
   177  		// The count of zombie memtables.
   178  		ZombieCount int64
   179  	}
   180  
   181  	Keys struct {
   182  		// The approximate count of internal range key set keys in the database.
   183  		RangeKeySetsCount uint64
   184  	}
   185  
   186  	Snapshots struct {
   187  		// The number of currently open snapshots.
   188  		Count int
   189  		// The sequence number of the earliest, currently open snapshot.
   190  		EarliestSeqNum uint64
   191  	}
   192  
   193  	Table struct {
   194  		// The number of bytes present in obsolete tables which are no longer
   195  		// referenced by the current DB state or any open iterators.
   196  		ObsoleteSize uint64
   197  		// The count of obsolete tables.
   198  		ObsoleteCount int64
   199  		// The number of bytes present in zombie tables which are no longer
   200  		// referenced by the current DB state but are still in use by an iterator.
   201  		ZombieSize uint64
   202  		// The count of zombie tables.
   203  		ZombieCount int64
   204  	}
   205  
   206  	TableCache CacheMetrics
   207  
   208  	// Count of the number of open sstable iterators.
   209  	TableIters int64
   210  
   211  	WAL struct {
   212  		// Number of live WAL files.
   213  		Files int64
   214  		// Number of obsolete WAL files.
   215  		ObsoleteFiles int64
   216  		// Physical size of the obsolete WAL files.
   217  		ObsoletePhysicalSize uint64
   218  		// Size of the live data in the WAL files. Note that with WAL file
   219  		// recycling this is less than the actual on-disk size of the WAL files.
   220  		Size uint64
   221  		// Physical size of the WAL files on-disk. With WAL file recycling,
   222  		// this is greater than the live data in WAL files.
   223  		PhysicalSize uint64
   224  		// Number of logical bytes written to the WAL.
   225  		BytesIn uint64
   226  		// Number of bytes written to the WAL.
   227  		BytesWritten uint64
   228  	}
   229  
   230  	private struct {
   231  		optionsFileSize  uint64
   232  		manifestFileSize uint64
   233  	}
   234  }
   235  
   236  // DiskSpaceUsage returns the total disk space used by the database in bytes,
   237  // including live and obsolete files.
   238  func (m *Metrics) DiskSpaceUsage() uint64 {
   239  	var usageBytes uint64
   240  	usageBytes += m.WAL.PhysicalSize
   241  	usageBytes += m.WAL.ObsoletePhysicalSize
   242  	for _, lm := range m.Levels {
   243  		usageBytes += uint64(lm.Size)
   244  	}
   245  	usageBytes += m.Table.ObsoleteSize
   246  	usageBytes += m.Table.ZombieSize
   247  	usageBytes += m.private.optionsFileSize
   248  	usageBytes += m.private.manifestFileSize
   249  	usageBytes += uint64(m.Compact.InProgressBytes)
   250  	return usageBytes
   251  }
   252  
   253  func (m *Metrics) levelSizes() [numLevels]int64 {
   254  	var sizes [numLevels]int64
   255  	for i := 0; i < len(sizes); i++ {
   256  		sizes[i] = m.Levels[i].Size
   257  	}
   258  	return sizes
   259  }
   260  
   261  // ReadAmp returns the current read amplification of the database.
   262  // It's computed as the number of sublevels in L0 + the number of non-empty
   263  // levels below L0.
   264  func (m *Metrics) ReadAmp() int {
   265  	var ramp int32
   266  	for _, l := range m.Levels {
   267  		ramp += l.Sublevels
   268  	}
   269  	return int(ramp)
   270  }
   271  
   272  // Total returns the sum of the per-level metrics and WAL metrics.
   273  func (m *Metrics) Total() LevelMetrics {
   274  	var total LevelMetrics
   275  	for level := 0; level < numLevels; level++ {
   276  		l := &m.Levels[level]
   277  		total.Add(l)
   278  		total.Sublevels += l.Sublevels
   279  	}
   280  	// Compute total bytes-in as the bytes written to the WAL + bytes ingested.
   281  	total.BytesIn = m.WAL.BytesWritten + total.BytesIngested
   282  	// Add the total bytes-in to the total bytes-flushed. This is to account for
   283  	// the bytes written to the log and bytes written externally and then
   284  	// ingested.
   285  	total.BytesFlushed += total.BytesIn
   286  	return total
   287  }
   288  
   289  const notApplicable = redact.SafeString("-")
   290  
   291  func (m *Metrics) formatWAL(w redact.SafePrinter) {
   292  	var writeAmp float64
   293  	if m.WAL.BytesIn > 0 {
   294  		writeAmp = float64(m.WAL.BytesWritten) / float64(m.WAL.BytesIn)
   295  	}
   296  	w.Printf("    WAL %9d %7s %7s %7s %7s %7s %7s %7s %7s %7s %7s %7s %7.1f\n",
   297  		redact.Safe(m.WAL.Files),
   298  		humanize.Uint64(m.WAL.Size),
   299  		notApplicable,
   300  		humanize.Uint64(m.WAL.BytesIn),
   301  		notApplicable,
   302  		notApplicable,
   303  		notApplicable,
   304  		notApplicable,
   305  		humanize.Uint64(m.WAL.BytesWritten),
   306  		notApplicable,
   307  		notApplicable,
   308  		notApplicable,
   309  		redact.Safe(writeAmp))
   310  }
   311  
   312  // String pretty-prints the metrics, showing a line for the WAL, a line per-level, and
   313  // a total:
   314  //
   315  //	__level_____count____size___score______in__ingest(sz_cnt)____move(sz_cnt)___write(sz_cnt)____read___w-amp
   316  //	    WAL         1    27 B       -    48 B       -       -       -       -   108 B       -       -     2.2
   317  //	      0         2   1.6 K    0.50    81 B   825 B       1     0 B       0   2.4 K       3     0 B    30.6
   318  //	      1         0     0 B    0.00     0 B     0 B       0     0 B       0     0 B       0     0 B     0.0
   319  //	      2         0     0 B    0.00     0 B     0 B       0     0 B       0     0 B       0     0 B     0.0
   320  //	      3         0     0 B    0.00     0 B     0 B       0     0 B       0     0 B       0     0 B     0.0
   321  //	      4         0     0 B    0.00     0 B     0 B       0     0 B       0     0 B       0     0 B     0.0
   322  //	      5         0     0 B    0.00     0 B     0 B       0     0 B       0     0 B       0     0 B     0.0
   323  //	      6         1   825 B    0.00   1.6 K     0 B       0     0 B       0   825 B       1   1.6 K     0.5
   324  //	  total         3   2.4 K       -   933 B   825 B       1     0 B       0   4.1 K       4   1.6 K     4.5
   325  //	  flush         3
   326  //	compact         1   1.6 K     0 B       1          (size == estimated-debt, score = in-progress-bytes, in = num-in-progress)
   327  //	  ctype         0       0       0       0       0  (default, delete, elision, move, read)
   328  //	 memtbl         1   4.0 M
   329  //	zmemtbl         0     0 B
   330  //	   ztbl         0     0 B
   331  //	 bcache         4   752 B    7.7%  (score == hit-rate)
   332  //	 tcache         0     0 B    0.0%  (score == hit-rate)
   333  //
   334  // snapshots         0               0  (score == earliest seq num)
   335  //
   336  //	titers         0
   337  //	filter         -       -    0.0%  (score == utility)
   338  //
   339  // The WAL "in" metric is the size of the batches written to the WAL. The WAL
   340  // "write" metric is the size of the physical data written to the WAL which
   341  // includes record fragment overhead. Write amplification is computed as
   342  // bytes-written / bytes-in, except for the total row where bytes-in is
   343  // replaced with WAL-bytes-written + bytes-ingested.
   344  func (m *Metrics) String() string {
   345  	return redact.StringWithoutMarkers(m)
   346  }
   347  
   348  var _ redact.SafeFormatter = &Metrics{}
   349  
   350  // SafeFormat implements redact.SafeFormatter.
   351  func (m *Metrics) SafeFormat(w redact.SafePrinter, _ rune) {
   352  	// NB: Pebble does not make any assumptions as to which Go primitive types
   353  	// have been registered as safe with redact.RegisterSafeType and does not
   354  	// register any types itself. Some of the calls to `redact.Safe`, etc are
   355  	// superfluous in the context of CockroachDB, which registers all the Go
   356  	// numeric types as safe.
   357  
   358  	// TODO(jackson): There are a few places where we use redact.SafeValue
   359  	// instead of redact.RedactableString. This is necessary because of a bug
   360  	// whereby formatting a redact.RedactableString argument does not respect
   361  	// width specifiers. When the issue is fixed, we can convert these to
   362  	// RedactableStrings. https://github.com/cockroachdb/redact/issues/17
   363  
   364  	var total LevelMetrics
   365  	w.SafeString("__level_____count____size___score______in__ingest(sz_cnt)" +
   366  		"____move(sz_cnt)___write(sz_cnt)____read___r-amp___w-amp\n")
   367  	m.formatWAL(w)
   368  	for level := 0; level < numLevels; level++ {
   369  		l := &m.Levels[level]
   370  		w.Printf("%7d ", redact.Safe(level))
   371  
   372  		// Format the score.
   373  		var score redact.SafeValue = notApplicable
   374  		if level < numLevels-1 {
   375  			score = redact.Safe(fmt.Sprintf("%0.2f", l.Score))
   376  		}
   377  		l.format(w, score)
   378  		total.Add(l)
   379  		total.Sublevels += l.Sublevels
   380  	}
   381  	// Compute total bytes-in as the bytes written to the WAL + bytes ingested.
   382  	total.BytesIn = m.WAL.BytesWritten + total.BytesIngested
   383  	// Add the total bytes-in to the total bytes-flushed. This is to account for
   384  	// the bytes written to the log and bytes written externally and then
   385  	// ingested.
   386  	total.BytesFlushed += total.BytesIn
   387  	w.SafeString("  total ")
   388  	total.format(w, notApplicable)
   389  
   390  	w.Printf("  flush %9d\n", redact.Safe(m.Flush.Count))
   391  	w.Printf("compact %9d %7s %7s %7d %7s  (size == estimated-debt, score = in-progress-bytes, in = num-in-progress)\n",
   392  		redact.Safe(m.Compact.Count),
   393  		humanize.IEC.Uint64(m.Compact.EstimatedDebt),
   394  		humanize.IEC.Int64(m.Compact.InProgressBytes),
   395  		redact.Safe(m.Compact.NumInProgress),
   396  		redact.SafeString(""))
   397  	w.Printf("  ctype %9d %7d %7d %7d %7d %7d %7d  (default, delete, elision, move, read, rewrite, multi-level)\n",
   398  		redact.Safe(m.Compact.DefaultCount),
   399  		redact.Safe(m.Compact.DeleteOnlyCount),
   400  		redact.Safe(m.Compact.ElisionOnlyCount),
   401  		redact.Safe(m.Compact.MoveCount),
   402  		redact.Safe(m.Compact.ReadCount),
   403  		redact.Safe(m.Compact.RewriteCount),
   404  		redact.Safe(m.Compact.MultiLevelCount))
   405  	w.Printf(" memtbl %9d %7s\n",
   406  		redact.Safe(m.MemTable.Count),
   407  		humanize.IEC.Uint64(m.MemTable.Size))
   408  	w.Printf("zmemtbl %9d %7s\n",
   409  		redact.Safe(m.MemTable.ZombieCount),
   410  		humanize.IEC.Uint64(m.MemTable.ZombieSize))
   411  	w.Printf("   ztbl %9d %7s\n",
   412  		redact.Safe(m.Table.ZombieCount),
   413  		humanize.IEC.Uint64(m.Table.ZombieSize))
   414  	formatCacheMetrics(w, &m.BlockCache, "bcache")
   415  	formatCacheMetrics(w, &m.TableCache, "tcache")
   416  	w.Printf("  snaps %9d %7s %7d  (score == earliest seq num)\n",
   417  		redact.Safe(m.Snapshots.Count),
   418  		notApplicable,
   419  		redact.Safe(m.Snapshots.EarliestSeqNum))
   420  	w.Printf(" titers %9d\n", redact.Safe(m.TableIters))
   421  	w.Printf(" filter %9s %7s %6.1f%%  (score == utility)\n",
   422  		notApplicable,
   423  		notApplicable,
   424  		redact.Safe(hitRate(m.Filter.Hits, m.Filter.Misses)))
   425  }
   426  
   427  func hitRate(hits, misses int64) float64 {
   428  	sum := hits + misses
   429  	if sum == 0 {
   430  		return 0
   431  	}
   432  	return 100 * float64(hits) / float64(sum)
   433  }
   434  
   435  // InternalIntervalMetrics exposes metrics about internal subsystems, that can
   436  // be useful for deep observability purposes, and for higher-level admission
   437  // control systems that are trying to estimate the capacity of the DB. These
   438  // are experimental and subject to change, since they expose internal
   439  // implementation details, so do not rely on these without discussion with the
   440  // Pebble team.
   441  // These represent the metrics over the interval of time from the last call to
   442  // retrieve these metrics. These are not cumulative, unlike Metrics. The main
   443  // challenge in making these cumulative is the hdrhistogram.Histogram, which
   444  // does not have the ability to subtract a histogram from a preceding metric
   445  // retrieval.
   446  type InternalIntervalMetrics struct {
   447  	// LogWriter metrics.
   448  	LogWriter struct {
   449  		// WriteThroughput is the WAL throughput.
   450  		WriteThroughput ThroughputMetric
   451  		// PendingBufferUtilization is the utilization of the WAL writer's
   452  		// finite-sized pending blocks buffer. It provides an additional signal
   453  		// regarding how close to "full" the WAL writer is. The value is in the
   454  		// interval [0,1].
   455  		PendingBufferUtilization float64
   456  		// SyncQueueUtilization is the utilization of the WAL writer's
   457  		// finite-sized queue of work that is waiting to sync. The value is in the
   458  		// interval [0,1].
   459  		SyncQueueUtilization float64
   460  		// SyncLatencyMicros is a distribution of the fsync latency observed by
   461  		// the WAL writer. It can be nil if there were no fsyncs.
   462  		SyncLatencyMicros *hdrhistogram.Histogram
   463  	}
   464  	// Flush loop metrics.
   465  	Flush struct {
   466  		// WriteThroughput is the flushing throughput.
   467  		WriteThroughput ThroughputMetric
   468  	}
   469  	// NB: the LogWriter throughput and the Flush throughput are not directly
   470  	// comparable because the former does not compress, unlike the latter.
   471  }