github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/sstable/options.go (about)

     1  // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package sstable
     6  
     7  import (
     8  	"github.com/cockroachdb/pebble/internal/base"
     9  	"github.com/cockroachdb/pebble/internal/cache"
    10  )
    11  
    12  // Compression is the per-block compression algorithm to use.
    13  type Compression int
    14  
    15  // The available compression types.
    16  const (
    17  	DefaultCompression Compression = iota
    18  	NoCompression
    19  	SnappyCompression
    20  	ZstdCompression
    21  	NCompression
    22  )
    23  
    24  var ignoredInternalProperties = map[string]struct{}{
    25  	"rocksdb.column.family.id":             {},
    26  	"rocksdb.fixed.key.length":             {},
    27  	"rocksdb.index.key.is.user.key":        {},
    28  	"rocksdb.index.value.is.delta.encoded": {},
    29  	"rocksdb.oldest.key.time":              {},
    30  	"rocksdb.creation.time":                {},
    31  	"rocksdb.file.creation.time":           {},
    32  	"rocksdb.format.version":               {},
    33  }
    34  
    35  func (c Compression) String() string {
    36  	switch c {
    37  	case DefaultCompression:
    38  		return "Default"
    39  	case NoCompression:
    40  		return "NoCompression"
    41  	case SnappyCompression:
    42  		return "Snappy"
    43  	case ZstdCompression:
    44  		return "ZSTD"
    45  	default:
    46  		return "Unknown"
    47  	}
    48  }
    49  
    50  // FilterType exports the base.FilterType type.
    51  type FilterType = base.FilterType
    52  
    53  // Exported TableFilter constants.
    54  const (
    55  	TableFilter = base.TableFilter
    56  )
    57  
    58  // FilterWriter exports the base.FilterWriter type.
    59  type FilterWriter = base.FilterWriter
    60  
    61  // FilterPolicy exports the base.FilterPolicy type.
    62  type FilterPolicy = base.FilterPolicy
    63  
    64  // TablePropertyCollector provides a hook for collecting user-defined
    65  // properties based on the keys and values stored in an sstable. A new
    66  // TablePropertyCollector is created for an sstable when the sstable is being
    67  // written.
    68  type TablePropertyCollector interface {
    69  	// Add is called with each new entry added to the sstable. While the sstable
    70  	// is itself sorted by key, do not assume that the entries are added in any
    71  	// order. In particular, the ordering of point entries and range tombstones
    72  	// is unspecified.
    73  	Add(key InternalKey, value []byte) error
    74  
    75  	// Finish is called when all entries have been added to the sstable. The
    76  	// collected properties (if any) should be added to the specified map. Note
    77  	// that in case of an error during sstable construction, Finish may not be
    78  	// called.
    79  	Finish(userProps map[string]string) error
    80  
    81  	// The name of the property collector.
    82  	Name() string
    83  }
    84  
    85  // SuffixReplaceableTableCollector is an extension to the TablePropertyCollector
    86  // interface that allows a table property collector to indicate that it supports
    87  // being *updated* during suffix replacement, i.e. when an existing SST in which
    88  // all keys have the same key suffix is updated to have a new suffix.
    89  //
    90  // A collector which supports being updated in such cases must be able to derive
    91  // its updated value from its old value and the change being made to the suffix,
    92  // without needing to be passed each updated K/V.
    93  //
    94  // For example, a collector that only inspects values can simply copy its
    95  // previously computed property as-is, since key-suffix replacement does not
    96  // change values, while a collector that depends only on key suffixes, like one
    97  // which collected mvcc-timestamp bounds from timestamp-suffixed keys, can just
    98  // set its new bounds from the new suffix, as it is common to all keys, without
    99  // needing to recompute it from every key.
   100  type SuffixReplaceableTableCollector interface {
   101  	// UpdateKeySuffixes is called when a table is updated to change the suffix of
   102  	// all keys in the table, and is passed the old value for that prop, if any,
   103  	// for that table as well as the old and new suffix.
   104  	UpdateKeySuffixes(oldProps map[string]string, oldSuffix, newSuffix []byte) error
   105  }
   106  
   107  // ReaderOptions holds the parameters needed for reading an sstable.
   108  type ReaderOptions struct {
   109  	// Cache is used to cache uncompressed blocks from sstables.
   110  	//
   111  	// The default cache size is a zero-size cache.
   112  	Cache *cache.Cache
   113  
   114  	// User properties specified in this map will not be added to sst.Properties.UserProperties.
   115  	DeniedUserProperties map[string]struct{}
   116  
   117  	// Comparer defines a total ordering over the space of []byte keys: a 'less
   118  	// than' relationship. The same comparison algorithm must be used for reads
   119  	// and writes over the lifetime of the DB.
   120  	//
   121  	// The default value uses the same ordering as bytes.Compare.
   122  	Comparer *Comparer
   123  
   124  	// Merge defines the Merge function in use for this keyspace.
   125  	Merge base.Merge
   126  
   127  	// Filters is a map from filter policy name to filter policy. It is used for
   128  	// debugging tools which may be used on multiple databases configured with
   129  	// different filter policies. It is not necessary to populate this filters
   130  	// map during normal usage of a DB.
   131  	Filters map[string]FilterPolicy
   132  
   133  	// Merger defines the associative merge operation to use for merging values
   134  	// written with {Batch,DB}.Merge. The MergerName is checked for consistency
   135  	// with the value stored in the sstable when it was written.
   136  	MergerName string
   137  
   138  	// Logger is an optional logger and tracer.
   139  	LoggerAndTracer base.LoggerAndTracer
   140  }
   141  
   142  func (o ReaderOptions) ensureDefaults() ReaderOptions {
   143  	if o.Comparer == nil {
   144  		o.Comparer = base.DefaultComparer
   145  	}
   146  	if o.Merge == nil {
   147  		o.Merge = base.DefaultMerger.Merge
   148  	}
   149  	if o.MergerName == "" {
   150  		o.MergerName = base.DefaultMerger.Name
   151  	}
   152  	if o.LoggerAndTracer == nil {
   153  		o.LoggerAndTracer = base.NoopLoggerAndTracer{}
   154  	}
   155  	if o.DeniedUserProperties == nil {
   156  		o.DeniedUserProperties = ignoredInternalProperties
   157  	}
   158  	return o
   159  }
   160  
   161  // WriterOptions holds the parameters used to control building an sstable.
   162  type WriterOptions struct {
   163  	// BlockRestartInterval is the number of keys between restart points
   164  	// for delta encoding of keys.
   165  	//
   166  	// The default value is 16.
   167  	BlockRestartInterval int
   168  
   169  	// BlockSize is the target uncompressed size in bytes of each table block.
   170  	//
   171  	// The default value is 4096.
   172  	BlockSize int
   173  
   174  	// BlockSizeThreshold finishes a block if the block size is larger than the
   175  	// specified percentage of the target block size and adding the next entry
   176  	// would cause the block to be larger than the target block size.
   177  	//
   178  	// The default value is 90
   179  	BlockSizeThreshold int
   180  
   181  	// Cache is used to cache uncompressed blocks from sstables.
   182  	//
   183  	// The default is a nil cache.
   184  	Cache *cache.Cache
   185  
   186  	// Comparer defines a total ordering over the space of []byte keys: a 'less
   187  	// than' relationship. The same comparison algorithm must be used for reads
   188  	// and writes over the lifetime of the DB.
   189  	//
   190  	// The default value uses the same ordering as bytes.Compare.
   191  	Comparer *Comparer
   192  
   193  	// Compression defines the per-block compression to use.
   194  	//
   195  	// The default value (DefaultCompression) uses snappy compression.
   196  	Compression Compression
   197  
   198  	// FilterPolicy defines a filter algorithm (such as a Bloom filter) that can
   199  	// reduce disk reads for Get calls.
   200  	//
   201  	// One such implementation is bloom.FilterPolicy(10) from the pebble/bloom
   202  	// package.
   203  	//
   204  	// The default value means to use no filter.
   205  	FilterPolicy FilterPolicy
   206  
   207  	// FilterType defines whether an existing filter policy is applied at a
   208  	// block-level or table-level. Block-level filters use less memory to create,
   209  	// but are slower to access as a check for the key in the index must first be
   210  	// performed to locate the filter block. A table-level filter will require
   211  	// memory proportional to the number of keys in an sstable to create, but
   212  	// avoids the index lookup when determining if a key is present. Table-level
   213  	// filters should be preferred except under constrained memory situations.
   214  	FilterType FilterType
   215  
   216  	// IndexBlockSize is the target uncompressed size in bytes of each index
   217  	// block. When the index block size is larger than this target, two-level
   218  	// indexes are automatically enabled. Setting this option to a large value
   219  	// (such as math.MaxInt32) disables the automatic creation of two-level
   220  	// indexes.
   221  	//
   222  	// The default value is the value of BlockSize.
   223  	IndexBlockSize int
   224  
   225  	// Merger defines the associative merge operation to use for merging values
   226  	// written with {Batch,DB}.Merge. The MergerName is checked for consistency
   227  	// with the value stored in the sstable when it was written.
   228  	MergerName string
   229  
   230  	// TableFormat specifies the format version for writing sstables. The default
   231  	// is TableFormatRocksDBv2 which creates RocksDB compatible sstables. Use
   232  	// TableFormatLevelDB to create LevelDB compatible sstable which can be used
   233  	// by a wider range of tools and libraries.
   234  	TableFormat TableFormat
   235  
   236  	// IsStrictObsolete is only relevant for >= TableFormatPebblev4. See comment
   237  	// in format.go. Must be false if format < TableFormatPebblev4.
   238  	//
   239  	// TODO(bilal): set this when writing shared ssts.
   240  	IsStrictObsolete bool
   241  
   242  	// WritingToLowestLevel is only relevant for >= TableFormatPebblev4. It is
   243  	// used to set the obsolete bit on DEL/DELSIZED/SINGLEDEL if they are the
   244  	// youngest for a userkey.
   245  	WritingToLowestLevel bool
   246  
   247  	// TablePropertyCollectors is a list of TablePropertyCollector creation
   248  	// functions. A new TablePropertyCollector is created for each sstable built
   249  	// and lives for the lifetime of the table.
   250  	TablePropertyCollectors []func() TablePropertyCollector
   251  
   252  	// BlockPropertyCollectors is a list of BlockPropertyCollector creation
   253  	// functions. A new BlockPropertyCollector is created for each sstable
   254  	// built and lives for the lifetime of writing that table.
   255  	BlockPropertyCollectors []func() BlockPropertyCollector
   256  
   257  	// Checksum specifies which checksum to use.
   258  	Checksum ChecksumType
   259  
   260  	// Parallelism is used to indicate that the sstable Writer is allowed to
   261  	// compress data blocks and write datablocks to disk in parallel with the
   262  	// Writer client goroutine.
   263  	Parallelism bool
   264  
   265  	// ShortAttributeExtractor mirrors
   266  	// Options.Experimental.ShortAttributeExtractor.
   267  	ShortAttributeExtractor base.ShortAttributeExtractor
   268  
   269  	// RequiredInPlaceValueBound mirrors
   270  	// Options.Experimental.RequiredInPlaceValueBound.
   271  	RequiredInPlaceValueBound UserKeyPrefixBound
   272  }
   273  
   274  func (o WriterOptions) ensureDefaults() WriterOptions {
   275  	if o.BlockRestartInterval <= 0 {
   276  		o.BlockRestartInterval = base.DefaultBlockRestartInterval
   277  	}
   278  	if o.BlockSize <= 0 {
   279  		o.BlockSize = base.DefaultBlockSize
   280  	}
   281  	if o.BlockSizeThreshold <= 0 {
   282  		o.BlockSizeThreshold = base.DefaultBlockSizeThreshold
   283  	}
   284  	if o.Comparer == nil {
   285  		o.Comparer = base.DefaultComparer
   286  	}
   287  	if o.Compression <= DefaultCompression || o.Compression >= NCompression {
   288  		o.Compression = SnappyCompression
   289  	}
   290  	if o.IndexBlockSize <= 0 {
   291  		o.IndexBlockSize = o.BlockSize
   292  	}
   293  	if o.MergerName == "" {
   294  		o.MergerName = base.DefaultMerger.Name
   295  	}
   296  	if o.Checksum == ChecksumTypeNone {
   297  		o.Checksum = ChecksumTypeCRC32c
   298  	}
   299  	// By default, if the table format is not specified, fall back to using the
   300  	// most compatible format.
   301  	if o.TableFormat == TableFormatUnspecified {
   302  		o.TableFormat = TableFormatRocksDBv2
   303  	}
   304  	return o
   305  }