github.com/cockroachdb/pebble@v1.1.2/sstable/options.go (about)

     1  // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package sstable
     6  
     7  import (
     8  	"github.com/cockroachdb/fifo"
     9  	"github.com/cockroachdb/pebble/internal/base"
    10  	"github.com/cockroachdb/pebble/internal/cache"
    11  )
    12  
    13  // Compression is the per-block compression algorithm to use.
    14  type Compression int
    15  
    16  // The available compression types.
    17  const (
    18  	DefaultCompression Compression = iota
    19  	NoCompression
    20  	SnappyCompression
    21  	ZstdCompression
    22  	NCompression
    23  )
    24  
    25  var ignoredInternalProperties = map[string]struct{}{
    26  	"rocksdb.column.family.id":             {},
    27  	"rocksdb.fixed.key.length":             {},
    28  	"rocksdb.index.key.is.user.key":        {},
    29  	"rocksdb.index.value.is.delta.encoded": {},
    30  	"rocksdb.oldest.key.time":              {},
    31  	"rocksdb.creation.time":                {},
    32  	"rocksdb.file.creation.time":           {},
    33  	"rocksdb.format.version":               {},
    34  }
    35  
    36  func (c Compression) String() string {
    37  	switch c {
    38  	case DefaultCompression:
    39  		return "Default"
    40  	case NoCompression:
    41  		return "NoCompression"
    42  	case SnappyCompression:
    43  		return "Snappy"
    44  	case ZstdCompression:
    45  		return "ZSTD"
    46  	default:
    47  		return "Unknown"
    48  	}
    49  }
    50  
    51  // FilterType exports the base.FilterType type.
    52  type FilterType = base.FilterType
    53  
    54  // Exported TableFilter constants.
    55  const (
    56  	TableFilter = base.TableFilter
    57  )
    58  
    59  // FilterWriter exports the base.FilterWriter type.
    60  type FilterWriter = base.FilterWriter
    61  
    62  // FilterPolicy exports the base.FilterPolicy type.
    63  type FilterPolicy = base.FilterPolicy
    64  
    65  // TablePropertyCollector provides a hook for collecting user-defined
    66  // properties based on the keys and values stored in an sstable. A new
    67  // TablePropertyCollector is created for an sstable when the sstable is being
    68  // written.
    69  type TablePropertyCollector interface {
    70  	// Add is called with each new entry added to the sstable. While the sstable
    71  	// is itself sorted by key, do not assume that the entries are added in any
    72  	// order. In particular, the ordering of point entries and range tombstones
    73  	// is unspecified.
    74  	Add(key InternalKey, value []byte) error
    75  
    76  	// Finish is called when all entries have been added to the sstable. The
    77  	// collected properties (if any) should be added to the specified map. Note
    78  	// that in case of an error during sstable construction, Finish may not be
    79  	// called.
    80  	Finish(userProps map[string]string) error
    81  
    82  	// The name of the property collector.
    83  	Name() string
    84  }
    85  
    86  // SuffixReplaceableTableCollector is an extension to the TablePropertyCollector
    87  // interface that allows a table property collector to indicate that it supports
    88  // being *updated* during suffix replacement, i.e. when an existing SST in which
    89  // all keys have the same key suffix is updated to have a new suffix.
    90  //
    91  // A collector which supports being updated in such cases must be able to derive
    92  // its updated value from its old value and the change being made to the suffix,
    93  // without needing to be passed each updated K/V.
    94  //
    95  // For example, a collector that only inspects values can simply copy its
    96  // previously computed property as-is, since key-suffix replacement does not
    97  // change values, while a collector that depends only on key suffixes, like one
    98  // which collected mvcc-timestamp bounds from timestamp-suffixed keys, can just
    99  // set its new bounds from the new suffix, as it is common to all keys, without
   100  // needing to recompute it from every key.
   101  type SuffixReplaceableTableCollector interface {
   102  	// UpdateKeySuffixes is called when a table is updated to change the suffix of
   103  	// all keys in the table, and is passed the old value for that prop, if any,
   104  	// for that table as well as the old and new suffix.
   105  	UpdateKeySuffixes(oldProps map[string]string, oldSuffix, newSuffix []byte) error
   106  }
   107  
   108  // ReaderOptions holds the parameters needed for reading an sstable.
   109  type ReaderOptions struct {
   110  	// Cache is used to cache uncompressed blocks from sstables.
   111  	//
   112  	// The default cache size is a zero-size cache.
   113  	Cache *cache.Cache
   114  
   115  	// LoadBlockSema, if set, is used to limit the number of blocks that can be
   116  	// loaded (i.e. read from the filesystem) in parallel. Each load acquires one
   117  	// unit from the semaphore for the duration of the read.
   118  	LoadBlockSema *fifo.Semaphore
   119  
   120  	// User properties specified in this map will not be added to sst.Properties.UserProperties.
   121  	DeniedUserProperties map[string]struct{}
   122  
   123  	// Comparer defines a total ordering over the space of []byte keys: a 'less
   124  	// than' relationship. The same comparison algorithm must be used for reads
   125  	// and writes over the lifetime of the DB.
   126  	//
   127  	// The default value uses the same ordering as bytes.Compare.
   128  	Comparer *Comparer
   129  
   130  	// Merge defines the Merge function in use for this keyspace.
   131  	Merge base.Merge
   132  
   133  	// Filters is a map from filter policy name to filter policy. It is used for
   134  	// debugging tools which may be used on multiple databases configured with
   135  	// different filter policies. It is not necessary to populate this filters
   136  	// map during normal usage of a DB.
   137  	Filters map[string]FilterPolicy
   138  
   139  	// Merger defines the associative merge operation to use for merging values
   140  	// written with {Batch,DB}.Merge. The MergerName is checked for consistency
   141  	// with the value stored in the sstable when it was written.
   142  	MergerName string
   143  
   144  	// Logger is an optional logger and tracer.
   145  	LoggerAndTracer base.LoggerAndTracer
   146  }
   147  
   148  func (o ReaderOptions) ensureDefaults() ReaderOptions {
   149  	if o.Comparer == nil {
   150  		o.Comparer = base.DefaultComparer
   151  	}
   152  	if o.Merge == nil {
   153  		o.Merge = base.DefaultMerger.Merge
   154  	}
   155  	if o.MergerName == "" {
   156  		o.MergerName = base.DefaultMerger.Name
   157  	}
   158  	if o.LoggerAndTracer == nil {
   159  		o.LoggerAndTracer = base.NoopLoggerAndTracer{}
   160  	}
   161  	if o.DeniedUserProperties == nil {
   162  		o.DeniedUserProperties = ignoredInternalProperties
   163  	}
   164  	return o
   165  }
   166  
   167  // WriterOptions holds the parameters used to control building an sstable.
   168  type WriterOptions struct {
   169  	// BlockRestartInterval is the number of keys between restart points
   170  	// for delta encoding of keys.
   171  	//
   172  	// The default value is 16.
   173  	BlockRestartInterval int
   174  
   175  	// BlockSize is the target uncompressed size in bytes of each table block.
   176  	//
   177  	// The default value is 4096.
   178  	BlockSize int
   179  
   180  	// BlockSizeThreshold finishes a block if the block size is larger than the
   181  	// specified percentage of the target block size and adding the next entry
   182  	// would cause the block to be larger than the target block size.
   183  	//
   184  	// The default value is 90
   185  	BlockSizeThreshold int
   186  
   187  	// Cache is used to cache uncompressed blocks from sstables.
   188  	//
   189  	// The default is a nil cache.
   190  	Cache *cache.Cache
   191  
   192  	// Comparer defines a total ordering over the space of []byte keys: a 'less
   193  	// than' relationship. The same comparison algorithm must be used for reads
   194  	// and writes over the lifetime of the DB.
   195  	//
   196  	// The default value uses the same ordering as bytes.Compare.
   197  	Comparer *Comparer
   198  
   199  	// Compression defines the per-block compression to use.
   200  	//
   201  	// The default value (DefaultCompression) uses snappy compression.
   202  	Compression Compression
   203  
   204  	// FilterPolicy defines a filter algorithm (such as a Bloom filter) that can
   205  	// reduce disk reads for Get calls.
   206  	//
   207  	// One such implementation is bloom.FilterPolicy(10) from the pebble/bloom
   208  	// package.
   209  	//
   210  	// The default value means to use no filter.
   211  	FilterPolicy FilterPolicy
   212  
   213  	// FilterType defines whether an existing filter policy is applied at a
   214  	// block-level or table-level. Block-level filters use less memory to create,
   215  	// but are slower to access as a check for the key in the index must first be
   216  	// performed to locate the filter block. A table-level filter will require
   217  	// memory proportional to the number of keys in an sstable to create, but
   218  	// avoids the index lookup when determining if a key is present. Table-level
   219  	// filters should be preferred except under constrained memory situations.
   220  	FilterType FilterType
   221  
   222  	// IndexBlockSize is the target uncompressed size in bytes of each index
   223  	// block. When the index block size is larger than this target, two-level
   224  	// indexes are automatically enabled. Setting this option to a large value
   225  	// (such as math.MaxInt32) disables the automatic creation of two-level
   226  	// indexes.
   227  	//
   228  	// The default value is the value of BlockSize.
   229  	IndexBlockSize int
   230  
   231  	// Merger defines the associative merge operation to use for merging values
   232  	// written with {Batch,DB}.Merge. The MergerName is checked for consistency
   233  	// with the value stored in the sstable when it was written.
   234  	MergerName string
   235  
   236  	// TableFormat specifies the format version for writing sstables. The default
   237  	// is TableFormatRocksDBv2 which creates RocksDB compatible sstables. Use
   238  	// TableFormatLevelDB to create LevelDB compatible sstable which can be used
   239  	// by a wider range of tools and libraries.
   240  	TableFormat TableFormat
   241  
   242  	// IsStrictObsolete is only relevant for >= TableFormatPebblev4. See comment
   243  	// in format.go. Must be false if format < TableFormatPebblev4.
   244  	//
   245  	// TODO(bilal): set this when writing shared ssts.
   246  	IsStrictObsolete bool
   247  
   248  	// WritingToLowestLevel is only relevant for >= TableFormatPebblev4. It is
   249  	// used to set the obsolete bit on DEL/DELSIZED/SINGLEDEL if they are the
   250  	// youngest for a userkey.
   251  	WritingToLowestLevel bool
   252  
   253  	// TablePropertyCollectors is a list of TablePropertyCollector creation
   254  	// functions. A new TablePropertyCollector is created for each sstable built
   255  	// and lives for the lifetime of the table.
   256  	TablePropertyCollectors []func() TablePropertyCollector
   257  
   258  	// BlockPropertyCollectors is a list of BlockPropertyCollector creation
   259  	// functions. A new BlockPropertyCollector is created for each sstable
   260  	// built and lives for the lifetime of writing that table.
   261  	BlockPropertyCollectors []func() BlockPropertyCollector
   262  
   263  	// Checksum specifies which checksum to use.
   264  	Checksum ChecksumType
   265  
   266  	// Parallelism is used to indicate that the sstable Writer is allowed to
   267  	// compress data blocks and write datablocks to disk in parallel with the
   268  	// Writer client goroutine.
   269  	Parallelism bool
   270  
   271  	// ShortAttributeExtractor mirrors
   272  	// Options.Experimental.ShortAttributeExtractor.
   273  	ShortAttributeExtractor base.ShortAttributeExtractor
   274  
   275  	// RequiredInPlaceValueBound mirrors
   276  	// Options.Experimental.RequiredInPlaceValueBound.
   277  	RequiredInPlaceValueBound UserKeyPrefixBound
   278  }
   279  
   280  func (o WriterOptions) ensureDefaults() WriterOptions {
   281  	if o.BlockRestartInterval <= 0 {
   282  		o.BlockRestartInterval = base.DefaultBlockRestartInterval
   283  	}
   284  	if o.BlockSize <= 0 {
   285  		o.BlockSize = base.DefaultBlockSize
   286  	}
   287  	if o.BlockSizeThreshold <= 0 {
   288  		o.BlockSizeThreshold = base.DefaultBlockSizeThreshold
   289  	}
   290  	if o.Comparer == nil {
   291  		o.Comparer = base.DefaultComparer
   292  	}
   293  	if o.Compression <= DefaultCompression || o.Compression >= NCompression {
   294  		o.Compression = SnappyCompression
   295  	}
   296  	if o.IndexBlockSize <= 0 {
   297  		o.IndexBlockSize = o.BlockSize
   298  	}
   299  	if o.MergerName == "" {
   300  		o.MergerName = base.DefaultMerger.Name
   301  	}
   302  	if o.Checksum == ChecksumTypeNone {
   303  		o.Checksum = ChecksumTypeCRC32c
   304  	}
   305  	// By default, if the table format is not specified, fall back to using the
   306  	// most compatible format.
   307  	if o.TableFormat == TableFormatUnspecified {
   308  		o.TableFormat = TableFormatRocksDBv2
   309  	}
   310  	return o
   311  }