github.com/cockroachdb/pebble@v1.1.2/sstable/options.go (about) 1 // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package sstable 6 7 import ( 8 "github.com/cockroachdb/fifo" 9 "github.com/cockroachdb/pebble/internal/base" 10 "github.com/cockroachdb/pebble/internal/cache" 11 ) 12 13 // Compression is the per-block compression algorithm to use. 14 type Compression int 15 16 // The available compression types. 17 const ( 18 DefaultCompression Compression = iota 19 NoCompression 20 SnappyCompression 21 ZstdCompression 22 NCompression 23 ) 24 25 var ignoredInternalProperties = map[string]struct{}{ 26 "rocksdb.column.family.id": {}, 27 "rocksdb.fixed.key.length": {}, 28 "rocksdb.index.key.is.user.key": {}, 29 "rocksdb.index.value.is.delta.encoded": {}, 30 "rocksdb.oldest.key.time": {}, 31 "rocksdb.creation.time": {}, 32 "rocksdb.file.creation.time": {}, 33 "rocksdb.format.version": {}, 34 } 35 36 func (c Compression) String() string { 37 switch c { 38 case DefaultCompression: 39 return "Default" 40 case NoCompression: 41 return "NoCompression" 42 case SnappyCompression: 43 return "Snappy" 44 case ZstdCompression: 45 return "ZSTD" 46 default: 47 return "Unknown" 48 } 49 } 50 51 // FilterType exports the base.FilterType type. 52 type FilterType = base.FilterType 53 54 // Exported TableFilter constants. 55 const ( 56 TableFilter = base.TableFilter 57 ) 58 59 // FilterWriter exports the base.FilterWriter type. 60 type FilterWriter = base.FilterWriter 61 62 // FilterPolicy exports the base.FilterPolicy type. 63 type FilterPolicy = base.FilterPolicy 64 65 // TablePropertyCollector provides a hook for collecting user-defined 66 // properties based on the keys and values stored in an sstable. A new 67 // TablePropertyCollector is created for an sstable when the sstable is being 68 // written. 69 type TablePropertyCollector interface { 70 // Add is called with each new entry added to the sstable. While the sstable 71 // is itself sorted by key, do not assume that the entries are added in any 72 // order. In particular, the ordering of point entries and range tombstones 73 // is unspecified. 74 Add(key InternalKey, value []byte) error 75 76 // Finish is called when all entries have been added to the sstable. The 77 // collected properties (if any) should be added to the specified map. Note 78 // that in case of an error during sstable construction, Finish may not be 79 // called. 80 Finish(userProps map[string]string) error 81 82 // The name of the property collector. 83 Name() string 84 } 85 86 // SuffixReplaceableTableCollector is an extension to the TablePropertyCollector 87 // interface that allows a table property collector to indicate that it supports 88 // being *updated* during suffix replacement, i.e. when an existing SST in which 89 // all keys have the same key suffix is updated to have a new suffix. 90 // 91 // A collector which supports being updated in such cases must be able to derive 92 // its updated value from its old value and the change being made to the suffix, 93 // without needing to be passed each updated K/V. 94 // 95 // For example, a collector that only inspects values can simply copy its 96 // previously computed property as-is, since key-suffix replacement does not 97 // change values, while a collector that depends only on key suffixes, like one 98 // which collected mvcc-timestamp bounds from timestamp-suffixed keys, can just 99 // set its new bounds from the new suffix, as it is common to all keys, without 100 // needing to recompute it from every key. 101 type SuffixReplaceableTableCollector interface { 102 // UpdateKeySuffixes is called when a table is updated to change the suffix of 103 // all keys in the table, and is passed the old value for that prop, if any, 104 // for that table as well as the old and new suffix. 105 UpdateKeySuffixes(oldProps map[string]string, oldSuffix, newSuffix []byte) error 106 } 107 108 // ReaderOptions holds the parameters needed for reading an sstable. 109 type ReaderOptions struct { 110 // Cache is used to cache uncompressed blocks from sstables. 111 // 112 // The default cache size is a zero-size cache. 113 Cache *cache.Cache 114 115 // LoadBlockSema, if set, is used to limit the number of blocks that can be 116 // loaded (i.e. read from the filesystem) in parallel. Each load acquires one 117 // unit from the semaphore for the duration of the read. 118 LoadBlockSema *fifo.Semaphore 119 120 // User properties specified in this map will not be added to sst.Properties.UserProperties. 121 DeniedUserProperties map[string]struct{} 122 123 // Comparer defines a total ordering over the space of []byte keys: a 'less 124 // than' relationship. The same comparison algorithm must be used for reads 125 // and writes over the lifetime of the DB. 126 // 127 // The default value uses the same ordering as bytes.Compare. 128 Comparer *Comparer 129 130 // Merge defines the Merge function in use for this keyspace. 131 Merge base.Merge 132 133 // Filters is a map from filter policy name to filter policy. It is used for 134 // debugging tools which may be used on multiple databases configured with 135 // different filter policies. It is not necessary to populate this filters 136 // map during normal usage of a DB. 137 Filters map[string]FilterPolicy 138 139 // Merger defines the associative merge operation to use for merging values 140 // written with {Batch,DB}.Merge. The MergerName is checked for consistency 141 // with the value stored in the sstable when it was written. 142 MergerName string 143 144 // Logger is an optional logger and tracer. 145 LoggerAndTracer base.LoggerAndTracer 146 } 147 148 func (o ReaderOptions) ensureDefaults() ReaderOptions { 149 if o.Comparer == nil { 150 o.Comparer = base.DefaultComparer 151 } 152 if o.Merge == nil { 153 o.Merge = base.DefaultMerger.Merge 154 } 155 if o.MergerName == "" { 156 o.MergerName = base.DefaultMerger.Name 157 } 158 if o.LoggerAndTracer == nil { 159 o.LoggerAndTracer = base.NoopLoggerAndTracer{} 160 } 161 if o.DeniedUserProperties == nil { 162 o.DeniedUserProperties = ignoredInternalProperties 163 } 164 return o 165 } 166 167 // WriterOptions holds the parameters used to control building an sstable. 168 type WriterOptions struct { 169 // BlockRestartInterval is the number of keys between restart points 170 // for delta encoding of keys. 171 // 172 // The default value is 16. 173 BlockRestartInterval int 174 175 // BlockSize is the target uncompressed size in bytes of each table block. 176 // 177 // The default value is 4096. 178 BlockSize int 179 180 // BlockSizeThreshold finishes a block if the block size is larger than the 181 // specified percentage of the target block size and adding the next entry 182 // would cause the block to be larger than the target block size. 183 // 184 // The default value is 90 185 BlockSizeThreshold int 186 187 // Cache is used to cache uncompressed blocks from sstables. 188 // 189 // The default is a nil cache. 190 Cache *cache.Cache 191 192 // Comparer defines a total ordering over the space of []byte keys: a 'less 193 // than' relationship. The same comparison algorithm must be used for reads 194 // and writes over the lifetime of the DB. 195 // 196 // The default value uses the same ordering as bytes.Compare. 197 Comparer *Comparer 198 199 // Compression defines the per-block compression to use. 200 // 201 // The default value (DefaultCompression) uses snappy compression. 202 Compression Compression 203 204 // FilterPolicy defines a filter algorithm (such as a Bloom filter) that can 205 // reduce disk reads for Get calls. 206 // 207 // One such implementation is bloom.FilterPolicy(10) from the pebble/bloom 208 // package. 209 // 210 // The default value means to use no filter. 211 FilterPolicy FilterPolicy 212 213 // FilterType defines whether an existing filter policy is applied at a 214 // block-level or table-level. Block-level filters use less memory to create, 215 // but are slower to access as a check for the key in the index must first be 216 // performed to locate the filter block. A table-level filter will require 217 // memory proportional to the number of keys in an sstable to create, but 218 // avoids the index lookup when determining if a key is present. Table-level 219 // filters should be preferred except under constrained memory situations. 220 FilterType FilterType 221 222 // IndexBlockSize is the target uncompressed size in bytes of each index 223 // block. When the index block size is larger than this target, two-level 224 // indexes are automatically enabled. Setting this option to a large value 225 // (such as math.MaxInt32) disables the automatic creation of two-level 226 // indexes. 227 // 228 // The default value is the value of BlockSize. 229 IndexBlockSize int 230 231 // Merger defines the associative merge operation to use for merging values 232 // written with {Batch,DB}.Merge. The MergerName is checked for consistency 233 // with the value stored in the sstable when it was written. 234 MergerName string 235 236 // TableFormat specifies the format version for writing sstables. The default 237 // is TableFormatRocksDBv2 which creates RocksDB compatible sstables. Use 238 // TableFormatLevelDB to create LevelDB compatible sstable which can be used 239 // by a wider range of tools and libraries. 240 TableFormat TableFormat 241 242 // IsStrictObsolete is only relevant for >= TableFormatPebblev4. See comment 243 // in format.go. Must be false if format < TableFormatPebblev4. 244 // 245 // TODO(bilal): set this when writing shared ssts. 246 IsStrictObsolete bool 247 248 // WritingToLowestLevel is only relevant for >= TableFormatPebblev4. It is 249 // used to set the obsolete bit on DEL/DELSIZED/SINGLEDEL if they are the 250 // youngest for a userkey. 251 WritingToLowestLevel bool 252 253 // TablePropertyCollectors is a list of TablePropertyCollector creation 254 // functions. A new TablePropertyCollector is created for each sstable built 255 // and lives for the lifetime of the table. 256 TablePropertyCollectors []func() TablePropertyCollector 257 258 // BlockPropertyCollectors is a list of BlockPropertyCollector creation 259 // functions. A new BlockPropertyCollector is created for each sstable 260 // built and lives for the lifetime of writing that table. 261 BlockPropertyCollectors []func() BlockPropertyCollector 262 263 // Checksum specifies which checksum to use. 264 Checksum ChecksumType 265 266 // Parallelism is used to indicate that the sstable Writer is allowed to 267 // compress data blocks and write datablocks to disk in parallel with the 268 // Writer client goroutine. 269 Parallelism bool 270 271 // ShortAttributeExtractor mirrors 272 // Options.Experimental.ShortAttributeExtractor. 273 ShortAttributeExtractor base.ShortAttributeExtractor 274 275 // RequiredInPlaceValueBound mirrors 276 // Options.Experimental.RequiredInPlaceValueBound. 277 RequiredInPlaceValueBound UserKeyPrefixBound 278 } 279 280 func (o WriterOptions) ensureDefaults() WriterOptions { 281 if o.BlockRestartInterval <= 0 { 282 o.BlockRestartInterval = base.DefaultBlockRestartInterval 283 } 284 if o.BlockSize <= 0 { 285 o.BlockSize = base.DefaultBlockSize 286 } 287 if o.BlockSizeThreshold <= 0 { 288 o.BlockSizeThreshold = base.DefaultBlockSizeThreshold 289 } 290 if o.Comparer == nil { 291 o.Comparer = base.DefaultComparer 292 } 293 if o.Compression <= DefaultCompression || o.Compression >= NCompression { 294 o.Compression = SnappyCompression 295 } 296 if o.IndexBlockSize <= 0 { 297 o.IndexBlockSize = o.BlockSize 298 } 299 if o.MergerName == "" { 300 o.MergerName = base.DefaultMerger.Name 301 } 302 if o.Checksum == ChecksumTypeNone { 303 o.Checksum = ChecksumTypeCRC32c 304 } 305 // By default, if the table format is not specified, fall back to using the 306 // most compatible format. 307 if o.TableFormat == TableFormatUnspecified { 308 o.TableFormat = TableFormatRocksDBv2 309 } 310 return o 311 }