github.com/pingcap/badger@v1.5.1-0.20230103063557-828f39b09b6d/options.go (about)

     1  /*
     2   * Copyright 2017 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package badger
    18  
    19  import (
    20  	"github.com/pingcap/badger/options"
    21  )
    22  
    23  // NOTE: Keep the comments in the following to 75 chars width, so they
    24  // format nicely in godoc.
    25  
    26  // Options are params for creating DB object.
    27  //
    28  // This package provides DefaultOptions which contains options that should
    29  // work for most applications. Consider using that as a starting point before
    30  // customizing it for your own needs.
    31  type Options struct {
    32  	// 1. Mandatory flags
    33  	// -------------------
    34  	// Directory to store the data in. Should exist and be writable.
    35  	Dir string
    36  	// Directory to store the value log in. Can be the same as Dir. Should
    37  	// exist and be writable.
    38  	ValueDir string
    39  
    40  	// 2. Frequently modified flags
    41  	// -----------------------------
    42  	// Sync all writes to disk. Setting this to true would slow down data
    43  	// loading significantly.
    44  	SyncWrites bool
    45  
    46  	// 3. Flags that user might want to review
    47  	// ----------------------------------------
    48  	// The following affect all levels of LSM tree.
    49  	MaxMemTableSize int64 // Each mem table is at most this size.
    50  	// If value size >= this threshold, only store value offsets in tree.
    51  	// If set to 0, all values are stored in SST.
    52  	ValueThreshold int
    53  	// Maximum number of tables to keep in memory, before stalling.
    54  	NumMemtables int
    55  	// The following affect how we handle LSM tree L0.
    56  	// Maximum number of Level 0 tables before we start compacting.
    57  	NumLevelZeroTables int
    58  
    59  	// If we hit this number of Level 0 tables, we will stall until L0 is
    60  	// compacted away.
    61  	NumLevelZeroTablesStall int
    62  
    63  	MaxBlockCacheSize int64
    64  	MaxIndexCacheSize int64
    65  
    66  	// Maximum total size for L1.
    67  	LevelOneSize int64
    68  
    69  	// Size of single value log file.
    70  	ValueLogFileSize int64
    71  
    72  	// Max number of entries a value log file can hold (approximately). A value log file would be
    73  	// determined by the smaller of its file size and max entries.
    74  	ValueLogMaxEntries uint32
    75  
    76  	// Max number of value log files to keep before safely remove.
    77  	ValueLogMaxNumFiles int
    78  
    79  	// Number of compaction workers to run concurrently.
    80  	NumCompactors int
    81  
    82  	// Transaction start and commit timestamps are managed by end-user.
    83  	// A managed transaction can only set values by SetEntry with a non-zero version key.
    84  	ManagedTxns bool
    85  
    86  	// 4. Flags for testing purposes
    87  	// ------------------------------
    88  	VolatileMode bool
    89  	DoNotCompact bool // Stops LSM tree from compactions.
    90  
    91  	maxBatchCount int64 // max entries in batch
    92  	maxBatchSize  int64 // max batch size in bytes
    93  
    94  	// Open the DB as read-only. With this set, multiple processes can
    95  	// open the same Badger DB. Note: if the DB being opened had crashed
    96  	// before and has vlog data to be replayed, ReadOnly will cause Open
    97  	// to fail with an appropriate message.
    98  	ReadOnly bool
    99  
   100  	// Truncate value log to delete corrupt data, if any. Would not truncate if ReadOnly is set.
   101  	Truncate bool
   102  
   103  	TableBuilderOptions options.TableBuilderOptions
   104  
   105  	ValueLogWriteOptions options.ValueLogWriterOptions
   106  
   107  	CompactionFilterFactory func(targetLevel int, smallest, biggest []byte) CompactionFilter
   108  
   109  	CompactL0WhenClose bool
   110  
   111  	RemoteCompactionAddr string
   112  }
   113  
   114  // CompactionFilter is an interface that user can implement to remove certain keys.
   115  type CompactionFilter interface {
   116  	// Filter is the method the compaction process invokes for kv that is being compacted. The returned decision
   117  	// indicates that the kv should be preserved, deleted or dropped in the output of this compaction run.
   118  	Filter(key, val, userMeta []byte) Decision
   119  
   120  	// Guards returns specifications that may splits the SST files
   121  	// A key is associated to a guard that has the longest matched Prefix.
   122  	Guards() []Guard
   123  }
   124  
   125  // Guard specifies when to finish a SST file during compaction. The rule is the following:
   126  // 1. The key must match the Prefix of the Guard, otherwise the SST should finish.
   127  // 2. If the key up to MatchLen is the different than the previous key and MinSize is reached, the SST should finish.
   128  type Guard struct {
   129  	Prefix   []byte
   130  	MatchLen int
   131  	MinSize  int64
   132  }
   133  
   134  // Decision is the type for compaction filter decision.
   135  type Decision int
   136  
   137  const (
   138  	// DecisionKeep indicates the entry should be reserved.
   139  	DecisionKeep Decision = 0
   140  	// DecisionMarkTombstone converts the entry to a delete tombstone.
   141  	DecisionMarkTombstone Decision = 1
   142  	// DecisionDrop simply drops the entry, doesn't leave a delete tombstone.
   143  	DecisionDrop Decision = 2
   144  )
   145  
   146  // DefaultOptions sets a list of recommended options for good performance.
   147  // Feel free to modify these to suit your needs.
   148  var DefaultOptions = Options{
   149  	DoNotCompact:            false,
   150  	LevelOneSize:            256 << 20,
   151  	MaxMemTableSize:         64 << 20,
   152  	NumCompactors:           3,
   153  	NumLevelZeroTables:      5,
   154  	NumLevelZeroTablesStall: 10,
   155  	NumMemtables:            5,
   156  	SyncWrites:              true,
   157  	ValueLogFileSize:        256 << 20,
   158  	ValueLogMaxEntries:      1000000,
   159  	ValueLogMaxNumFiles:     1,
   160  	ValueThreshold:          32,
   161  	Truncate:                false,
   162  	MaxBlockCacheSize:       1 << 30,
   163  	MaxIndexCacheSize:       1 << 30,
   164  	TableBuilderOptions: options.TableBuilderOptions{
   165  		MaxTableSize:        8 << 20,
   166  		SuRFStartLevel:      8,
   167  		HashUtilRatio:       0.75,
   168  		WriteBufferSize:     2 * 1024 * 1024,
   169  		BytesPerSecond:      -1,
   170  		MaxLevels:           7,
   171  		LevelSizeMultiplier: 10,
   172  		BlockSize:           64 * 1024,
   173  		// TODO: use lz4 instead of snappy for better (de)compress performance.
   174  		CompressionPerLevel: []options.CompressionType{options.None, options.None, options.Snappy, options.Snappy, options.Snappy, options.ZSTD, options.ZSTD},
   175  		LogicalBloomFPR:     0.01,
   176  		SuRFOptions: options.SuRFOptions{
   177  			HashSuffixLen:  8,
   178  			RealSuffixLen:  8,
   179  			BitsPerKeyHint: 40,
   180  		},
   181  	},
   182  	ValueLogWriteOptions: options.ValueLogWriterOptions{
   183  		WriteBufferSize: 2 * 1024 * 1024,
   184  	},
   185  	CompactL0WhenClose: true,
   186  }
   187  
   188  // LSMOnlyOptions follows from DefaultOptions, but sets a higher ValueThreshold so values would
   189  // be colocated with the LSM tree, with value log largely acting as a write-ahead log only. These
   190  // options would reduce the disk usage of value log, and make Badger act like a typical LSM tree.
   191  var LSMOnlyOptions = Options{}
   192  
   193  func init() {
   194  	LSMOnlyOptions = DefaultOptions
   195  
   196  	LSMOnlyOptions.ValueThreshold = 65500      // Max value length which fits in uint16.
   197  	LSMOnlyOptions.ValueLogFileSize = 64 << 20 // Allow easy space reclamation.
   198  }