github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/bucket_options.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package lsmkv 13 14 import ( 15 "time" 16 17 "github.com/pkg/errors" 18 ) 19 20 type BucketOption func(b *Bucket) error 21 22 func WithStrategy(strategy string) BucketOption { 23 return func(b *Bucket) error { 24 switch strategy { 25 case StrategyReplace, StrategyMapCollection, StrategySetCollection, 26 StrategyRoaringSet: 27 default: 28 return errors.Errorf("unrecognized strategy %q", strategy) 29 } 30 31 b.strategy = strategy 32 return nil 33 } 34 } 35 36 func WithMemtableThreshold(threshold uint64) BucketOption { 37 return func(b *Bucket) error { 38 b.memtableThreshold = threshold 39 return nil 40 } 41 } 42 43 func WithWalThreshold(threshold uint64) BucketOption { 44 return func(b *Bucket) error { 45 b.walThreshold = threshold 46 return nil 47 } 48 } 49 50 func WithDirtyThreshold(threshold time.Duration) BucketOption { 51 return func(b *Bucket) error { 52 b.flushDirtyAfter = threshold 53 return nil 54 } 55 } 56 57 func WithSecondaryIndices(count uint16) BucketOption { 58 return func(b *Bucket) error { 59 b.secondaryIndices = count 60 return nil 61 } 62 } 63 64 func WithLegacyMapSorting() BucketOption { 65 return func(b *Bucket) error { 66 b.legacyMapSortingBeforeCompaction = true 67 return nil 68 } 69 } 70 71 func WithPread(with bool) BucketOption { 72 return func(b *Bucket) error { 73 b.mmapContents = !with 74 return nil 75 } 76 } 77 78 func WithDynamicMemtableSizing( 79 initialMB, maxMB, minActiveSeconds, maxActiveSeconds int, 80 ) BucketOption { 81 return func(b *Bucket) error { 82 mb := 1024 * 1024 83 cfg := memtableSizeAdvisorCfg{ 84 initial: initialMB * mb, 85 stepSize: 10 * mb, 86 maxSize: maxMB * mb, 87 minDuration: time.Duration(minActiveSeconds) * time.Second, 88 maxDuration: time.Duration(maxActiveSeconds) * time.Second, 89 } 90 b.memtableResizer = newMemtableSizeAdvisor(cfg) 91 return nil 92 } 93 } 94 95 type secondaryIndexKeys [][]byte 96 97 type SecondaryKeyOption func(s secondaryIndexKeys) error 98 99 func WithSecondaryKey(pos int, key []byte) SecondaryKeyOption { 100 return func(s secondaryIndexKeys) error { 101 if pos > len(s) { 102 return errors.Errorf("set secondary index %d on an index of length %d", 103 pos, len(s)) 104 } 105 106 s[pos] = key 107 108 return nil 109 } 110 } 111 112 func WithMonitorCount() BucketOption { 113 return func(b *Bucket) error { 114 if b.strategy != StrategyReplace { 115 return errors.Errorf("count monitoring only supported on 'replace' buckets") 116 } 117 b.monitorCount = true 118 return nil 119 } 120 } 121 122 func WithKeepTombstones(keepTombstones bool) BucketOption { 123 return func(b *Bucket) error { 124 b.keepTombstones = keepTombstones 125 return nil 126 } 127 } 128 129 func WithUseBloomFilter(useBloomFilter bool) BucketOption { 130 return func(b *Bucket) error { 131 b.useBloomFilter = useBloomFilter 132 return nil 133 } 134 } 135 136 func WithCalcCountNetAdditions(calcCountNetAdditions bool) BucketOption { 137 return func(b *Bucket) error { 138 b.calcCountNetAdditions = calcCountNetAdditions 139 return nil 140 } 141 } 142 143 /* 144 Background for this option: 145 146 We use the LSM store in two places: 147 Our existing key/value and inverted buckets 148 As part of the new brute-force based index (to be built this week). 149 150 Brute-force index 151 This is a simple disk-index where we use a cursor to iterate over all objects. This is what we need the force-compaction for. The experimentation so far has shown that the cursor is much more performant on a single segment than it is on multiple segments. This is because with a single segment it’s essentially just one conitiguuous chunk of data on disk that we read through. But with multiple segments (and an unpredicatable order) it ends up being many tiny reads (inefficient). 152 Existing uses of the LSM store 153 For existing uses, e.g. the object store, we don’t want to force-compact. This is because they can grow massive. For example, you could have a 100GB segment, then a new write leads to a new segment that is just a few bytes. If we would force-compact those two we would write 100GB every time the user sends a few bytes to Weaviate. In this case, the existing tiered compaction strategy makes more sense. 154 Configurability of buckets 155 */ 156 func WithForceCompation(opt bool) BucketOption { 157 return func(b *Bucket) error { 158 b.forceCompaction = opt 159 return nil 160 } 161 }