github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/bucket_options.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package lsmkv
    13  
    14  import (
    15  	"time"
    16  
    17  	"github.com/pkg/errors"
    18  )
    19  
    20  type BucketOption func(b *Bucket) error
    21  
    22  func WithStrategy(strategy string) BucketOption {
    23  	return func(b *Bucket) error {
    24  		switch strategy {
    25  		case StrategyReplace, StrategyMapCollection, StrategySetCollection,
    26  			StrategyRoaringSet:
    27  		default:
    28  			return errors.Errorf("unrecognized strategy %q", strategy)
    29  		}
    30  
    31  		b.strategy = strategy
    32  		return nil
    33  	}
    34  }
    35  
    36  func WithMemtableThreshold(threshold uint64) BucketOption {
    37  	return func(b *Bucket) error {
    38  		b.memtableThreshold = threshold
    39  		return nil
    40  	}
    41  }
    42  
    43  func WithWalThreshold(threshold uint64) BucketOption {
    44  	return func(b *Bucket) error {
    45  		b.walThreshold = threshold
    46  		return nil
    47  	}
    48  }
    49  
    50  func WithDirtyThreshold(threshold time.Duration) BucketOption {
    51  	return func(b *Bucket) error {
    52  		b.flushDirtyAfter = threshold
    53  		return nil
    54  	}
    55  }
    56  
    57  func WithSecondaryIndices(count uint16) BucketOption {
    58  	return func(b *Bucket) error {
    59  		b.secondaryIndices = count
    60  		return nil
    61  	}
    62  }
    63  
    64  func WithLegacyMapSorting() BucketOption {
    65  	return func(b *Bucket) error {
    66  		b.legacyMapSortingBeforeCompaction = true
    67  		return nil
    68  	}
    69  }
    70  
    71  func WithPread(with bool) BucketOption {
    72  	return func(b *Bucket) error {
    73  		b.mmapContents = !with
    74  		return nil
    75  	}
    76  }
    77  
    78  func WithDynamicMemtableSizing(
    79  	initialMB, maxMB, minActiveSeconds, maxActiveSeconds int,
    80  ) BucketOption {
    81  	return func(b *Bucket) error {
    82  		mb := 1024 * 1024
    83  		cfg := memtableSizeAdvisorCfg{
    84  			initial:     initialMB * mb,
    85  			stepSize:    10 * mb,
    86  			maxSize:     maxMB * mb,
    87  			minDuration: time.Duration(minActiveSeconds) * time.Second,
    88  			maxDuration: time.Duration(maxActiveSeconds) * time.Second,
    89  		}
    90  		b.memtableResizer = newMemtableSizeAdvisor(cfg)
    91  		return nil
    92  	}
    93  }
    94  
    95  type secondaryIndexKeys [][]byte
    96  
    97  type SecondaryKeyOption func(s secondaryIndexKeys) error
    98  
    99  func WithSecondaryKey(pos int, key []byte) SecondaryKeyOption {
   100  	return func(s secondaryIndexKeys) error {
   101  		if pos > len(s) {
   102  			return errors.Errorf("set secondary index %d on an index of length %d",
   103  				pos, len(s))
   104  		}
   105  
   106  		s[pos] = key
   107  
   108  		return nil
   109  	}
   110  }
   111  
   112  func WithMonitorCount() BucketOption {
   113  	return func(b *Bucket) error {
   114  		if b.strategy != StrategyReplace {
   115  			return errors.Errorf("count monitoring only supported on 'replace' buckets")
   116  		}
   117  		b.monitorCount = true
   118  		return nil
   119  	}
   120  }
   121  
   122  func WithKeepTombstones(keepTombstones bool) BucketOption {
   123  	return func(b *Bucket) error {
   124  		b.keepTombstones = keepTombstones
   125  		return nil
   126  	}
   127  }
   128  
   129  func WithUseBloomFilter(useBloomFilter bool) BucketOption {
   130  	return func(b *Bucket) error {
   131  		b.useBloomFilter = useBloomFilter
   132  		return nil
   133  	}
   134  }
   135  
   136  func WithCalcCountNetAdditions(calcCountNetAdditions bool) BucketOption {
   137  	return func(b *Bucket) error {
   138  		b.calcCountNetAdditions = calcCountNetAdditions
   139  		return nil
   140  	}
   141  }
   142  
   143  /*
   144  Background for this option:
   145  
   146  We use the LSM store in two places:
   147  Our existing key/value and inverted buckets
   148  As part of the new brute-force based index (to be built this week).
   149  
   150  Brute-force index
   151  This is a simple disk-index where we use a cursor to iterate over all objects. This is what we need the force-compaction for. The experimentation so far has shown that the cursor is much more performant on a single segment than it is on multiple segments. This is because with a single segment it’s essentially just one conitiguuous chunk of data on disk that we read through. But with multiple segments (and an unpredicatable order) it ends up being many tiny reads (inefficient).
   152  Existing uses of the LSM store
   153  For existing uses, e.g. the object store, we don’t want to force-compact. This is because they can grow massive. For example, you could have a 100GB segment, then a new write leads to a new segment that is just a few bytes. If we would force-compact those two we would write 100GB every time the user sends a few bytes to Weaviate. In this case, the existing tiered compaction strategy makes more sense.
   154  Configurability of buckets
   155  */
   156  func WithForceCompation(opt bool) BucketOption {
   157  	return func(b *Bucket) error {
   158  		b.forceCompaction = opt
   159  		return nil
   160  	}
   161  }