github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/memtable.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package lsmkv
    13  
    14  import (
    15  	"path/filepath"
    16  	"sync"
    17  	"time"
    18  
    19  	"github.com/pkg/errors"
    20  	"github.com/weaviate/weaviate/adapters/repos/db/roaringset"
    21  	"github.com/weaviate/weaviate/entities/lsmkv"
    22  )
    23  
    24  type Memtable struct {
    25  	sync.RWMutex
    26  	key                *binarySearchTree
    27  	keyMulti           *binarySearchTreeMulti
    28  	keyMap             *binarySearchTreeMap
    29  	primaryIndex       *binarySearchTree
    30  	roaringSet         *roaringset.BinarySearchTree
    31  	commitlog          *commitLogger
    32  	size               uint64
    33  	path               string
    34  	strategy           string
    35  	secondaryIndices   uint16
    36  	secondaryToPrimary []map[string][]byte
    37  	// stores time memtable got dirty to determine when flush is needed
    38  	dirtyAt   time.Time
    39  	createdAt time.Time
    40  	metrics   *memtableMetrics
    41  }
    42  
    43  func newMemtable(path string, strategy string,
    44  	secondaryIndices uint16, cl *commitLogger, metrics *Metrics,
    45  ) (*Memtable, error) {
    46  	m := &Memtable{
    47  		key:              &binarySearchTree{},
    48  		keyMulti:         &binarySearchTreeMulti{},
    49  		keyMap:           &binarySearchTreeMap{},
    50  		primaryIndex:     &binarySearchTree{}, // todo, sort upfront
    51  		roaringSet:       &roaringset.BinarySearchTree{},
    52  		commitlog:        cl,
    53  		path:             path,
    54  		strategy:         strategy,
    55  		secondaryIndices: secondaryIndices,
    56  		dirtyAt:          time.Time{},
    57  		createdAt:        time.Now(),
    58  		metrics:          newMemtableMetrics(metrics, filepath.Dir(path), strategy),
    59  	}
    60  
    61  	if m.secondaryIndices > 0 {
    62  		m.secondaryToPrimary = make([]map[string][]byte, m.secondaryIndices)
    63  		for i := range m.secondaryToPrimary {
    64  			m.secondaryToPrimary[i] = map[string][]byte{}
    65  		}
    66  	}
    67  
    68  	m.metrics.size(m.size)
    69  
    70  	return m, nil
    71  }
    72  
    73  func (m *Memtable) get(key []byte) ([]byte, error) {
    74  	start := time.Now()
    75  	defer m.metrics.get(start.UnixNano())
    76  
    77  	if m.strategy != StrategyReplace {
    78  		return nil, errors.Errorf("get only possible with strategy 'replace'")
    79  	}
    80  
    81  	m.RLock()
    82  	defer m.RUnlock()
    83  
    84  	v, err := m.key.get(key)
    85  	if err != nil {
    86  		return nil, err
    87  	}
    88  
    89  	return v, nil
    90  }
    91  
    92  func (m *Memtable) getBySecondary(pos int, key []byte) ([]byte, error) {
    93  	start := time.Now()
    94  	defer m.metrics.getBySecondary(start.UnixNano())
    95  
    96  	if m.strategy != StrategyReplace {
    97  		return nil, errors.Errorf("get only possible with strategy 'replace'")
    98  	}
    99  
   100  	m.RLock()
   101  	defer m.RUnlock()
   102  
   103  	primary := m.secondaryToPrimary[pos][string(key)]
   104  	if primary == nil {
   105  		return nil, lsmkv.NotFound
   106  	}
   107  
   108  	v, err := m.key.get(primary)
   109  	if err != nil {
   110  		return nil, err
   111  	}
   112  
   113  	return v, nil
   114  }
   115  
   116  func (m *Memtable) put(key, value []byte, opts ...SecondaryKeyOption) error {
   117  	start := time.Now()
   118  	defer m.metrics.put(start.UnixNano())
   119  
   120  	if m.strategy != StrategyReplace {
   121  		return errors.Errorf("put only possible with strategy 'replace'")
   122  	}
   123  
   124  	m.Lock()
   125  	defer m.Unlock()
   126  
   127  	var secondaryKeys [][]byte
   128  	if m.secondaryIndices > 0 {
   129  		secondaryKeys = make([][]byte, m.secondaryIndices)
   130  		for _, opt := range opts {
   131  			if err := opt(secondaryKeys); err != nil {
   132  				return err
   133  			}
   134  		}
   135  	}
   136  
   137  	if err := m.commitlog.put(segmentReplaceNode{
   138  		primaryKey:          key,
   139  		value:               value,
   140  		secondaryIndexCount: m.secondaryIndices,
   141  		secondaryKeys:       secondaryKeys,
   142  		tombstone:           false,
   143  	}); err != nil {
   144  		return errors.Wrap(err, "write into commit log")
   145  	}
   146  
   147  	netAdditions, previousKeys := m.key.insert(key, value, secondaryKeys)
   148  
   149  	for i, sec := range previousKeys {
   150  		m.secondaryToPrimary[i][string(sec)] = nil
   151  	}
   152  
   153  	for i, sec := range secondaryKeys {
   154  		m.secondaryToPrimary[i][string(sec)] = key
   155  	}
   156  
   157  	m.size += uint64(netAdditions)
   158  	m.metrics.size(m.size)
   159  	m.updateDirtyAt()
   160  
   161  	return nil
   162  }
   163  
   164  func (m *Memtable) setTombstone(key []byte, opts ...SecondaryKeyOption) error {
   165  	start := time.Now()
   166  	defer m.metrics.setTombstone(start.UnixNano())
   167  
   168  	if m.strategy != "replace" {
   169  		return errors.Errorf("setTombstone only possible with strategy 'replace'")
   170  	}
   171  
   172  	m.Lock()
   173  	defer m.Unlock()
   174  
   175  	var secondaryKeys [][]byte
   176  	if m.secondaryIndices > 0 {
   177  		secondaryKeys = make([][]byte, m.secondaryIndices)
   178  		for _, opt := range opts {
   179  			if err := opt(secondaryKeys); err != nil {
   180  				return err
   181  			}
   182  		}
   183  	}
   184  
   185  	if err := m.commitlog.put(segmentReplaceNode{
   186  		primaryKey:          key,
   187  		value:               nil,
   188  		secondaryIndexCount: m.secondaryIndices,
   189  		secondaryKeys:       secondaryKeys,
   190  		tombstone:           true,
   191  	}); err != nil {
   192  		return errors.Wrap(err, "write into commit log")
   193  	}
   194  
   195  	m.key.setTombstone(key, secondaryKeys)
   196  	m.size += uint64(len(key)) + 1 // 1 byte for tombstone
   197  	m.metrics.size(m.size)
   198  	m.updateDirtyAt()
   199  
   200  	return nil
   201  }
   202  
   203  func (m *Memtable) getCollection(key []byte) ([]value, error) {
   204  	start := time.Now()
   205  	defer m.metrics.getCollection(start.UnixNano())
   206  
   207  	if m.strategy != StrategySetCollection && m.strategy != StrategyMapCollection {
   208  		return nil, errors.Errorf("getCollection only possible with strategies %q, %q",
   209  			StrategySetCollection, StrategyMapCollection)
   210  	}
   211  
   212  	m.RLock()
   213  	defer m.RUnlock()
   214  
   215  	v, err := m.keyMulti.get(key)
   216  	if err != nil {
   217  		return nil, err
   218  	}
   219  
   220  	return v, nil
   221  }
   222  
   223  func (m *Memtable) getMap(key []byte) ([]MapPair, error) {
   224  	start := time.Now()
   225  	defer m.metrics.getMap(start.UnixNano())
   226  
   227  	if m.strategy != StrategyMapCollection {
   228  		return nil, errors.Errorf("getCollection only possible with strategy %q",
   229  			StrategyMapCollection)
   230  	}
   231  
   232  	m.RLock()
   233  	defer m.RUnlock()
   234  
   235  	v, err := m.keyMap.get(key)
   236  	if err != nil {
   237  		return nil, err
   238  	}
   239  
   240  	return v, nil
   241  }
   242  
   243  func (m *Memtable) append(key []byte, values []value) error {
   244  	start := time.Now()
   245  	defer m.metrics.append(start.UnixNano())
   246  
   247  	if m.strategy != StrategySetCollection && m.strategy != StrategyMapCollection {
   248  		return errors.Errorf("append only possible with strategies %q, %q",
   249  			StrategySetCollection, StrategyMapCollection)
   250  	}
   251  
   252  	m.Lock()
   253  	defer m.Unlock()
   254  	if err := m.commitlog.append(segmentCollectionNode{
   255  		primaryKey: key,
   256  		values:     values,
   257  	}); err != nil {
   258  		return errors.Wrap(err, "write into commit log")
   259  	}
   260  
   261  	m.keyMulti.insert(key, values)
   262  	m.size += uint64(len(key))
   263  	for _, value := range values {
   264  		m.size += uint64(len(value.value))
   265  	}
   266  	m.metrics.size(m.size)
   267  	m.updateDirtyAt()
   268  
   269  	return nil
   270  }
   271  
   272  func (m *Memtable) appendMapSorted(key []byte, pair MapPair) error {
   273  	start := time.Now()
   274  	defer m.metrics.appendMapSorted(start.UnixNano())
   275  
   276  	if m.strategy != StrategyMapCollection {
   277  		return errors.Errorf("append only possible with strategy %q",
   278  			StrategyMapCollection)
   279  	}
   280  
   281  	m.Lock()
   282  	defer m.Unlock()
   283  
   284  	valuesForCommitLog, err := pair.Bytes()
   285  	if err != nil {
   286  		return err
   287  	}
   288  
   289  	if err := m.commitlog.append(segmentCollectionNode{
   290  		primaryKey: key,
   291  		values: []value{
   292  			{
   293  				value:     valuesForCommitLog,
   294  				tombstone: pair.Tombstone,
   295  			},
   296  		},
   297  	}); err != nil {
   298  		return errors.Wrap(err, "write into commit log")
   299  	}
   300  
   301  	m.keyMap.insert(key, pair)
   302  	m.size += uint64(len(key) + len(valuesForCommitLog))
   303  	m.metrics.size(m.size)
   304  	m.updateDirtyAt()
   305  
   306  	return nil
   307  }
   308  
   309  func (m *Memtable) Size() uint64 {
   310  	m.RLock()
   311  	defer m.RUnlock()
   312  
   313  	return m.size
   314  }
   315  
   316  func (m *Memtable) ActiveDuration() time.Duration {
   317  	m.RLock()
   318  	defer m.RUnlock()
   319  
   320  	return time.Since(m.createdAt)
   321  }
   322  
   323  func (m *Memtable) updateDirtyAt() {
   324  	if m.dirtyAt.IsZero() {
   325  		m.dirtyAt = time.Now()
   326  	}
   327  }
   328  
   329  // returns time memtable got dirty (1st write occurred)
   330  // (0 if clean)
   331  func (m *Memtable) DirtyDuration() time.Duration {
   332  	m.RLock()
   333  	defer m.RUnlock()
   334  
   335  	if m.dirtyAt.IsZero() {
   336  		return 0
   337  	}
   338  	return time.Since(m.dirtyAt)
   339  }
   340  
   341  func (m *Memtable) countStats() *countStats {
   342  	m.RLock()
   343  	defer m.RUnlock()
   344  	return m.key.countStats()
   345  }
   346  
   347  // the WAL uses a buffer and isn't written until the buffer size is crossed or
   348  // this function explicitly called. This allows to safge unnecessary disk
   349  // writes in larger operations, such as batches. It is sufficient to call write
   350  // on the WAL just once. This does not make a batch atomic, but it guarantees
   351  // that the WAL is written before a successful response is returned to the
   352  // user.
   353  func (m *Memtable) writeWAL() error {
   354  	m.Lock()
   355  	defer m.Unlock()
   356  
   357  	return m.commitlog.flushBuffers()
   358  }