github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/memtable_flush.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package lsmkv
    13  
    14  import (
    15  	"bufio"
    16  	"fmt"
    17  	"io"
    18  	"os"
    19  
    20  	"github.com/pkg/errors"
    21  	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv/segmentindex"
    22  )
    23  
    24  func (m *Memtable) flush() error {
    25  	// close the commit log first, this also forces it to be fsynced. If
    26  	// something fails there, don't proceed with flushing. The commit log will
    27  	// only be deleted at the very end, if the flush was successful
    28  	// (indicated by a successful close of the flush file - which indicates a
    29  	// successful fsync)
    30  
    31  	if err := m.commitlog.close(); err != nil {
    32  		return errors.Wrap(err, "close commit log file")
    33  	}
    34  
    35  	if m.Size() == 0 {
    36  		// this is an empty memtable, nothing to do
    37  		// however, we still have to cleanup the commit log, otherwise we will
    38  		// attempt to recover from it on the next cycle
    39  		if err := m.commitlog.delete(); err != nil {
    40  			return errors.Wrap(err, "delete commit log file")
    41  		}
    42  		return nil
    43  	}
    44  
    45  	f, err := os.OpenFile(m.path+".db", os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o666)
    46  	if err != nil {
    47  		return err
    48  	}
    49  
    50  	w := bufio.NewWriter(f)
    51  
    52  	var keys []segmentindex.Key
    53  	switch m.strategy {
    54  	case StrategyReplace:
    55  		if keys, err = m.flushDataReplace(w); err != nil {
    56  			return err
    57  		}
    58  
    59  	case StrategySetCollection:
    60  		if keys, err = m.flushDataSet(w); err != nil {
    61  			return err
    62  		}
    63  
    64  	case StrategyRoaringSet:
    65  		if keys, err = m.flushDataRoaringSet(w); err != nil {
    66  			return err
    67  		}
    68  
    69  	case StrategyMapCollection:
    70  		if keys, err = m.flushDataMap(w); err != nil {
    71  			return err
    72  		}
    73  
    74  	default:
    75  		return fmt.Errorf("cannot flush strategy %s", m.strategy)
    76  	}
    77  
    78  	indices := &segmentindex.Indexes{
    79  		Keys:                keys,
    80  		SecondaryIndexCount: m.secondaryIndices,
    81  		ScratchSpacePath:    m.path + ".scratch.d",
    82  	}
    83  
    84  	if _, err := indices.WriteTo(w); err != nil {
    85  		return err
    86  	}
    87  
    88  	if err := w.Flush(); err != nil {
    89  		return err
    90  	}
    91  
    92  	if err := f.Sync(); err != nil {
    93  		return err
    94  	}
    95  
    96  	if err := f.Close(); err != nil {
    97  		return err
    98  	}
    99  
   100  	// only now that the file has been flushed is it safe to delete the commit log
   101  	// TODO: there might be an interest in keeping the commit logs around for
   102  	// longer as they might come in handy for replication
   103  	return m.commitlog.delete()
   104  }
   105  
   106  func (m *Memtable) flushDataReplace(f io.Writer) ([]segmentindex.Key, error) {
   107  	flat := m.key.flattenInOrder()
   108  
   109  	totalDataLength := totalKeyAndValueSize(flat)
   110  	perObjectAdditions := len(flat) * (1 + 8 + 4 + int(m.secondaryIndices)*4) // 1 byte for the tombstone, 8 bytes value length encoding, 4 bytes key length encoding, + 4 bytes key encoding for every secondary index
   111  	headerSize := segmentindex.HeaderSize
   112  	header := segmentindex.Header{
   113  		IndexStart:       uint64(totalDataLength + perObjectAdditions + headerSize),
   114  		Level:            0, // always level zero on a new one
   115  		Version:          0, // always version 0 for now
   116  		SecondaryIndices: m.secondaryIndices,
   117  		Strategy:         SegmentStrategyFromString(m.strategy),
   118  	}
   119  
   120  	n, err := header.WriteTo(f)
   121  	if err != nil {
   122  		return nil, err
   123  	}
   124  	headerSize = int(n)
   125  	keys := make([]segmentindex.Key, len(flat))
   126  
   127  	totalWritten := headerSize
   128  	for i, node := range flat {
   129  		segNode := &segmentReplaceNode{
   130  			offset:              totalWritten,
   131  			tombstone:           node.tombstone,
   132  			value:               node.value,
   133  			primaryKey:          node.key,
   134  			secondaryKeys:       node.secondaryKeys,
   135  			secondaryIndexCount: m.secondaryIndices,
   136  		}
   137  
   138  		ki, err := segNode.KeyIndexAndWriteTo(f)
   139  		if err != nil {
   140  			return nil, errors.Wrapf(err, "write node %d", i)
   141  		}
   142  
   143  		keys[i] = ki
   144  		totalWritten = ki.ValueEnd
   145  	}
   146  
   147  	return keys, nil
   148  }
   149  
   150  func (m *Memtable) flushDataSet(f io.Writer) ([]segmentindex.Key, error) {
   151  	flat := m.keyMulti.flattenInOrder()
   152  	return m.flushDataCollection(f, flat)
   153  }
   154  
   155  func (m *Memtable) flushDataMap(f io.Writer) ([]segmentindex.Key, error) {
   156  	m.RLock()
   157  	flat := m.keyMap.flattenInOrder()
   158  	m.RUnlock()
   159  
   160  	// by encoding each map pair we can force the same structure as for a
   161  	// collection, which means we can reuse the same flushing logic
   162  	asMulti := make([]*binarySearchNodeMulti, len(flat))
   163  	for i, mapNode := range flat {
   164  		asMulti[i] = &binarySearchNodeMulti{
   165  			key:    mapNode.key,
   166  			values: make([]value, len(mapNode.values)),
   167  		}
   168  
   169  		for j := range asMulti[i].values {
   170  			enc, err := mapNode.values[j].Bytes()
   171  			if err != nil {
   172  				return nil, err
   173  			}
   174  
   175  			asMulti[i].values[j] = value{
   176  				value:     enc,
   177  				tombstone: mapNode.values[j].Tombstone,
   178  			}
   179  		}
   180  
   181  	}
   182  	return m.flushDataCollection(f, asMulti)
   183  }
   184  
   185  func (m *Memtable) flushDataCollection(f io.Writer,
   186  	flat []*binarySearchNodeMulti,
   187  ) ([]segmentindex.Key, error) {
   188  	totalDataLength := totalValueSizeCollection(flat)
   189  	header := segmentindex.Header{
   190  		IndexStart:       uint64(totalDataLength + segmentindex.HeaderSize),
   191  		Level:            0, // always level zero on a new one
   192  		Version:          0, // always version 0 for now
   193  		SecondaryIndices: m.secondaryIndices,
   194  		Strategy:         SegmentStrategyFromString(m.strategy),
   195  	}
   196  
   197  	n, err := header.WriteTo(f)
   198  	if err != nil {
   199  		return nil, err
   200  	}
   201  	headerSize := int(n)
   202  	keys := make([]segmentindex.Key, len(flat))
   203  
   204  	totalWritten := headerSize
   205  	for i, node := range flat {
   206  		ki, err := (&segmentCollectionNode{
   207  			values:     node.values,
   208  			primaryKey: node.key,
   209  			offset:     totalWritten,
   210  		}).KeyIndexAndWriteTo(f)
   211  		if err != nil {
   212  			return nil, errors.Wrapf(err, "write node %d", i)
   213  		}
   214  
   215  		keys[i] = ki
   216  		totalWritten = ki.ValueEnd
   217  	}
   218  
   219  	return keys, nil
   220  }
   221  
   222  func totalKeyAndValueSize(in []*binarySearchNode) int {
   223  	var sum int
   224  	for _, n := range in {
   225  		sum += len(n.value)
   226  		sum += len(n.key)
   227  		for _, sec := range n.secondaryKeys {
   228  			sum += len(sec)
   229  		}
   230  	}
   231  
   232  	return sum
   233  }
   234  
   235  func totalValueSizeCollection(in []*binarySearchNodeMulti) int {
   236  	var sum int
   237  	for _, n := range in {
   238  		sum += 8 // uint64 to indicate array length
   239  		for _, v := range n.values {
   240  			sum += 1 // bool to indicate value tombstone
   241  			sum += 8 // uint64 to indicate value length
   242  			sum += len(v.value)
   243  		}
   244  
   245  		sum += 4 // uint32 to indicate key size
   246  		sum += len(n.key)
   247  	}
   248  
   249  	return sum
   250  }