github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/compactor_set.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package lsmkv
    13  
    14  import (
    15  	"bufio"
    16  	"bytes"
    17  	"io"
    18  
    19  	"github.com/pkg/errors"
    20  	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv/segmentindex"
    21  )
    22  
    23  type compactorSet struct {
    24  	// c1 is always the older segment, so when there is a conflict c2 wins
    25  	// (because of the replace strategy)
    26  	c1 *segmentCursorCollection
    27  	c2 *segmentCursorCollection
    28  
    29  	// the level matching those of the cursors
    30  	currentLevel        uint16
    31  	secondaryIndexCount uint16
    32  	// Tells if tombstones or keys without corresponding values
    33  	// can be removed from merged segment.
    34  	// (left segment is root (1st) one, keepTombstones is off for bucket)
    35  	cleanupTombstones bool
    36  
    37  	w    io.WriteSeeker
    38  	bufw *bufio.Writer
    39  
    40  	scratchSpacePath string
    41  }
    42  
    43  func newCompactorSetCollection(w io.WriteSeeker,
    44  	c1, c2 *segmentCursorCollection, level, secondaryIndexCount uint16,
    45  	scratchSpacePath string, cleanupTombstones bool,
    46  ) *compactorSet {
    47  	return &compactorSet{
    48  		c1:                  c1,
    49  		c2:                  c2,
    50  		w:                   w,
    51  		bufw:                bufio.NewWriterSize(w, 256*1024),
    52  		currentLevel:        level,
    53  		cleanupTombstones:   cleanupTombstones,
    54  		secondaryIndexCount: secondaryIndexCount,
    55  		scratchSpacePath:    scratchSpacePath,
    56  	}
    57  }
    58  
    59  func (c *compactorSet) do() error {
    60  	if err := c.init(); err != nil {
    61  		return errors.Wrap(err, "init")
    62  	}
    63  
    64  	kis, err := c.writeKeys()
    65  	if err != nil {
    66  		return errors.Wrap(err, "write keys")
    67  	}
    68  
    69  	if err := c.writeIndices(kis); err != nil {
    70  		return errors.Wrap(err, "write index")
    71  	}
    72  
    73  	// flush buffered, so we can safely seek on underlying writer
    74  	if err := c.bufw.Flush(); err != nil {
    75  		return errors.Wrap(err, "flush buffered")
    76  	}
    77  
    78  	var dataEnd uint64 = segmentindex.HeaderSize
    79  	if len(kis) > 0 {
    80  		dataEnd = uint64(kis[len(kis)-1].ValueEnd)
    81  	}
    82  
    83  	if err := c.writeHeader(c.currentLevel, 0, c.secondaryIndexCount,
    84  		dataEnd); err != nil {
    85  		return errors.Wrap(err, "write header")
    86  	}
    87  
    88  	return nil
    89  }
    90  
    91  func (c *compactorSet) init() error {
    92  	// write a dummy header, we don't know the contents of the actual header yet,
    93  	// we will seek to the beginning and overwrite the actual header at the very
    94  	// end
    95  
    96  	if _, err := c.bufw.Write(make([]byte, segmentindex.HeaderSize)); err != nil {
    97  		return errors.Wrap(err, "write empty header")
    98  	}
    99  
   100  	return nil
   101  }
   102  
   103  func (c *compactorSet) writeKeys() ([]segmentindex.Key, error) {
   104  	key1, value1, _ := c.c1.first()
   105  	key2, value2, _ := c.c2.first()
   106  
   107  	// the (dummy) header was already written, this is our initial offset
   108  	offset := segmentindex.HeaderSize
   109  
   110  	var kis []segmentindex.Key
   111  
   112  	for {
   113  		if key1 == nil && key2 == nil {
   114  			break
   115  		}
   116  		if bytes.Equal(key1, key2) {
   117  			values := append(value1, value2...)
   118  			valuesMerged := newSetDecoder().DoPartial(values)
   119  			if values, skip := c.cleanupValues(valuesMerged); !skip {
   120  				ki, err := c.writeIndividualNode(offset, key2, values)
   121  				if err != nil {
   122  					return nil, errors.Wrap(err, "write individual node (equal keys)")
   123  				}
   124  
   125  				offset = ki.ValueEnd
   126  				kis = append(kis, ki)
   127  			}
   128  			// advance both!
   129  			key1, value1, _ = c.c1.next()
   130  			key2, value2, _ = c.c2.next()
   131  			continue
   132  		}
   133  
   134  		if (key1 != nil && bytes.Compare(key1, key2) == -1) || key2 == nil {
   135  			// key 1 is smaller
   136  			if values, skip := c.cleanupValues(value1); !skip {
   137  				ki, err := c.writeIndividualNode(offset, key1, values)
   138  				if err != nil {
   139  					return nil, errors.Wrap(err, "write individual node (key1 smaller)")
   140  				}
   141  
   142  				offset = ki.ValueEnd
   143  				kis = append(kis, ki)
   144  			}
   145  			key1, value1, _ = c.c1.next()
   146  		} else {
   147  			// key 2 is smaller
   148  			if values, skip := c.cleanupValues(value2); !skip {
   149  				ki, err := c.writeIndividualNode(offset, key2, values)
   150  				if err != nil {
   151  					return nil, errors.Wrap(err, "write individual node (key2 smaller)")
   152  				}
   153  
   154  				offset = ki.ValueEnd
   155  				kis = append(kis, ki)
   156  			}
   157  			key2, value2, _ = c.c2.next()
   158  		}
   159  	}
   160  
   161  	return kis, nil
   162  }
   163  
   164  func (c *compactorSet) writeIndividualNode(offset int, key []byte,
   165  	values []value,
   166  ) (segmentindex.Key, error) {
   167  	return (&segmentCollectionNode{
   168  		values:     values,
   169  		primaryKey: key,
   170  		offset:     offset,
   171  	}).KeyIndexAndWriteTo(c.bufw)
   172  }
   173  
   174  func (c *compactorSet) writeIndices(keys []segmentindex.Key) error {
   175  	indices := &segmentindex.Indexes{
   176  		Keys:                keys,
   177  		SecondaryIndexCount: c.secondaryIndexCount,
   178  		ScratchSpacePath:    c.scratchSpacePath,
   179  	}
   180  
   181  	_, err := indices.WriteTo(c.bufw)
   182  	return err
   183  }
   184  
   185  // writeHeader assumes that everything has been written to the underlying
   186  // writer and it is now safe to seek to the beginning and override the initial
   187  // header
   188  func (c *compactorSet) writeHeader(level, version, secondaryIndices uint16,
   189  	startOfIndex uint64,
   190  ) error {
   191  	if _, err := c.w.Seek(0, io.SeekStart); err != nil {
   192  		return errors.Wrap(err, "seek to beginning to write header")
   193  	}
   194  
   195  	h := &segmentindex.Header{
   196  		Level:            level,
   197  		Version:          version,
   198  		SecondaryIndices: secondaryIndices,
   199  		Strategy:         segmentindex.StrategySetCollection,
   200  		IndexStart:       startOfIndex,
   201  	}
   202  
   203  	if _, err := h.WriteTo(c.w); err != nil {
   204  		return err
   205  	}
   206  
   207  	return nil
   208  }
   209  
   210  // Removes values with tombstone set from input slice. Output slice may be smaller than input one.
   211  // Returned skip of true means there are no values left (key can be omitted in segment)
   212  // WARN: method can alter input slice by swapping its elements and reducing length (not capacity)
   213  func (c *compactorSet) cleanupValues(values []value) (vals []value, skip bool) {
   214  	if !c.cleanupTombstones {
   215  		return values, false
   216  	}
   217  
   218  	// Reuse input slice not to allocate new memory
   219  	// Rearrange slice in a way that tombstoned values are moved to the end
   220  	// and reduce slice's length.
   221  	last := 0
   222  	for i := 0; i < len(values); i++ {
   223  		if !values[i].tombstone {
   224  			// Swap both elements instead overwritting `last` by `i`.
   225  			// Overwrite would result in `values[last].value` pointing to the same slice
   226  			// as `values[i].value`.
   227  			// If `values` slice is reused by multiple nodes (as it happens for map cursors
   228  			// `segmentCursorCollectionReusable` using `segmentCollectionNode` as buffer)
   229  			// populating values[i].value would overwrite values[last].value
   230  			// Swaps makes sure values[i].value and values[last].value point to different slices
   231  			values[last], values[i] = values[i], values[last]
   232  			last++
   233  		}
   234  	}
   235  
   236  	if last == 0 {
   237  		return nil, true
   238  	}
   239  	return values[:last], false
   240  }