github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/compactor_map.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package lsmkv
    13  
    14  import (
    15  	"bufio"
    16  	"bytes"
    17  	"io"
    18  	"sort"
    19  
    20  	"github.com/pkg/errors"
    21  	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv/segmentindex"
    22  )
    23  
    24  type compactorMap struct {
    25  	// c1 is always the older segment, so when there is a conflict c2 wins
    26  	// (because of the replace strategy)
    27  	c1 *segmentCursorCollectionReusable
    28  	c2 *segmentCursorCollectionReusable
    29  
    30  	// the level matching those of the cursors
    31  	currentLevel        uint16
    32  	secondaryIndexCount uint16
    33  	// Tells if tombstones or keys without corresponding values
    34  	// can be removed from merged segment.
    35  	// (left segment is root (1st) one, keepTombstones is off for bucket)
    36  	cleanupTombstones bool
    37  
    38  	w    io.WriteSeeker
    39  	bufw *bufio.Writer
    40  
    41  	scratchSpacePath string
    42  
    43  	// for backward-compatibility with states where the disk state for maps was
    44  	// not guaranteed to be sorted yet
    45  	requiresSorting bool
    46  }
    47  
    48  func newCompactorMapCollection(w io.WriteSeeker,
    49  	c1, c2 *segmentCursorCollectionReusable, level, secondaryIndexCount uint16,
    50  	scratchSpacePath string, requiresSorting bool, cleanupTombstones bool,
    51  ) *compactorMap {
    52  	return &compactorMap{
    53  		c1:                  c1,
    54  		c2:                  c2,
    55  		w:                   w,
    56  		bufw:                bufio.NewWriterSize(w, 256*1024),
    57  		currentLevel:        level,
    58  		cleanupTombstones:   cleanupTombstones,
    59  		secondaryIndexCount: secondaryIndexCount,
    60  		scratchSpacePath:    scratchSpacePath,
    61  		requiresSorting:     requiresSorting,
    62  	}
    63  }
    64  
    65  func (c *compactorMap) do() error {
    66  	if err := c.init(); err != nil {
    67  		return errors.Wrap(err, "init")
    68  	}
    69  
    70  	kis, err := c.writeKeys()
    71  	if err != nil {
    72  		return errors.Wrap(err, "write keys")
    73  	}
    74  
    75  	if err := c.writeIndices(kis); err != nil {
    76  		return errors.Wrap(err, "write index")
    77  	}
    78  
    79  	// flush buffered, so we can safely seek on underlying writer
    80  	if err := c.bufw.Flush(); err != nil {
    81  		return errors.Wrap(err, "flush buffered")
    82  	}
    83  
    84  	var dataEnd uint64 = segmentindex.HeaderSize
    85  	if len(kis) > 0 {
    86  		dataEnd = uint64(kis[len(kis)-1].ValueEnd)
    87  	}
    88  
    89  	if err := c.writeHeader(c.currentLevel, 0, c.secondaryIndexCount,
    90  		dataEnd); err != nil {
    91  		return errors.Wrap(err, "write header")
    92  	}
    93  
    94  	return nil
    95  }
    96  
    97  func (c *compactorMap) init() error {
    98  	// write a dummy header, we don't know the contents of the actual header yet,
    99  	// we will seek to the beginning and overwrite the actual header at the very
   100  	// end
   101  
   102  	if _, err := c.bufw.Write(make([]byte, segmentindex.HeaderSize)); err != nil {
   103  		return errors.Wrap(err, "write empty header")
   104  	}
   105  
   106  	return nil
   107  }
   108  
   109  func (c *compactorMap) writeKeys() ([]segmentindex.Key, error) {
   110  	key1, value1, _ := c.c1.first()
   111  	key2, value2, _ := c.c2.first()
   112  
   113  	// the (dummy) header was already written, this is our initial offset
   114  	offset := segmentindex.HeaderSize
   115  
   116  	var kis []segmentindex.Key
   117  	pairs := newReusableMapPairs()
   118  	me := newMapEncoder()
   119  	ssm := newSortedMapMerger()
   120  
   121  	for {
   122  		if key1 == nil && key2 == nil {
   123  			break
   124  		}
   125  		if bytes.Equal(key1, key2) {
   126  			pairs.ResizeLeft(len(value1))
   127  			pairs.ResizeRight(len(value2))
   128  
   129  			for i, v := range value1 {
   130  				if err := pairs.left[i].FromBytes(v.value, false); err != nil {
   131  					return nil, err
   132  				}
   133  				pairs.left[i].Tombstone = v.tombstone
   134  			}
   135  
   136  			for i, v := range value2 {
   137  				if err := pairs.right[i].FromBytes(v.value, false); err != nil {
   138  					return nil, err
   139  				}
   140  				pairs.right[i].Tombstone = v.tombstone
   141  			}
   142  
   143  			if c.requiresSorting {
   144  				sort.Slice(pairs.left, func(a, b int) bool {
   145  					return bytes.Compare(pairs.left[a].Key, pairs.left[b].Key) < 0
   146  				})
   147  				sort.Slice(pairs.right, func(a, b int) bool {
   148  					return bytes.Compare(pairs.right[a].Key, pairs.right[b].Key) < 0
   149  				})
   150  			}
   151  
   152  			ssm.reset([][]MapPair{pairs.left, pairs.right})
   153  			mergedPairs, err := ssm.
   154  				doKeepTombstonesReusable()
   155  			if err != nil {
   156  				return nil, err
   157  			}
   158  
   159  			mergedEncoded, err := me.DoMultiReusable(mergedPairs)
   160  			if err != nil {
   161  				return nil, err
   162  			}
   163  
   164  			if values, skip := c.cleanupValues(mergedEncoded); !skip {
   165  				ki, err := c.writeIndividualNode(offset, key2, values)
   166  				if err != nil {
   167  					return nil, errors.Wrap(err, "write individual node (equal keys)")
   168  				}
   169  
   170  				offset = ki.ValueEnd
   171  				kis = append(kis, ki)
   172  			}
   173  			// advance both!
   174  			key1, value1, _ = c.c1.next()
   175  			key2, value2, _ = c.c2.next()
   176  			continue
   177  		}
   178  
   179  		if (key1 != nil && bytes.Compare(key1, key2) == -1) || key2 == nil {
   180  			// key 1 is smaller
   181  			if values, skip := c.cleanupValues(value1); !skip {
   182  				ki, err := c.writeIndividualNode(offset, key1, values)
   183  				if err != nil {
   184  					return nil, errors.Wrap(err, "write individual node (key1 smaller)")
   185  				}
   186  
   187  				offset = ki.ValueEnd
   188  				kis = append(kis, ki)
   189  			}
   190  			key1, value1, _ = c.c1.next()
   191  		} else {
   192  			// key 2 is smaller
   193  			if values, skip := c.cleanupValues(value2); !skip {
   194  				ki, err := c.writeIndividualNode(offset, key2, values)
   195  				if err != nil {
   196  					return nil, errors.Wrap(err, "write individual node (key2 smaller)")
   197  				}
   198  
   199  				offset = ki.ValueEnd
   200  				kis = append(kis, ki)
   201  			}
   202  			key2, value2, _ = c.c2.next()
   203  		}
   204  	}
   205  
   206  	return kis, nil
   207  }
   208  
   209  func (c *compactorMap) writeIndividualNode(offset int, key []byte,
   210  	values []value,
   211  ) (segmentindex.Key, error) {
   212  	// NOTE: There are no guarantees in the cursor logic that any memory is valid
   213  	// for more than a single iteration. Every time you call next() to advance
   214  	// the cursor, any memory might be reused.
   215  	//
   216  	// This includes the key buffer which was the cause of
   217  	// https://github.com/weaviate/weaviate/issues/3517
   218  	//
   219  	// A previous logic created a new assignment in each iteration, but thatwas
   220  	// not an explicit guarantee. A change in v1.21 (for pread/mmap) added a
   221  	// reusable buffer for the key which surfaced this bug.
   222  	keyCopy := make([]byte, len(key))
   223  	copy(keyCopy, key)
   224  
   225  	return segmentCollectionNode{
   226  		values:     values,
   227  		primaryKey: keyCopy,
   228  		offset:     offset,
   229  	}.KeyIndexAndWriteTo(c.bufw)
   230  }
   231  
   232  func (c *compactorMap) writeIndices(keys []segmentindex.Key) error {
   233  	indices := segmentindex.Indexes{
   234  		Keys:                keys,
   235  		SecondaryIndexCount: c.secondaryIndexCount,
   236  		ScratchSpacePath:    c.scratchSpacePath,
   237  	}
   238  
   239  	_, err := indices.WriteTo(c.bufw)
   240  	return err
   241  }
   242  
   243  // writeHeader assumes that everything has been written to the underlying
   244  // writer and it is now safe to seek to the beginning and override the initial
   245  // header
   246  func (c *compactorMap) writeHeader(level, version, secondaryIndices uint16,
   247  	startOfIndex uint64,
   248  ) error {
   249  	if _, err := c.w.Seek(0, io.SeekStart); err != nil {
   250  		return errors.Wrap(err, "seek to beginning to write header")
   251  	}
   252  
   253  	h := &segmentindex.Header{
   254  		Level:            level,
   255  		Version:          version,
   256  		SecondaryIndices: secondaryIndices,
   257  		Strategy:         segmentindex.StrategyMapCollection,
   258  		IndexStart:       startOfIndex,
   259  	}
   260  
   261  	if _, err := h.WriteTo(c.w); err != nil {
   262  		return err
   263  	}
   264  
   265  	return nil
   266  }
   267  
   268  // Removes values with tombstone set from input slice. Output slice may be smaller than input one.
   269  // Returned skip of true means there are no values left (key can be omitted in segment)
   270  // WARN: method can alter input slice by swapping its elements and reducing length (not capacity)
   271  func (c *compactorMap) cleanupValues(values []value) (vals []value, skip bool) {
   272  	if !c.cleanupTombstones {
   273  		return values, false
   274  	}
   275  
   276  	// Reuse input slice not to allocate new memory
   277  	// Rearrange slice in a way that tombstoned values are moved to the end
   278  	// and reduce slice's length.
   279  	last := 0
   280  	for i := 0; i < len(values); i++ {
   281  		if !values[i].tombstone {
   282  			// Swap both elements instead overwritting `last` by `i`.
   283  			// Overwrite would result in `values[last].value` pointing to the same slice
   284  			// as `values[i].value`.
   285  			// If `values` slice is reused by multiple nodes (as it happens for map cursors
   286  			// `segmentCursorCollectionReusable` using `segmentCollectionNode` as buffer)
   287  			// populating slice `values[i].value` would overwrite slice `values[last].value`.
   288  			// Swaps makes sure `values[i].value` and `values[last].value` point to different slices.
   289  			values[last], values[i] = values[i], values[last]
   290  			last++
   291  		}
   292  	}
   293  
   294  	if last == 0 {
   295  		return nil, true
   296  	}
   297  	return values[:last], false
   298  }