github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/roaringset/compactor.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package roaringset
    13  
    14  import (
    15  	"bufio"
    16  	"bytes"
    17  	"fmt"
    18  	"io"
    19  
    20  	"github.com/pkg/errors"
    21  	"github.com/weaviate/sroar"
    22  	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv/segmentindex"
    23  )
    24  
    25  // Compactor takes in a left and a right segment and merges them into a single
    26  // segment. The input segments are represented by cursors without their
    27  // respective segmentindexes. A new segmentindex is built from the merged nodes
    28  // without taking the old indexes into account at all.
    29  //
    30  // The left segment must precede the right one in its creation time, as the
    31  // compactor applies latest-takes-presence rules when there is a conflict.
    32  //
    33  // # Merging independent key/value pairs
    34  //
    35  // The new segment's nodes will be in sorted fashion (this is a requirement for
    36  // the segment index and segment cursors to function). To achieve a sorted end
    37  // result, the Compactor goes over both input cursors simultaneously and always
    38  // works on the smaller of the two keys. After a key/value pair has been added
    39  // to the output only the input cursor that provided the pair is advanced.
    40  //
    41  // # Merging key/value pairs with identical keys
    42  //
    43  // When both segment have a key/value pair with an overlapping key, the value
    44  // has to be merged. The merge logic is not part of the compactor itself.
    45  // Instead it makes use of [BitmapLayers.Merge].
    46  //
    47  // # Exit Criterium
    48  //
    49  // When both cursors no longer return values, all key/value pairs are
    50  // considered compacted. The compactor then deals with metadata.
    51  //
    52  // # Index and Header metadata
    53  //
    54  // Only once the key/value pairs have been compacted, will the compactor write
    55  // the primary index based on the new key/value payload. Finally, the input
    56  // writer is rewinded to be able to write the header metadata at the beginning
    57  // of the file. Because of this, the input writer must be an [io.WriteSeeker],
    58  // such as [*os.File].
    59  //
    60  // The level of the resulting segment is the input level increased by one.
    61  // Levels help the "eligible for compaction" cycle to find suitable compaction
    62  // pairs.
    63  type Compactor struct {
    64  	left, right  *SegmentCursor
    65  	currentLevel uint16
    66  	// Tells if deletions or keys without corresponding values
    67  	// can be removed from merged segment.
    68  	// (left segment is root (1st) one, keepTombstones is off for bucket)
    69  	cleanupDeletions bool
    70  
    71  	w    io.WriteSeeker
    72  	bufw *bufio.Writer
    73  
    74  	scratchSpacePath string
    75  }
    76  
    77  // NewCompactor from left (older) and right (newer) seeker. See [Compactor] for
    78  // an explanation of what goes on under the hood, and why the input
    79  // requirements are the way they are.
    80  func NewCompactor(w io.WriteSeeker,
    81  	left, right *SegmentCursor, level uint16,
    82  	scratchSpacePath string, cleanupDeletions bool,
    83  ) *Compactor {
    84  	return &Compactor{
    85  		left:             left,
    86  		right:            right,
    87  		w:                w,
    88  		bufw:             bufio.NewWriterSize(w, 256*1024),
    89  		currentLevel:     level,
    90  		cleanupDeletions: cleanupDeletions,
    91  		scratchSpacePath: scratchSpacePath,
    92  	}
    93  }
    94  
    95  // Do starts a compaction. See [Compactor] for an explanation of this process.
    96  func (c *Compactor) Do() error {
    97  	if err := c.init(); err != nil {
    98  		return fmt.Errorf("init: %w", err)
    99  	}
   100  
   101  	kis, err := c.writeNodes()
   102  	if err != nil {
   103  		return fmt.Errorf("write keys: %w", err)
   104  	}
   105  
   106  	if err := c.writeIndexes(kis); err != nil {
   107  		return fmt.Errorf("write index: %w", err)
   108  	}
   109  
   110  	// flush buffered, so we can safely seek on underlying writer
   111  	if err := c.bufw.Flush(); err != nil {
   112  		return fmt.Errorf("flush buffered: %w", err)
   113  	}
   114  
   115  	var dataEnd uint64 = segmentindex.HeaderSize
   116  	if len(kis) > 0 {
   117  		dataEnd = uint64(kis[len(kis)-1].ValueEnd)
   118  	}
   119  
   120  	if err := c.writeHeader(c.currentLevel, 0, 0,
   121  		dataEnd); err != nil {
   122  		return fmt.Errorf("write header: %w", err)
   123  	}
   124  
   125  	return nil
   126  }
   127  
   128  func (c *Compactor) init() error {
   129  	// write a dummy header, we don't know the contents of the actual header yet,
   130  	// we will seek to the beginning and overwrite the actual header at the very
   131  	// end
   132  
   133  	if _, err := c.bufw.Write(make([]byte, segmentindex.HeaderSize)); err != nil {
   134  		return errors.Wrap(err, "write empty header")
   135  	}
   136  
   137  	return nil
   138  }
   139  
   140  // nodeCompactor is a helper type to improve the code structure of merging
   141  // nodes in a compaction
   142  type nodeCompactor struct {
   143  	left, right           *SegmentCursor
   144  	keyLeft, keyRight     []byte
   145  	valueLeft, valueRight BitmapLayer
   146  	output                []segmentindex.Key
   147  	offset                int
   148  	bufw                  *bufio.Writer
   149  
   150  	cleanupDeletions bool
   151  	emptyBitmap      *sroar.Bitmap
   152  }
   153  
   154  func (c *Compactor) writeNodes() ([]segmentindex.Key, error) {
   155  	nc := &nodeCompactor{
   156  		left:             c.left,
   157  		right:            c.right,
   158  		bufw:             c.bufw,
   159  		cleanupDeletions: c.cleanupDeletions,
   160  		emptyBitmap:      sroar.NewBitmap(),
   161  	}
   162  
   163  	nc.init()
   164  
   165  	if err := nc.loopThroughKeys(); err != nil {
   166  		return nil, err
   167  	}
   168  
   169  	return nc.output, nil
   170  }
   171  
   172  func (c *nodeCompactor) init() {
   173  	c.keyLeft, c.valueLeft, _ = c.left.First()
   174  	c.keyRight, c.valueRight, _ = c.right.First()
   175  
   176  	// the (dummy) header was already written, this is our initial offset
   177  	c.offset = segmentindex.HeaderSize
   178  }
   179  
   180  func (c *nodeCompactor) loopThroughKeys() error {
   181  	for {
   182  		if c.keyLeft == nil && c.keyRight == nil {
   183  			return nil
   184  		}
   185  
   186  		if c.keysEqual() {
   187  			if err := c.mergeIdenticalKeys(); err != nil {
   188  				return err
   189  			}
   190  		} else if c.leftKeySmallerOrRightNotSet() {
   191  			if err := c.takeLeftKey(); err != nil {
   192  				return err
   193  			}
   194  		} else {
   195  			if err := c.takeRightKey(); err != nil {
   196  				return err
   197  			}
   198  		}
   199  	}
   200  }
   201  
   202  func (c *nodeCompactor) keysEqual() bool {
   203  	return bytes.Equal(c.keyLeft, c.keyRight)
   204  }
   205  
   206  func (c *nodeCompactor) leftKeySmallerOrRightNotSet() bool {
   207  	return (c.keyLeft != nil && bytes.Compare(c.keyLeft, c.keyRight) == -1) || c.keyRight == nil
   208  }
   209  
   210  func (c *nodeCompactor) mergeIdenticalKeys() error {
   211  	layers := BitmapLayers{
   212  		{Additions: c.valueLeft.Additions, Deletions: c.valueLeft.Deletions},
   213  		{Additions: c.valueRight.Additions, Deletions: c.valueRight.Deletions},
   214  	}
   215  	merged, err := layers.Merge()
   216  	if err != nil {
   217  		return fmt.Errorf("merge bitmap layers for identical keys: %w", err)
   218  	}
   219  
   220  	if additions, deletions, skip := c.cleanupValues(merged.Additions, merged.Deletions); !skip {
   221  		sn, err := NewSegmentNode(c.keyRight, additions, deletions)
   222  		if err != nil {
   223  			return fmt.Errorf("new segment node for merged key: %w", err)
   224  		}
   225  
   226  		ki, err := sn.KeyIndexAndWriteTo(c.bufw, c.offset)
   227  		if err != nil {
   228  			return fmt.Errorf("write individual node (merged key): %w", err)
   229  		}
   230  
   231  		c.offset = ki.ValueEnd
   232  		c.output = append(c.output, ki)
   233  	}
   234  
   235  	// advance both!
   236  	c.keyLeft, c.valueLeft, _ = c.left.Next()
   237  	c.keyRight, c.valueRight, _ = c.right.Next()
   238  	return nil
   239  }
   240  
   241  func (c *nodeCompactor) takeLeftKey() error {
   242  	if additions, deletions, skip := c.cleanupValues(c.valueLeft.Additions, c.valueLeft.Deletions); !skip {
   243  		sn, err := NewSegmentNode(c.keyLeft, additions, deletions)
   244  		if err != nil {
   245  			return fmt.Errorf("new segment node for left key: %w", err)
   246  		}
   247  
   248  		ki, err := sn.KeyIndexAndWriteTo(c.bufw, c.offset)
   249  		if err != nil {
   250  			return fmt.Errorf("write individual node (left key): %w", err)
   251  		}
   252  
   253  		c.offset = ki.ValueEnd
   254  		c.output = append(c.output, ki)
   255  	}
   256  
   257  	c.keyLeft, c.valueLeft, _ = c.left.Next()
   258  	return nil
   259  }
   260  
   261  func (c *nodeCompactor) takeRightKey() error {
   262  	if additions, deletions, skip := c.cleanupValues(c.valueRight.Additions, c.valueRight.Deletions); !skip {
   263  		sn, err := NewSegmentNode(c.keyRight, additions, deletions)
   264  		if err != nil {
   265  			return fmt.Errorf("new segment node for right key: %w", err)
   266  		}
   267  
   268  		ki, err := sn.KeyIndexAndWriteTo(c.bufw, c.offset)
   269  		if err != nil {
   270  			return fmt.Errorf("write individual node (right key): %w", err)
   271  		}
   272  
   273  		c.offset = ki.ValueEnd
   274  		c.output = append(c.output, ki)
   275  	}
   276  
   277  	c.keyRight, c.valueRight, _ = c.right.Next()
   278  	return nil
   279  }
   280  
   281  func (c *nodeCompactor) cleanupValues(additions, deletions *sroar.Bitmap,
   282  ) (add, del *sroar.Bitmap, skip bool) {
   283  	if !c.cleanupDeletions {
   284  		return additions, deletions, false
   285  	}
   286  	if !additions.IsEmpty() {
   287  		return additions, c.emptyBitmap, false
   288  	}
   289  	return nil, nil, true
   290  }
   291  
   292  func (c *Compactor) writeIndexes(keys []segmentindex.Key) error {
   293  	indexes := &segmentindex.Indexes{
   294  		Keys:                keys,
   295  		SecondaryIndexCount: 0,
   296  		ScratchSpacePath:    c.scratchSpacePath,
   297  	}
   298  
   299  	_, err := indexes.WriteTo(c.bufw)
   300  	return err
   301  }
   302  
   303  // writeHeader assumes that everything has been written to the underlying
   304  // writer and it is now safe to seek to the beginning and override the initial
   305  // header
   306  func (c *Compactor) writeHeader(level, version, secondaryIndices uint16,
   307  	startOfIndex uint64,
   308  ) error {
   309  	if _, err := c.w.Seek(0, io.SeekStart); err != nil {
   310  		return errors.Wrap(err, "seek to beginning to write header")
   311  	}
   312  
   313  	h := &segmentindex.Header{
   314  		Level:            level,
   315  		Version:          version,
   316  		SecondaryIndices: secondaryIndices,
   317  		Strategy:         segmentindex.StrategyRoaringSet,
   318  		IndexStart:       startOfIndex,
   319  	}
   320  
   321  	if _, err := h.WriteTo(c.w); err != nil {
   322  		return err
   323  	}
   324  
   325  	return nil
   326  }