github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/roaringset/layers.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package roaringset
    13  
    14  import (
    15  	"fmt"
    16  
    17  	"github.com/weaviate/sroar"
    18  )
    19  
    20  // A BitmapLayer contains all the bitmap related delta-information stored for a
    21  // specific key in one layer. A layer typically corresponds to one disk segment
    22  // or a memtable layer
    23  //
    24  // A layer is essentially a snapshot in time and to get an accurate few of the
    25  // set in its entirety multiple layers need to be combined using
    26  // [BitmapLayers].
    27  //
    28  // The contents of Additions and Deletions must be mutually exclusive. A layer
    29  // cannot both add and delete an element. The only way to create new layers is
    30  // through inserting into a Memtable. The memtable must make sure that:
    31  //
    32  //   - When an element is added, any previous deletion of this element is
    33  //     removed
    34  //   - When an element is deleted, any previous addition of this element is
    35  //     removed.
    36  //
    37  // As a result, an element is either a net addition or a net deletion in a
    38  // layer, but it can never be both.
    39  type BitmapLayer struct {
    40  	Additions *sroar.Bitmap
    41  	Deletions *sroar.Bitmap
    42  }
    43  
    44  func (l *BitmapLayer) Clone() BitmapLayer {
    45  	clone := BitmapLayer{}
    46  	if l.Additions != nil {
    47  		clone.Additions = l.Additions.Clone()
    48  	}
    49  	if l.Deletions != nil {
    50  		clone.Deletions = l.Deletions.Clone()
    51  	}
    52  	return clone
    53  }
    54  
    55  // BitmapLayers are a helper type to perform operations on multiple layers,
    56  // such as [BitmapLayers.Flatten] or [BitmapLayers.Merge].
    57  type BitmapLayers []BitmapLayer
    58  
    59  // Flatten reduces all snapshots into a single Bitmap. This bitmap no longer
    60  // contains separate additions and deletions, but a single set where all
    61  // additions and deletions have been applied in the correct order.
    62  //
    63  // If you do not wish to flatten all of history, but rather combine two layers,
    64  // such as would happen in a Compaction, use [BitmapLayers.Merge] instead.
    65  //
    66  // Flatten is typically used when serving a specific key to the user: It
    67  // flattens all disk segments, a currently flushing memtable if it exists, and
    68  // the active memtable into a single bitmap. The final bitmap is returned to
    69  // the user.
    70  //
    71  // # Flattening Logic
    72  //
    73  //   - The first layer is seen as chronologically first. Deletions in the
    74  //     first layers are ignored, as there is nothing to be deleted. As a
    75  //     result, the additions of the first segment become the root state in the
    76  //     first iteration.
    77  //   - Any subsequent layer is merged into the root layer in the following way:
    78  //     Deletions remove any existing additions, Additions are added.
    79  //   - This process happens one layer at a time. This way delete-and-readd
    80  //     cycles are reflected correctly. For example, if layer 2 deletes an element
    81  //     X and layer 3 adds element X, then it is a net addition overall, and X
    82  //     should be represented in the final bitmap. If the order is reversed and
    83  //     layer 2 adds X, whereas layer 3 removes X, it is should not be contained
    84  //     in the final map.
    85  func (bml BitmapLayers) Flatten() *sroar.Bitmap {
    86  	if len(bml) == 0 {
    87  		return sroar.NewBitmap()
    88  	}
    89  
    90  	cur := bml[0]
    91  	// TODO: is this copy really needed? aren't we already operating on copied
    92  	// bms?
    93  	merged := cur.Additions.Clone()
    94  
    95  	for i := 1; i < len(bml); i++ {
    96  		merged.AndNot(bml[i].Deletions)
    97  		merged.Or(bml[i].Additions)
    98  	}
    99  
   100  	return merged
   101  }
   102  
   103  // Merge turns two successive layers into one. It does not flatten the segment,
   104  // but keeps additions and deletions separate. This is because there are no
   105  // guarantees that the first segment was the root segment. A merge could run on
   106  // segments 3+4 and they could contain deletions of elements that were added in
   107  // segments 1 or 2.
   108  //
   109  // Merge is intended to be used as part of compactions.
   110  func (bml BitmapLayers) Merge() (BitmapLayer, error) {
   111  	out := BitmapLayer{}
   112  	if len(bml) != 2 {
   113  		return out, fmt.Errorf("merge requires exactly two input segments")
   114  	}
   115  
   116  	left, right := bml[0], bml[1]
   117  
   118  	additions := left.Additions.Clone()
   119  	additions.Or(right.Additions)
   120  	additions.AndNot(right.Deletions)
   121  
   122  	deletions := left.Deletions.Clone()
   123  	deletions.AndNot(right.Additions)
   124  	deletions.Or(right.Deletions)
   125  
   126  	out.Additions = Condense(additions)
   127  	out.Deletions = Condense(deletions)
   128  	return out, nil
   129  }