github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/roaringset/helpers_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package roaringset
    13  
    14  import (
    15  	"fmt"
    16  	"testing"
    17  
    18  	"github.com/sirupsen/logrus/hooks/test"
    19  
    20  	"github.com/stretchr/testify/assert"
    21  	"github.com/weaviate/sroar"
    22  )
    23  
    24  var logger, _ = test.NewNullLogger()
    25  
    26  func TestBitmap_Condense(t *testing.T) {
    27  	t.Run("And with itself (internal array)", func(t *testing.T) {
    28  		bm := NewBitmap(slice(0, 1000)...)
    29  		for i := 0; i < 10; i++ {
    30  			bm.And(bm)
    31  		}
    32  		bmLen := len(bm.ToBuffer())
    33  
    34  		condensed := Condense(bm)
    35  		condensedLen := len(condensed.ToBuffer())
    36  
    37  		assert.Greater(t, bmLen, condensedLen)
    38  		assert.ElementsMatch(t, bm.ToArray(), condensed.ToArray())
    39  	})
    40  
    41  	t.Run("And with itself (internal bitmap)", func(t *testing.T) {
    42  		bm := NewBitmap(slice(0, 3000)...)
    43  		for i := 0; i < 10; i++ {
    44  			bm.And(bm)
    45  		}
    46  		bmLen := len(bm.ToBuffer())
    47  
    48  		condensed := Condense(bm)
    49  		condensedLen := len(condensed.ToBuffer())
    50  
    51  		assert.Greater(t, bmLen, condensedLen)
    52  		assert.ElementsMatch(t, bm.ToArray(), condensed.ToArray())
    53  	})
    54  
    55  	t.Run("And (internal arrays)", func(t *testing.T) {
    56  		bm1 := NewBitmap(slice(0, 1000)...)
    57  		bm2 := NewBitmap(slice(500, 1500)...)
    58  		bm := bm1.Clone()
    59  		bm.And(bm2)
    60  		bmLen := len(bm.ToBuffer())
    61  
    62  		condensed := Condense(bm)
    63  		condensedLen := len(condensed.ToBuffer())
    64  
    65  		assert.Greater(t, bmLen, condensedLen)
    66  		assert.ElementsMatch(t, bm.ToArray(), condensed.ToArray())
    67  	})
    68  
    69  	t.Run("And (internal bitmaps)", func(t *testing.T) {
    70  		bm1 := NewBitmap(slice(0, 4000)...)
    71  		bm2 := NewBitmap(slice(1000, 5000)...)
    72  		bm := bm1.Clone()
    73  		bm.And(bm2)
    74  		bmLen := len(bm.ToBuffer())
    75  
    76  		condensed := Condense(bm)
    77  		condensedLen := len(condensed.ToBuffer())
    78  
    79  		assert.Greater(t, bmLen, condensedLen)
    80  		assert.ElementsMatch(t, bm.ToArray(), condensed.ToArray())
    81  	})
    82  
    83  	t.Run("And (internal bitmaps to bitmap with few elements)", func(t *testing.T) {
    84  		// this is not optimal. Internally elements will be stored in bitmap,
    85  		// though they would easily fit into array
    86  		bm1 := NewBitmap(slice(0, 4000)...)
    87  		bm2 := NewBitmap(slice(1000, 5000)...)
    88  		bm := bm1.Clone()
    89  		bm.And(bm2)
    90  		bmLen := len(bm.ToBuffer())
    91  
    92  		condensed := Condense(bm)
    93  		condensedLen := len(condensed.ToBuffer())
    94  
    95  		assert.Greater(t, bmLen, condensedLen)
    96  		assert.ElementsMatch(t, bm.ToArray(), condensed.ToArray())
    97  	})
    98  
    99  	t.Run("Remove (array)", func(t *testing.T) {
   100  		bm := NewBitmap(slice(0, 1000)...)
   101  		for i := uint64(2); i < 1000; i++ {
   102  			bm.Remove(i)
   103  		}
   104  		bmLen := len(bm.ToBuffer())
   105  
   106  		condensed := Condense(bm)
   107  		condensedLen := len(condensed.ToBuffer())
   108  
   109  		assert.Greater(t, bmLen, condensedLen)
   110  		assert.ElementsMatch(t, bm.ToArray(), condensed.ToArray())
   111  	})
   112  
   113  	t.Run("Remove (bitmap)", func(t *testing.T) {
   114  		bm := NewBitmap(slice(0, 100_000)...)
   115  		for i := uint64(10_000); i < 100_000; i++ {
   116  			bm.Remove(i)
   117  		}
   118  		bmLen := len(bm.ToBuffer())
   119  
   120  		condensed := Condense(bm)
   121  		condensedLen := len(condensed.ToBuffer())
   122  
   123  		assert.Greater(t, bmLen, condensedLen)
   124  		assert.ElementsMatch(t, bm.ToArray(), condensed.ToArray())
   125  	})
   126  }
   127  
   128  func TestBitmap_Prefill(t *testing.T) {
   129  	t.Run("sequential", func(t *testing.T) {
   130  		for _, maxVal := range []uint64{1_000, 10_000, 100_000, 1_000_000, uint64(prefillBufferSize)} {
   131  			t.Run(fmt.Sprint(maxVal), func(t *testing.T) {
   132  				bm := newBitmapPrefillSequential(maxVal)
   133  
   134  				// +1, due to 0 included
   135  				assert.Equal(t, int(maxVal)+1, bm.GetCardinality())
   136  
   137  				// remove all except maxVal
   138  				bm.RemoveRange(0, maxVal)
   139  
   140  				assert.Equal(t, 1, bm.GetCardinality())
   141  				assert.True(t, bm.Contains(maxVal))
   142  			})
   143  		}
   144  	})
   145  
   146  	t.Run("parallel", func(t *testing.T) {
   147  		for _, maxVal := range []uint64{1_000, 10_000, 100_000, 1_000_000, uint64(prefillBufferSize)} {
   148  			for _, routinesLimit := range []int{2, 3, 4, 5, 6, 7, 8} {
   149  				t.Run(fmt.Sprint(maxVal), func(t *testing.T) {
   150  					bm := newBitmapPrefillParallel(maxVal, routinesLimit, logger)
   151  
   152  					// +1, due to 0 included
   153  					assert.Equal(t, int(maxVal)+1, bm.GetCardinality())
   154  
   155  					// remove all except maxVal
   156  					bm.RemoveRange(0, maxVal)
   157  
   158  					assert.Equal(t, 1, bm.GetCardinality())
   159  					assert.True(t, bm.Contains(maxVal))
   160  				})
   161  			}
   162  		}
   163  	})
   164  
   165  	t.Run("conditional - sequential or parallel", func(t *testing.T) {
   166  		for _, maxVal := range []uint64{1_000, 10_000, 100_000, 1_000_000, uint64(prefillBufferSize)} {
   167  			t.Run(fmt.Sprint(maxVal), func(t *testing.T) {
   168  				bm := NewBitmapPrefill(maxVal, logger)
   169  
   170  				// +1, due to 0 included
   171  				assert.Equal(t, int(maxVal)+1, bm.GetCardinality())
   172  
   173  				// remove all except maxVal
   174  				bm.RemoveRange(0, maxVal)
   175  
   176  				assert.Equal(t, 1, bm.GetCardinality())
   177  				assert.True(t, bm.Contains(maxVal))
   178  			})
   179  		}
   180  	})
   181  }
   182  
   183  func TestBitmap_Inverted(t *testing.T) {
   184  	type test struct {
   185  		name          string
   186  		source        []uint64
   187  		maxVal        uint64
   188  		shouldContain []uint64
   189  	}
   190  
   191  	tests := []test{
   192  		{
   193  			name:          "just 0, no source",
   194  			source:        nil,
   195  			maxVal:        0,
   196  			shouldContain: []uint64{0},
   197  		},
   198  		{
   199  			name:          "no matches in source",
   200  			source:        nil,
   201  			maxVal:        7,
   202  			shouldContain: []uint64{0, 1, 2, 3, 4, 5, 6, 7},
   203  		},
   204  		{
   205  			name:          "some matches in source",
   206  			source:        []uint64{3, 4, 5},
   207  			maxVal:        7,
   208  			shouldContain: []uint64{0, 1, 2, 6, 7},
   209  		},
   210  		{
   211  			name:          "source has higher val than max val",
   212  			source:        []uint64{3, 4, 5, 8},
   213  			maxVal:        7,
   214  			shouldContain: []uint64{0, 1, 2, 6, 7},
   215  		},
   216  	}
   217  
   218  	for _, test := range tests {
   219  		t.Run(test.name, func(t *testing.T) {
   220  			source := sroar.NewBitmap()
   221  			source.SetMany(test.source)
   222  			out := NewInvertedBitmap(source, test.maxVal, logger)
   223  			outSlice := out.ToArray()
   224  			assert.Equal(t, test.shouldContain, outSlice)
   225  		})
   226  	}
   227  }
   228  
   229  func TestBitmapFactory(t *testing.T) {
   230  	maxVal := uint64(10)
   231  	maxValGetter := func() uint64 { return maxVal }
   232  	bmf := NewBitmapFactory(maxValGetter, logger)
   233  	t.Logf("card: %d", bmf.bitmap.GetCardinality())
   234  
   235  	currMax := bmf.currentMaxVal
   236  	t.Run("max val set correctly", func(t *testing.T) {
   237  		assert.Equal(t, maxVal+DefaultBufferIncrement, currMax)
   238  	})
   239  
   240  	t.Run("max val increased to threshold does not change cardinality", func(t *testing.T) {
   241  		maxVal += 100
   242  		assert.NotNil(t, bmf.GetBitmap())
   243  		assert.Equal(t, currMax, bmf.currentMaxVal)
   244  		assert.Equal(t, currMax+1, uint64(bmf.bitmap.GetCardinality()))
   245  		assert.Equal(t, maxVal, bmf.ActualMaxVal())
   246  	})
   247  
   248  	t.Run("max val surpasses threshold, cardinality increased", func(t *testing.T) {
   249  		maxVal += 1
   250  		assert.NotNil(t, bmf.GetBitmap())
   251  		currMax += 1 + DefaultBufferIncrement
   252  		assert.Equal(t, currMax, bmf.currentMaxVal)
   253  		assert.Equal(t, currMax+1, uint64(bmf.bitmap.GetCardinality()))
   254  		assert.Equal(t, maxVal, bmf.ActualMaxVal())
   255  	})
   256  }
   257  
   258  func slice(from, to uint64) []uint64 {
   259  	len := to - from
   260  	s := make([]uint64, len)
   261  	for i := uint64(0); i < len; i++ {
   262  		s[i] = from + i
   263  	}
   264  	return s
   265  }