github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/roaringset/helpers_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package roaringset 13 14 import ( 15 "fmt" 16 "testing" 17 18 "github.com/sirupsen/logrus/hooks/test" 19 20 "github.com/stretchr/testify/assert" 21 "github.com/weaviate/sroar" 22 ) 23 24 var logger, _ = test.NewNullLogger() 25 26 func TestBitmap_Condense(t *testing.T) { 27 t.Run("And with itself (internal array)", func(t *testing.T) { 28 bm := NewBitmap(slice(0, 1000)...) 29 for i := 0; i < 10; i++ { 30 bm.And(bm) 31 } 32 bmLen := len(bm.ToBuffer()) 33 34 condensed := Condense(bm) 35 condensedLen := len(condensed.ToBuffer()) 36 37 assert.Greater(t, bmLen, condensedLen) 38 assert.ElementsMatch(t, bm.ToArray(), condensed.ToArray()) 39 }) 40 41 t.Run("And with itself (internal bitmap)", func(t *testing.T) { 42 bm := NewBitmap(slice(0, 3000)...) 43 for i := 0; i < 10; i++ { 44 bm.And(bm) 45 } 46 bmLen := len(bm.ToBuffer()) 47 48 condensed := Condense(bm) 49 condensedLen := len(condensed.ToBuffer()) 50 51 assert.Greater(t, bmLen, condensedLen) 52 assert.ElementsMatch(t, bm.ToArray(), condensed.ToArray()) 53 }) 54 55 t.Run("And (internal arrays)", func(t *testing.T) { 56 bm1 := NewBitmap(slice(0, 1000)...) 57 bm2 := NewBitmap(slice(500, 1500)...) 58 bm := bm1.Clone() 59 bm.And(bm2) 60 bmLen := len(bm.ToBuffer()) 61 62 condensed := Condense(bm) 63 condensedLen := len(condensed.ToBuffer()) 64 65 assert.Greater(t, bmLen, condensedLen) 66 assert.ElementsMatch(t, bm.ToArray(), condensed.ToArray()) 67 }) 68 69 t.Run("And (internal bitmaps)", func(t *testing.T) { 70 bm1 := NewBitmap(slice(0, 4000)...) 71 bm2 := NewBitmap(slice(1000, 5000)...) 72 bm := bm1.Clone() 73 bm.And(bm2) 74 bmLen := len(bm.ToBuffer()) 75 76 condensed := Condense(bm) 77 condensedLen := len(condensed.ToBuffer()) 78 79 assert.Greater(t, bmLen, condensedLen) 80 assert.ElementsMatch(t, bm.ToArray(), condensed.ToArray()) 81 }) 82 83 t.Run("And (internal bitmaps to bitmap with few elements)", func(t *testing.T) { 84 // this is not optimal. Internally elements will be stored in bitmap, 85 // though they would easily fit into array 86 bm1 := NewBitmap(slice(0, 4000)...) 87 bm2 := NewBitmap(slice(1000, 5000)...) 88 bm := bm1.Clone() 89 bm.And(bm2) 90 bmLen := len(bm.ToBuffer()) 91 92 condensed := Condense(bm) 93 condensedLen := len(condensed.ToBuffer()) 94 95 assert.Greater(t, bmLen, condensedLen) 96 assert.ElementsMatch(t, bm.ToArray(), condensed.ToArray()) 97 }) 98 99 t.Run("Remove (array)", func(t *testing.T) { 100 bm := NewBitmap(slice(0, 1000)...) 101 for i := uint64(2); i < 1000; i++ { 102 bm.Remove(i) 103 } 104 bmLen := len(bm.ToBuffer()) 105 106 condensed := Condense(bm) 107 condensedLen := len(condensed.ToBuffer()) 108 109 assert.Greater(t, bmLen, condensedLen) 110 assert.ElementsMatch(t, bm.ToArray(), condensed.ToArray()) 111 }) 112 113 t.Run("Remove (bitmap)", func(t *testing.T) { 114 bm := NewBitmap(slice(0, 100_000)...) 115 for i := uint64(10_000); i < 100_000; i++ { 116 bm.Remove(i) 117 } 118 bmLen := len(bm.ToBuffer()) 119 120 condensed := Condense(bm) 121 condensedLen := len(condensed.ToBuffer()) 122 123 assert.Greater(t, bmLen, condensedLen) 124 assert.ElementsMatch(t, bm.ToArray(), condensed.ToArray()) 125 }) 126 } 127 128 func TestBitmap_Prefill(t *testing.T) { 129 t.Run("sequential", func(t *testing.T) { 130 for _, maxVal := range []uint64{1_000, 10_000, 100_000, 1_000_000, uint64(prefillBufferSize)} { 131 t.Run(fmt.Sprint(maxVal), func(t *testing.T) { 132 bm := newBitmapPrefillSequential(maxVal) 133 134 // +1, due to 0 included 135 assert.Equal(t, int(maxVal)+1, bm.GetCardinality()) 136 137 // remove all except maxVal 138 bm.RemoveRange(0, maxVal) 139 140 assert.Equal(t, 1, bm.GetCardinality()) 141 assert.True(t, bm.Contains(maxVal)) 142 }) 143 } 144 }) 145 146 t.Run("parallel", func(t *testing.T) { 147 for _, maxVal := range []uint64{1_000, 10_000, 100_000, 1_000_000, uint64(prefillBufferSize)} { 148 for _, routinesLimit := range []int{2, 3, 4, 5, 6, 7, 8} { 149 t.Run(fmt.Sprint(maxVal), func(t *testing.T) { 150 bm := newBitmapPrefillParallel(maxVal, routinesLimit, logger) 151 152 // +1, due to 0 included 153 assert.Equal(t, int(maxVal)+1, bm.GetCardinality()) 154 155 // remove all except maxVal 156 bm.RemoveRange(0, maxVal) 157 158 assert.Equal(t, 1, bm.GetCardinality()) 159 assert.True(t, bm.Contains(maxVal)) 160 }) 161 } 162 } 163 }) 164 165 t.Run("conditional - sequential or parallel", func(t *testing.T) { 166 for _, maxVal := range []uint64{1_000, 10_000, 100_000, 1_000_000, uint64(prefillBufferSize)} { 167 t.Run(fmt.Sprint(maxVal), func(t *testing.T) { 168 bm := NewBitmapPrefill(maxVal, logger) 169 170 // +1, due to 0 included 171 assert.Equal(t, int(maxVal)+1, bm.GetCardinality()) 172 173 // remove all except maxVal 174 bm.RemoveRange(0, maxVal) 175 176 assert.Equal(t, 1, bm.GetCardinality()) 177 assert.True(t, bm.Contains(maxVal)) 178 }) 179 } 180 }) 181 } 182 183 func TestBitmap_Inverted(t *testing.T) { 184 type test struct { 185 name string 186 source []uint64 187 maxVal uint64 188 shouldContain []uint64 189 } 190 191 tests := []test{ 192 { 193 name: "just 0, no source", 194 source: nil, 195 maxVal: 0, 196 shouldContain: []uint64{0}, 197 }, 198 { 199 name: "no matches in source", 200 source: nil, 201 maxVal: 7, 202 shouldContain: []uint64{0, 1, 2, 3, 4, 5, 6, 7}, 203 }, 204 { 205 name: "some matches in source", 206 source: []uint64{3, 4, 5}, 207 maxVal: 7, 208 shouldContain: []uint64{0, 1, 2, 6, 7}, 209 }, 210 { 211 name: "source has higher val than max val", 212 source: []uint64{3, 4, 5, 8}, 213 maxVal: 7, 214 shouldContain: []uint64{0, 1, 2, 6, 7}, 215 }, 216 } 217 218 for _, test := range tests { 219 t.Run(test.name, func(t *testing.T) { 220 source := sroar.NewBitmap() 221 source.SetMany(test.source) 222 out := NewInvertedBitmap(source, test.maxVal, logger) 223 outSlice := out.ToArray() 224 assert.Equal(t, test.shouldContain, outSlice) 225 }) 226 } 227 } 228 229 func TestBitmapFactory(t *testing.T) { 230 maxVal := uint64(10) 231 maxValGetter := func() uint64 { return maxVal } 232 bmf := NewBitmapFactory(maxValGetter, logger) 233 t.Logf("card: %d", bmf.bitmap.GetCardinality()) 234 235 currMax := bmf.currentMaxVal 236 t.Run("max val set correctly", func(t *testing.T) { 237 assert.Equal(t, maxVal+DefaultBufferIncrement, currMax) 238 }) 239 240 t.Run("max val increased to threshold does not change cardinality", func(t *testing.T) { 241 maxVal += 100 242 assert.NotNil(t, bmf.GetBitmap()) 243 assert.Equal(t, currMax, bmf.currentMaxVal) 244 assert.Equal(t, currMax+1, uint64(bmf.bitmap.GetCardinality())) 245 assert.Equal(t, maxVal, bmf.ActualMaxVal()) 246 }) 247 248 t.Run("max val surpasses threshold, cardinality increased", func(t *testing.T) { 249 maxVal += 1 250 assert.NotNil(t, bmf.GetBitmap()) 251 currMax += 1 + DefaultBufferIncrement 252 assert.Equal(t, currMax, bmf.currentMaxVal) 253 assert.Equal(t, currMax+1, uint64(bmf.bitmap.GetCardinality())) 254 assert.Equal(t, maxVal, bmf.ActualMaxVal()) 255 }) 256 } 257 258 func slice(from, to uint64) []uint64 { 259 len := to - from 260 s := make([]uint64, len) 261 for i := uint64(0); i < len; i++ { 262 s[i] = from + i 263 } 264 return s 265 }