github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/roaringset/compactor_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package roaringset 13 14 import ( 15 "io" 16 "os" 17 "path/filepath" 18 "testing" 19 20 "github.com/stretchr/testify/assert" 21 "github.com/stretchr/testify/require" 22 "github.com/weaviate/weaviate/adapters/repos/db/lsmkv/segmentindex" 23 ) 24 25 func Test_Compactor(t *testing.T) { 26 type test struct { 27 name string 28 left []byte 29 right []byte 30 expected []keyWithBML 31 expectedRoot []keyWithBML 32 } 33 34 tests := []test{ 35 { 36 name: "independent segments without overlap", 37 left: createSegmentsFromKeys(t, []keyWithBML{ 38 { 39 key: []byte("aaa"), 40 additions: []uint64{0}, 41 deletions: []uint64{1}, 42 }, 43 { 44 key: []byte("ccc"), 45 additions: []uint64{4}, 46 deletions: []uint64{5}, 47 }, 48 }), 49 right: createSegmentsFromKeys(t, []keyWithBML{ 50 { 51 key: []byte("bbb"), 52 additions: []uint64{2}, 53 deletions: []uint64{3}, 54 }, 55 { 56 key: []byte("ddd"), 57 additions: []uint64{6}, 58 deletions: []uint64{7}, 59 }, 60 }), 61 expected: []keyWithBML{ 62 { 63 key: []byte("aaa"), 64 additions: []uint64{0}, 65 deletions: []uint64{1}, 66 }, 67 { 68 key: []byte("bbb"), 69 additions: []uint64{2}, 70 deletions: []uint64{3}, 71 }, 72 { 73 key: []byte("ccc"), 74 additions: []uint64{4}, 75 deletions: []uint64{5}, 76 }, 77 { 78 key: []byte("ddd"), 79 additions: []uint64{6}, 80 deletions: []uint64{7}, 81 }, 82 }, 83 expectedRoot: []keyWithBML{ 84 { 85 key: []byte("aaa"), 86 additions: []uint64{0}, 87 }, 88 { 89 key: []byte("bbb"), 90 additions: []uint64{2}, 91 }, 92 { 93 key: []byte("ccc"), 94 additions: []uint64{4}, 95 }, 96 { 97 key: []byte("ddd"), 98 additions: []uint64{6}, 99 }, 100 }, 101 }, 102 { 103 name: "some segments overlap", 104 // note: there is no need to test every possible edge case for the 105 // overlapping segments in this place, as this logic is outsourced to 106 // BitmapLayer.Merge() which already has tests for edge cases 107 left: createSegmentsFromKeys(t, []keyWithBML{ 108 { 109 key: []byte("aaa"), 110 additions: []uint64{0}, 111 deletions: []uint64{1}, 112 }, 113 { 114 key: []byte("overlap"), 115 additions: []uint64{4, 5, 6}, 116 deletions: []uint64{1, 3, 7}, 117 }, 118 }), 119 right: createSegmentsFromKeys(t, []keyWithBML{ 120 { 121 key: []byte("overlap"), 122 additions: []uint64{3, 8}, 123 deletions: []uint64{5}, 124 }, 125 { 126 key: []byte("zzz"), 127 additions: []uint64{6}, 128 deletions: []uint64{7}, 129 }, 130 }), 131 expected: []keyWithBML{ 132 { 133 key: []byte("aaa"), 134 additions: []uint64{0}, 135 deletions: []uint64{1}, 136 }, 137 { 138 key: []byte("overlap"), 139 additions: []uint64{3, 4, 6, 8}, 140 deletions: []uint64{1, 5, 7}, 141 }, 142 { 143 key: []byte("zzz"), 144 additions: []uint64{6}, 145 deletions: []uint64{7}, 146 }, 147 }, 148 expectedRoot: []keyWithBML{ 149 { 150 key: []byte("aaa"), 151 additions: []uint64{0}, 152 }, 153 { 154 key: []byte("overlap"), 155 additions: []uint64{3, 4, 6, 8}, 156 }, 157 { 158 key: []byte("zzz"), 159 additions: []uint64{6}, 160 }, 161 }, 162 }, 163 { 164 name: "everything but one is deleted", 165 left: createSegmentsFromKeys(t, []keyWithBML{ 166 { 167 key: []byte("aaa"), 168 additions: []uint64{0}, 169 deletions: []uint64{}, 170 }, 171 { 172 key: []byte("bbb"), 173 additions: []uint64{4, 5, 6}, 174 deletions: []uint64{}, 175 }, 176 { 177 key: []byte("ddd"), 178 additions: []uint64{11, 12, 111}, 179 deletions: []uint64{}, 180 }, 181 }), 182 right: createSegmentsFromKeys(t, []keyWithBML{ 183 { 184 key: []byte("aaa"), 185 additions: []uint64{}, 186 deletions: []uint64{0}, 187 }, 188 { 189 key: []byte("bbb"), 190 additions: []uint64{}, 191 deletions: []uint64{4, 5, 6}, 192 }, 193 { 194 key: []byte("ccc"), 195 additions: []uint64{}, 196 deletions: []uint64{7, 8}, 197 }, 198 { 199 key: []byte("ddd"), 200 additions: []uint64{222}, 201 deletions: []uint64{11, 12, 13, 14}, 202 }, 203 }), 204 expected: []keyWithBML{ 205 { 206 key: []byte("aaa"), 207 additions: []uint64{}, 208 deletions: []uint64{0}, 209 }, 210 { 211 key: []byte("bbb"), 212 additions: []uint64{}, 213 deletions: []uint64{4, 5, 6}, 214 }, 215 { 216 key: []byte("ccc"), 217 additions: []uint64{}, 218 deletions: []uint64{7, 8}, 219 }, 220 { 221 key: []byte("ddd"), 222 additions: []uint64{111, 222}, 223 deletions: []uint64{11, 12, 13, 14}, 224 }, 225 }, 226 expectedRoot: []keyWithBML{ 227 { 228 key: []byte("ddd"), 229 additions: []uint64{111, 222}, 230 }, 231 }, 232 }, 233 234 // the key loop is essentially a state machine. The next tests try to cover 235 // all possible states: 236 // 237 // 1. only the left key is set -> take left key 238 // 2. both left key and right key are set, but left is smaller -> take left 239 // key 240 // 3. only the right key is set -> take right key 241 // 4. both right and left keys are set, but right key is smaller -> take 242 // the right key 243 // 5. both keys are identical -> merge them 244 // 245 // Note: There is also an implicit 6th case: both keys are not set, this is 246 // the exit condition which is part of every test. 247 { 248 name: "state 1 - only left key is set", 249 left: createSegmentsFromKeys(t, []keyWithBML{ 250 { 251 key: []byte("aaa"), 252 additions: []uint64{0}, 253 deletions: []uint64{1}, 254 }, 255 }), 256 right: createSegmentsFromKeys(t, []keyWithBML{}), 257 expected: []keyWithBML{ 258 { 259 key: []byte("aaa"), 260 additions: []uint64{0}, 261 deletions: []uint64{1}, 262 }, 263 }, 264 expectedRoot: []keyWithBML{ 265 { 266 key: []byte("aaa"), 267 additions: []uint64{0}, 268 }, 269 }, 270 }, 271 { 272 name: "state 2 - left+right, left is smaller", 273 left: createSegmentsFromKeys(t, []keyWithBML{ 274 { 275 key: []byte("aaa"), 276 additions: []uint64{0}, 277 deletions: []uint64{1}, 278 }, 279 }), 280 right: createSegmentsFromKeys(t, []keyWithBML{ 281 { 282 key: []byte("bbb"), 283 additions: []uint64{2}, 284 deletions: []uint64{3}, 285 }, 286 }), 287 expected: []keyWithBML{ 288 { 289 key: []byte("aaa"), 290 additions: []uint64{0}, 291 deletions: []uint64{1}, 292 }, 293 { 294 key: []byte("bbb"), 295 additions: []uint64{2}, 296 deletions: []uint64{3}, 297 }, 298 }, 299 expectedRoot: []keyWithBML{ 300 { 301 key: []byte("aaa"), 302 additions: []uint64{0}, 303 }, 304 { 305 key: []byte("bbb"), 306 additions: []uint64{2}, 307 }, 308 }, 309 }, 310 { 311 name: "state 3 - only the right key is set", 312 left: createSegmentsFromKeys(t, []keyWithBML{}), 313 right: createSegmentsFromKeys(t, []keyWithBML{ 314 { 315 key: []byte("bbb"), 316 additions: []uint64{2}, 317 deletions: []uint64{3}, 318 }, 319 }), 320 expected: []keyWithBML{ 321 { 322 key: []byte("bbb"), 323 additions: []uint64{2}, 324 deletions: []uint64{3}, 325 }, 326 }, 327 expectedRoot: []keyWithBML{ 328 { 329 key: []byte("bbb"), 330 additions: []uint64{2}, 331 }, 332 }, 333 }, 334 { 335 name: "state 4 - left+right, right is smaller", 336 left: createSegmentsFromKeys(t, []keyWithBML{ 337 { 338 key: []byte("ccc"), 339 additions: []uint64{0}, 340 deletions: []uint64{1}, 341 }, 342 }), 343 right: createSegmentsFromKeys(t, []keyWithBML{ 344 { 345 key: []byte("bbb"), 346 additions: []uint64{2}, 347 deletions: []uint64{3}, 348 }, 349 }), 350 expected: []keyWithBML{ 351 { 352 key: []byte("bbb"), 353 additions: []uint64{2}, 354 deletions: []uint64{3}, 355 }, 356 { 357 key: []byte("ccc"), 358 additions: []uint64{0}, 359 deletions: []uint64{1}, 360 }, 361 }, 362 expectedRoot: []keyWithBML{ 363 { 364 key: []byte("bbb"), 365 additions: []uint64{2}, 366 }, 367 { 368 key: []byte("ccc"), 369 additions: []uint64{0}, 370 }, 371 }, 372 }, 373 { 374 name: "state 5 - left+right are identical", 375 left: createSegmentsFromKeys(t, []keyWithBML{ 376 { 377 key: []byte("aaa"), 378 additions: []uint64{0}, 379 deletions: []uint64{1}, 380 }, 381 }), 382 right: createSegmentsFromKeys(t, []keyWithBML{ 383 { 384 key: []byte("aaa"), 385 additions: []uint64{2}, 386 deletions: []uint64{3}, 387 }, 388 }), 389 expected: []keyWithBML{ 390 { 391 key: []byte("aaa"), 392 additions: []uint64{0, 2}, 393 deletions: []uint64{1, 3}, 394 }, 395 }, 396 expectedRoot: []keyWithBML{ 397 { 398 key: []byte("aaa"), 399 additions: []uint64{0, 2}, 400 }, 401 }, 402 }, 403 } 404 405 for _, test := range tests { 406 t.Run("[keep]"+test.name, func(t *testing.T) { 407 dir := t.TempDir() 408 409 leftCursor := NewSegmentCursor(test.left, nil) 410 rightCursor := NewSegmentCursor(test.right, nil) 411 412 segmentFile := filepath.Join(dir, "result.db") 413 f, err := os.Create(segmentFile) 414 require.NoError(t, err) 415 416 c := NewCompactor(f, leftCursor, rightCursor, 5, dir+"/scratch", false) 417 require.NoError(t, c.Do()) 418 419 require.NoError(t, f.Close()) 420 421 f, err = os.Open(segmentFile) 422 require.NoError(t, err) 423 424 header, err := segmentindex.ParseHeader(f) 425 require.NoError(t, err) 426 427 segmentBytes, err := io.ReadAll(f) 428 require.NoError(t, err) 429 430 require.NoError(t, f.Close()) 431 432 cu := NewSegmentCursor(segmentBytes[:header.IndexStart-segmentindex.HeaderSize], nil) 433 434 i := 0 435 for k, v, _ := cu.First(); k != nil; k, v, _ = cu.Next() { 436 assert.Equal(t, test.expected[i].key, k) 437 assert.Equal(t, test.expected[i].additions, v.Additions.ToArray()) 438 assert.Equal(t, test.expected[i].deletions, v.Deletions.ToArray()) 439 i++ 440 } 441 442 assert.Equal(t, len(test.expected), i, "all expected keys must have been hit") 443 }) 444 } 445 446 for _, test := range tests { 447 t.Run("[cleanup] "+test.name, func(t *testing.T) { 448 dir := t.TempDir() 449 450 leftCursor := NewSegmentCursor(test.left, nil) 451 rightCursor := NewSegmentCursor(test.right, nil) 452 453 segmentFile := filepath.Join(dir, "result.db") 454 f, err := os.Create(segmentFile) 455 require.NoError(t, err) 456 457 c := NewCompactor(f, leftCursor, rightCursor, 5, dir+"/scratch", true) 458 require.NoError(t, c.Do()) 459 460 require.NoError(t, f.Close()) 461 462 f, err = os.Open(segmentFile) 463 require.NoError(t, err) 464 465 header, err := segmentindex.ParseHeader(f) 466 require.NoError(t, err) 467 468 segmentBytes, err := io.ReadAll(f) 469 require.NoError(t, err) 470 471 require.NoError(t, f.Close()) 472 473 cu := NewSegmentCursor(segmentBytes[:header.IndexStart-segmentindex.HeaderSize], nil) 474 475 i := 0 476 for k, v, _ := cu.First(); k != nil; k, v, _ = cu.Next() { 477 assert.Equal(t, test.expectedRoot[i].key, k) 478 assert.Equal(t, test.expectedRoot[i].additions, v.Additions.ToArray()) 479 assert.Empty(t, v.Deletions.ToArray()) 480 i++ 481 } 482 483 assert.Equal(t, len(test.expectedRoot), i, "all expected keys must have been hit") 484 }) 485 } 486 } 487 488 type keyWithBML struct { 489 key []byte 490 additions []uint64 491 deletions []uint64 492 } 493 494 func createSegmentsFromKeys(t *testing.T, keys []keyWithBML) []byte { 495 out := []byte{} 496 497 for _, k := range keys { 498 add := NewBitmap(k.additions...) 499 del := NewBitmap(k.deletions...) 500 sn, err := NewSegmentNode(k.key, add, del) 501 require.Nil(t, err) 502 out = append(out, sn.ToBuffer()...) 503 } 504 505 return out 506 }