github.com/weaviate/sroar@v0.0.0-20230210105426-26108af5465d/setutil.go (about) 1 // Copyright 2016 by the roaring authors. 2 // Licensed under the Apache License, Version 2.0. 3 // Full version of the license is here: 4 // https://github.com/RoaringBitmap/roaring/blob/master/LICENSE 5 6 package sroar 7 8 // TODO: Add license from roaring bitmap library. 9 10 func min(a, b int) int { 11 if a < b { 12 return a 13 } 14 return b 15 } 16 func max(a, b int) int { 17 if a > b { 18 return a 19 } 20 return b 21 } 22 23 func equal(a, b []uint16) bool { 24 if len(a) != len(b) { 25 return false 26 } 27 for i := range a { 28 if a[i] != b[i] { 29 return false 30 } 31 } 32 return true 33 } 34 35 func difference(set1 []uint16, set2 []uint16, buffer []uint16) int { 36 if 0 == len(set2) { 37 buffer = buffer[:len(set1)] 38 for k := 0; k < len(set1); k++ { 39 buffer[k] = set1[k] 40 } 41 return len(set1) 42 } 43 if 0 == len(set1) { 44 return 0 45 } 46 pos := 0 47 k1 := 0 48 k2 := 0 49 buffer = buffer[:cap(buffer)] 50 s1 := set1[k1] 51 s2 := set2[k2] 52 for { 53 if s1 < s2 { 54 buffer[pos] = s1 55 pos++ 56 k1++ 57 if k1 >= len(set1) { 58 break 59 } 60 s1 = set1[k1] 61 } else if s1 == s2 { 62 k1++ 63 k2++ 64 if k1 >= len(set1) { 65 break 66 } 67 s1 = set1[k1] 68 if k2 >= len(set2) { 69 for ; k1 < len(set1); k1++ { 70 buffer[pos] = set1[k1] 71 pos++ 72 } 73 break 74 } 75 s2 = set2[k2] 76 } else { // if (val1>val2) 77 k2++ 78 if k2 >= len(set2) { 79 for ; k1 < len(set1); k1++ { 80 buffer[pos] = set1[k1] 81 pos++ 82 } 83 break 84 } 85 s2 = set2[k2] 86 } 87 } 88 return pos 89 90 } 91 92 func exclusiveUnion2by2(set1 []uint16, set2 []uint16, buffer []uint16) int { 93 if 0 == len(set2) { 94 buffer = buffer[:len(set1)] 95 copy(buffer, set1[:]) 96 return len(set1) 97 } 98 if 0 == len(set1) { 99 buffer = buffer[:len(set2)] 100 copy(buffer, set2[:]) 101 return len(set2) 102 } 103 pos := 0 104 k1 := 0 105 k2 := 0 106 s1 := set1[k1] 107 s2 := set2[k2] 108 buffer = buffer[:cap(buffer)] 109 for { 110 if s1 < s2 { 111 buffer[pos] = s1 112 pos++ 113 k1++ 114 if k1 >= len(set1) { 115 for ; k2 < len(set2); k2++ { 116 buffer[pos] = set2[k2] 117 pos++ 118 } 119 break 120 } 121 s1 = set1[k1] 122 } else if s1 == s2 { 123 k1++ 124 k2++ 125 if k1 >= len(set1) { 126 for ; k2 < len(set2); k2++ { 127 buffer[pos] = set2[k2] 128 pos++ 129 } 130 break 131 } 132 if k2 >= len(set2) { 133 for ; k1 < len(set1); k1++ { 134 buffer[pos] = set1[k1] 135 pos++ 136 } 137 break 138 } 139 s1 = set1[k1] 140 s2 = set2[k2] 141 } else { // if (val1>val2) 142 buffer[pos] = s2 143 pos++ 144 k2++ 145 if k2 >= len(set2) { 146 for ; k1 < len(set1); k1++ { 147 buffer[pos] = set1[k1] 148 pos++ 149 } 150 break 151 } 152 s2 = set2[k2] 153 } 154 } 155 return pos 156 } 157 158 func union2by2Cardinality(set1 []uint16, set2 []uint16) int { 159 pos := 0 160 k1 := 0 161 k2 := 0 162 if 0 == len(set2) { 163 return len(set1) 164 } 165 if 0 == len(set1) { 166 return len(set2) 167 } 168 s1 := set1[k1] 169 s2 := set2[k2] 170 for { 171 if s1 < s2 { 172 pos++ 173 k1++ 174 if k1 >= len(set1) { 175 pos += len(set2) - k2 176 break 177 } 178 s1 = set1[k1] 179 } else if s1 == s2 { 180 pos++ 181 k1++ 182 k2++ 183 if k1 >= len(set1) { 184 pos += len(set2) - k2 185 break 186 } 187 if k2 >= len(set2) { 188 pos += len(set1) - k1 189 break 190 } 191 s1 = set1[k1] 192 s2 = set2[k2] 193 } else { // if (set1[k1]>set2[k2]) 194 pos++ 195 k2++ 196 if k2 >= len(set2) { 197 pos += len(set1) - k1 198 break 199 } 200 s2 = set2[k2] 201 } 202 } 203 return pos 204 } 205 206 func intersection2by2( 207 set1 []uint16, 208 set2 []uint16, 209 buffer []uint16) int { 210 211 if len(set1)*64 < len(set2) { 212 return onesidedgallopingintersect2by2(set1, set2, buffer) 213 } else if len(set2)*64 < len(set1) { 214 return onesidedgallopingintersect2by2(set2, set1, buffer) 215 } else { 216 return localintersect2by2(set1, set2, buffer) 217 } 218 } 219 220 func intersection2by2Cardinality( 221 set1 []uint16, 222 set2 []uint16) int { 223 224 if len(set1)*64 < len(set2) { 225 return onesidedgallopingintersect2by2Cardinality(set1, set2) 226 } else if len(set2)*64 < len(set1) { 227 return onesidedgallopingintersect2by2Cardinality(set2, set1) 228 } else { 229 return localintersect2by2Cardinality(set1, set2) 230 } 231 } 232 233 func intersects2by2( 234 set1 []uint16, 235 set2 []uint16) bool { 236 // could be optimized if one set is much larger than the other one 237 if (0 == len(set1)) || (0 == len(set2)) { 238 return false 239 } 240 k1 := 0 241 k2 := 0 242 s1 := set1[k1] 243 s2 := set2[k2] 244 mainwhile: 245 for { 246 247 if s2 < s1 { 248 for { 249 k2++ 250 if k2 == len(set2) { 251 break mainwhile 252 } 253 s2 = set2[k2] 254 if s2 >= s1 { 255 break 256 } 257 } 258 } 259 if s1 < s2 { 260 for { 261 k1++ 262 if k1 == len(set1) { 263 break mainwhile 264 } 265 s1 = set1[k1] 266 if s1 >= s2 { 267 break 268 } 269 } 270 271 } else { 272 // (set2[k2] == set1[k1]) 273 return true 274 } 275 } 276 return false 277 } 278 279 func localintersect2by2( 280 set1 []uint16, 281 set2 []uint16, 282 buffer []uint16) int { 283 284 if (0 == len(set1)) || (0 == len(set2)) { 285 return 0 286 } 287 k1 := 0 288 k2 := 0 289 pos := 0 290 buffer = buffer[:cap(buffer)] 291 s1 := set1[k1] 292 s2 := set2[k2] 293 mainwhile: 294 for { 295 if s2 < s1 { 296 for { 297 k2++ 298 if k2 == len(set2) { 299 break mainwhile 300 } 301 s2 = set2[k2] 302 if s2 >= s1 { 303 break 304 } 305 } 306 } 307 if s1 < s2 { 308 for { 309 k1++ 310 if k1 == len(set1) { 311 break mainwhile 312 } 313 s1 = set1[k1] 314 if s1 >= s2 { 315 break 316 } 317 } 318 319 } else { 320 // (set2[k2] == set1[k1]) 321 buffer[pos] = s1 322 pos++ 323 k1++ 324 if k1 == len(set1) { 325 break 326 } 327 s1 = set1[k1] 328 k2++ 329 if k2 == len(set2) { 330 break 331 } 332 s2 = set2[k2] 333 } 334 } 335 return pos 336 } 337 338 func localintersect2by2Cardinality( 339 set1 []uint16, 340 set2 []uint16) int { 341 342 if (0 == len(set1)) || (0 == len(set2)) { 343 return 0 344 } 345 k1 := 0 346 k2 := 0 347 pos := 0 348 s1 := set1[k1] 349 s2 := set2[k2] 350 mainwhile: 351 for { 352 if s2 < s1 { 353 for { 354 k2++ 355 if k2 == len(set2) { 356 break mainwhile 357 } 358 s2 = set2[k2] 359 if s2 >= s1 { 360 break 361 } 362 } 363 } 364 if s1 < s2 { 365 for { 366 k1++ 367 if k1 == len(set1) { 368 break mainwhile 369 } 370 s1 = set1[k1] 371 if s1 >= s2 { 372 break 373 } 374 } 375 376 } else { 377 // (set2[k2] == set1[k1]) 378 pos++ 379 k1++ 380 if k1 == len(set1) { 381 break 382 } 383 s1 = set1[k1] 384 k2++ 385 if k2 == len(set2) { 386 break 387 } 388 s2 = set2[k2] 389 } 390 } 391 return pos 392 } 393 394 func advanceUntil( 395 array []uint16, 396 pos int, 397 length int, 398 min uint16) int { 399 lower := pos + 1 400 401 if lower >= length || array[lower] >= min { 402 return lower 403 } 404 405 spansize := 1 406 407 for lower+spansize < length && array[lower+spansize] < min { 408 spansize *= 2 409 } 410 var upper int 411 if lower+spansize < length { 412 upper = lower + spansize 413 } else { 414 upper = length - 1 415 } 416 417 if array[upper] == min { 418 return upper 419 } 420 421 if array[upper] < min { 422 // means 423 // array 424 // has no 425 // item 426 // >= min 427 // pos = array.length; 428 return length 429 } 430 431 // we know that the next-smallest span was too small 432 lower += (spansize >> 1) 433 434 mid := 0 435 for lower+1 != upper { 436 mid = (lower + upper) >> 1 437 if array[mid] == min { 438 return mid 439 } else if array[mid] < min { 440 lower = mid 441 } else { 442 upper = mid 443 } 444 } 445 return upper 446 447 } 448 449 func onesidedgallopingintersect2by2( 450 smallset []uint16, 451 largeset []uint16, 452 buffer []uint16) int { 453 454 if 0 == len(smallset) { 455 return 0 456 } 457 buffer = buffer[:cap(buffer)] 458 k1 := 0 459 k2 := 0 460 pos := 0 461 s1 := largeset[k1] 462 s2 := smallset[k2] 463 mainwhile: 464 465 for { 466 if s1 < s2 { 467 k1 = advanceUntil(largeset, k1, len(largeset), s2) 468 if k1 == len(largeset) { 469 break mainwhile 470 } 471 s1 = largeset[k1] 472 } 473 if s2 < s1 { 474 k2++ 475 if k2 == len(smallset) { 476 break mainwhile 477 } 478 s2 = smallset[k2] 479 } else { 480 481 buffer[pos] = s2 482 pos++ 483 k2++ 484 if k2 == len(smallset) { 485 break 486 } 487 s2 = smallset[k2] 488 k1 = advanceUntil(largeset, k1, len(largeset), s2) 489 if k1 == len(largeset) { 490 break mainwhile 491 } 492 s1 = largeset[k1] 493 } 494 495 } 496 return pos 497 } 498 499 func onesidedgallopingintersect2by2Cardinality( 500 smallset []uint16, 501 largeset []uint16) int { 502 503 if 0 == len(smallset) { 504 return 0 505 } 506 k1 := 0 507 k2 := 0 508 pos := 0 509 s1 := largeset[k1] 510 s2 := smallset[k2] 511 mainwhile: 512 513 for { 514 if s1 < s2 { 515 k1 = advanceUntil(largeset, k1, len(largeset), s2) 516 if k1 == len(largeset) { 517 break mainwhile 518 } 519 s1 = largeset[k1] 520 } 521 if s2 < s1 { 522 k2++ 523 if k2 == len(smallset) { 524 break mainwhile 525 } 526 s2 = smallset[k2] 527 } else { 528 529 pos++ 530 k2++ 531 if k2 == len(smallset) { 532 break 533 } 534 s2 = smallset[k2] 535 k1 = advanceUntil(largeset, k1, len(largeset), s2) 536 if k1 == len(largeset) { 537 break mainwhile 538 } 539 s1 = largeset[k1] 540 } 541 542 } 543 return pos 544 } 545 546 func binarySearch(array []uint16, ikey uint16) int { 547 low := 0 548 high := len(array) - 1 549 for low+16 <= high { 550 middleIndex := int(uint32(low+high) >> 1) 551 middleValue := array[middleIndex] 552 if middleValue < ikey { 553 low = middleIndex + 1 554 } else if middleValue > ikey { 555 high = middleIndex - 1 556 } else { 557 return middleIndex 558 } 559 } 560 for ; low <= high; low++ { 561 val := array[low] 562 if val >= ikey { 563 if val == ikey { 564 return low 565 } 566 break 567 } 568 } 569 return -(low + 1) 570 } 571 572 func union2by2(set1 []uint16, set2 []uint16, buffer []uint16) int { 573 pos := 0 574 k1 := 0 575 k2 := 0 576 if 0 == len(set2) { 577 buffer = buffer[:len(set1)] 578 copy(buffer, set1[:]) 579 return len(set1) 580 } 581 if 0 == len(set1) { 582 buffer = buffer[:len(set2)] 583 copy(buffer, set2[:]) 584 return len(set2) 585 } 586 s1 := set1[k1] 587 s2 := set2[k2] 588 buffer = buffer[:cap(buffer)] 589 for { 590 if s1 < s2 { 591 buffer[pos] = s1 592 pos++ 593 k1++ 594 if k1 >= len(set1) { 595 copy(buffer[pos:], set2[k2:]) 596 pos += len(set2) - k2 597 break 598 } 599 s1 = set1[k1] 600 } else if s1 == s2 { 601 buffer[pos] = s1 602 pos++ 603 k1++ 604 k2++ 605 if k1 >= len(set1) { 606 copy(buffer[pos:], set2[k2:]) 607 pos += len(set2) - k2 608 break 609 } 610 if k2 >= len(set2) { 611 copy(buffer[pos:], set1[k1:]) 612 pos += len(set1) - k1 613 break 614 } 615 s1 = set1[k1] 616 s2 = set2[k2] 617 } else { // if (set1[k1]>set2[k2]) 618 buffer[pos] = s2 619 pos++ 620 k2++ 621 if k2 >= len(set2) { 622 copy(buffer[pos:], set1[k1:]) 623 pos += len(set1) - k1 624 break 625 } 626 s2 = set2[k2] 627 } 628 } 629 return pos 630 }