github.com/coyove/sdss@v0.0.0-20231129015646-c2ec58cca6a2/contrib/roaring/runcontainer.go (about) 1 package roaring 2 3 // 4 // Copyright (c) 2016 by the roaring authors. 5 // Licensed under the Apache License, Version 2.0. 6 // 7 // We derive a few lines of code from the sort.Search 8 // function in the golang standard library. That function 9 // is Copyright 2009 The Go Authors, and licensed 10 // under the following BSD-style license. 11 /* 12 Copyright (c) 2009 The Go Authors. All rights reserved. 13 14 Redistribution and use in source and binary forms, with or without 15 modification, are permitted provided that the following conditions are 16 met: 17 18 * Redistributions of source code must retain the above copyright 19 notice, this list of conditions and the following disclaimer. 20 * Redistributions in binary form must reproduce the above 21 copyright notice, this list of conditions and the following disclaimer 22 in the documentation and/or other materials provided with the 23 distribution. 24 * Neither the name of Google Inc. nor the names of its 25 contributors may be used to endorse or promote products derived from 26 this software without specific prior written permission. 27 28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 */ 40 41 import ( 42 "fmt" 43 "sort" 44 "unsafe" 45 ) 46 47 // runContainer16 does run-length encoding of sets of 48 // uint16 integers. 49 type runContainer16 struct { 50 iv []interval16 51 } 52 53 // interval16 is the internal to runContainer16 54 // structure that maintains the individual [start, last] 55 // closed intervals. 56 type interval16 struct { 57 start uint16 58 length uint16 // length minus 1 59 } 60 61 func newInterval16Range(start, last uint16) interval16 { 62 if last < start { 63 panic(fmt.Sprintf("last (%d) cannot be smaller than start (%d)", last, start)) 64 } 65 66 return interval16{ 67 start, 68 last - start, 69 } 70 } 71 72 // runlen returns the count of integers in the interval. 73 func (iv interval16) runlen() int { 74 return int(iv.length) + 1 75 } 76 77 func (iv interval16) last() uint16 { 78 return iv.start + iv.length 79 } 80 81 // String produces a human viewable string of the contents. 82 func (iv interval16) String() string { 83 return fmt.Sprintf("[%d, %d]", iv.start, iv.length) 84 } 85 86 func ivalString16(iv []interval16) string { 87 var s string 88 var j int 89 var p interval16 90 for j, p = range iv { 91 s += fmt.Sprintf("%v:[%d, %d], ", j, p.start, p.last()) 92 } 93 return s 94 } 95 96 // String produces a human viewable string of the contents. 97 func (rc *runContainer16) String() string { 98 if len(rc.iv) == 0 { 99 return "runContainer16{}" 100 } 101 is := ivalString16(rc.iv) 102 return `runContainer16{` + is + `}` 103 } 104 105 // uint16Slice is a sort.Sort convenience method 106 type uint16Slice []uint16 107 108 // Len returns the length of p. 109 func (p uint16Slice) Len() int { return len(p) } 110 111 // Less returns p[i] < p[j] 112 func (p uint16Slice) Less(i, j int) bool { return p[i] < p[j] } 113 114 // Swap swaps elements i and j. 115 func (p uint16Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 116 117 // addHelper helps build a runContainer16. 118 type addHelper16 struct { 119 runstart uint16 120 runlen uint16 121 actuallyAdded uint16 122 m []interval16 123 rc *runContainer16 124 } 125 126 func (ah *addHelper16) storeIval(runstart, runlen uint16) { 127 mi := interval16{start: runstart, length: runlen} 128 ah.m = append(ah.m, mi) 129 } 130 131 func (ah *addHelper16) add(cur, prev uint16, i int) { 132 if cur == prev+1 { 133 ah.runlen++ 134 ah.actuallyAdded++ 135 } else { 136 if cur < prev { 137 panic(fmt.Sprintf("newRunContainer16FromVals sees "+ 138 "unsorted vals; vals[%v]=cur=%v < prev=%v. Sort your vals"+ 139 " before calling us with alreadySorted == true.", i, cur, prev)) 140 } 141 if cur == prev { 142 // ignore duplicates 143 } else { 144 ah.actuallyAdded++ 145 ah.storeIval(ah.runstart, ah.runlen) 146 ah.runstart = cur 147 ah.runlen = 0 148 } 149 } 150 } 151 152 // newRunContainerRange makes a new container made of just the specified closed interval [rangestart,rangelast] 153 func newRunContainer16Range(rangestart uint16, rangelast uint16) *runContainer16 { 154 rc := &runContainer16{} 155 rc.iv = append(rc.iv, newInterval16Range(rangestart, rangelast)) 156 return rc 157 } 158 159 // newRunContainer16FromVals makes a new container from vals. 160 // 161 // For efficiency, vals should be sorted in ascending order. 162 // Ideally vals should not contain duplicates, but we detect and 163 // ignore them. If vals is already sorted in ascending order, then 164 // pass alreadySorted = true. Otherwise, for !alreadySorted, 165 // we will sort vals before creating a runContainer16 of them. 166 // We sort the original vals, so this will change what the 167 // caller sees in vals as a side effect. 168 func newRunContainer16FromVals(alreadySorted bool, vals ...uint16) *runContainer16 { 169 // keep this in sync with newRunContainer16FromArray below 170 171 rc := &runContainer16{} 172 ah := addHelper16{rc: rc} 173 174 if !alreadySorted { 175 sort.Sort(uint16Slice(vals)) 176 } 177 n := len(vals) 178 var cur, prev uint16 179 switch { 180 case n == 0: 181 // nothing more 182 case n == 1: 183 ah.m = append(ah.m, newInterval16Range(vals[0], vals[0])) 184 ah.actuallyAdded++ 185 default: 186 ah.runstart = vals[0] 187 ah.actuallyAdded++ 188 for i := 1; i < n; i++ { 189 prev = vals[i-1] 190 cur = vals[i] 191 ah.add(cur, prev, i) 192 } 193 ah.storeIval(ah.runstart, ah.runlen) 194 } 195 rc.iv = ah.m 196 return rc 197 } 198 199 // newRunContainer16FromBitmapContainer makes a new run container from bc, 200 // somewhat efficiently. For reference, see the Java 201 // https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/RunContainer.java#L145-L192 202 func newRunContainer16FromBitmapContainer(bc *bitmapContainer) *runContainer16 { 203 204 rc := &runContainer16{} 205 nbrRuns := bc.numberOfRuns() 206 if nbrRuns == 0 { 207 return rc 208 } 209 rc.iv = make([]interval16, nbrRuns) 210 211 longCtr := 0 // index of current long in bitmap 212 curWord := bc.bitmap[0] // its value 213 runCount := 0 214 for { 215 // potentially multiword advance to first 1 bit 216 for curWord == 0 && longCtr < len(bc.bitmap)-1 { 217 longCtr++ 218 curWord = bc.bitmap[longCtr] 219 } 220 221 if curWord == 0 { 222 // wrap up, no more runs 223 return rc 224 } 225 localRunStart := countTrailingZeros(curWord) 226 runStart := localRunStart + 64*longCtr 227 // stuff 1s into number's LSBs 228 curWordWith1s := curWord | (curWord - 1) 229 230 // find the next 0, potentially in a later word 231 runEnd := 0 232 for curWordWith1s == maxWord && longCtr < len(bc.bitmap)-1 { 233 longCtr++ 234 curWordWith1s = bc.bitmap[longCtr] 235 } 236 237 if curWordWith1s == maxWord { 238 // a final unterminated run of 1s 239 runEnd = wordSizeInBits + longCtr*64 240 rc.iv[runCount].start = uint16(runStart) 241 rc.iv[runCount].length = uint16(runEnd) - uint16(runStart) - 1 242 return rc 243 } 244 localRunEnd := countTrailingZeros(^curWordWith1s) 245 runEnd = localRunEnd + longCtr*64 246 rc.iv[runCount].start = uint16(runStart) 247 rc.iv[runCount].length = uint16(runEnd) - 1 - uint16(runStart) 248 runCount++ 249 // now, zero out everything right of runEnd. 250 curWord = curWordWith1s & (curWordWith1s + 1) 251 // We've lathered and rinsed, so repeat... 252 } 253 254 } 255 256 // 257 // newRunContainer16FromArray populates a new 258 // runContainer16 from the contents of arr. 259 // 260 func newRunContainer16FromArray(arr *arrayContainer) *runContainer16 { 261 // keep this in sync with newRunContainer16FromVals above 262 263 rc := &runContainer16{} 264 ah := addHelper16{rc: rc} 265 266 n := arr.getCardinality() 267 var cur, prev uint16 268 switch { 269 case n == 0: 270 // nothing more 271 case n == 1: 272 ah.m = append(ah.m, newInterval16Range(arr.content[0], arr.content[0])) 273 ah.actuallyAdded++ 274 default: 275 ah.runstart = arr.content[0] 276 ah.actuallyAdded++ 277 for i := 1; i < n; i++ { 278 prev = arr.content[i-1] 279 cur = arr.content[i] 280 ah.add(cur, prev, i) 281 } 282 ah.storeIval(ah.runstart, ah.runlen) 283 } 284 rc.iv = ah.m 285 return rc 286 } 287 288 // set adds the integers in vals to the set. Vals 289 // must be sorted in increasing order; if not, you should set 290 // alreadySorted to false, and we will sort them in place for you. 291 // (Be aware of this side effect -- it will affect the callers 292 // view of vals). 293 // 294 // If you have a small number of additions to an already 295 // big runContainer16, calling Add() may be faster. 296 func (rc *runContainer16) set(alreadySorted bool, vals ...uint16) { 297 298 rc2 := newRunContainer16FromVals(alreadySorted, vals...) 299 un := rc.union(rc2) 300 rc.iv = un.iv 301 } 302 303 // canMerge returns true iff the intervals 304 // a and b either overlap or they are 305 // contiguous and so can be merged into 306 // a single interval. 307 func canMerge16(a, b interval16) bool { 308 if int(a.last())+1 < int(b.start) { 309 return false 310 } 311 return int(b.last())+1 >= int(a.start) 312 } 313 314 // haveOverlap differs from canMerge in that 315 // it tells you if the intersection of a 316 // and b would contain an element (otherwise 317 // it would be the empty set, and we return 318 // false). 319 func haveOverlap16(a, b interval16) bool { 320 if int(a.last())+1 <= int(b.start) { 321 return false 322 } 323 return int(b.last())+1 > int(a.start) 324 } 325 326 // mergeInterval16s joins a and b into a 327 // new interval, and panics if it cannot. 328 func mergeInterval16s(a, b interval16) (res interval16) { 329 if !canMerge16(a, b) { 330 panic(fmt.Sprintf("cannot merge %#v and %#v", a, b)) 331 } 332 333 if b.start < a.start { 334 res.start = b.start 335 } else { 336 res.start = a.start 337 } 338 339 if b.last() > a.last() { 340 res.length = b.last() - res.start 341 } else { 342 res.length = a.last() - res.start 343 } 344 345 return 346 } 347 348 // intersectInterval16s returns the intersection 349 // of a and b. The isEmpty flag will be true if 350 // a and b were disjoint. 351 func intersectInterval16s(a, b interval16) (res interval16, isEmpty bool) { 352 if !haveOverlap16(a, b) { 353 isEmpty = true 354 return 355 } 356 if b.start > a.start { 357 res.start = b.start 358 } else { 359 res.start = a.start 360 } 361 362 bEnd := b.last() 363 aEnd := a.last() 364 var resEnd uint16 365 366 if bEnd < aEnd { 367 resEnd = bEnd 368 } else { 369 resEnd = aEnd 370 } 371 res.length = resEnd - res.start 372 return 373 } 374 375 // union merges two runContainer16s, producing 376 // a new runContainer16 with the union of rc and b. 377 func (rc *runContainer16) union(b *runContainer16) *runContainer16 { 378 379 // rc is also known as 'a' here, but golint insisted we 380 // call it rc for consistency with the rest of the methods. 381 382 var m []interval16 383 384 alim := int(len(rc.iv)) 385 blim := int(len(b.iv)) 386 387 var na int // next from a 388 var nb int // next from b 389 390 // merged holds the current merge output, which might 391 // get additional merges before being appended to m. 392 var merged interval16 393 var mergedUsed bool // is merged being used at the moment? 394 395 var cura interval16 // currently considering this interval16 from a 396 var curb interval16 // currently considering this interval16 from b 397 398 pass := 0 399 for na < alim && nb < blim { 400 pass++ 401 cura = rc.iv[na] 402 curb = b.iv[nb] 403 404 if mergedUsed { 405 mergedUpdated := false 406 if canMerge16(cura, merged) { 407 merged = mergeInterval16s(cura, merged) 408 na = rc.indexOfIntervalAtOrAfter(int(merged.last())+1, na+1) 409 mergedUpdated = true 410 } 411 if canMerge16(curb, merged) { 412 merged = mergeInterval16s(curb, merged) 413 nb = b.indexOfIntervalAtOrAfter(int(merged.last())+1, nb+1) 414 mergedUpdated = true 415 } 416 if !mergedUpdated { 417 // we know that merged is disjoint from cura and curb 418 m = append(m, merged) 419 mergedUsed = false 420 } 421 continue 422 423 } else { 424 // !mergedUsed 425 if !canMerge16(cura, curb) { 426 if cura.start < curb.start { 427 m = append(m, cura) 428 na++ 429 } else { 430 m = append(m, curb) 431 nb++ 432 } 433 } else { 434 merged = mergeInterval16s(cura, curb) 435 mergedUsed = true 436 na = rc.indexOfIntervalAtOrAfter(int(merged.last())+1, na+1) 437 nb = b.indexOfIntervalAtOrAfter(int(merged.last())+1, nb+1) 438 } 439 } 440 } 441 var aDone, bDone bool 442 if na >= alim { 443 aDone = true 444 } 445 if nb >= blim { 446 bDone = true 447 } 448 // finish by merging anything remaining into merged we can: 449 if mergedUsed { 450 if !aDone { 451 aAdds: 452 for na < alim { 453 cura = rc.iv[na] 454 if canMerge16(cura, merged) { 455 merged = mergeInterval16s(cura, merged) 456 na = rc.indexOfIntervalAtOrAfter(int(merged.last())+1, na+1) 457 } else { 458 break aAdds 459 } 460 } 461 462 } 463 464 if !bDone { 465 bAdds: 466 for nb < blim { 467 curb = b.iv[nb] 468 if canMerge16(curb, merged) { 469 merged = mergeInterval16s(curb, merged) 470 nb = b.indexOfIntervalAtOrAfter(int(merged.last())+1, nb+1) 471 } else { 472 break bAdds 473 } 474 } 475 476 } 477 478 m = append(m, merged) 479 } 480 if na < alim { 481 m = append(m, rc.iv[na:]...) 482 } 483 if nb < blim { 484 m = append(m, b.iv[nb:]...) 485 } 486 487 res := &runContainer16{iv: m} 488 return res 489 } 490 491 // unionCardinality returns the cardinality of the merger of two runContainer16s, the union of rc and b. 492 func (rc *runContainer16) unionCardinality(b *runContainer16) uint { 493 494 // rc is also known as 'a' here, but golint insisted we 495 // call it rc for consistency with the rest of the methods. 496 answer := uint(0) 497 498 alim := int(len(rc.iv)) 499 blim := int(len(b.iv)) 500 501 var na int // next from a 502 var nb int // next from b 503 504 // merged holds the current merge output, which might 505 // get additional merges before being appended to m. 506 var merged interval16 507 var mergedUsed bool // is merged being used at the moment? 508 509 var cura interval16 // currently considering this interval16 from a 510 var curb interval16 // currently considering this interval16 from b 511 512 pass := 0 513 for na < alim && nb < blim { 514 pass++ 515 cura = rc.iv[na] 516 curb = b.iv[nb] 517 518 if mergedUsed { 519 mergedUpdated := false 520 if canMerge16(cura, merged) { 521 merged = mergeInterval16s(cura, merged) 522 na = rc.indexOfIntervalAtOrAfter(int(merged.last())+1, na+1) 523 mergedUpdated = true 524 } 525 if canMerge16(curb, merged) { 526 merged = mergeInterval16s(curb, merged) 527 nb = b.indexOfIntervalAtOrAfter(int(merged.last())+1, nb+1) 528 mergedUpdated = true 529 } 530 if !mergedUpdated { 531 // we know that merged is disjoint from cura and curb 532 //m = append(m, merged) 533 answer += uint(merged.last()) - uint(merged.start) + 1 534 mergedUsed = false 535 } 536 continue 537 538 } else { 539 // !mergedUsed 540 if !canMerge16(cura, curb) { 541 if cura.start < curb.start { 542 answer += uint(cura.last()) - uint(cura.start) + 1 543 //m = append(m, cura) 544 na++ 545 } else { 546 answer += uint(curb.last()) - uint(curb.start) + 1 547 //m = append(m, curb) 548 nb++ 549 } 550 } else { 551 merged = mergeInterval16s(cura, curb) 552 mergedUsed = true 553 na = rc.indexOfIntervalAtOrAfter(int(merged.last())+1, na+1) 554 nb = b.indexOfIntervalAtOrAfter(int(merged.last())+1, nb+1) 555 } 556 } 557 } 558 var aDone, bDone bool 559 if na >= alim { 560 aDone = true 561 } 562 if nb >= blim { 563 bDone = true 564 } 565 // finish by merging anything remaining into merged we can: 566 if mergedUsed { 567 if !aDone { 568 aAdds: 569 for na < alim { 570 cura = rc.iv[na] 571 if canMerge16(cura, merged) { 572 merged = mergeInterval16s(cura, merged) 573 na = rc.indexOfIntervalAtOrAfter(int(merged.last())+1, na+1) 574 } else { 575 break aAdds 576 } 577 } 578 579 } 580 581 if !bDone { 582 bAdds: 583 for nb < blim { 584 curb = b.iv[nb] 585 if canMerge16(curb, merged) { 586 merged = mergeInterval16s(curb, merged) 587 nb = b.indexOfIntervalAtOrAfter(int(merged.last())+1, nb+1) 588 } else { 589 break bAdds 590 } 591 } 592 593 } 594 595 //m = append(m, merged) 596 answer += uint(merged.last()) - uint(merged.start) + 1 597 } 598 for _, r := range rc.iv[na:] { 599 answer += uint(r.last()) - uint(r.start) + 1 600 } 601 for _, r := range b.iv[nb:] { 602 answer += uint(r.last()) - uint(r.start) + 1 603 } 604 return answer 605 } 606 607 // indexOfIntervalAtOrAfter is a helper for union. 608 func (rc *runContainer16) indexOfIntervalAtOrAfter(key int, startIndex int) int { 609 w, already, _ := rc.searchRange(key, startIndex, 0) 610 if already { 611 return w 612 } 613 return w + 1 614 } 615 616 // intersect returns a new runContainer16 holding the 617 // intersection of rc (also known as 'a') and b. 618 func (rc *runContainer16) intersect(b *runContainer16) *runContainer16 { 619 620 a := rc 621 numa := int(len(a.iv)) 622 numb := int(len(b.iv)) 623 res := &runContainer16{} 624 if numa == 0 || numb == 0 { 625 return res 626 } 627 628 if numa == 1 && numb == 1 { 629 if !haveOverlap16(a.iv[0], b.iv[0]) { 630 return res 631 } 632 } 633 634 var output []interval16 635 636 var acuri int 637 var bcuri int 638 639 astart := int(a.iv[acuri].start) 640 bstart := int(b.iv[bcuri].start) 641 642 var intersection interval16 643 var leftoverstart int 644 var isOverlap, isLeftoverA, isLeftoverB bool 645 var done bool 646 toploop: 647 for acuri < numa && bcuri < numb { 648 649 isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection = 650 intersectWithLeftover16(astart, int(a.iv[acuri].last()), bstart, int(b.iv[bcuri].last())) 651 652 if !isOverlap { 653 switch { 654 case astart < bstart: 655 acuri, done = a.findNextIntervalThatIntersectsStartingFrom(acuri+1, bstart) 656 if done { 657 break toploop 658 } 659 astart = int(a.iv[acuri].start) 660 661 case astart > bstart: 662 bcuri, done = b.findNextIntervalThatIntersectsStartingFrom(bcuri+1, astart) 663 if done { 664 break toploop 665 } 666 bstart = int(b.iv[bcuri].start) 667 } 668 669 } else { 670 // isOverlap 671 output = append(output, intersection) 672 switch { 673 case isLeftoverA: 674 // note that we change astart without advancing acuri, 675 // since we need to capture any 2ndary intersections with a.iv[acuri] 676 astart = leftoverstart 677 bcuri++ 678 if bcuri >= numb { 679 break toploop 680 } 681 bstart = int(b.iv[bcuri].start) 682 case isLeftoverB: 683 // note that we change bstart without advancing bcuri, 684 // since we need to capture any 2ndary intersections with b.iv[bcuri] 685 bstart = leftoverstart 686 acuri++ 687 if acuri >= numa { 688 break toploop 689 } 690 astart = int(a.iv[acuri].start) 691 default: 692 // neither had leftover, both completely consumed 693 694 // advance to next a interval 695 acuri++ 696 if acuri >= numa { 697 break toploop 698 } 699 astart = int(a.iv[acuri].start) 700 701 // advance to next b interval 702 bcuri++ 703 if bcuri >= numb { 704 break toploop 705 } 706 bstart = int(b.iv[bcuri].start) 707 } 708 } 709 } // end for toploop 710 711 if len(output) == 0 { 712 return res 713 } 714 715 res.iv = output 716 return res 717 } 718 719 // intersectCardinality returns the cardinality of the 720 // intersection of rc (also known as 'a') and b. 721 func (rc *runContainer16) intersectCardinality(b *runContainer16) int { 722 answer := int(0) 723 724 a := rc 725 numa := int(len(a.iv)) 726 numb := int(len(b.iv)) 727 if numa == 0 || numb == 0 { 728 return 0 729 } 730 731 if numa == 1 && numb == 1 { 732 if !haveOverlap16(a.iv[0], b.iv[0]) { 733 return 0 734 } 735 } 736 737 var acuri int 738 var bcuri int 739 740 astart := int(a.iv[acuri].start) 741 bstart := int(b.iv[bcuri].start) 742 743 var intersection interval16 744 var leftoverstart int 745 var isOverlap, isLeftoverA, isLeftoverB bool 746 var done bool 747 pass := 0 748 toploop: 749 for acuri < numa && bcuri < numb { 750 pass++ 751 752 isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection = 753 intersectWithLeftover16(astart, int(a.iv[acuri].last()), bstart, int(b.iv[bcuri].last())) 754 755 if !isOverlap { 756 switch { 757 case astart < bstart: 758 acuri, done = a.findNextIntervalThatIntersectsStartingFrom(acuri+1, bstart) 759 if done { 760 break toploop 761 } 762 astart = int(a.iv[acuri].start) 763 764 case astart > bstart: 765 bcuri, done = b.findNextIntervalThatIntersectsStartingFrom(bcuri+1, astart) 766 if done { 767 break toploop 768 } 769 bstart = int(b.iv[bcuri].start) 770 } 771 772 } else { 773 // isOverlap 774 answer += int(intersection.last()) - int(intersection.start) + 1 775 switch { 776 case isLeftoverA: 777 // note that we change astart without advancing acuri, 778 // since we need to capture any 2ndary intersections with a.iv[acuri] 779 astart = leftoverstart 780 bcuri++ 781 if bcuri >= numb { 782 break toploop 783 } 784 bstart = int(b.iv[bcuri].start) 785 case isLeftoverB: 786 // note that we change bstart without advancing bcuri, 787 // since we need to capture any 2ndary intersections with b.iv[bcuri] 788 bstart = leftoverstart 789 acuri++ 790 if acuri >= numa { 791 break toploop 792 } 793 astart = int(a.iv[acuri].start) 794 default: 795 // neither had leftover, both completely consumed 796 797 // advance to next a interval 798 acuri++ 799 if acuri >= numa { 800 break toploop 801 } 802 astart = int(a.iv[acuri].start) 803 804 // advance to next b interval 805 bcuri++ 806 if bcuri >= numb { 807 break toploop 808 } 809 bstart = int(b.iv[bcuri].start) 810 } 811 } 812 } // end for toploop 813 814 return answer 815 } 816 817 // get returns true iff key is in the container. 818 func (rc *runContainer16) contains(key uint16) bool { 819 _, in, _ := rc.search(int(key)) 820 return in 821 } 822 823 // numIntervals returns the count of intervals in the container. 824 func (rc *runContainer16) numIntervals() int { 825 return len(rc.iv) 826 } 827 828 // searchRange returns alreadyPresent to indicate if the 829 // key is already in one of our interval16s. 830 // 831 // If key is alreadyPresent, then whichInterval16 tells 832 // you where. 833 // 834 // If key is not already present, then whichInterval16 is 835 // set as follows: 836 // 837 // a) whichInterval16 == len(rc.iv)-1 if key is beyond our 838 // last interval16 in rc.iv; 839 // 840 // b) whichInterval16 == -1 if key is before our first 841 // interval16 in rc.iv; 842 // 843 // c) whichInterval16 is set to the minimum index of rc.iv 844 // which comes strictly before the key; 845 // so rc.iv[whichInterval16].last < key, 846 // and if whichInterval16+1 exists, then key < rc.iv[whichInterval16+1].start 847 // (Note that whichInterval16+1 won't exist when 848 // whichInterval16 is the last interval.) 849 // 850 // runContainer16.search always returns whichInterval16 < len(rc.iv). 851 // 852 // The search space is from startIndex to endxIndex. If endxIndex is set to zero, then there 853 // no upper bound. 854 // 855 func (rc *runContainer16) searchRange(key int, startIndex int, endxIndex int) (whichInterval16 int, alreadyPresent bool, numCompares int) { 856 n := int(len(rc.iv)) 857 if n == 0 { 858 return -1, false, 0 859 } 860 if endxIndex == 0 { 861 endxIndex = n 862 } 863 864 // sort.Search returns the smallest index i 865 // in [0, n) at which f(i) is true, assuming that on the range [0, n), 866 // f(i) == true implies f(i+1) == true. 867 // If there is no such index, Search returns n. 868 869 // For correctness, this began as verbatim snippet from 870 // sort.Search in the Go standard lib. 871 // We inline our comparison function for speed, and 872 // annotate with numCompares 873 // to observe and test that extra bounds are utilized. 874 i, j := startIndex, endxIndex 875 for i < j { 876 h := i + (j-i)/2 // avoid overflow when computing h as the bisector 877 // i <= h < j 878 numCompares++ 879 if !(key < int(rc.iv[h].start)) { 880 i = h + 1 881 } else { 882 j = h 883 } 884 } 885 below := i 886 // end std lib snippet. 887 888 // The above is a simple in-lining and annotation of: 889 /* below := sort.Search(n, 890 func(i int) bool { 891 return key < rc.iv[i].start 892 }) 893 */ 894 whichInterval16 = below - 1 895 896 if below == n { 897 // all falses => key is >= start of all interval16s 898 // ... so does it belong to the last interval16? 899 if key < int(rc.iv[n-1].last())+1 { 900 // yes, it belongs to the last interval16 901 alreadyPresent = true 902 return 903 } 904 // no, it is beyond the last interval16. 905 // leave alreadyPreset = false 906 return 907 } 908 909 // INVAR: key is below rc.iv[below] 910 if below == 0 { 911 // key is before the first first interval16. 912 // leave alreadyPresent = false 913 return 914 } 915 916 // INVAR: key is >= rc.iv[below-1].start and 917 // key is < rc.iv[below].start 918 919 // is key in below-1 interval16? 920 if key >= int(rc.iv[below-1].start) && key < int(rc.iv[below-1].last())+1 { 921 // yes, it is. key is in below-1 interval16. 922 alreadyPresent = true 923 return 924 } 925 926 // INVAR: key >= rc.iv[below-1].endx && key < rc.iv[below].start 927 // leave alreadyPresent = false 928 return 929 } 930 931 // search returns alreadyPresent to indicate if the 932 // key is already in one of our interval16s. 933 // 934 // If key is alreadyPresent, then whichInterval16 tells 935 // you where. 936 // 937 // If key is not already present, then whichInterval16 is 938 // set as follows: 939 // 940 // a) whichInterval16 == len(rc.iv)-1 if key is beyond our 941 // last interval16 in rc.iv; 942 // 943 // b) whichInterval16 == -1 if key is before our first 944 // interval16 in rc.iv; 945 // 946 // c) whichInterval16 is set to the minimum index of rc.iv 947 // which comes strictly before the key; 948 // so rc.iv[whichInterval16].last < key, 949 // and if whichInterval16+1 exists, then key < rc.iv[whichInterval16+1].start 950 // (Note that whichInterval16+1 won't exist when 951 // whichInterval16 is the last interval.) 952 // 953 // runContainer16.search always returns whichInterval16 < len(rc.iv). 954 // 955 func (rc *runContainer16) search(key int) (whichInterval16 int, alreadyPresent bool, numCompares int) { 956 return rc.searchRange(key, 0, 0) 957 } 958 959 // getCardinality returns the count of the integers stored in the 960 // runContainer16. The running complexity depends on the size 961 // of the container. 962 func (rc *runContainer16) getCardinality() int { 963 // have to compute it 964 n := 0 965 for _, p := range rc.iv { 966 n += p.runlen() 967 } 968 return n 969 } 970 971 // isEmpty returns true if the container is empty. 972 // It runs in constant time. 973 func (rc *runContainer16) isEmpty() bool { 974 return len(rc.iv) == 0 975 } 976 977 // AsSlice decompresses the contents into a []uint16 slice. 978 func (rc *runContainer16) AsSlice() []uint16 { 979 s := make([]uint16, rc.getCardinality()) 980 j := 0 981 for _, p := range rc.iv { 982 for i := p.start; i <= p.last(); i++ { 983 s[j] = i 984 j++ 985 } 986 } 987 return s 988 } 989 990 // newRunContainer16 creates an empty run container. 991 func newRunContainer16() *runContainer16 { 992 return &runContainer16{} 993 } 994 995 // newRunContainer16CopyIv creates a run container, initializing 996 // with a copy of the supplied iv slice. 997 // 998 func newRunContainer16CopyIv(iv []interval16) *runContainer16 { 999 rc := &runContainer16{ 1000 iv: make([]interval16, len(iv)), 1001 } 1002 copy(rc.iv, iv) 1003 return rc 1004 } 1005 1006 func (rc *runContainer16) Clone() *runContainer16 { 1007 rc2 := newRunContainer16CopyIv(rc.iv) 1008 return rc2 1009 } 1010 1011 // newRunContainer16TakeOwnership returns a new runContainer16 1012 // backed by the provided iv slice, which we will 1013 // assume exclusive control over from now on. 1014 // 1015 func newRunContainer16TakeOwnership(iv []interval16) *runContainer16 { 1016 rc := &runContainer16{ 1017 iv: iv, 1018 } 1019 return rc 1020 } 1021 1022 const baseRc16Size = int(unsafe.Sizeof(runContainer16{})) 1023 const perIntervalRc16Size = int(unsafe.Sizeof(interval16{})) 1024 1025 const baseDiskRc16Size = int(unsafe.Sizeof(uint16(0))) 1026 1027 // see also runContainer16SerializedSizeInBytes(numRuns int) int 1028 1029 // getSizeInBytes returns the number of bytes of memory 1030 // required by this runContainer16. 1031 func (rc *runContainer16) getSizeInBytes() int { 1032 return perIntervalRc16Size*len(rc.iv) + baseRc16Size 1033 } 1034 1035 // runContainer16SerializedSizeInBytes returns the number of bytes of disk 1036 // required to hold numRuns in a runContainer16. 1037 func runContainer16SerializedSizeInBytes(numRuns int) int { 1038 return perIntervalRc16Size*numRuns + baseDiskRc16Size 1039 } 1040 1041 // Add adds a single value k to the set. 1042 func (rc *runContainer16) Add(k uint16) (wasNew bool) { 1043 // TODO comment from runContainer16.java: 1044 // it might be better and simpler to do return 1045 // toBitmapOrArrayContainer(getCardinality()).add(k) 1046 // but note that some unit tests use this method to build up test 1047 // runcontainers without calling runOptimize 1048 1049 k64 := int(k) 1050 1051 index, present, _ := rc.search(k64) 1052 if present { 1053 return // already there 1054 } 1055 wasNew = true 1056 1057 n := int(len(rc.iv)) 1058 if index == -1 { 1059 // we may need to extend the first run 1060 if n > 0 { 1061 if rc.iv[0].start == k+1 { 1062 rc.iv[0].start = k 1063 rc.iv[0].length++ 1064 return 1065 } 1066 } 1067 // nope, k stands alone, starting the new first interval16. 1068 rc.iv = append([]interval16{newInterval16Range(k, k)}, rc.iv...) 1069 return 1070 } 1071 1072 // are we off the end? handle both index == n and index == n-1: 1073 if index >= n-1 { 1074 if int(rc.iv[n-1].last())+1 == k64 { 1075 rc.iv[n-1].length++ 1076 return 1077 } 1078 rc.iv = append(rc.iv, newInterval16Range(k, k)) 1079 return 1080 } 1081 1082 // INVAR: index and index+1 both exist, and k goes between them. 1083 // 1084 // Now: add k into the middle, 1085 // possibly fusing with index or index+1 interval16 1086 // and possibly resulting in fusing of two interval16s 1087 // that had a one integer gap. 1088 1089 left := index 1090 right := index + 1 1091 1092 // are we fusing left and right by adding k? 1093 if int(rc.iv[left].last())+1 == k64 && int(rc.iv[right].start) == k64+1 { 1094 // fuse into left 1095 rc.iv[left].length = rc.iv[right].last() - rc.iv[left].start 1096 // remove redundant right 1097 rc.iv = append(rc.iv[:left+1], rc.iv[right+1:]...) 1098 return 1099 } 1100 1101 // are we an addition to left? 1102 if int(rc.iv[left].last())+1 == k64 { 1103 // yes 1104 rc.iv[left].length++ 1105 return 1106 } 1107 1108 // are we an addition to right? 1109 if int(rc.iv[right].start) == k64+1 { 1110 // yes 1111 rc.iv[right].start = k 1112 rc.iv[right].length++ 1113 return 1114 } 1115 1116 // k makes a standalone new interval16, inserted in the middle 1117 tail := append([]interval16{newInterval16Range(k, k)}, rc.iv[right:]...) 1118 rc.iv = append(rc.iv[:left+1], tail...) 1119 return 1120 } 1121 1122 // runIterator16 advice: you must call hasNext() 1123 // before calling next()/peekNext() to insure there are contents. 1124 type runIterator16 struct { 1125 rc *runContainer16 1126 curIndex int 1127 curPosInIndex uint16 1128 } 1129 1130 // newRunIterator16 returns a new empty run container. 1131 func (rc *runContainer16) newRunIterator16() *runIterator16 { 1132 return &runIterator16{rc: rc, curIndex: 0, curPosInIndex: 0} 1133 } 1134 1135 func (rc *runContainer16) iterate(cb func(x uint16) bool) bool { 1136 iterator := runIterator16{rc, 0, 0} 1137 1138 for iterator.hasNext() { 1139 if !cb(iterator.next()) { 1140 return false 1141 } 1142 } 1143 1144 return true 1145 } 1146 1147 // hasNext returns false if calling next will panic. It 1148 // returns true when there is at least one more value 1149 // available in the iteration sequence. 1150 func (ri *runIterator16) hasNext() bool { 1151 return int(len(ri.rc.iv)) > ri.curIndex+1 || 1152 (int(len(ri.rc.iv)) == ri.curIndex+1 && ri.rc.iv[ri.curIndex].length >= ri.curPosInIndex) 1153 } 1154 1155 // next returns the next value in the iteration sequence. 1156 func (ri *runIterator16) next() uint16 { 1157 next := ri.rc.iv[ri.curIndex].start + ri.curPosInIndex 1158 1159 if ri.curPosInIndex == ri.rc.iv[ri.curIndex].length { 1160 ri.curPosInIndex = 0 1161 ri.curIndex++ 1162 } else { 1163 ri.curPosInIndex++ 1164 } 1165 1166 return next 1167 } 1168 1169 // peekNext returns the next value in the iteration sequence without advancing the iterator 1170 func (ri *runIterator16) peekNext() uint16 { 1171 return ri.rc.iv[ri.curIndex].start + ri.curPosInIndex 1172 } 1173 1174 // advanceIfNeeded advances as long as the next value is smaller than minval 1175 func (ri *runIterator16) advanceIfNeeded(minval uint16) { 1176 if !ri.hasNext() || ri.peekNext() >= minval { 1177 return 1178 } 1179 1180 // interval cannot be -1 because of minval > peekNext 1181 interval, isPresent, _ := ri.rc.searchRange(int(minval), ri.curIndex, int(len(ri.rc.iv))) 1182 1183 // if the minval is present, set the curPosIndex at the right position 1184 if isPresent { 1185 ri.curIndex = interval 1186 ri.curPosInIndex = minval - ri.rc.iv[ri.curIndex].start 1187 } else { 1188 // otherwise interval is set to to the minimum index of rc.iv 1189 // which comes strictly before the key, that's why we set the next interval 1190 ri.curIndex = interval + 1 1191 ri.curPosInIndex = 0 1192 } 1193 } 1194 1195 // runReverseIterator16 advice: you must call hasNext() 1196 // before calling next() to insure there are contents. 1197 type runReverseIterator16 struct { 1198 rc *runContainer16 1199 curIndex int // index into rc.iv 1200 curPosInIndex uint16 // offset in rc.iv[curIndex] 1201 } 1202 1203 // newRunReverseIterator16 returns a new empty run iterator. 1204 func (rc *runContainer16) newRunReverseIterator16() *runReverseIterator16 { 1205 index := int(len(rc.iv)) - 1 1206 pos := uint16(0) 1207 1208 if index >= 0 { 1209 pos = rc.iv[index].length 1210 } 1211 1212 return &runReverseIterator16{ 1213 rc: rc, 1214 curIndex: index, 1215 curPosInIndex: pos, 1216 } 1217 } 1218 1219 // hasNext returns false if calling next will panic. It 1220 // returns true when there is at least one more value 1221 // available in the iteration sequence. 1222 func (ri *runReverseIterator16) hasNext() bool { 1223 return ri.curIndex > 0 || ri.curIndex == 0 && ri.curPosInIndex >= 0 1224 } 1225 1226 // next returns the next value in the iteration sequence. 1227 func (ri *runReverseIterator16) next() uint16 { 1228 next := ri.rc.iv[ri.curIndex].start + ri.curPosInIndex 1229 1230 if ri.curPosInIndex > 0 { 1231 ri.curPosInIndex-- 1232 } else { 1233 ri.curIndex-- 1234 1235 if ri.curIndex >= 0 { 1236 ri.curPosInIndex = ri.rc.iv[ri.curIndex].length 1237 } 1238 } 1239 1240 return next 1241 } 1242 1243 func (rc *runContainer16) newManyRunIterator16() *runIterator16 { 1244 return rc.newRunIterator16() 1245 } 1246 1247 // hs are the high bits to include to avoid needing to reiterate over the buffer in NextMany 1248 func (ri *runIterator16) nextMany(hs uint32, buf []uint32) int { 1249 n := 0 1250 1251 if !ri.hasNext() { 1252 return n 1253 } 1254 1255 // start and end are inclusive 1256 for n < len(buf) { 1257 moreVals := 0 1258 1259 if ri.rc.iv[ri.curIndex].length >= ri.curPosInIndex { 1260 // add as many as you can from this seq 1261 moreVals = minOfInt(int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex)+1, len(buf)-n) 1262 base := uint32(ri.rc.iv[ri.curIndex].start+ri.curPosInIndex) | hs 1263 1264 // allows BCE 1265 buf2 := buf[n : n+moreVals] 1266 for i := range buf2 { 1267 buf2[i] = base + uint32(i) 1268 } 1269 1270 // update values 1271 n += moreVals 1272 } 1273 1274 if moreVals+int(ri.curPosInIndex) > int(ri.rc.iv[ri.curIndex].length) { 1275 ri.curPosInIndex = 0 1276 ri.curIndex++ 1277 1278 if ri.curIndex == int(len(ri.rc.iv)) { 1279 break 1280 } 1281 } else { 1282 ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16 1283 } 1284 } 1285 1286 return n 1287 } 1288 1289 func (ri *runIterator16) nextMany64(hs uint64, buf []uint64) int { 1290 n := 0 1291 1292 if !ri.hasNext() { 1293 return n 1294 } 1295 1296 // start and end are inclusive 1297 for n < len(buf) { 1298 moreVals := 0 1299 1300 if ri.rc.iv[ri.curIndex].length >= ri.curPosInIndex { 1301 // add as many as you can from this seq 1302 moreVals = minOfInt(int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex)+1, len(buf)-n) 1303 base := uint64(ri.rc.iv[ri.curIndex].start+ri.curPosInIndex) | hs 1304 1305 // allows BCE 1306 buf2 := buf[n : n+moreVals] 1307 for i := range buf2 { 1308 buf2[i] = base + uint64(i) 1309 } 1310 1311 // update values 1312 n += moreVals 1313 } 1314 1315 if moreVals+int(ri.curPosInIndex) > int(ri.rc.iv[ri.curIndex].length) { 1316 ri.curPosInIndex = 0 1317 ri.curIndex++ 1318 1319 if ri.curIndex == int(len(ri.rc.iv)) { 1320 break 1321 } 1322 } else { 1323 ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16 1324 } 1325 } 1326 1327 return n 1328 } 1329 1330 // remove removes key from the container. 1331 func (rc *runContainer16) removeKey(key uint16) (wasPresent bool) { 1332 1333 var index int 1334 index, wasPresent, _ = rc.search(int(key)) 1335 if !wasPresent { 1336 return // already removed, nothing to do. 1337 } 1338 pos := key - rc.iv[index].start 1339 rc.deleteAt(&index, &pos) 1340 return 1341 } 1342 1343 // internal helper functions 1344 1345 func (rc *runContainer16) deleteAt(curIndex *int, curPosInIndex *uint16) { 1346 ci := *curIndex 1347 pos := *curPosInIndex 1348 1349 // are we first, last, or in the middle of our interval16? 1350 switch { 1351 case pos == 0: 1352 if int(rc.iv[ci].length) == 0 { 1353 // our interval disappears 1354 rc.iv = append(rc.iv[:ci], rc.iv[ci+1:]...) 1355 // curIndex stays the same, since the delete did 1356 // the advance for us. 1357 *curPosInIndex = 0 1358 } else { 1359 rc.iv[ci].start++ // no longer overflowable 1360 rc.iv[ci].length-- 1361 } 1362 case pos == rc.iv[ci].length: 1363 // length 1364 rc.iv[ci].length-- 1365 // our interval16 cannot disappear, else we would have been pos == 0, case first above. 1366 *curPosInIndex-- 1367 // if we leave *curIndex alone, then Next() will work properly even after the delete. 1368 default: 1369 //middle 1370 // split into two, adding an interval16 1371 new0 := newInterval16Range(rc.iv[ci].start, rc.iv[ci].start+*curPosInIndex-1) 1372 1373 new1start := int(rc.iv[ci].start+*curPosInIndex) + 1 1374 if new1start > int(MaxUint16) { 1375 panic("overflow?!?!") 1376 } 1377 new1 := newInterval16Range(uint16(new1start), rc.iv[ci].last()) 1378 tail := append([]interval16{new0, new1}, rc.iv[ci+1:]...) 1379 rc.iv = append(rc.iv[:ci], tail...) 1380 // update curIndex and curPosInIndex 1381 *curIndex++ 1382 *curPosInIndex = 0 1383 } 1384 1385 } 1386 1387 func have4Overlap16(astart, alast, bstart, blast int) bool { 1388 if alast+1 <= bstart { 1389 return false 1390 } 1391 return blast+1 > astart 1392 } 1393 1394 func intersectWithLeftover16(astart, alast, bstart, blast int) (isOverlap, isLeftoverA, isLeftoverB bool, leftoverstart int, intersection interval16) { 1395 if !have4Overlap16(astart, alast, bstart, blast) { 1396 return 1397 } 1398 isOverlap = true 1399 1400 // do the intersection: 1401 if bstart > astart { 1402 intersection.start = uint16(bstart) 1403 } else { 1404 intersection.start = uint16(astart) 1405 } 1406 1407 switch { 1408 case blast < alast: 1409 isLeftoverA = true 1410 leftoverstart = blast + 1 1411 intersection.length = uint16(blast) - intersection.start 1412 case alast < blast: 1413 isLeftoverB = true 1414 leftoverstart = alast + 1 1415 intersection.length = uint16(alast) - intersection.start 1416 default: 1417 // alast == blast 1418 intersection.length = uint16(alast) - intersection.start 1419 } 1420 1421 return 1422 } 1423 1424 func (rc *runContainer16) findNextIntervalThatIntersectsStartingFrom(startIndex int, key int) (index int, done bool) { 1425 w, _, _ := rc.searchRange(key, startIndex, 0) 1426 // rc.search always returns w < len(rc.iv) 1427 if w < startIndex { 1428 // not found and comes before lower bound startIndex, 1429 // so just use the lower bound. 1430 if startIndex == int(len(rc.iv)) { 1431 // also this bump up means that we are done 1432 return startIndex, true 1433 } 1434 return startIndex, false 1435 } 1436 1437 return w, false 1438 } 1439 1440 func sliceToString16(m []interval16) string { 1441 s := "" 1442 for i := range m { 1443 s += fmt.Sprintf("%v: %s, ", i, m[i]) 1444 } 1445 return s 1446 } 1447 1448 // helper for invert 1449 func (rc *runContainer16) invertlastInterval(origin uint16, lastIdx int) []interval16 { 1450 cur := rc.iv[lastIdx] 1451 if cur.last() == MaxUint16 { 1452 if cur.start == origin { 1453 return nil // empty container 1454 } 1455 return []interval16{newInterval16Range(origin, cur.start-1)} 1456 } 1457 if cur.start == origin { 1458 return []interval16{newInterval16Range(cur.last()+1, MaxUint16)} 1459 } 1460 // invert splits 1461 return []interval16{ 1462 newInterval16Range(origin, cur.start-1), 1463 newInterval16Range(cur.last()+1, MaxUint16), 1464 } 1465 } 1466 1467 // invert returns a new container (not inplace), that is 1468 // the inversion of rc. For each bit b in rc, the 1469 // returned value has !b 1470 func (rc *runContainer16) invert() *runContainer16 { 1471 ni := len(rc.iv) 1472 var m []interval16 1473 switch ni { 1474 case 0: 1475 return &runContainer16{iv: []interval16{newInterval16Range(0, MaxUint16)}} 1476 case 1: 1477 return &runContainer16{iv: rc.invertlastInterval(0, 0)} 1478 } 1479 var invstart int 1480 ult := ni - 1 1481 for i, cur := range rc.iv { 1482 if i == ult { 1483 // invertlastInteval will add both intervals (b) and (c) in 1484 // diagram below. 1485 m = append(m, rc.invertlastInterval(uint16(invstart), i)...) 1486 break 1487 } 1488 // INVAR: i and cur are not the last interval, there is a next at i+1 1489 // 1490 // ........[cur.start, cur.last] ...... [next.start, next.last].... 1491 // ^ ^ ^ 1492 // (a) (b) (c) 1493 // 1494 // Now: we add interval (a); but if (a) is empty, for cur.start==0, we skip it. 1495 if cur.start > 0 { 1496 m = append(m, newInterval16Range(uint16(invstart), cur.start-1)) 1497 } 1498 invstart = int(cur.last() + 1) 1499 } 1500 return &runContainer16{iv: m} 1501 } 1502 1503 func (iv interval16) equal(b interval16) bool { 1504 return iv.start == b.start && iv.length == b.length 1505 } 1506 1507 func (iv interval16) isSuperSetOf(b interval16) bool { 1508 return iv.start <= b.start && b.last() <= iv.last() 1509 } 1510 1511 func (iv interval16) subtractInterval(del interval16) (left []interval16, delcount int) { 1512 isect, isEmpty := intersectInterval16s(iv, del) 1513 1514 if isEmpty { 1515 return nil, 0 1516 } 1517 if del.isSuperSetOf(iv) { 1518 return nil, iv.runlen() 1519 } 1520 1521 switch { 1522 case isect.start > iv.start && isect.last() < iv.last(): 1523 new0 := newInterval16Range(iv.start, isect.start-1) 1524 new1 := newInterval16Range(isect.last()+1, iv.last()) 1525 return []interval16{new0, new1}, isect.runlen() 1526 case isect.start == iv.start: 1527 return []interval16{newInterval16Range(isect.last()+1, iv.last())}, isect.runlen() 1528 default: 1529 return []interval16{newInterval16Range(iv.start, isect.start-1)}, isect.runlen() 1530 } 1531 } 1532 1533 func (rc *runContainer16) isubtract(del interval16) { 1534 origiv := make([]interval16, len(rc.iv)) 1535 copy(origiv, rc.iv) 1536 n := int(len(rc.iv)) 1537 if n == 0 { 1538 return // already done. 1539 } 1540 1541 _, isEmpty := intersectInterval16s(newInterval16Range(rc.iv[0].start, rc.iv[n-1].last()), del) 1542 if isEmpty { 1543 return // done 1544 } 1545 1546 // INVAR there is some intersection between rc and del 1547 istart, startAlready, _ := rc.search(int(del.start)) 1548 ilast, lastAlready, _ := rc.search(int(del.last())) 1549 if istart == -1 { 1550 if ilast == n-1 && !lastAlready { 1551 rc.iv = nil 1552 return 1553 } 1554 } 1555 // some intervals will remain 1556 switch { 1557 case startAlready && lastAlready: 1558 res0, _ := rc.iv[istart].subtractInterval(del) 1559 1560 // would overwrite values in iv b/c res0 can have len 2. so 1561 // write to origiv instead. 1562 lost := 1 + ilast - istart 1563 changeSize := int(len(res0)) - lost 1564 newSize := int(len(rc.iv)) + changeSize 1565 1566 // rc.iv = append(pre, caboose...) 1567 // return 1568 1569 if ilast != istart { 1570 res1, _ := rc.iv[ilast].subtractInterval(del) 1571 res0 = append(res0, res1...) 1572 changeSize = int(len(res0)) - lost 1573 newSize = int(len(rc.iv)) + changeSize 1574 } 1575 switch { 1576 case changeSize < 0: 1577 // shrink 1578 copy(rc.iv[istart+int(len(res0)):], rc.iv[ilast+1:]) 1579 copy(rc.iv[istart:istart+int(len(res0))], res0) 1580 rc.iv = rc.iv[:newSize] 1581 return 1582 case changeSize == 0: 1583 // stay the same 1584 copy(rc.iv[istart:istart+int(len(res0))], res0) 1585 return 1586 default: 1587 // changeSize > 0 is only possible when ilast == istart. 1588 // Hence we now know: changeSize == 1 and len(res0) == 2 1589 rc.iv = append(rc.iv, interval16{}) 1590 // len(rc.iv) is correct now, no need to rc.iv = rc.iv[:newSize] 1591 1592 // copy the tail into place 1593 copy(rc.iv[ilast+2:], rc.iv[ilast+1:]) 1594 // copy the new item(s) into place 1595 copy(rc.iv[istart:istart+2], res0) 1596 return 1597 } 1598 1599 case !startAlready && !lastAlready: 1600 // we get to discard whole intervals 1601 1602 // from the search() definition: 1603 1604 // if del.start is not present, then istart is 1605 // set as follows: 1606 // 1607 // a) istart == n-1 if del.start is beyond our 1608 // last interval16 in rc.iv; 1609 // 1610 // b) istart == -1 if del.start is before our first 1611 // interval16 in rc.iv; 1612 // 1613 // c) istart is set to the minimum index of rc.iv 1614 // which comes strictly before the del.start; 1615 // so del.start > rc.iv[istart].last, 1616 // and if istart+1 exists, then del.start < rc.iv[istart+1].startx 1617 1618 // if del.last is not present, then ilast is 1619 // set as follows: 1620 // 1621 // a) ilast == n-1 if del.last is beyond our 1622 // last interval16 in rc.iv; 1623 // 1624 // b) ilast == -1 if del.last is before our first 1625 // interval16 in rc.iv; 1626 // 1627 // c) ilast is set to the minimum index of rc.iv 1628 // which comes strictly before the del.last; 1629 // so del.last > rc.iv[ilast].last, 1630 // and if ilast+1 exists, then del.last < rc.iv[ilast+1].start 1631 1632 // INVAR: istart >= 0 1633 pre := rc.iv[:istart+1] 1634 if ilast == n-1 { 1635 rc.iv = pre 1636 return 1637 } 1638 // INVAR: ilast < n-1 1639 lost := ilast - istart 1640 changeSize := -lost 1641 newSize := int(len(rc.iv)) + changeSize 1642 if changeSize != 0 { 1643 copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:]) 1644 } 1645 rc.iv = rc.iv[:newSize] 1646 return 1647 1648 case startAlready && !lastAlready: 1649 // we can only shrink or stay the same size 1650 // i.e. we either eliminate the whole interval, 1651 // or just cut off the right side. 1652 res0, _ := rc.iv[istart].subtractInterval(del) 1653 if len(res0) > 0 { 1654 // len(res) must be 1 1655 rc.iv[istart] = res0[0] 1656 } 1657 lost := 1 + (ilast - istart) 1658 changeSize := int(len(res0)) - lost 1659 newSize := int(len(rc.iv)) + changeSize 1660 if changeSize != 0 { 1661 copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:]) 1662 } 1663 rc.iv = rc.iv[:newSize] 1664 return 1665 1666 case !startAlready && lastAlready: 1667 // we can only shrink or stay the same size 1668 res1, _ := rc.iv[ilast].subtractInterval(del) 1669 lost := ilast - istart 1670 changeSize := int(len(res1)) - lost 1671 newSize := int(len(rc.iv)) + changeSize 1672 if changeSize != 0 { 1673 // move the tail first to make room for res1 1674 copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:]) 1675 } 1676 copy(rc.iv[istart+1:], res1) 1677 rc.iv = rc.iv[:newSize] 1678 return 1679 } 1680 } 1681 1682 // compute rc minus b, and return the result as a new value (not inplace). 1683 // port of run_container_andnot from CRoaring... 1684 // https://github.com/RoaringBitmap/CRoaring/blob/master/src/containers/run.c#L435-L496 1685 func (rc *runContainer16) AndNotRunContainer16(b *runContainer16) *runContainer16 { 1686 1687 if len(b.iv) == 0 || len(rc.iv) == 0 { 1688 return rc 1689 } 1690 1691 dst := newRunContainer16() 1692 apos := 0 1693 bpos := 0 1694 1695 a := rc 1696 1697 astart := a.iv[apos].start 1698 alast := a.iv[apos].last() 1699 bstart := b.iv[bpos].start 1700 blast := b.iv[bpos].last() 1701 1702 alen := len(a.iv) 1703 blen := len(b.iv) 1704 1705 for apos < alen && bpos < blen { 1706 switch { 1707 case alast < bstart: 1708 // output the first run 1709 dst.iv = append(dst.iv, newInterval16Range(astart, alast)) 1710 apos++ 1711 if apos < alen { 1712 astart = a.iv[apos].start 1713 alast = a.iv[apos].last() 1714 } 1715 case blast < astart: 1716 // exit the second run 1717 bpos++ 1718 if bpos < blen { 1719 bstart = b.iv[bpos].start 1720 blast = b.iv[bpos].last() 1721 } 1722 default: 1723 // a: [ ] 1724 // b: [ ] 1725 // alast >= bstart 1726 // blast >= astart 1727 if astart < bstart { 1728 dst.iv = append(dst.iv, newInterval16Range(astart, bstart-1)) 1729 } 1730 if alast > blast { 1731 astart = blast + 1 1732 } else { 1733 apos++ 1734 if apos < alen { 1735 astart = a.iv[apos].start 1736 alast = a.iv[apos].last() 1737 } 1738 } 1739 } 1740 } 1741 if apos < alen { 1742 dst.iv = append(dst.iv, newInterval16Range(astart, alast)) 1743 apos++ 1744 if apos < alen { 1745 dst.iv = append(dst.iv, a.iv[apos:]...) 1746 } 1747 } 1748 1749 return dst 1750 } 1751 1752 func (rc *runContainer16) numberOfRuns() (nr int) { 1753 return len(rc.iv) 1754 } 1755 1756 func (rc *runContainer16) containerType() contype { 1757 return run16Contype 1758 } 1759 1760 func (rc *runContainer16) equals16(srb *runContainer16) bool { 1761 // Check if the containers are the same object. 1762 if rc == srb { 1763 return true 1764 } 1765 1766 if len(srb.iv) != len(rc.iv) { 1767 return false 1768 } 1769 1770 for i, v := range rc.iv { 1771 if v != srb.iv[i] { 1772 return false 1773 } 1774 } 1775 return true 1776 } 1777 1778 // compile time verify we meet interface requirements 1779 var _ container = &runContainer16{} 1780 1781 func (rc *runContainer16) clone() container { 1782 return newRunContainer16CopyIv(rc.iv) 1783 } 1784 1785 func (rc *runContainer16) minimum() uint16 { 1786 return rc.iv[0].start // assume not empty 1787 } 1788 1789 func (rc *runContainer16) maximum() uint16 { 1790 return rc.iv[len(rc.iv)-1].last() // assume not empty 1791 } 1792 1793 func (rc *runContainer16) isFull() bool { 1794 return (len(rc.iv) == 1) && ((rc.iv[0].start == 0) && (rc.iv[0].last() == MaxUint16)) 1795 } 1796 1797 func (rc *runContainer16) and(a container) container { 1798 if rc.isFull() { 1799 return a.clone() 1800 } 1801 switch c := a.(type) { 1802 case *runContainer16: 1803 return rc.intersect(c) 1804 case *arrayContainer: 1805 return rc.andArray(c) 1806 case *bitmapContainer: 1807 return rc.andBitmapContainer(c) 1808 } 1809 panic("unsupported container type") 1810 } 1811 1812 func (rc *runContainer16) andCardinality(a container) int { 1813 switch c := a.(type) { 1814 case *runContainer16: 1815 return int(rc.intersectCardinality(c)) 1816 case *arrayContainer: 1817 return rc.andArrayCardinality(c) 1818 case *bitmapContainer: 1819 return rc.andBitmapContainerCardinality(c) 1820 } 1821 panic("unsupported container type") 1822 } 1823 1824 // andBitmapContainer finds the intersection of rc and b. 1825 func (rc *runContainer16) andBitmapContainer(bc *bitmapContainer) container { 1826 bc2 := newBitmapContainerFromRun(rc) 1827 return bc2.andBitmap(bc) 1828 } 1829 1830 func (rc *runContainer16) andArrayCardinality(ac *arrayContainer) int { 1831 pos := 0 1832 answer := 0 1833 maxpos := ac.getCardinality() 1834 if maxpos == 0 { 1835 return 0 // won't happen in actual code 1836 } 1837 v := ac.content[pos] 1838 mainloop: 1839 for _, p := range rc.iv { 1840 for v < p.start { 1841 pos++ 1842 if pos == maxpos { 1843 break mainloop 1844 } 1845 v = ac.content[pos] 1846 } 1847 for v <= p.last() { 1848 answer++ 1849 pos++ 1850 if pos == maxpos { 1851 break mainloop 1852 } 1853 v = ac.content[pos] 1854 } 1855 } 1856 return answer 1857 } 1858 1859 func (rc *runContainer16) iand(a container) container { 1860 if rc.isFull() { 1861 return a.clone() 1862 } 1863 switch c := a.(type) { 1864 case *runContainer16: 1865 return rc.inplaceIntersect(c) 1866 case *arrayContainer: 1867 return rc.andArray(c) 1868 case *bitmapContainer: 1869 return rc.iandBitmapContainer(c) 1870 } 1871 panic("unsupported container type") 1872 } 1873 1874 func (rc *runContainer16) inplaceIntersect(rc2 *runContainer16) container { 1875 sect := rc.intersect(rc2) 1876 *rc = *sect 1877 return rc 1878 } 1879 1880 func (rc *runContainer16) iandBitmapContainer(bc *bitmapContainer) container { 1881 isect := rc.andBitmapContainer(bc) 1882 *rc = *newRunContainer16FromContainer(isect) 1883 return rc 1884 } 1885 1886 func (rc *runContainer16) andArray(ac *arrayContainer) container { 1887 if len(rc.iv) == 0 { 1888 return newArrayContainer() 1889 } 1890 1891 acCardinality := ac.getCardinality() 1892 c := newArrayContainerCapacity(acCardinality) 1893 1894 for rlePos, arrayPos := 0, 0; arrayPos < acCardinality; { 1895 iv := rc.iv[rlePos] 1896 arrayVal := ac.content[arrayPos] 1897 1898 for iv.last() < arrayVal { 1899 rlePos++ 1900 if rlePos == len(rc.iv) { 1901 return c 1902 } 1903 iv = rc.iv[rlePos] 1904 } 1905 1906 if iv.start > arrayVal { 1907 arrayPos = advanceUntil(ac.content, arrayPos, len(ac.content), iv.start) 1908 } else { 1909 c.content = append(c.content, arrayVal) 1910 arrayPos++ 1911 } 1912 } 1913 return c 1914 } 1915 1916 func (rc *runContainer16) andNot(a container) container { 1917 switch c := a.(type) { 1918 case *arrayContainer: 1919 return rc.andNotArray(c) 1920 case *bitmapContainer: 1921 return rc.andNotBitmap(c) 1922 case *runContainer16: 1923 return rc.andNotRunContainer16(c) 1924 } 1925 panic("unsupported container type") 1926 } 1927 1928 func (rc *runContainer16) fillLeastSignificant16bits(x []uint32, i int, mask uint32) int { 1929 k := i 1930 var val int 1931 for _, p := range rc.iv { 1932 n := p.runlen() 1933 for j := int(0); j < n; j++ { 1934 val = int(p.start) + j 1935 x[k] = uint32(val) | mask 1936 k++ 1937 } 1938 } 1939 return k 1940 } 1941 1942 func (rc *runContainer16) getShortIterator() shortPeekable { 1943 return rc.newRunIterator16() 1944 } 1945 1946 func (rc *runContainer16) getReverseIterator() shortIterable { 1947 return rc.newRunReverseIterator16() 1948 } 1949 1950 func (rc *runContainer16) getManyIterator() manyIterable { 1951 return rc.newManyRunIterator16() 1952 } 1953 1954 // add the values in the range [firstOfRange, endx). endx 1955 // is still abe to express 2^16 because it is an int not an uint16. 1956 func (rc *runContainer16) iaddRange(firstOfRange, endx int) container { 1957 1958 if firstOfRange > endx { 1959 panic(fmt.Sprintf("invalid %v = endx > firstOfRange", endx)) 1960 } 1961 if firstOfRange == endx { 1962 return rc 1963 } 1964 addme := newRunContainer16TakeOwnership([]interval16{ 1965 { 1966 start: uint16(firstOfRange), 1967 length: uint16(endx - 1 - firstOfRange), 1968 }, 1969 }) 1970 *rc = *rc.union(addme) 1971 return rc 1972 } 1973 1974 // remove the values in the range [firstOfRange,endx) 1975 func (rc *runContainer16) iremoveRange(firstOfRange, endx int) container { 1976 if firstOfRange > endx { 1977 panic(fmt.Sprintf("request to iremove empty set [%v, %v),"+ 1978 " nothing to do.", firstOfRange, endx)) 1979 } 1980 // empty removal 1981 if firstOfRange == endx { 1982 return rc 1983 } 1984 x := newInterval16Range(uint16(firstOfRange), uint16(endx-1)) 1985 rc.isubtract(x) 1986 return rc 1987 } 1988 1989 // not flip the values in the range [firstOfRange,endx) 1990 func (rc *runContainer16) not(firstOfRange, endx int) container { 1991 if firstOfRange > endx { 1992 panic(fmt.Sprintf("invalid %v = endx > firstOfRange = %v", endx, firstOfRange)) 1993 } 1994 1995 return rc.Not(firstOfRange, endx) 1996 } 1997 1998 // Not flips the values in the range [firstOfRange,endx). 1999 // This is not inplace. Only the returned value has the flipped bits. 2000 // 2001 // Currently implemented as (!A intersect B) union (A minus B), 2002 // where A is rc, and B is the supplied [firstOfRange, endx) interval. 2003 // 2004 // TODO(time optimization): convert this to a single pass 2005 // algorithm by copying AndNotRunContainer16() and modifying it. 2006 // Current routine is correct but 2007 // makes 2 more passes through the arrays than should be 2008 // strictly necessary. Measure both ways though--this may not matter. 2009 // 2010 func (rc *runContainer16) Not(firstOfRange, endx int) *runContainer16 { 2011 2012 if firstOfRange > endx { 2013 panic(fmt.Sprintf("invalid %v = endx > firstOfRange == %v", endx, firstOfRange)) 2014 } 2015 2016 if firstOfRange >= endx { 2017 return rc.Clone() 2018 } 2019 2020 a := rc 2021 // algo: 2022 // (!A intersect B) union (A minus B) 2023 2024 nota := a.invert() 2025 2026 bs := []interval16{newInterval16Range(uint16(firstOfRange), uint16(endx-1))} 2027 b := newRunContainer16TakeOwnership(bs) 2028 2029 notAintersectB := nota.intersect(b) 2030 2031 aMinusB := a.AndNotRunContainer16(b) 2032 2033 rc2 := notAintersectB.union(aMinusB) 2034 return rc2 2035 } 2036 2037 // equals is now logical equals; it does not require the 2038 // same underlying container type. 2039 func (rc *runContainer16) equals(o container) bool { 2040 srb, ok := o.(*runContainer16) 2041 2042 if !ok { 2043 // maybe value instead of pointer 2044 val, valok := o.(*runContainer16) 2045 if valok { 2046 srb = val 2047 ok = true 2048 } 2049 } 2050 if ok { 2051 // Check if the containers are the same object. 2052 if rc == srb { 2053 return true 2054 } 2055 2056 if len(srb.iv) != len(rc.iv) { 2057 return false 2058 } 2059 2060 for i, v := range rc.iv { 2061 if v != srb.iv[i] { 2062 return false 2063 } 2064 } 2065 return true 2066 } 2067 2068 // use generic comparison 2069 if o.getCardinality() != rc.getCardinality() { 2070 return false 2071 } 2072 rit := rc.getShortIterator() 2073 bit := o.getShortIterator() 2074 2075 //k := 0 2076 for rit.hasNext() { 2077 if bit.next() != rit.next() { 2078 return false 2079 } 2080 //k++ 2081 } 2082 return true 2083 } 2084 2085 func (rc *runContainer16) iaddReturnMinimized(x uint16) container { 2086 rc.Add(x) 2087 return rc 2088 } 2089 2090 func (rc *runContainer16) iadd(x uint16) (wasNew bool) { 2091 return rc.Add(x) 2092 } 2093 2094 func (rc *runContainer16) iremoveReturnMinimized(x uint16) container { 2095 rc.removeKey(x) 2096 return rc 2097 } 2098 2099 func (rc *runContainer16) iremove(x uint16) bool { 2100 return rc.removeKey(x) 2101 } 2102 2103 func (rc *runContainer16) or(a container) container { 2104 if rc.isFull() { 2105 return rc.clone() 2106 } 2107 switch c := a.(type) { 2108 case *runContainer16: 2109 return rc.union(c) 2110 case *arrayContainer: 2111 return rc.orArray(c) 2112 case *bitmapContainer: 2113 return rc.orBitmapContainer(c) 2114 } 2115 panic("unsupported container type") 2116 } 2117 2118 func (rc *runContainer16) orCardinality(a container) int { 2119 switch c := a.(type) { 2120 case *runContainer16: 2121 return int(rc.unionCardinality(c)) 2122 case *arrayContainer: 2123 return rc.orArrayCardinality(c) 2124 case *bitmapContainer: 2125 return rc.orBitmapContainerCardinality(c) 2126 } 2127 panic("unsupported container type") 2128 } 2129 2130 // orBitmapContainer finds the union of rc and bc. 2131 func (rc *runContainer16) orBitmapContainer(bc *bitmapContainer) container { 2132 bc2 := newBitmapContainerFromRun(rc) 2133 return bc2.iorBitmap(bc) 2134 } 2135 2136 func (rc *runContainer16) andBitmapContainerCardinality(bc *bitmapContainer) int { 2137 answer := 0 2138 for i := range rc.iv { 2139 answer += bc.getCardinalityInRange(uint(rc.iv[i].start), uint(rc.iv[i].last())+1) 2140 } 2141 //bc.computeCardinality() 2142 return answer 2143 } 2144 2145 func (rc *runContainer16) orBitmapContainerCardinality(bc *bitmapContainer) int { 2146 return rc.getCardinality() + bc.getCardinality() - rc.andBitmapContainerCardinality(bc) 2147 } 2148 2149 // orArray finds the union of rc and ac. 2150 func (rc *runContainer16) orArray(ac *arrayContainer) container { 2151 if ac.isEmpty() { 2152 return rc.clone() 2153 } 2154 if rc.isEmpty() { 2155 return ac.clone() 2156 } 2157 intervals, cardMinusOne := runArrayUnionToRuns(rc, ac) 2158 result := newRunContainer16TakeOwnership(intervals) 2159 if len(intervals) >= 2048 && cardMinusOne >= arrayDefaultMaxSize { 2160 return newBitmapContainerFromRun(result) 2161 } 2162 if len(intervals)*2 > 1+int(cardMinusOne) { 2163 return result.toArrayContainer() 2164 } 2165 return result 2166 } 2167 2168 // orArray finds the union of rc and ac. 2169 func (rc *runContainer16) orArrayCardinality(ac *arrayContainer) int { 2170 return ac.getCardinality() + rc.getCardinality() - rc.andArrayCardinality(ac) 2171 } 2172 2173 func (rc *runContainer16) ior(a container) container { 2174 if rc.isFull() { 2175 return rc 2176 } 2177 switch c := a.(type) { 2178 case *runContainer16: 2179 return rc.inplaceUnion(c) 2180 case *arrayContainer: 2181 return rc.iorArray(c) 2182 case *bitmapContainer: 2183 return rc.iorBitmapContainer(c) 2184 } 2185 panic("unsupported container type") 2186 } 2187 2188 func (rc *runContainer16) inplaceUnion(rc2 *runContainer16) container { 2189 for _, p := range rc2.iv { 2190 last := int(p.last()) 2191 for i := int(p.start); i <= last; i++ { 2192 rc.Add(uint16(i)) 2193 } 2194 } 2195 return rc 2196 } 2197 2198 func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container { 2199 2200 it := bc.getShortIterator() 2201 for it.hasNext() { 2202 rc.Add(it.next()) 2203 } 2204 return rc 2205 } 2206 2207 func (rc *runContainer16) iorArray(ac *arrayContainer) container { 2208 if rc.isEmpty() { 2209 return ac.clone() 2210 } 2211 if ac.isEmpty() { 2212 return rc 2213 } 2214 var cardMinusOne uint16 2215 //TODO: perform the union algorithm in-place using rc.iv 2216 // this can be done with methods like the in-place array container union 2217 // but maybe lazily moving the remaining elements back. 2218 rc.iv, cardMinusOne = runArrayUnionToRuns(rc, ac) 2219 if len(rc.iv) >= 2048 && cardMinusOne >= arrayDefaultMaxSize { 2220 return newBitmapContainerFromRun(rc) 2221 } 2222 if len(rc.iv)*2 > 1+int(cardMinusOne) { 2223 return rc.toArrayContainer() 2224 } 2225 return rc 2226 } 2227 2228 func runArrayUnionToRuns(rc *runContainer16, ac *arrayContainer) ([]interval16, uint16) { 2229 pos1 := 0 2230 pos2 := 0 2231 length1 := len(ac.content) 2232 length2 := len(rc.iv) 2233 target := make([]interval16, 0, len(rc.iv)) 2234 // have to find the first range 2235 // options are 2236 // 1. from array container 2237 // 2. from run container 2238 var previousInterval interval16 2239 var cardMinusOne uint16 2240 if ac.content[0] < rc.iv[0].start { 2241 previousInterval.start = ac.content[0] 2242 previousInterval.length = 0 2243 pos1++ 2244 } else { 2245 previousInterval.start = rc.iv[0].start 2246 previousInterval.length = rc.iv[0].length 2247 pos2++ 2248 } 2249 2250 for pos1 < length1 || pos2 < length2 { 2251 if pos1 < length1 { 2252 s1 := ac.content[pos1] 2253 if s1 <= previousInterval.start+previousInterval.length { 2254 pos1++ 2255 continue 2256 } 2257 if previousInterval.last() < MaxUint16 && previousInterval.last()+1 == s1 { 2258 previousInterval.length++ 2259 pos1++ 2260 continue 2261 } 2262 } 2263 if pos2 < length2 { 2264 range2 := rc.iv[pos2] 2265 if range2.start <= previousInterval.last() || range2.start > 0 && range2.start-1 == previousInterval.last() { 2266 pos2++ 2267 if previousInterval.last() < range2.last() { 2268 previousInterval.length = range2.last() - previousInterval.start 2269 } 2270 continue 2271 } 2272 } 2273 cardMinusOne += previousInterval.length + 1 2274 target = append(target, previousInterval) 2275 if pos2 == length2 || pos1 < length1 && ac.content[pos1] < rc.iv[pos2].start { 2276 previousInterval.start = ac.content[pos1] 2277 previousInterval.length = 0 2278 pos1++ 2279 } else { 2280 previousInterval = rc.iv[pos2] 2281 pos2++ 2282 } 2283 } 2284 cardMinusOne += previousInterval.length 2285 target = append(target, previousInterval) 2286 2287 return target, cardMinusOne 2288 } 2289 2290 // lazyIOR is described (not yet implemented) in 2291 // this nice note from @lemire on 2292 // https://github.com/RoaringBitmap/roaring/pull/70#issuecomment-263613737 2293 // 2294 // Description of lazyOR and lazyIOR from @lemire: 2295 // 2296 // Lazy functions are optional and can be simply 2297 // wrapper around non-lazy functions. 2298 // 2299 // The idea of "laziness" is as follows. It is 2300 // inspired by the concept of lazy evaluation 2301 // you might be familiar with (functional programming 2302 // and all that). So a roaring bitmap is 2303 // such that all its containers are, in some 2304 // sense, chosen to use as little memory as 2305 // possible. This is nice. Also, all bitsets 2306 // are "cardinality aware" so that you can do 2307 // fast rank/select queries, or query the 2308 // cardinality of the whole bitmap... very fast, 2309 // without latency. 2310 // 2311 // However, imagine that you are aggregating 100 2312 // bitmaps together. So you OR the first two, then OR 2313 // that with the third one and so forth. Clearly, 2314 // intermediate bitmaps don't need to be as 2315 // compressed as possible, right? They can be 2316 // in a "dirty state". You only need the end 2317 // result to be in a nice state... which you 2318 // can achieve by calling repairAfterLazy at the end. 2319 // 2320 // The Java/C code does something special for 2321 // the in-place lazy OR runs. The idea is that 2322 // instead of taking two run containers and 2323 // generating a new one, we actually try to 2324 // do the computation in-place through a 2325 // technique invented by @gssiyankai (pinging him!). 2326 // What you do is you check whether the host 2327 // run container has lots of extra capacity. 2328 // If it does, you move its data at the end of 2329 // the backing array, and then you write 2330 // the answer at the beginning. What this 2331 // trick does is minimize memory allocations. 2332 // 2333 func (rc *runContainer16) lazyIOR(a container) container { 2334 // not lazy at the moment 2335 return rc.ior(a) 2336 } 2337 2338 // lazyOR is described above in lazyIOR. 2339 func (rc *runContainer16) lazyOR(a container) container { 2340 // not lazy at the moment 2341 return rc.or(a) 2342 } 2343 2344 func (rc *runContainer16) intersects(a container) bool { 2345 // TODO: optimize by doing inplace/less allocation 2346 isect := rc.and(a) 2347 return !isect.isEmpty() 2348 } 2349 2350 func (rc *runContainer16) xor(a container) container { 2351 switch c := a.(type) { 2352 case *arrayContainer: 2353 return rc.xorArray(c) 2354 case *bitmapContainer: 2355 return rc.xorBitmap(c) 2356 case *runContainer16: 2357 return rc.xorRunContainer16(c) 2358 } 2359 panic("unsupported container type") 2360 } 2361 2362 func (rc *runContainer16) iandNot(a container) container { 2363 switch c := a.(type) { 2364 case *arrayContainer: 2365 return rc.iandNotArray(c) 2366 case *bitmapContainer: 2367 return rc.iandNotBitmap(c) 2368 case *runContainer16: 2369 return rc.iandNotRunContainer16(c) 2370 } 2371 panic("unsupported container type") 2372 } 2373 2374 // flip the values in the range [firstOfRange,endx) 2375 func (rc *runContainer16) inot(firstOfRange, endx int) container { 2376 if firstOfRange > endx { 2377 panic(fmt.Sprintf("invalid %v = endx > firstOfRange = %v", endx, firstOfRange)) 2378 } 2379 if firstOfRange > endx { 2380 return rc 2381 } 2382 // TODO: minimize copies, do it all inplace; not() makes a copy. 2383 rc = rc.Not(firstOfRange, endx) 2384 return rc 2385 } 2386 2387 func (rc *runContainer16) rank(x uint16) int { 2388 n := int(len(rc.iv)) 2389 xx := int(x) 2390 w, already, _ := rc.search(xx) 2391 if w < 0 { 2392 return 0 2393 } 2394 if !already && w == n-1 { 2395 return rc.getCardinality() 2396 } 2397 var rnk int 2398 if !already { 2399 for i := int(0); i <= w; i++ { 2400 rnk += rc.iv[i].runlen() 2401 } 2402 return int(rnk) 2403 } 2404 for i := int(0); i < w; i++ { 2405 rnk += rc.iv[i].runlen() 2406 } 2407 rnk += int(x-rc.iv[w].start) + 1 2408 return int(rnk) 2409 } 2410 2411 func (rc *runContainer16) selectInt(x uint16) int { 2412 var offset int 2413 for k := range rc.iv { 2414 nextOffset := offset + rc.iv[k].runlen() 2415 if nextOffset > int(x) { 2416 return int(int(rc.iv[k].start) + (int(x) - offset)) 2417 } 2418 offset = nextOffset 2419 } 2420 panic("cannot select x") 2421 } 2422 2423 func (rc *runContainer16) andNotRunContainer16(b *runContainer16) container { 2424 return rc.AndNotRunContainer16(b) 2425 } 2426 2427 func (rc *runContainer16) andNotArray(ac *arrayContainer) container { 2428 rcb := rc.toBitmapContainer() 2429 acb := ac.toBitmapContainer() 2430 return rcb.andNotBitmap(acb) 2431 } 2432 2433 func (rc *runContainer16) andNotBitmap(bc *bitmapContainer) container { 2434 rcb := rc.toBitmapContainer() 2435 return rcb.andNotBitmap(bc) 2436 } 2437 2438 func (rc *runContainer16) toBitmapContainer() *bitmapContainer { 2439 bc := newBitmapContainer() 2440 for i := range rc.iv { 2441 bc.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1) 2442 } 2443 bc.computeCardinality() 2444 return bc 2445 } 2446 2447 func (rc *runContainer16) iandNotRunContainer16(x2 *runContainer16) container { 2448 rcb := rc.toBitmapContainer() 2449 x2b := x2.toBitmapContainer() 2450 rcb.iandNotBitmapSurely(x2b) 2451 // TODO: check size and optimize the return value 2452 // TODO: is inplace modification really required? If not, elide the copy. 2453 rc2 := newRunContainer16FromBitmapContainer(rcb) 2454 *rc = *rc2 2455 return rc 2456 } 2457 2458 func (rc *runContainer16) iandNotArray(ac *arrayContainer) container { 2459 rcb := rc.toBitmapContainer() 2460 acb := ac.toBitmapContainer() 2461 rcb.iandNotBitmapSurely(acb) 2462 // TODO: check size and optimize the return value 2463 // TODO: is inplace modification really required? If not, elide the copy. 2464 rc2 := newRunContainer16FromBitmapContainer(rcb) 2465 *rc = *rc2 2466 return rc 2467 } 2468 2469 func (rc *runContainer16) iandNotBitmap(bc *bitmapContainer) container { 2470 rcb := rc.toBitmapContainer() 2471 rcb.iandNotBitmapSurely(bc) 2472 // TODO: check size and optimize the return value 2473 // TODO: is inplace modification really required? If not, elide the copy. 2474 rc2 := newRunContainer16FromBitmapContainer(rcb) 2475 *rc = *rc2 2476 return rc 2477 } 2478 2479 func (rc *runContainer16) xorRunContainer16(x2 *runContainer16) container { 2480 rcb := rc.toBitmapContainer() 2481 x2b := x2.toBitmapContainer() 2482 return rcb.xorBitmap(x2b) 2483 } 2484 2485 func (rc *runContainer16) xorArray(ac *arrayContainer) container { 2486 rcb := rc.toBitmapContainer() 2487 acb := ac.toBitmapContainer() 2488 return rcb.xorBitmap(acb) 2489 } 2490 2491 func (rc *runContainer16) xorBitmap(bc *bitmapContainer) container { 2492 rcb := rc.toBitmapContainer() 2493 return rcb.xorBitmap(bc) 2494 } 2495 2496 // convert to bitmap or array *if needed* 2497 func (rc *runContainer16) toEfficientContainer() container { 2498 sizeAsRunContainer := rc.getSizeInBytes() 2499 sizeAsBitmapContainer := bitmapContainerSizeInBytes() 2500 card := rc.getCardinality() 2501 sizeAsArrayContainer := arrayContainerSizeInBytes(card) 2502 if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) { 2503 return rc 2504 } 2505 if card <= arrayDefaultMaxSize { 2506 return rc.toArrayContainer() 2507 } 2508 bc := newBitmapContainerFromRun(rc) 2509 return bc 2510 } 2511 2512 func (rc *runContainer16) toArrayContainer() *arrayContainer { 2513 ac := newArrayContainer() 2514 for i := range rc.iv { 2515 ac.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1) 2516 } 2517 return ac 2518 } 2519 2520 func newRunContainer16FromContainer(c container) *runContainer16 { 2521 2522 switch x := c.(type) { 2523 case *runContainer16: 2524 return x.Clone() 2525 case *arrayContainer: 2526 return newRunContainer16FromArray(x) 2527 case *bitmapContainer: 2528 return newRunContainer16FromBitmapContainer(x) 2529 } 2530 panic("unsupported container type") 2531 } 2532 2533 // And finds the intersection of rc and b. 2534 func (rc *runContainer16) And(b *Bitmap) *Bitmap { 2535 out := NewBitmap() 2536 for _, p := range rc.iv { 2537 plast := p.last() 2538 for i := p.start; i <= plast; i++ { 2539 if b.Contains(uint32(i)) { 2540 out.Add(uint32(i)) 2541 } 2542 } 2543 } 2544 return out 2545 } 2546 2547 // Xor returns the exclusive-or of rc and b. 2548 func (rc *runContainer16) Xor(b *Bitmap) *Bitmap { 2549 out := b.Clone() 2550 for _, p := range rc.iv { 2551 plast := p.last() 2552 for v := p.start; v <= plast; v++ { 2553 w := uint32(v) 2554 if out.Contains(w) { 2555 out.RemoveRange(uint64(w), uint64(w+1)) 2556 } else { 2557 out.Add(w) 2558 } 2559 } 2560 } 2561 return out 2562 } 2563 2564 // Or returns the union of rc and b. 2565 func (rc *runContainer16) Or(b *Bitmap) *Bitmap { 2566 out := b.Clone() 2567 for _, p := range rc.iv { 2568 plast := p.last() 2569 for v := p.start; v <= plast; v++ { 2570 out.Add(uint32(v)) 2571 } 2572 } 2573 return out 2574 } 2575 2576 // serializedSizeInBytes returns the number of bytes of memory 2577 // required by this runContainer16. This is for the 2578 // Roaring format, as specified https://github.com/RoaringBitmap/RoaringFormatSpec/ 2579 func (rc *runContainer16) serializedSizeInBytes() int { 2580 // number of runs in one uint16, then each run 2581 // needs two more uint16 2582 return 2 + len(rc.iv)*4 2583 } 2584 2585 func (rc *runContainer16) addOffset(x uint16) (container, container) { 2586 var low, high *runContainer16 2587 2588 if len(rc.iv) == 0 { 2589 return nil, nil 2590 } 2591 2592 first := uint32(rc.iv[0].start) + uint32(x) 2593 if highbits(first) == 0 { 2594 // Some elements will fall into low part, allocate a container. 2595 // Checking the first one is enough because they are ordered. 2596 low = newRunContainer16() 2597 } 2598 last := uint32(rc.iv[len(rc.iv)-1].start) 2599 last += uint32(rc.iv[len(rc.iv)-1].length) 2600 last += uint32(x) 2601 if highbits(last) > 0 { 2602 // Some elements will fall into high part, allocate a container. 2603 // Checking the last one is enough because they are ordered. 2604 high = newRunContainer16() 2605 } 2606 2607 for _, iv := range rc.iv { 2608 val := int(iv.start) + int(x) 2609 finalVal := int(val) + int(iv.length) 2610 if val <= 0xffff { 2611 if finalVal <= 0xffff { 2612 low.iv = append(low.iv, interval16{uint16(val), iv.length}) 2613 } else { 2614 low.iv = append(low.iv, interval16{uint16(val), uint16(0xffff - val)}) 2615 high.iv = append(high.iv, interval16{uint16(0), uint16(finalVal & 0xffff)}) 2616 } 2617 } else { 2618 high.iv = append(high.iv, interval16{uint16(val & 0xffff), iv.length}) 2619 } 2620 } 2621 2622 // Ensure proper nil interface. 2623 if low == nil { 2624 return nil, high 2625 } 2626 if high == nil { 2627 return low, nil 2628 } 2629 2630 return low, high 2631 }