github.com/coyove/sdss@v0.0.0-20231129015646-c2ec58cca6a2/contrib/roaring/roaring.go (about) 1 // Package roaring is an implementation of Roaring Bitmaps in Go. 2 // They provide fast compressed bitmap data structures (also called bitset). 3 // They are ideally suited to represent sets of integers over 4 // relatively small ranges. 5 // See http://roaringbitmap.org for details. 6 package roaring 7 8 import ( 9 "bytes" 10 "encoding/base64" 11 "fmt" 12 "io" 13 "sort" 14 "strconv" 15 16 "github.com/coyove/sdss/contrib/roaring/internal" 17 ) 18 19 // Bitmap represents a compressed bitmap where you can add integers. 20 type Bitmap struct { 21 highlowcontainer roaringArray 22 } 23 24 // ToBase64 serializes a bitmap as Base64 25 func (rb *Bitmap) ToBase64() (string, error) { 26 buf := new(bytes.Buffer) 27 _, err := rb.WriteTo(buf) 28 return base64.StdEncoding.EncodeToString(buf.Bytes()), err 29 30 } 31 32 // FromBase64 deserializes a bitmap from Base64 33 func (rb *Bitmap) FromBase64(str string) (int64, error) { 34 data, err := base64.StdEncoding.DecodeString(str) 35 if err != nil { 36 return 0, err 37 } 38 buf := bytes.NewBuffer(data) 39 40 return rb.ReadFrom(buf) 41 } 42 43 // WriteTo writes a serialized version of this bitmap to stream. 44 // The format is compatible with other RoaringBitmap 45 // implementations (Java, C) and is documented here: 46 // https://github.com/RoaringBitmap/RoaringFormatSpec 47 func (rb *Bitmap) WriteTo(stream io.Writer) (int64, error) { 48 return rb.highlowcontainer.writeTo(stream) 49 } 50 51 // ToBytes returns an array of bytes corresponding to what is written 52 // when calling WriteTo 53 func (rb *Bitmap) ToBytes() ([]byte, error) { 54 return rb.highlowcontainer.toBytes() 55 } 56 57 // Checksum computes a hash (currently FNV-1a) for a bitmap that is suitable for 58 // using bitmaps as elements in hash sets or as keys in hash maps, as well as 59 // generally quicker comparisons. 60 // The implementation is biased towards efficiency in little endian machines, so 61 // expect some extra CPU cycles and memory to be used if your machine is big endian. 62 // Likewise, don't use this to verify integrity unless you're certain you'll load 63 // the bitmap on a machine with the same endianess used to create it. 64 func (rb *Bitmap) Checksum() uint64 { 65 const ( 66 offset = 14695981039346656037 67 prime = 1099511628211 68 ) 69 70 var bytes []byte 71 72 hash := uint64(offset) 73 74 bytes = uint16SliceAsByteSlice(rb.highlowcontainer.keys) 75 76 for _, b := range bytes { 77 hash ^= uint64(b) 78 hash *= prime 79 } 80 81 for _, c := range rb.highlowcontainer.containers { 82 // 0 separator 83 hash ^= 0 84 hash *= prime 85 86 switch c := c.(type) { 87 case *bitmapContainer: 88 bytes = uint64SliceAsByteSlice(c.bitmap) 89 case *arrayContainer: 90 bytes = uint16SliceAsByteSlice(c.content) 91 case *runContainer16: 92 bytes = interval16SliceAsByteSlice(c.iv) 93 default: 94 panic("invalid container type") 95 } 96 97 if len(bytes) == 0 { 98 panic("empty containers are not supported") 99 } 100 101 for _, b := range bytes { 102 hash ^= uint64(b) 103 hash *= prime 104 } 105 } 106 107 return hash 108 } 109 110 // ReadFrom reads a serialized version of this bitmap from stream. 111 // The format is compatible with other RoaringBitmap 112 // implementations (Java, C) and is documented here: 113 // https://github.com/RoaringBitmap/RoaringFormatSpec 114 // Since io.Reader is regarded as a stream and cannot be read twice. 115 // So add cookieHeader to accept the 4-byte data that has been read in roaring64.ReadFrom. 116 // It is not necessary to pass cookieHeader when call roaring.ReadFrom to read the roaring32 data directly. 117 func (rb *Bitmap) ReadFrom(reader io.Reader, cookieHeader ...byte) (p int64, err error) { 118 stream := internal.ByteInputAdapterPool.Get().(*internal.ByteInputAdapter) 119 stream.Reset(reader) 120 121 p, err = rb.highlowcontainer.readFrom(stream, cookieHeader...) 122 internal.ByteInputAdapterPool.Put(stream) 123 124 return 125 } 126 127 // FromBuffer creates a bitmap from its serialized version stored in buffer 128 // 129 // The format specification is available here: 130 // https://github.com/RoaringBitmap/RoaringFormatSpec 131 // 132 // The provided byte array (buf) is expected to be a constant. 133 // The function makes the best effort attempt not to copy data. 134 // You should take care not to modify buff as it will 135 // likely result in unexpected program behavior. 136 // 137 // Resulting bitmaps are effectively immutable in the following sense: 138 // a copy-on-write marker is used so that when you modify the resulting 139 // bitmap, copies of selected data (containers) are made. 140 // You should *not* change the copy-on-write status of the resulting 141 // bitmaps (SetCopyOnWrite). 142 // 143 // If buf becomes unavailable, then a bitmap created with 144 // FromBuffer would be effectively broken. Furthermore, any 145 // bitmap derived from this bitmap (e.g., via Or, And) might 146 // also be broken. Thus, before making buf unavailable, you should 147 // call CloneCopyOnWriteContainers on all such bitmaps. 148 // 149 func (rb *Bitmap) FromBuffer(buf []byte) (p int64, err error) { 150 stream := internal.ByteBufferPool.Get().(*internal.ByteBuffer) 151 stream.Reset(buf) 152 153 p, err = rb.highlowcontainer.readFrom(stream) 154 internal.ByteBufferPool.Put(stream) 155 156 return 157 } 158 159 // RunOptimize attempts to further compress the runs of consecutive values found in the bitmap 160 func (rb *Bitmap) RunOptimize() { 161 rb.highlowcontainer.runOptimize() 162 } 163 164 // HasRunCompression returns true if the bitmap benefits from run compression 165 func (rb *Bitmap) HasRunCompression() bool { 166 return rb.highlowcontainer.hasRunCompression() 167 } 168 169 // MarshalBinary implements the encoding.BinaryMarshaler interface for the bitmap 170 // (same as ToBytes) 171 func (rb *Bitmap) MarshalBinary() ([]byte, error) { 172 return rb.ToBytes() 173 } 174 175 // UnmarshalBinary implements the encoding.BinaryUnmarshaler interface for the bitmap 176 func (rb *Bitmap) UnmarshalBinary(data []byte) error { 177 r := bytes.NewReader(data) 178 _, err := rb.ReadFrom(r) 179 return err 180 } 181 182 // NewBitmap creates a new empty Bitmap (see also New) 183 func NewBitmap() *Bitmap { 184 return &Bitmap{} 185 } 186 187 // New creates a new empty Bitmap (same as NewBitmap) 188 func New() *Bitmap { 189 return &Bitmap{} 190 } 191 192 // Clear resets the Bitmap to be logically empty, but may retain 193 // some memory allocations that may speed up future operations 194 func (rb *Bitmap) Clear() { 195 rb.highlowcontainer.clear() 196 } 197 198 // ToArray creates a new slice containing all of the integers stored in the Bitmap in sorted order 199 func (rb *Bitmap) ToArray() []uint32 { 200 array := make([]uint32, rb.GetCardinality()) 201 pos := 0 202 pos2 := 0 203 204 for pos < rb.highlowcontainer.size() { 205 hs := uint32(rb.highlowcontainer.getKeyAtIndex(pos)) << 16 206 c := rb.highlowcontainer.getContainerAtIndex(pos) 207 pos++ 208 pos2 = c.fillLeastSignificant16bits(array, pos2, hs) 209 } 210 return array 211 } 212 213 // GetSizeInBytes estimates the memory usage of the Bitmap. Note that this 214 // might differ slightly from the amount of bytes required for persistent storage 215 func (rb *Bitmap) GetSizeInBytes() uint64 { 216 size := uint64(8) 217 for _, c := range rb.highlowcontainer.containers { 218 size += uint64(2) + uint64(c.getSizeInBytes()) 219 } 220 return size 221 } 222 223 // GetSerializedSizeInBytes computes the serialized size in bytes 224 // of the Bitmap. It should correspond to the 225 // number of bytes written when invoking WriteTo. You can expect 226 // that this function is much cheaper computationally than WriteTo. 227 func (rb *Bitmap) GetSerializedSizeInBytes() uint64 { 228 return rb.highlowcontainer.serializedSizeInBytes() 229 } 230 231 // BoundSerializedSizeInBytes returns an upper bound on the serialized size in bytes 232 // assuming that one wants to store "cardinality" integers in [0, universe_size) 233 func BoundSerializedSizeInBytes(cardinality uint64, universeSize uint64) uint64 { 234 contnbr := (universeSize + uint64(65535)) / uint64(65536) 235 if contnbr > cardinality { 236 contnbr = cardinality 237 // we can't have more containers than we have values 238 } 239 headermax := 8*contnbr + 4 240 if 4 > (contnbr+7)/8 { 241 headermax += 4 242 } else { 243 headermax += (contnbr + 7) / 8 244 } 245 valsarray := uint64(arrayContainerSizeInBytes(int(cardinality))) 246 valsbitmap := contnbr * uint64(bitmapContainerSizeInBytes()) 247 valsbest := valsarray 248 if valsbest > valsbitmap { 249 valsbest = valsbitmap 250 } 251 return valsbest + headermax 252 } 253 254 // IntIterable allows you to iterate over the values in a Bitmap 255 type IntIterable interface { 256 HasNext() bool 257 Next() uint32 258 } 259 260 // IntPeekable allows you to look at the next value without advancing and 261 // advance as long as the next value is smaller than minval 262 type IntPeekable interface { 263 IntIterable 264 // PeekNext peeks the next value without advancing the iterator 265 PeekNext() uint32 266 // AdvanceIfNeeded advances as long as the next value is smaller than minval 267 AdvanceIfNeeded(minval uint32) 268 } 269 270 type intIterator struct { 271 pos int 272 hs uint32 273 iter shortPeekable 274 highlowcontainer *roaringArray 275 276 // These embedded iterators per container type help reduce load in the GC. 277 // This way, instead of making up-to 64k allocations per full iteration 278 // we get a single allocation and simply reinitialize the appropriate 279 // iterator and point to it in the generic `iter` member on each key bound. 280 shortIter shortIterator 281 runIter runIterator16 282 bitmapIter bitmapContainerShortIterator 283 } 284 285 // HasNext returns true if there are more integers to iterate over 286 func (ii *intIterator) HasNext() bool { 287 return ii.pos < ii.highlowcontainer.size() 288 } 289 290 func (ii *intIterator) init() { 291 if ii.highlowcontainer.size() > ii.pos { 292 ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16 293 c := ii.highlowcontainer.getContainerAtIndex(ii.pos) 294 switch t := c.(type) { 295 case *arrayContainer: 296 ii.shortIter = shortIterator{t.content, 0} 297 ii.iter = &ii.shortIter 298 case *runContainer16: 299 ii.runIter = runIterator16{rc: t, curIndex: 0, curPosInIndex: 0} 300 ii.iter = &ii.runIter 301 case *bitmapContainer: 302 ii.bitmapIter = bitmapContainerShortIterator{t, t.NextSetBit(0)} 303 ii.iter = &ii.bitmapIter 304 } 305 } 306 } 307 308 // Next returns the next integer 309 func (ii *intIterator) Next() uint32 { 310 x := uint32(ii.iter.next()) | ii.hs 311 if !ii.iter.hasNext() { 312 ii.pos = ii.pos + 1 313 ii.init() 314 } 315 return x 316 } 317 318 // PeekNext peeks the next value without advancing the iterator 319 func (ii *intIterator) PeekNext() uint32 { 320 return uint32(ii.iter.peekNext()&maxLowBit) | ii.hs 321 } 322 323 func (ii *intIterator) Seek(minval uint32) { 324 to := minval & 0xffff0000 325 326 ii.pos = sort.Search(ii.highlowcontainer.size(), func(i int) bool { 327 return uint32(ii.highlowcontainer.getKeyAtIndex(i))<<16 >= to 328 }) 329 ii.init() 330 331 if ii.HasNext() && ii.hs == to { 332 ii.iter.advanceIfNeeded(lowbits(minval)) 333 334 if !ii.iter.hasNext() { 335 ii.pos++ 336 ii.init() 337 } 338 } 339 } 340 341 // AdvanceIfNeeded advances as long as the next value is smaller than minval 342 func (ii *intIterator) AdvanceIfNeeded(minval uint32) { 343 to := minval & 0xffff0000 344 345 for ii.HasNext() && ii.hs < to { 346 ii.pos++ 347 ii.init() 348 } 349 350 if ii.HasNext() && ii.hs == to { 351 ii.iter.advanceIfNeeded(lowbits(minval)) 352 353 if !ii.iter.hasNext() { 354 ii.pos++ 355 ii.init() 356 } 357 } 358 } 359 360 // IntIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap) 361 type IntIterator = intIterator 362 363 // Initialize configures the existing iterator so that it can iterate through the values of 364 // the provided bitmap. 365 // The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove). 366 func (p *intIterator) Initialize(a *Bitmap) { 367 p.pos = 0 368 p.highlowcontainer = &a.highlowcontainer 369 p.init() 370 } 371 372 type intReverseIterator struct { 373 pos int 374 hs uint32 375 iter shortIterable 376 highlowcontainer *roaringArray 377 378 shortIter reverseIterator 379 runIter runReverseIterator16 380 bitmapIter reverseBitmapContainerShortIterator 381 } 382 383 // HasNext returns true if there are more integers to iterate over 384 func (ii *intReverseIterator) HasNext() bool { 385 return ii.pos >= 0 386 } 387 388 func (ii *intReverseIterator) init() { 389 if ii.pos >= 0 { 390 ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16 391 c := ii.highlowcontainer.getContainerAtIndex(ii.pos) 392 switch t := c.(type) { 393 case *arrayContainer: 394 ii.shortIter = reverseIterator{t.content, len(t.content) - 1} 395 ii.iter = &ii.shortIter 396 case *runContainer16: 397 index := int(len(t.iv)) - 1 398 pos := uint16(0) 399 400 if index >= 0 { 401 pos = t.iv[index].length 402 } 403 404 ii.runIter = runReverseIterator16{rc: t, curIndex: index, curPosInIndex: pos} 405 ii.iter = &ii.runIter 406 case *bitmapContainer: 407 pos := -1 408 if t.cardinality > 0 { 409 pos = int(t.maximum()) 410 } 411 ii.bitmapIter = reverseBitmapContainerShortIterator{t, pos} 412 ii.iter = &ii.bitmapIter 413 } 414 } else { 415 ii.iter = nil 416 } 417 } 418 419 // Next returns the next integer 420 func (ii *intReverseIterator) Next() uint32 { 421 x := uint32(ii.iter.next()) | ii.hs 422 if !ii.iter.hasNext() { 423 ii.pos = ii.pos - 1 424 ii.init() 425 } 426 return x 427 } 428 429 // IntReverseIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap) 430 type IntReverseIterator = intReverseIterator 431 432 // Initialize configures the existing iterator so that it can iterate through the values of 433 // the provided bitmap. 434 // The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove). 435 func (p *intReverseIterator) Initialize(a *Bitmap) { 436 p.highlowcontainer = &a.highlowcontainer 437 p.pos = a.highlowcontainer.size() - 1 438 p.init() 439 } 440 441 // ManyIntIterable allows you to iterate over the values in a Bitmap 442 type ManyIntIterable interface { 443 // NextMany fills buf up with values, returns how many values were returned 444 NextMany(buf []uint32) int 445 // NextMany64 fills up buf with 64 bit values, uses hs as a mask (OR), returns how many values were returned 446 NextMany64(hs uint64, buf []uint64) int 447 } 448 449 type manyIntIterator struct { 450 pos int 451 hs uint32 452 iter manyIterable 453 highlowcontainer *roaringArray 454 455 shortIter shortIterator 456 runIter runIterator16 457 bitmapIter bitmapContainerManyIterator 458 } 459 460 func (ii *manyIntIterator) init() { 461 if ii.highlowcontainer.size() > ii.pos { 462 ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16 463 c := ii.highlowcontainer.getContainerAtIndex(ii.pos) 464 switch t := c.(type) { 465 case *arrayContainer: 466 ii.shortIter = shortIterator{t.content, 0} 467 ii.iter = &ii.shortIter 468 case *runContainer16: 469 ii.runIter = runIterator16{rc: t, curIndex: 0, curPosInIndex: 0} 470 ii.iter = &ii.runIter 471 case *bitmapContainer: 472 ii.bitmapIter = bitmapContainerManyIterator{t, -1, 0} 473 ii.iter = &ii.bitmapIter 474 } 475 } else { 476 ii.iter = nil 477 } 478 } 479 480 func (ii *manyIntIterator) NextMany(buf []uint32) int { 481 n := 0 482 for n < len(buf) { 483 if ii.iter == nil { 484 break 485 } 486 moreN := ii.iter.nextMany(ii.hs, buf[n:]) 487 n += moreN 488 if moreN == 0 { 489 ii.pos = ii.pos + 1 490 ii.init() 491 } 492 } 493 494 return n 495 } 496 497 func (ii *manyIntIterator) NextMany64(hs64 uint64, buf []uint64) int { 498 n := 0 499 for n < len(buf) { 500 if ii.iter == nil { 501 break 502 } 503 504 hs := uint64(ii.hs) | hs64 505 moreN := ii.iter.nextMany64(hs, buf[n:]) 506 n += moreN 507 if moreN == 0 { 508 ii.pos = ii.pos + 1 509 ii.init() 510 } 511 } 512 513 return n 514 } 515 516 // ManyIntIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap) 517 type ManyIntIterator = manyIntIterator 518 519 // Initialize configures the existing iterator so that it can iterate through the values of 520 // the provided bitmap. 521 // The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove). 522 func (p *manyIntIterator) Initialize(a *Bitmap) { 523 p.pos = 0 524 p.highlowcontainer = &a.highlowcontainer 525 p.init() 526 } 527 528 // String creates a string representation of the Bitmap 529 func (rb *Bitmap) String() string { 530 // inspired by https://github.com/fzandona/goroar/ 531 var buffer bytes.Buffer 532 start := []byte("{") 533 buffer.Write(start) 534 i := rb.Iterator() 535 counter := 0 536 if i.HasNext() { 537 counter = counter + 1 538 buffer.WriteString(strconv.FormatInt(int64(i.Next()), 10)) 539 } 540 for i.HasNext() { 541 buffer.WriteString(",") 542 counter = counter + 1 543 // to avoid exhausting the memory 544 if counter > 0x40000 { 545 buffer.WriteString("...") 546 break 547 } 548 buffer.WriteString(strconv.FormatInt(int64(i.Next()), 10)) 549 } 550 buffer.WriteString("}") 551 return buffer.String() 552 } 553 554 // Iterate iterates over the bitmap, calling the given callback with each value in the bitmap. If the callback returns 555 // false, the iteration is halted. 556 // The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove). 557 // There is no guarantee as to what order the values will be iterated. 558 func (rb *Bitmap) Iterate(cb func(x uint32) bool) { 559 for i := 0; i < rb.highlowcontainer.size(); i++ { 560 hs := uint32(rb.highlowcontainer.getKeyAtIndex(i)) << 16 561 c := rb.highlowcontainer.getContainerAtIndex(i) 562 563 var shouldContinue bool 564 // This is hacky but it avoids allocations from invoking an interface method with a closure 565 switch t := c.(type) { 566 case *arrayContainer: 567 shouldContinue = t.iterate(func(x uint16) bool { 568 return cb(uint32(x) | hs) 569 }) 570 case *runContainer16: 571 shouldContinue = t.iterate(func(x uint16) bool { 572 return cb(uint32(x) | hs) 573 }) 574 case *bitmapContainer: 575 shouldContinue = t.iterate(func(x uint16) bool { 576 return cb(uint32(x) | hs) 577 }) 578 } 579 580 if !shouldContinue { 581 break 582 } 583 } 584 } 585 586 // Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order; 587 // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). 588 func (rb *Bitmap) Iterator() IntPeekable { 589 p := new(intIterator) 590 p.Initialize(rb) 591 return p 592 } 593 594 // ReverseIterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order; 595 // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). 596 func (rb *Bitmap) ReverseIterator() IntIterable { 597 p := new(intReverseIterator) 598 p.Initialize(rb) 599 return p 600 } 601 602 // ManyIterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order; 603 // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). 604 func (rb *Bitmap) ManyIterator() ManyIntIterable { 605 p := new(manyIntIterator) 606 p.Initialize(rb) 607 return p 608 } 609 610 // Clone creates a copy of the Bitmap 611 func (rb *Bitmap) Clone() *Bitmap { 612 ptr := new(Bitmap) 613 ptr.highlowcontainer = *rb.highlowcontainer.clone() 614 return ptr 615 } 616 617 // Minimum get the smallest value stored in this roaring bitmap, assumes that it is not empty 618 func (rb *Bitmap) Minimum() uint32 { 619 if len(rb.highlowcontainer.containers) == 0 { 620 panic("Empty bitmap") 621 } 622 return uint32(rb.highlowcontainer.containers[0].minimum()) | (uint32(rb.highlowcontainer.keys[0]) << 16) 623 } 624 625 // Maximum get the largest value stored in this roaring bitmap, assumes that it is not empty 626 func (rb *Bitmap) Maximum() uint32 { 627 if len(rb.highlowcontainer.containers) == 0 { 628 panic("Empty bitmap") 629 } 630 lastindex := len(rb.highlowcontainer.containers) - 1 631 return uint32(rb.highlowcontainer.containers[lastindex].maximum()) | (uint32(rb.highlowcontainer.keys[lastindex]) << 16) 632 } 633 634 // Contains returns true if the integer is contained in the bitmap 635 func (rb *Bitmap) Contains(x uint32) bool { 636 hb := highbits(x) 637 c := rb.highlowcontainer.getContainer(hb) 638 return c != nil && c.contains(lowbits(x)) 639 } 640 641 // ContainsInt returns true if the integer is contained in the bitmap (this is a convenience method, the parameter is casted to uint32 and Contains is called) 642 func (rb *Bitmap) ContainsInt(x int) bool { 643 return rb.Contains(uint32(x)) 644 } 645 646 // Equals returns true if the two bitmaps contain the same integers 647 func (rb *Bitmap) Equals(o interface{}) bool { 648 srb, ok := o.(*Bitmap) 649 if ok { 650 return srb.highlowcontainer.equals(rb.highlowcontainer) 651 } 652 return false 653 } 654 655 // AddOffset adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process 656 func AddOffset(x *Bitmap, offset uint32) (answer *Bitmap) { 657 return AddOffset64(x, int64(offset)) 658 } 659 660 // AddOffset64 adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process 661 // If offset + element is outside of the range [0,2^32), that the element will be dropped 662 func AddOffset64(x *Bitmap, offset int64) (answer *Bitmap) { 663 // we need "offset" to be a long because we want to support values 664 // between -0xFFFFFFFF up to +-0xFFFFFFFF 665 var containerOffset64 int64 666 667 if offset < 0 { 668 containerOffset64 = (offset - (1 << 16) + 1) / (1 << 16) 669 } else { 670 containerOffset64 = offset >> 16 671 } 672 673 answer = New() 674 675 if containerOffset64 >= (1<<16) || containerOffset64 < -(1<<16) { 676 return answer 677 } 678 679 containerOffset := int32(containerOffset64) 680 inOffset := (uint16)(offset - containerOffset64*(1<<16)) 681 682 if inOffset == 0 { 683 for pos := 0; pos < x.highlowcontainer.size(); pos++ { 684 key := int32(x.highlowcontainer.getKeyAtIndex(pos)) 685 key += containerOffset 686 687 if key >= 0 && key <= MaxUint16 { 688 c := x.highlowcontainer.getContainerAtIndex(pos).clone() 689 answer.highlowcontainer.appendContainer(uint16(key), c, false) 690 } 691 } 692 } else { 693 for pos := 0; pos < x.highlowcontainer.size(); pos++ { 694 key := int32(x.highlowcontainer.getKeyAtIndex(pos)) 695 key += containerOffset 696 697 if key+1 < 0 || key > MaxUint16 { 698 continue 699 } 700 701 c := x.highlowcontainer.getContainerAtIndex(pos) 702 lo, hi := c.addOffset(inOffset) 703 704 if lo != nil && key >= 0 { 705 curSize := answer.highlowcontainer.size() 706 lastkey := int32(0) 707 708 if curSize > 0 { 709 lastkey = int32(answer.highlowcontainer.getKeyAtIndex(curSize - 1)) 710 } 711 712 if curSize > 0 && lastkey == key { 713 prev := answer.highlowcontainer.getContainerAtIndex(curSize - 1) 714 orresult := prev.ior(lo) 715 answer.highlowcontainer.setContainerAtIndex(curSize-1, orresult) 716 } else { 717 answer.highlowcontainer.appendContainer(uint16(key), lo, false) 718 } 719 } 720 721 if hi != nil && key+1 <= MaxUint16 { 722 answer.highlowcontainer.appendContainer(uint16(key+1), hi, false) 723 } 724 } 725 } 726 727 return answer 728 } 729 730 // Add the integer x to the bitmap 731 func (rb *Bitmap) Add(x uint32) { 732 hb := highbits(x) 733 ra := &rb.highlowcontainer 734 i := ra.getIndex(hb) 735 if i >= 0 { 736 var c container 737 c = ra.getWritableContainerAtIndex(i).iaddReturnMinimized(lowbits(x)) 738 rb.highlowcontainer.setContainerAtIndex(i, c) 739 } else { 740 newac := newArrayContainer() 741 rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, newac.iaddReturnMinimized(lowbits(x))) 742 } 743 } 744 745 // add the integer x to the bitmap, return the container and its index 746 func (rb *Bitmap) addwithptr(x uint32) (int, container) { 747 hb := highbits(x) 748 ra := &rb.highlowcontainer 749 i := ra.getIndex(hb) 750 var c container 751 if i >= 0 { 752 c = ra.getWritableContainerAtIndex(i).iaddReturnMinimized(lowbits(x)) 753 rb.highlowcontainer.setContainerAtIndex(i, c) 754 return i, c 755 } 756 newac := newArrayContainer() 757 c = newac.iaddReturnMinimized(lowbits(x)) 758 rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, c) 759 return -i - 1, c 760 } 761 762 // CheckedAdd adds the integer x to the bitmap and return true if it was added (false if the integer was already present) 763 func (rb *Bitmap) CheckedAdd(x uint32) bool { 764 // TODO: add unit tests for this method 765 hb := highbits(x) 766 i := rb.highlowcontainer.getIndex(hb) 767 if i >= 0 { 768 C := rb.highlowcontainer.getWritableContainerAtIndex(i) 769 oldcard := C.getCardinality() 770 C = C.iaddReturnMinimized(lowbits(x)) 771 rb.highlowcontainer.setContainerAtIndex(i, C) 772 return C.getCardinality() > oldcard 773 } 774 newac := newArrayContainer() 775 rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, newac.iaddReturnMinimized(lowbits(x))) 776 return true 777 778 } 779 780 // AddInt adds the integer x to the bitmap (convenience method: the parameter is casted to uint32 and we call Add) 781 func (rb *Bitmap) AddInt(x int) { 782 rb.Add(uint32(x)) 783 } 784 785 // Remove the integer x from the bitmap 786 func (rb *Bitmap) Remove(x uint32) { 787 hb := highbits(x) 788 i := rb.highlowcontainer.getIndex(hb) 789 if i >= 0 { 790 c := rb.highlowcontainer.getWritableContainerAtIndex(i).iremoveReturnMinimized(lowbits(x)) 791 rb.highlowcontainer.setContainerAtIndex(i, c) 792 if rb.highlowcontainer.getContainerAtIndex(i).isEmpty() { 793 rb.highlowcontainer.removeAtIndex(i) 794 } 795 } 796 } 797 798 // CheckedRemove removes the integer x from the bitmap and return true if the integer was effectively removed (and false if the integer was not present) 799 func (rb *Bitmap) CheckedRemove(x uint32) bool { 800 // TODO: add unit tests for this method 801 hb := highbits(x) 802 i := rb.highlowcontainer.getIndex(hb) 803 if i >= 0 { 804 C := rb.highlowcontainer.getWritableContainerAtIndex(i) 805 oldcard := C.getCardinality() 806 C = C.iremoveReturnMinimized(lowbits(x)) 807 rb.highlowcontainer.setContainerAtIndex(i, C) 808 if rb.highlowcontainer.getContainerAtIndex(i).isEmpty() { 809 rb.highlowcontainer.removeAtIndex(i) 810 return true 811 } 812 return C.getCardinality() < oldcard 813 } 814 return false 815 816 } 817 818 // IsEmpty returns true if the Bitmap is empty (it is faster than doing (GetCardinality() == 0)) 819 func (rb *Bitmap) IsEmpty() bool { 820 return rb.highlowcontainer.size() == 0 821 } 822 823 // GetCardinality returns the number of integers contained in the bitmap 824 func (rb *Bitmap) GetCardinality() uint64 { 825 size := uint64(0) 826 for _, c := range rb.highlowcontainer.containers { 827 size += uint64(c.getCardinality()) 828 } 829 return size 830 } 831 832 // Rank returns the number of integers that are smaller or equal to x (Rank(infinity) would be GetCardinality()). 833 // If you pass the smallest value, you get the value 1. If you pass a value that is smaller than the smallest 834 // value, you get 0. Note that this function differs in convention from the Select function since it 835 // return 1 and not 0 on the smallest value. 836 func (rb *Bitmap) Rank(x uint32) uint64 { 837 size := uint64(0) 838 for i := 0; i < rb.highlowcontainer.size(); i++ { 839 key := rb.highlowcontainer.getKeyAtIndex(i) 840 if key > highbits(x) { 841 return size 842 } 843 if key < highbits(x) { 844 size += uint64(rb.highlowcontainer.getContainerAtIndex(i).getCardinality()) 845 } else { 846 return size + uint64(rb.highlowcontainer.getContainerAtIndex(i).rank(lowbits(x))) 847 } 848 } 849 return size 850 } 851 852 // Select returns the xth integer in the bitmap. If you pass 0, you get 853 // the smallest element. Note that this function differs in convention from 854 // the Rank function which returns 1 on the smallest value. 855 func (rb *Bitmap) Select(x uint32) (uint32, error) { 856 remaining := x 857 for i := 0; i < rb.highlowcontainer.size(); i++ { 858 c := rb.highlowcontainer.getContainerAtIndex(i) 859 card := uint32(c.getCardinality()) 860 if remaining >= card { 861 remaining -= card 862 } else { 863 key := rb.highlowcontainer.getKeyAtIndex(i) 864 return uint32(key)<<16 + uint32(c.selectInt(uint16(remaining))), nil 865 } 866 } 867 return 0, fmt.Errorf("can't find %dth integer in a bitmap with only %d items", x, rb.GetCardinality()) 868 } 869 870 // And computes the intersection between two bitmaps and stores the result in the current bitmap 871 func (rb *Bitmap) And(x2 *Bitmap) { 872 pos1 := 0 873 pos2 := 0 874 intersectionsize := 0 875 length1 := rb.highlowcontainer.size() 876 length2 := x2.highlowcontainer.size() 877 878 main: 879 for { 880 if pos1 < length1 && pos2 < length2 { 881 s1 := rb.highlowcontainer.getKeyAtIndex(pos1) 882 s2 := x2.highlowcontainer.getKeyAtIndex(pos2) 883 for { 884 if s1 == s2 { 885 c1 := rb.highlowcontainer.getWritableContainerAtIndex(pos1) 886 c2 := x2.highlowcontainer.getContainerAtIndex(pos2) 887 diff := c1.iand(c2) 888 if !diff.isEmpty() { 889 rb.highlowcontainer.replaceKeyAndContainerAtIndex(intersectionsize, s1, diff, false) 890 intersectionsize++ 891 } 892 pos1++ 893 pos2++ 894 if (pos1 == length1) || (pos2 == length2) { 895 break main 896 } 897 s1 = rb.highlowcontainer.getKeyAtIndex(pos1) 898 s2 = x2.highlowcontainer.getKeyAtIndex(pos2) 899 } else if s1 < s2 { 900 pos1 = rb.highlowcontainer.advanceUntil(s2, pos1) 901 if pos1 == length1 { 902 break main 903 } 904 s1 = rb.highlowcontainer.getKeyAtIndex(pos1) 905 } else { //s1 > s2 906 pos2 = x2.highlowcontainer.advanceUntil(s1, pos2) 907 if pos2 == length2 { 908 break main 909 } 910 s2 = x2.highlowcontainer.getKeyAtIndex(pos2) 911 } 912 } 913 } else { 914 break 915 } 916 } 917 rb.highlowcontainer.resize(intersectionsize) 918 } 919 920 // OrCardinality returns the cardinality of the union between two bitmaps, bitmaps are not modified 921 func (rb *Bitmap) OrCardinality(x2 *Bitmap) uint64 { 922 pos1 := 0 923 pos2 := 0 924 length1 := rb.highlowcontainer.size() 925 length2 := x2.highlowcontainer.size() 926 answer := uint64(0) 927 main: 928 for { 929 if (pos1 < length1) && (pos2 < length2) { 930 s1 := rb.highlowcontainer.getKeyAtIndex(pos1) 931 s2 := x2.highlowcontainer.getKeyAtIndex(pos2) 932 933 for { 934 if s1 < s2 { 935 answer += uint64(rb.highlowcontainer.getContainerAtIndex(pos1).getCardinality()) 936 pos1++ 937 if pos1 == length1 { 938 break main 939 } 940 s1 = rb.highlowcontainer.getKeyAtIndex(pos1) 941 } else if s1 > s2 { 942 answer += uint64(x2.highlowcontainer.getContainerAtIndex(pos2).getCardinality()) 943 pos2++ 944 if pos2 == length2 { 945 break main 946 } 947 s2 = x2.highlowcontainer.getKeyAtIndex(pos2) 948 } else { 949 // TODO: could be faster if we did not have to materialize the container 950 answer += uint64(rb.highlowcontainer.getContainerAtIndex(pos1).or(x2.highlowcontainer.getContainerAtIndex(pos2)).getCardinality()) 951 pos1++ 952 pos2++ 953 if (pos1 == length1) || (pos2 == length2) { 954 break main 955 } 956 s1 = rb.highlowcontainer.getKeyAtIndex(pos1) 957 s2 = x2.highlowcontainer.getKeyAtIndex(pos2) 958 } 959 } 960 } else { 961 break 962 } 963 } 964 for ; pos1 < length1; pos1++ { 965 answer += uint64(rb.highlowcontainer.getContainerAtIndex(pos1).getCardinality()) 966 } 967 for ; pos2 < length2; pos2++ { 968 answer += uint64(x2.highlowcontainer.getContainerAtIndex(pos2).getCardinality()) 969 } 970 return answer 971 } 972 973 // AndCardinality returns the cardinality of the intersection between two bitmaps, bitmaps are not modified 974 func (rb *Bitmap) AndCardinality(x2 *Bitmap) uint64 { 975 pos1 := 0 976 pos2 := 0 977 answer := uint64(0) 978 length1 := rb.highlowcontainer.size() 979 length2 := x2.highlowcontainer.size() 980 981 main: 982 for { 983 if pos1 < length1 && pos2 < length2 { 984 s1 := rb.highlowcontainer.getKeyAtIndex(pos1) 985 s2 := x2.highlowcontainer.getKeyAtIndex(pos2) 986 for { 987 if s1 == s2 { 988 c1 := rb.highlowcontainer.getContainerAtIndex(pos1) 989 c2 := x2.highlowcontainer.getContainerAtIndex(pos2) 990 answer += uint64(c1.andCardinality(c2)) 991 pos1++ 992 pos2++ 993 if (pos1 == length1) || (pos2 == length2) { 994 break main 995 } 996 s1 = rb.highlowcontainer.getKeyAtIndex(pos1) 997 s2 = x2.highlowcontainer.getKeyAtIndex(pos2) 998 } else if s1 < s2 { 999 pos1 = rb.highlowcontainer.advanceUntil(s2, pos1) 1000 if pos1 == length1 { 1001 break main 1002 } 1003 s1 = rb.highlowcontainer.getKeyAtIndex(pos1) 1004 } else { //s1 > s2 1005 pos2 = x2.highlowcontainer.advanceUntil(s1, pos2) 1006 if pos2 == length2 { 1007 break main 1008 } 1009 s2 = x2.highlowcontainer.getKeyAtIndex(pos2) 1010 } 1011 } 1012 } else { 1013 break 1014 } 1015 } 1016 return answer 1017 } 1018 1019 // IntersectsWithInterval checks whether a bitmap 'rb' and an open interval '[x,y)' intersect. 1020 func (rb *Bitmap) IntersectsWithInterval(x, y uint64) bool { 1021 if x >= y { 1022 return false 1023 } 1024 if x > MaxUint32 { 1025 return false 1026 } 1027 1028 it := intIterator{} 1029 it.Initialize(rb) 1030 it.AdvanceIfNeeded(uint32(x)) 1031 if !it.HasNext() { 1032 return false 1033 } 1034 if uint64(it.Next()) >= y { 1035 return false 1036 } 1037 1038 return true 1039 } 1040 1041 // Intersects checks whether two bitmap intersects, bitmaps are not modified 1042 func (rb *Bitmap) Intersects(x2 *Bitmap) bool { 1043 pos1 := 0 1044 pos2 := 0 1045 length1 := rb.highlowcontainer.size() 1046 length2 := x2.highlowcontainer.size() 1047 1048 main: 1049 for { 1050 if pos1 < length1 && pos2 < length2 { 1051 s1 := rb.highlowcontainer.getKeyAtIndex(pos1) 1052 s2 := x2.highlowcontainer.getKeyAtIndex(pos2) 1053 for { 1054 if s1 == s2 { 1055 c1 := rb.highlowcontainer.getContainerAtIndex(pos1) 1056 c2 := x2.highlowcontainer.getContainerAtIndex(pos2) 1057 if c1.intersects(c2) { 1058 return true 1059 } 1060 pos1++ 1061 pos2++ 1062 if (pos1 == length1) || (pos2 == length2) { 1063 break main 1064 } 1065 s1 = rb.highlowcontainer.getKeyAtIndex(pos1) 1066 s2 = x2.highlowcontainer.getKeyAtIndex(pos2) 1067 } else if s1 < s2 { 1068 pos1 = rb.highlowcontainer.advanceUntil(s2, pos1) 1069 if pos1 == length1 { 1070 break main 1071 } 1072 s1 = rb.highlowcontainer.getKeyAtIndex(pos1) 1073 } else { //s1 > s2 1074 pos2 = x2.highlowcontainer.advanceUntil(s1, pos2) 1075 if pos2 == length2 { 1076 break main 1077 } 1078 s2 = x2.highlowcontainer.getKeyAtIndex(pos2) 1079 } 1080 } 1081 } else { 1082 break 1083 } 1084 } 1085 return false 1086 } 1087 1088 // Xor computes the symmetric difference between two bitmaps and stores the result in the current bitmap 1089 func (rb *Bitmap) Xor(x2 *Bitmap) { 1090 pos1 := 0 1091 pos2 := 0 1092 length1 := rb.highlowcontainer.size() 1093 length2 := x2.highlowcontainer.size() 1094 for { 1095 if (pos1 < length1) && (pos2 < length2) { 1096 s1 := rb.highlowcontainer.getKeyAtIndex(pos1) 1097 s2 := x2.highlowcontainer.getKeyAtIndex(pos2) 1098 if s1 < s2 { 1099 pos1 = rb.highlowcontainer.advanceUntil(s2, pos1) 1100 if pos1 == length1 { 1101 break 1102 } 1103 } else if s1 > s2 { 1104 c := x2.highlowcontainer.getWritableContainerAtIndex(pos2) 1105 rb.highlowcontainer.insertNewKeyValueAt(pos1, x2.highlowcontainer.getKeyAtIndex(pos2), c) 1106 length1++ 1107 pos1++ 1108 pos2++ 1109 } else { 1110 // TODO: couple be computed in-place for reduced memory usage 1111 c := rb.highlowcontainer.getContainerAtIndex(pos1).xor(x2.highlowcontainer.getContainerAtIndex(pos2)) 1112 if !c.isEmpty() { 1113 rb.highlowcontainer.setContainerAtIndex(pos1, c) 1114 pos1++ 1115 } else { 1116 rb.highlowcontainer.removeAtIndex(pos1) 1117 length1-- 1118 } 1119 pos2++ 1120 } 1121 } else { 1122 break 1123 } 1124 } 1125 if pos1 == length1 { 1126 rb.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2) 1127 } 1128 } 1129 1130 // Or computes the union between two bitmaps and stores the result in the current bitmap 1131 func (rb *Bitmap) Or(x2 *Bitmap) { 1132 pos1 := 0 1133 pos2 := 0 1134 length1 := rb.highlowcontainer.size() 1135 length2 := x2.highlowcontainer.size() 1136 main: 1137 for (pos1 < length1) && (pos2 < length2) { 1138 s1 := rb.highlowcontainer.getKeyAtIndex(pos1) 1139 s2 := x2.highlowcontainer.getKeyAtIndex(pos2) 1140 1141 for { 1142 if s1 < s2 { 1143 pos1++ 1144 if pos1 == length1 { 1145 break main 1146 } 1147 s1 = rb.highlowcontainer.getKeyAtIndex(pos1) 1148 } else if s1 > s2 { 1149 rb.highlowcontainer.insertNewKeyValueAt(pos1, s2, x2.highlowcontainer.getContainerAtIndex(pos2).clone()) 1150 pos1++ 1151 length1++ 1152 pos2++ 1153 if pos2 == length2 { 1154 break main 1155 } 1156 s2 = x2.highlowcontainer.getKeyAtIndex(pos2) 1157 } else { 1158 rb.highlowcontainer.replaceKeyAndContainerAtIndex(pos1, s1, rb.highlowcontainer.getUnionedWritableContainer(pos1, x2.highlowcontainer.getContainerAtIndex(pos2)), false) 1159 pos1++ 1160 pos2++ 1161 if (pos1 == length1) || (pos2 == length2) { 1162 break main 1163 } 1164 s1 = rb.highlowcontainer.getKeyAtIndex(pos1) 1165 s2 = x2.highlowcontainer.getKeyAtIndex(pos2) 1166 } 1167 } 1168 } 1169 if pos1 == length1 { 1170 rb.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2) 1171 } 1172 } 1173 1174 // AndNot computes the difference between two bitmaps and stores the result in the current bitmap 1175 func (rb *Bitmap) AndNot(x2 *Bitmap) { 1176 pos1 := 0 1177 pos2 := 0 1178 intersectionsize := 0 1179 length1 := rb.highlowcontainer.size() 1180 length2 := x2.highlowcontainer.size() 1181 1182 main: 1183 for { 1184 if pos1 < length1 && pos2 < length2 { 1185 s1 := rb.highlowcontainer.getKeyAtIndex(pos1) 1186 s2 := x2.highlowcontainer.getKeyAtIndex(pos2) 1187 for { 1188 if s1 == s2 { 1189 c1 := rb.highlowcontainer.getWritableContainerAtIndex(pos1) 1190 c2 := x2.highlowcontainer.getContainerAtIndex(pos2) 1191 diff := c1.iandNot(c2) 1192 if !diff.isEmpty() { 1193 rb.highlowcontainer.replaceKeyAndContainerAtIndex(intersectionsize, s1, diff, false) 1194 intersectionsize++ 1195 } 1196 pos1++ 1197 pos2++ 1198 if (pos1 == length1) || (pos2 == length2) { 1199 break main 1200 } 1201 s1 = rb.highlowcontainer.getKeyAtIndex(pos1) 1202 s2 = x2.highlowcontainer.getKeyAtIndex(pos2) 1203 } else if s1 < s2 { 1204 c1 := rb.highlowcontainer.getContainerAtIndex(pos1) 1205 mustCopyOnWrite := rb.highlowcontainer.needsCopyOnWrite(pos1) 1206 rb.highlowcontainer.replaceKeyAndContainerAtIndex(intersectionsize, s1, c1, mustCopyOnWrite) 1207 intersectionsize++ 1208 pos1++ 1209 if pos1 == length1 { 1210 break main 1211 } 1212 s1 = rb.highlowcontainer.getKeyAtIndex(pos1) 1213 } else { //s1 > s2 1214 pos2 = x2.highlowcontainer.advanceUntil(s1, pos2) 1215 if pos2 == length2 { 1216 break main 1217 } 1218 s2 = x2.highlowcontainer.getKeyAtIndex(pos2) 1219 } 1220 } 1221 } else { 1222 break 1223 } 1224 } 1225 // TODO:implement as a copy 1226 for pos1 < length1 { 1227 c1 := rb.highlowcontainer.getContainerAtIndex(pos1) 1228 s1 := rb.highlowcontainer.getKeyAtIndex(pos1) 1229 mustCopyOnWrite := rb.highlowcontainer.needsCopyOnWrite(pos1) 1230 rb.highlowcontainer.replaceKeyAndContainerAtIndex(intersectionsize, s1, c1, mustCopyOnWrite) 1231 intersectionsize++ 1232 pos1++ 1233 } 1234 rb.highlowcontainer.resize(intersectionsize) 1235 } 1236 1237 // Or computes the union between two bitmaps and returns the result 1238 func Or(x1, x2 *Bitmap) *Bitmap { 1239 answer := NewBitmap() 1240 pos1 := 0 1241 pos2 := 0 1242 length1 := x1.highlowcontainer.size() 1243 length2 := x2.highlowcontainer.size() 1244 main: 1245 for (pos1 < length1) && (pos2 < length2) { 1246 s1 := x1.highlowcontainer.getKeyAtIndex(pos1) 1247 s2 := x2.highlowcontainer.getKeyAtIndex(pos2) 1248 1249 for { 1250 if s1 < s2 { 1251 answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1) 1252 pos1++ 1253 if pos1 == length1 { 1254 break main 1255 } 1256 s1 = x1.highlowcontainer.getKeyAtIndex(pos1) 1257 } else if s1 > s2 { 1258 answer.highlowcontainer.appendCopy(x2.highlowcontainer, pos2) 1259 pos2++ 1260 if pos2 == length2 { 1261 break main 1262 } 1263 s2 = x2.highlowcontainer.getKeyAtIndex(pos2) 1264 } else { 1265 1266 answer.highlowcontainer.appendContainer(s1, x1.highlowcontainer.getContainerAtIndex(pos1).or(x2.highlowcontainer.getContainerAtIndex(pos2)), false) 1267 pos1++ 1268 pos2++ 1269 if (pos1 == length1) || (pos2 == length2) { 1270 break main 1271 } 1272 s1 = x1.highlowcontainer.getKeyAtIndex(pos1) 1273 s2 = x2.highlowcontainer.getKeyAtIndex(pos2) 1274 } 1275 } 1276 } 1277 if pos1 == length1 { 1278 answer.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2) 1279 } else if pos2 == length2 { 1280 answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1) 1281 } 1282 return answer 1283 } 1284 1285 // And computes the intersection between two bitmaps and returns the result 1286 func And(x1, x2 *Bitmap) *Bitmap { 1287 answer := NewBitmap() 1288 pos1 := 0 1289 pos2 := 0 1290 length1 := x1.highlowcontainer.size() 1291 length2 := x2.highlowcontainer.size() 1292 main: 1293 for pos1 < length1 && pos2 < length2 { 1294 s1 := x1.highlowcontainer.getKeyAtIndex(pos1) 1295 s2 := x2.highlowcontainer.getKeyAtIndex(pos2) 1296 for { 1297 if s1 == s2 { 1298 C := x1.highlowcontainer.getContainerAtIndex(pos1) 1299 C = C.and(x2.highlowcontainer.getContainerAtIndex(pos2)) 1300 1301 if !C.isEmpty() { 1302 answer.highlowcontainer.appendContainer(s1, C, false) 1303 } 1304 pos1++ 1305 pos2++ 1306 if (pos1 == length1) || (pos2 == length2) { 1307 break main 1308 } 1309 s1 = x1.highlowcontainer.getKeyAtIndex(pos1) 1310 s2 = x2.highlowcontainer.getKeyAtIndex(pos2) 1311 } else if s1 < s2 { 1312 pos1 = x1.highlowcontainer.advanceUntil(s2, pos1) 1313 if pos1 == length1 { 1314 break main 1315 } 1316 s1 = x1.highlowcontainer.getKeyAtIndex(pos1) 1317 } else { // s1 > s2 1318 pos2 = x2.highlowcontainer.advanceUntil(s1, pos2) 1319 if pos2 == length2 { 1320 break main 1321 } 1322 s2 = x2.highlowcontainer.getKeyAtIndex(pos2) 1323 } 1324 } 1325 } 1326 return answer 1327 } 1328 1329 // Xor computes the symmetric difference between two bitmaps and returns the result 1330 func Xor(x1, x2 *Bitmap) *Bitmap { 1331 answer := NewBitmap() 1332 pos1 := 0 1333 pos2 := 0 1334 length1 := x1.highlowcontainer.size() 1335 length2 := x2.highlowcontainer.size() 1336 for { 1337 if (pos1 < length1) && (pos2 < length2) { 1338 s1 := x1.highlowcontainer.getKeyAtIndex(pos1) 1339 s2 := x2.highlowcontainer.getKeyAtIndex(pos2) 1340 if s1 < s2 { 1341 answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1) 1342 pos1++ 1343 } else if s1 > s2 { 1344 answer.highlowcontainer.appendCopy(x2.highlowcontainer, pos2) 1345 pos2++ 1346 } else { 1347 c := x1.highlowcontainer.getContainerAtIndex(pos1).xor(x2.highlowcontainer.getContainerAtIndex(pos2)) 1348 if !c.isEmpty() { 1349 answer.highlowcontainer.appendContainer(s1, c, false) 1350 } 1351 pos1++ 1352 pos2++ 1353 } 1354 } else { 1355 break 1356 } 1357 } 1358 if pos1 == length1 { 1359 answer.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2) 1360 } else if pos2 == length2 { 1361 answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1) 1362 } 1363 return answer 1364 } 1365 1366 // AndNot computes the difference between two bitmaps and returns the result 1367 func AndNot(x1, x2 *Bitmap) *Bitmap { 1368 answer := NewBitmap() 1369 pos1 := 0 1370 pos2 := 0 1371 length1 := x1.highlowcontainer.size() 1372 length2 := x2.highlowcontainer.size() 1373 1374 main: 1375 for { 1376 if pos1 < length1 && pos2 < length2 { 1377 s1 := x1.highlowcontainer.getKeyAtIndex(pos1) 1378 s2 := x2.highlowcontainer.getKeyAtIndex(pos2) 1379 for { 1380 if s1 < s2 { 1381 answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1) 1382 pos1++ 1383 if pos1 == length1 { 1384 break main 1385 } 1386 s1 = x1.highlowcontainer.getKeyAtIndex(pos1) 1387 } else if s1 == s2 { 1388 c1 := x1.highlowcontainer.getContainerAtIndex(pos1) 1389 c2 := x2.highlowcontainer.getContainerAtIndex(pos2) 1390 diff := c1.andNot(c2) 1391 if !diff.isEmpty() { 1392 answer.highlowcontainer.appendContainer(s1, diff, false) 1393 } 1394 pos1++ 1395 pos2++ 1396 if (pos1 == length1) || (pos2 == length2) { 1397 break main 1398 } 1399 s1 = x1.highlowcontainer.getKeyAtIndex(pos1) 1400 s2 = x2.highlowcontainer.getKeyAtIndex(pos2) 1401 } else { //s1 > s2 1402 pos2 = x2.highlowcontainer.advanceUntil(s1, pos2) 1403 if pos2 == length2 { 1404 break main 1405 } 1406 s2 = x2.highlowcontainer.getKeyAtIndex(pos2) 1407 } 1408 } 1409 } else { 1410 break 1411 } 1412 } 1413 if pos2 == length2 { 1414 answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1) 1415 } 1416 return answer 1417 } 1418 1419 // AddMany add all of the values in dat 1420 func (rb *Bitmap) AddMany(dat []uint32) { 1421 if len(dat) == 0 { 1422 return 1423 } 1424 prev := dat[0] 1425 idx, c := rb.addwithptr(prev) 1426 for _, i := range dat[1:] { 1427 if highbits(prev) == highbits(i) { 1428 c = c.iaddReturnMinimized(lowbits(i)) 1429 rb.highlowcontainer.setContainerAtIndex(idx, c) 1430 } else { 1431 idx, c = rb.addwithptr(i) 1432 } 1433 prev = i 1434 } 1435 } 1436 1437 // BitmapOf generates a new bitmap filled with the specified integers 1438 func BitmapOf(dat ...uint32) *Bitmap { 1439 ans := NewBitmap() 1440 ans.AddMany(dat) 1441 return ans 1442 } 1443 1444 // Flip negates the bits in the given range (i.e., [rangeStart,rangeEnd)), any integer present in this range and in the bitmap is removed, 1445 // and any integer present in the range and not in the bitmap is added. 1446 // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range 1447 // while uint64(0x100000000) cannot be represented as a 32-bit value. 1448 func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) { 1449 1450 if rangeEnd > MaxUint32+1 { 1451 panic("rangeEnd > MaxUint32+1") 1452 } 1453 if rangeStart > MaxUint32+1 { 1454 panic("rangeStart > MaxUint32+1") 1455 } 1456 1457 if rangeStart >= rangeEnd { 1458 return 1459 } 1460 1461 hbStart := uint32(highbits(uint32(rangeStart))) 1462 lbStart := uint32(lowbits(uint32(rangeStart))) 1463 hbLast := uint32(highbits(uint32(rangeEnd - 1))) 1464 lbLast := uint32(lowbits(uint32(rangeEnd - 1))) 1465 1466 var max uint32 = maxLowBit 1467 for hb := hbStart; hb <= hbLast; hb++ { 1468 var containerStart uint32 1469 if hb == hbStart { 1470 containerStart = uint32(lbStart) 1471 } 1472 containerLast := max 1473 if hb == hbLast { 1474 containerLast = uint32(lbLast) 1475 } 1476 1477 i := rb.highlowcontainer.getIndex(uint16(hb)) 1478 1479 if i >= 0 { 1480 c := rb.highlowcontainer.getWritableContainerAtIndex(i).inot(int(containerStart), int(containerLast)+1) 1481 if !c.isEmpty() { 1482 rb.highlowcontainer.setContainerAtIndex(i, c) 1483 } else { 1484 rb.highlowcontainer.removeAtIndex(i) 1485 } 1486 } else { // *think* the range of ones must never be 1487 // empty. 1488 rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast))) 1489 } 1490 } 1491 } 1492 1493 // FlipInt calls Flip after casting the parameters (convenience method) 1494 func (rb *Bitmap) FlipInt(rangeStart, rangeEnd int) { 1495 rb.Flip(uint64(rangeStart), uint64(rangeEnd)) 1496 } 1497 1498 // AddRange adds the integers in [rangeStart, rangeEnd) to the bitmap. 1499 // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range 1500 // while uint64(0x100000000) cannot be represented as a 32-bit value. 1501 func (rb *Bitmap) AddRange(rangeStart, rangeEnd uint64) { 1502 if rangeStart >= rangeEnd { 1503 return 1504 } 1505 if rangeEnd-1 > MaxUint32 { 1506 panic("rangeEnd-1 > MaxUint32") 1507 } 1508 hbStart := uint32(highbits(uint32(rangeStart))) 1509 lbStart := uint32(lowbits(uint32(rangeStart))) 1510 hbLast := uint32(highbits(uint32(rangeEnd - 1))) 1511 lbLast := uint32(lowbits(uint32(rangeEnd - 1))) 1512 1513 var max uint32 = maxLowBit 1514 for hb := hbStart; hb <= hbLast; hb++ { 1515 containerStart := uint32(0) 1516 if hb == hbStart { 1517 containerStart = lbStart 1518 } 1519 containerLast := max 1520 if hb == hbLast { 1521 containerLast = lbLast 1522 } 1523 1524 i := rb.highlowcontainer.getIndex(uint16(hb)) 1525 1526 if i >= 0 { 1527 c := rb.highlowcontainer.getWritableContainerAtIndex(i).iaddRange(int(containerStart), int(containerLast)+1) 1528 rb.highlowcontainer.setContainerAtIndex(i, c) 1529 } else { // *think* the range of ones must never be 1530 // empty. 1531 rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast))) 1532 } 1533 } 1534 } 1535 1536 // RemoveRange removes the integers in [rangeStart, rangeEnd) from the bitmap. 1537 // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range 1538 // while uint64(0x100000000) cannot be represented as a 32-bit value. 1539 func (rb *Bitmap) RemoveRange(rangeStart, rangeEnd uint64) { 1540 if rangeStart >= rangeEnd { 1541 return 1542 } 1543 if rangeEnd-1 > MaxUint32 { 1544 // logically, we should assume that the user wants to 1545 // remove all values from rangeStart to infinity 1546 // see https://github.com/RoaringBitmap/roaring/issues/141 1547 rangeEnd = uint64(0x100000000) 1548 } 1549 hbStart := uint32(highbits(uint32(rangeStart))) 1550 lbStart := uint32(lowbits(uint32(rangeStart))) 1551 hbLast := uint32(highbits(uint32(rangeEnd - 1))) 1552 lbLast := uint32(lowbits(uint32(rangeEnd - 1))) 1553 1554 var max uint32 = maxLowBit 1555 1556 if hbStart == hbLast { 1557 i := rb.highlowcontainer.getIndex(uint16(hbStart)) 1558 if i < 0 { 1559 return 1560 } 1561 c := rb.highlowcontainer.getWritableContainerAtIndex(i).iremoveRange(int(lbStart), int(lbLast+1)) 1562 if !c.isEmpty() { 1563 rb.highlowcontainer.setContainerAtIndex(i, c) 1564 } else { 1565 rb.highlowcontainer.removeAtIndex(i) 1566 } 1567 return 1568 } 1569 ifirst := rb.highlowcontainer.getIndex(uint16(hbStart)) 1570 ilast := rb.highlowcontainer.getIndex(uint16(hbLast)) 1571 1572 if ifirst >= 0 { 1573 if lbStart != 0 { 1574 c := rb.highlowcontainer.getWritableContainerAtIndex(ifirst).iremoveRange(int(lbStart), int(max+1)) 1575 if !c.isEmpty() { 1576 rb.highlowcontainer.setContainerAtIndex(ifirst, c) 1577 ifirst++ 1578 } 1579 } 1580 } else { 1581 ifirst = -ifirst - 1 1582 } 1583 if ilast >= 0 { 1584 if lbLast != max { 1585 c := rb.highlowcontainer.getWritableContainerAtIndex(ilast).iremoveRange(int(0), int(lbLast+1)) 1586 if !c.isEmpty() { 1587 rb.highlowcontainer.setContainerAtIndex(ilast, c) 1588 } else { 1589 ilast++ 1590 } 1591 } else { 1592 ilast++ 1593 } 1594 } else { 1595 ilast = -ilast - 1 1596 } 1597 rb.highlowcontainer.removeIndexRange(ifirst, ilast) 1598 } 1599 1600 // Flip negates the bits in the given range (i.e., [rangeStart,rangeEnd)), any integer present in this range and in the bitmap is removed, 1601 // and any integer present in the range and not in the bitmap is added, a new bitmap is returned leaving 1602 // the current bitmap unchanged. 1603 // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range 1604 // while uint64(0x100000000) cannot be represented as a 32-bit value. 1605 func Flip(bm *Bitmap, rangeStart, rangeEnd uint64) *Bitmap { 1606 if rangeStart >= rangeEnd { 1607 return bm.Clone() 1608 } 1609 1610 if rangeStart > MaxUint32 { 1611 panic("rangeStart > MaxUint32") 1612 } 1613 if rangeEnd-1 > MaxUint32 { 1614 panic("rangeEnd-1 > MaxUint32") 1615 } 1616 1617 answer := NewBitmap() 1618 hbStart := uint32(highbits(uint32(rangeStart))) 1619 lbStart := uint32(lowbits(uint32(rangeStart))) 1620 hbLast := uint32(highbits(uint32(rangeEnd - 1))) 1621 lbLast := uint32(lowbits(uint32(rangeEnd - 1))) 1622 1623 // copy the containers before the active area 1624 answer.highlowcontainer.appendCopiesUntil(bm.highlowcontainer, uint16(hbStart)) 1625 1626 var max uint32 = maxLowBit 1627 for hb := hbStart; hb <= hbLast; hb++ { 1628 var containerStart uint32 1629 if hb == hbStart { 1630 containerStart = uint32(lbStart) 1631 } 1632 containerLast := max 1633 if hb == hbLast { 1634 containerLast = uint32(lbLast) 1635 } 1636 1637 i := bm.highlowcontainer.getIndex(uint16(hb)) 1638 j := answer.highlowcontainer.getIndex(uint16(hb)) 1639 1640 if i >= 0 { 1641 c := bm.highlowcontainer.getContainerAtIndex(i).not(int(containerStart), int(containerLast)+1) 1642 if !c.isEmpty() { 1643 answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb), c) 1644 } 1645 1646 } else { // *think* the range of ones must never be 1647 // empty. 1648 answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb), 1649 rangeOfOnes(int(containerStart), int(containerLast))) 1650 } 1651 } 1652 // copy the containers after the active area. 1653 answer.highlowcontainer.appendCopiesAfter(bm.highlowcontainer, uint16(hbLast)) 1654 1655 return answer 1656 } 1657 1658 // SetCopyOnWrite sets this bitmap to use copy-on-write so that copies are fast and memory conscious 1659 // if the parameter is true, otherwise we leave the default where hard copies are made 1660 // (copy-on-write requires extra care in a threaded context). 1661 // Calling SetCopyOnWrite(true) on a bitmap created with FromBuffer is unsafe. 1662 func (rb *Bitmap) SetCopyOnWrite(val bool) { 1663 rb.highlowcontainer.copyOnWrite = val 1664 } 1665 1666 // GetCopyOnWrite gets this bitmap's copy-on-write property 1667 func (rb *Bitmap) GetCopyOnWrite() (val bool) { 1668 return rb.highlowcontainer.copyOnWrite 1669 } 1670 1671 // CloneCopyOnWriteContainers clones all containers which have 1672 // needCopyOnWrite set to true. 1673 // This can be used to make sure it is safe to munmap a []byte 1674 // that the roaring array may still have a reference to, after 1675 // calling FromBuffer. 1676 // More generally this function is useful if you call FromBuffer 1677 // to construct a bitmap with a backing array buf 1678 // and then later discard the buf array. Note that you should call 1679 // CloneCopyOnWriteContainers on all bitmaps that were derived 1680 // from the 'FromBuffer' bitmap since they map have dependencies 1681 // on the buf array as well. 1682 func (rb *Bitmap) CloneCopyOnWriteContainers() { 1683 rb.highlowcontainer.cloneCopyOnWriteContainers() 1684 } 1685 1686 // FlipInt calls Flip after casting the parameters (convenience method) 1687 func FlipInt(bm *Bitmap, rangeStart, rangeEnd int) *Bitmap { 1688 return Flip(bm, uint64(rangeStart), uint64(rangeEnd)) 1689 } 1690 1691 // Statistics provides details on the container types in use. 1692 type Statistics struct { 1693 Cardinality uint64 1694 Containers uint64 1695 1696 ArrayContainers uint64 1697 ArrayContainerBytes uint64 1698 ArrayContainerValues uint64 1699 1700 BitmapContainers uint64 1701 BitmapContainerBytes uint64 1702 BitmapContainerValues uint64 1703 1704 RunContainers uint64 1705 RunContainerBytes uint64 1706 RunContainerValues uint64 1707 } 1708 1709 // Stats returns details on container type usage in a Statistics struct. 1710 func (rb *Bitmap) Stats() Statistics { 1711 stats := Statistics{} 1712 stats.Containers = uint64(len(rb.highlowcontainer.containers)) 1713 for _, c := range rb.highlowcontainer.containers { 1714 stats.Cardinality += uint64(c.getCardinality()) 1715 1716 switch c.(type) { 1717 case *arrayContainer: 1718 stats.ArrayContainers++ 1719 stats.ArrayContainerBytes += uint64(c.getSizeInBytes()) 1720 stats.ArrayContainerValues += uint64(c.getCardinality()) 1721 case *bitmapContainer: 1722 stats.BitmapContainers++ 1723 stats.BitmapContainerBytes += uint64(c.getSizeInBytes()) 1724 stats.BitmapContainerValues += uint64(c.getCardinality()) 1725 case *runContainer16: 1726 stats.RunContainers++ 1727 stats.RunContainerBytes += uint64(c.getSizeInBytes()) 1728 stats.RunContainerValues += uint64(c.getCardinality()) 1729 } 1730 } 1731 return stats 1732 }