github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/util/packed/packed.go (about) 1 package packed 2 3 import ( 4 "errors" 5 "fmt" 6 "github.com/balzaczyy/golucene/core/codec" 7 "github.com/balzaczyy/golucene/core/util" 8 "math" 9 ) 10 11 // util/packed/PackedInts.java 12 13 type DataInput interface { 14 ReadByte() (byte, error) 15 ReadBytes(buf []byte) error 16 ReadShort() (int16, error) 17 ReadInt() (int32, error) 18 ReadVInt() (int32, error) 19 ReadLong() (int64, error) 20 ReadString() (string, error) 21 } 22 23 type DataOutput interface { 24 WriteBytes(buf []byte) error 25 WriteInt(int32) error 26 WriteVInt(int32) error 27 WriteString(string) error 28 } 29 30 /* 31 Simplistic compression for arrays of unsinged int64 values. Each value 32 is >= 0 and <= a specified maximum value. The vlues are stored as 33 packed ints, with each value consuming a fixed number of bits. 34 */ 35 var PackedInts = struct { 36 FASTEST float32 // At most 700% memory overhead, always select a direct implementation. 37 FAST float32 // At most 50% memory overhead, always elect a reasonable fast implementation. 38 DEFAULT float32 // At most 25% memory overhead. 39 COMPACT float32 // No memory overhead at all, but hte returned implementation may be slow. 40 }{7, 0.5, 0.25, 0} 41 42 const ( 43 /* Default amount of memory to use for bulk operations. */ 44 DEFAULT_BUFFER_SIZE = 1024 // 1K 45 46 PACKED_CODEC_NAME = "PackedInts" 47 PACKED_VERSION_START = 0 48 PACKED_VERSION_BYTE_ALIGNED = 1 49 VERSION_MONOTONIC_WITHOUT_ZIGZAG = 2 50 VERSION_CURRENT = VERSION_MONOTONIC_WITHOUT_ZIGZAG 51 ) 52 53 // Ceck the validity of a version number 54 func CheckVersion(version int32) { 55 if version < PACKED_VERSION_START { 56 panic(fmt.Sprintf("Version is too old, should be at least %v (got %v)", PACKED_VERSION_START, version)) 57 } else if version > VERSION_CURRENT { 58 panic(fmt.Sprintf("Version is too new, should be at most %v (got %v)", VERSION_CURRENT, version)) 59 } 60 } 61 62 /** 63 * A format to write packed ints. 64 * 65 * @lucene.internal 66 */ 67 type PackedFormat int 68 69 const ( 70 PACKED = 0 71 PACKED_SINGLE_BLOCK = 1 72 ) 73 74 func (f PackedFormat) Id() int { 75 return int(f) 76 } 77 78 /** 79 * Computes how many byte blocks are needed to store <code>values</code> 80 * values of size <code>bitsPerValue</code>. 81 */ 82 func (f PackedFormat) ByteCount(packedIntsVersion, valueCount int32, bitsPerValue uint32) int64 { 83 switch int(f) { 84 case PACKED: 85 if packedIntsVersion < PACKED_VERSION_BYTE_ALIGNED { 86 return 8 * int64(math.Ceil(float64(valueCount)*float64(bitsPerValue)/64)) 87 } 88 return int64(math.Ceil(float64(valueCount) * float64(bitsPerValue) / 8)) 89 } 90 // assert bitsPerValue >= 0 && bitsPerValue <= 64 91 // assume long-aligned 92 return 8 * int64(f.longCount(packedIntsVersion, valueCount, bitsPerValue)) 93 } 94 95 /** 96 * Computes how many long blocks are needed to store <code>values</code> 97 * values of size <code>bitsPerValue</code>. 98 */ 99 func (f PackedFormat) longCount(packedIntsVersion, valueCount int32, bitsPerValue uint32) int { 100 switch int(f) { 101 case PACKED_SINGLE_BLOCK: 102 valuesPerBlock := 64 / bitsPerValue 103 return int(math.Ceil(float64(valueCount) / float64(valuesPerBlock))) 104 } 105 // assert bitsPerValue >= 0 && bitsPerValue <= 64 106 ans := f.ByteCount(packedIntsVersion, valueCount, bitsPerValue) 107 // assert ans < 8 * math.MaxInt32() 108 if ans%8 == 0 { 109 return int(ans / 8) 110 } 111 return int(ans/8) + 1 112 } 113 114 /** 115 * Tests whether the provided number of bits per value is supported by the 116 * format. 117 */ 118 func (f PackedFormat) IsSupported(bitsPerValue int) bool { 119 switch int(f) { 120 case PACKED_SINGLE_BLOCK: 121 return is64Supported(bitsPerValue) 122 } 123 return bitsPerValue >= 1 && bitsPerValue <= 64 124 } 125 126 /* Returns the overhead per value, in bits. */ 127 func (f PackedFormat) OverheadPerValue(bitsPerValue int) float32 { 128 switch int(f) { 129 case PACKED_SINGLE_BLOCK: 130 assert(f.IsSupported(bitsPerValue)) 131 valuesPerBlock := 64 / bitsPerValue 132 overhead := 64 % bitsPerValue 133 return float32(overhead) / float32(valuesPerBlock) 134 } 135 return 0 136 } 137 138 /* Simple class that holds a format and a number of bits per value. */ 139 type FormatAndBits struct { 140 Format PackedFormat 141 BitsPerValue int 142 } 143 144 func (v FormatAndBits) String() string { 145 return fmt.Sprintf("FormatAndBits(format=%v bitsPerValue=%v)", v.Format, v.BitsPerValue) 146 } 147 148 /* 149 Try to find the Format and number of bits per value that would 150 restore from disk the fastest reader whose overhead is less than 151 acceptableOverheadRatio. 152 153 The acceptableOverheadRatio parameter makes sense for random-access 154 Readers. In case you only plan to perform sequential access on this 155 stream later on, you should probably use COMPACT. 156 157 If you don't know how many values you are going to write, use 158 valueCount = -1. 159 */ 160 func FastestFormatAndBits(valueCount, bitsPerValue int, 161 acceptableOverheadRatio float32) FormatAndBits { 162 if valueCount == -1 { 163 valueCount = int(math.MaxInt32) 164 } 165 166 if acceptableOverheadRatio < PackedInts.COMPACT { 167 acceptableOverheadRatio = PackedInts.COMPACT 168 } 169 if acceptableOverheadRatio > PackedInts.FASTEST { 170 acceptableOverheadRatio = PackedInts.FASTEST 171 } 172 acceptableOverheadRatioValue := acceptableOverheadRatio * float32(bitsPerValue) // in bits 173 174 maxBitsPerValue := bitsPerValue + int(acceptableOverheadRatioValue) 175 176 actualBitsPerValue := -1 177 format := PACKED 178 179 if bitsPerValue <= 8 && maxBitsPerValue >= 8 { 180 actualBitsPerValue = 8 181 } else if bitsPerValue <= 16 && maxBitsPerValue >= 16 { 182 actualBitsPerValue = 16 183 } else if bitsPerValue <= 32 && maxBitsPerValue >= 32 { 184 actualBitsPerValue = 32 185 } else if bitsPerValue <= 64 && maxBitsPerValue >= 64 { 186 actualBitsPerValue = 64 187 } else if valueCount <= int(PACKED8_THREE_BLOCKS_MAX_SIZE) && bitsPerValue <= 24 && maxBitsPerValue >= 24 { 188 actualBitsPerValue = 24 189 } else if valueCount <= int(PACKED16_THREE_BLOCKS_MAX_SIZE) && bitsPerValue <= 48 && maxBitsPerValue >= 48 { 190 actualBitsPerValue = 48 191 } else { 192 for bpv := bitsPerValue; bpv <= maxBitsPerValue; bpv++ { 193 if PackedFormat(PACKED_SINGLE_BLOCK).IsSupported(bpv) { 194 overhead := PackedFormat(PACKED_SINGLE_BLOCK).OverheadPerValue(bpv) 195 acceptableOverhead := acceptableOverheadRatioValue + float32(bitsPerValue-bpv) 196 if overhead <= acceptableOverhead { 197 actualBitsPerValue = bpv 198 format = PACKED_SINGLE_BLOCK 199 break 200 } 201 } 202 } 203 if actualBitsPerValue < 0 { 204 actualBitsPerValue = bitsPerValue 205 } 206 } 207 208 return FormatAndBits{PackedFormat(format), actualBitsPerValue} 209 } 210 211 type PackedIntsEncoder interface { 212 ByteValueCount() int 213 ByteBlockCount() int 214 // Read iterations * valueCount() values from values, encode them 215 // and write iterations * blockCount() blocks into blocks. 216 // encodeLongToLong(values, blocks []int64, iterations int) 217 // Read iterations * valueCount() values from values, encode them 218 // and write 8 * iterations * blockCount() blocks into blocks. 219 encodeLongToByte(values []int64, blocks []byte, iterations int) 220 EncodeIntToByte(values []int, blocks []byte, iterations int) 221 } 222 223 type PackedIntsDecoder interface { 224 // The minum numer of byte blocks to encode in a single iteration, when using byte encoding 225 ByteBlockCount() int 226 // The number of values that can be stored in byteBlockCount() byte blocks 227 ByteValueCount() int 228 decodeLongToLong(blocks, values []int64, iterations int) 229 // Read 8 * iterations * blockCount() blocks from blocks, decodethem and write 230 // iterations * valueCount() values inot values. 231 decodeByteToLong(blocks []byte, values []int64, iterations int) 232 } 233 234 func GetPackedIntsEncoder(format PackedFormat, version int32, bitsPerValue uint32) PackedIntsEncoder { 235 CheckVersion(version) 236 return newBulkOperation(format, bitsPerValue) 237 } 238 239 func GetPackedIntsDecoder(format PackedFormat, version int32, bitsPerValue uint32) PackedIntsDecoder { 240 // log.Printf("Obtaining PackedIntsDecoder(%v, %v), version=%v", format, bitsPerValue, version) 241 CheckVersion(version) 242 return newBulkOperation(format, bitsPerValue) 243 } 244 245 /* A read-only random access array of positive integers. */ 246 type PackedIntsReader interface { 247 util.Accountable 248 Get(index int) int64 // NumericDocValue 249 getBulk(int, []int64) int 250 Size() int 251 } 252 253 type abstractReaderSPI interface { 254 Get(index int) int64 255 Size() int 256 } 257 258 type abstractReader struct { 259 spi abstractReaderSPI 260 } 261 262 func newReader(spi abstractReaderSPI) *abstractReader { 263 return &abstractReader{ 264 spi: spi, 265 } 266 } 267 268 func (r *abstractReader) getBulk(index int, arr []int64) int { 269 length := len(arr) 270 assert2(length > 0, "len must be > 0 (got %v)", length) 271 assert(index >= 0 && index < r.spi.Size()) 272 273 gets := r.spi.Size() - index 274 if length < gets { 275 gets = length 276 } 277 for i, _ := range arr { 278 arr[i] = r.spi.Get(index + i) 279 } 280 return gets 281 } 282 283 // Run-once iterator interface, to decode previously saved PackedInts 284 type ReaderIterator interface { 285 Next() (v int64, err error) // next value 286 nextN(n int) (vs []int64, err error) // at least 1 and at most n next values, the returned ref MUST NOT be modified 287 bitsPerValue() int // number of bits per value 288 size() int // number of values 289 ord() int // the current position 290 } 291 292 type nextNAction interface { 293 nextN(n int) (vs []int64, err error) 294 } 295 296 type ReaderIteratorImpl struct { 297 nextNAction 298 in DataInput 299 _bitsPerValue int 300 valueCount int 301 } 302 303 func newReaderIteratorImpl(sub nextNAction, valueCount, bitsPerValue int, in DataInput) *ReaderIteratorImpl { 304 return &ReaderIteratorImpl{sub, in, bitsPerValue, valueCount} 305 } 306 307 /* 308 Lucene(Java) manipulates underlying LongsRef to advance the pointer, which 309 can not be implemented using Go's slice. Here I have to assume nextN() method 310 would automatically increment the pointer without next(). 311 */ 312 func (it *ReaderIteratorImpl) Next() (v int64, err error) { 313 nextValues, err := it.nextN(1) 314 if err != nil { 315 return 0, err 316 } 317 assert(len(nextValues) > 0) 318 return nextValues[0], nil 319 } 320 321 func (it *ReaderIteratorImpl) bitsPerValue() int { 322 return it._bitsPerValue 323 } 324 325 func (it *ReaderIteratorImpl) size() int { 326 return it.valueCount 327 } 328 329 func assert(ok bool) { 330 assert2(ok, "assert fail") 331 } 332 333 /* A packed integer array that can be modified. */ 334 type Mutable interface { 335 PackedIntsReader 336 // Returns the number of bits used to store any given value. Note: 337 // this does not imply that memory usage is bpv * values() as 338 // implementations are free to use non-space-optimal packing of 339 // bits. 340 BitsPerValue() int 341 // Set the value at the given index in the array. 342 Set(index int, value int64) 343 setBulk(int, []int64) int 344 Clear() 345 // Save this mutable into out. Instantiating a reader from the 346 // generated data will return a reader with the same number of bits 347 // per value. 348 Save(out util.DataOutput) error 349 } 350 351 type abstractMutableSPI interface { 352 Get(index int) int64 353 Set(index int, value int64) 354 Size() int 355 } 356 357 type abstractMutable struct { 358 *abstractReader 359 spi abstractMutableSPI 360 } 361 362 func newMutable(spi abstractMutableSPI) *abstractMutable { 363 return &abstractMutable{ 364 abstractReader: newReader(spi), 365 spi: spi, 366 } 367 } 368 369 func (m *abstractMutable) setBulk(index int, arr []int64) int { 370 length := len(arr) 371 assert2(length > 0, "len must be > 0 (got %v)", length) 372 assert(index >= 0 && index < m.spi.Size()) 373 374 for i, v := range arr { 375 m.spi.Set(index+i, v) 376 } 377 return length 378 } 379 380 /* Fill the mutable [from,to) with val. */ 381 func (m *abstractMutable) fill(from, to int, val int64) { 382 panic("niy") 383 // assert(val < MaxValue(m.BitsPerValue())) 384 // assert(from <= to) 385 // for i := from; i < to; i++ { 386 // m.spi.Set(i, val) 387 // } 388 } 389 390 /* Sets all values to 0 */ 391 func (m *abstractMutable) Clear() { 392 panic("niy") 393 // m.fill(0, int(m.Size()), 0) 394 } 395 396 func (m *abstractMutable) Save(out util.DataOutput) error { 397 panic("niy") 398 // writer := WriterNoHeader(out, m.format, int(m.valueCount), m.bitsPerValue, DEFAULT_BUFFER_SIZE) 399 // err := writer.writeHeader() 400 // if err != nil { 401 // return err 402 // } 403 // for i := 0; i < int(m.valueCount); i++ { 404 // err = writer.Add(m.spi.Get(i)) 405 // if err != nil { 406 // return err 407 // } 408 // } 409 // return writer.Finish() 410 } 411 412 func (m *abstractMutable) Format() PackedFormat { 413 return PackedFormat(PACKED) 414 } 415 416 type ReaderImpl struct { 417 valueCount int 418 } 419 420 func newReaderImpl(valueCount int) *ReaderImpl { 421 return &ReaderImpl{valueCount} 422 } 423 424 func (p *ReaderImpl) Size() int { 425 return p.valueCount 426 } 427 428 type MutableImpl struct { 429 *abstractMutable 430 valueCount int 431 bitsPerValue int 432 } 433 434 func newMutableImpl(spi abstractMutableSPI, 435 valueCount, bitsPerValue int) *MutableImpl { 436 437 assert2(bitsPerValue > 0 && bitsPerValue <= 64, "%v", bitsPerValue) 438 return &MutableImpl{ 439 abstractMutable: newMutable(spi), 440 valueCount: valueCount, 441 bitsPerValue: bitsPerValue, 442 } 443 } 444 445 func (m *MutableImpl) BitsPerValue() int { 446 return m.bitsPerValue 447 } 448 449 func (m *MutableImpl) Size() int { 450 return m.valueCount 451 } 452 453 func ReaderNoHeader(in DataInput, format PackedFormat, version, valueCount int32, 454 bitsPerValue uint32) (r PackedIntsReader, err error) { 455 456 CheckVersion(version) 457 switch format { 458 case PACKED_SINGLE_BLOCK: 459 return newPacked64SingleBlockFromInput(in, valueCount, bitsPerValue) 460 case PACKED: 461 switch bitsPerValue { 462 /* [[[gocog 463 package main 464 465 import ( 466 "fmt" 467 "os" 468 ) 469 470 const HEADER = `// This file has been automatically generated, DO NOT EDIT 471 472 package packed 473 474 import ( 475 "github.com/balzaczyy/golucene/core/util" 476 ) 477 478 ` 479 480 var ( 481 TYPES = map[int]string{8: "byte", 16: "int16", 32: "int32", 64: "int64"} 482 NAMES = map[int]string{8: "Byte", 16: "Short", 32: "Int", 64: "Long"} 483 MASKS = map[int]string{8: " & 0xFF", 16: " & 0xFFFF", 32: " & 0xFFFFFFFF", 64: ""} 484 CASTS = map[int]string{8: "byte(", 16: "int16(", 32: "int32(", 64: "("} 485 ) 486 487 func main() { 488 w := fmt.Fprintf 489 for bpv, typ := range TYPES { 490 f, err := os.Create(fmt.Sprintf("direct%d.go", bpv)) 491 if err != nil { 492 panic(err) 493 } 494 defer f.Close() 495 496 w(f, HEADER) 497 w(f, "// Direct wrapping of %d-bits values to a backing array.\n", bpv) 498 w(f, "type Direct%d struct {\n", bpv) 499 w(f, " *MutableImpl\n") 500 w(f, " values []%v\n", typ) 501 w(f, "}\n\n") 502 503 w(f, "func newDirect%d(valueCount int) *Direct%d {\n", bpv, bpv) 504 w(f, " ans := &Direct%d{\n", bpv) 505 w(f, " values: make([]%v, valueCount),\n", typ) 506 w(f, " }\n") 507 w(f, " ans.MutableImpl = newMutableImpl(ans, valueCount, %v)\n", bpv) 508 w(f, " return ans\n") 509 w(f, "}\n\n") 510 511 w(f, "func newDirect%dFromInput(version int32, in DataInput, valueCount int) (r PackedIntsReader, err error) {\n", bpv) 512 w(f, " ans := newDirect%v(valueCount)\n", bpv) 513 if bpv == 8 { 514 w(f, " if err = in.ReadBytes(ans.values[:valueCount]); err == nil {\n") 515 } else { 516 w(f, " for i, _ := range ans.values {\n") 517 w(f, " if ans.values[i], err = in.Read%v(); err != nil {\n", NAMES[bpv]) 518 w(f, " break\n") 519 w(f, " }\n") 520 w(f, " }\n") 521 w(f, " if err == nil {\n") 522 } 523 if bpv != 64 { 524 w(f, " // because packed ints have not always been byte-aligned\n") 525 w(f, " remaining := PackedFormat(PACKED).ByteCount(version, int32(valueCount), %v) - %v*int64(valueCount)\n", bpv, bpv/8) 526 w(f, " for i := int64(0); i < remaining; i++ {\n") 527 w(f, " if _, err = in.ReadByte(); err != nil {\n") 528 w(f, " break\n") 529 w(f, " }\n") 530 w(f, " }\n") 531 } 532 w(f, " }\n") 533 w(f, " return ans, err\n") 534 w(f, "}\n\n") 535 536 w(f, "func (d *Direct%v) Get(index int) int64 {\n", bpv) 537 w(f, " return int64(d.values[index])%s\n", MASKS[bpv]) 538 w(f, "}\n\n") 539 540 w(f, "func (d *Direct%v) Set(index int, value int64) {\n", bpv) 541 w(f, " d.values[index] = %v(value)\n", typ) 542 w(f, "}\n\n") 543 544 w(f, "func (d *Direct%v) RamBytesUsed() int64 {\n", bpv) 545 w(f, " return util.AlignObjectSize(\n") 546 w(f, " util.NUM_BYTES_OBJECT_HEADER +\n") 547 w(f, " 2*util.NUM_BYTES_INT +\n") 548 w(f, " util.NUM_BYTES_OBJECT_REF +\n") 549 w(f, " util.SizeOf(d.values))\n") 550 w(f, "}\n") 551 552 w(f, ` 553 func (d *Direct%v) Clear() { 554 for i, _ := range d.values { 555 d.values[i] = 0 556 } 557 } 558 559 func (d *Direct%v) getBulk(index int, arr []int64) int { 560 assert2(len(arr) > 0, "len must be > 0 (got %%v)", len(arr)) 561 assert(index >= 0 && index < d.valueCount) 562 563 gets := d.valueCount - index 564 if len(arr) < gets { 565 gets = len(arr) 566 } 567 for i, _ := range arr[:gets] { 568 arr[i] = int64(d.values[index+i])%v 569 } 570 return gets 571 } 572 573 func (d *Direct%v) setBulk(index int, arr []int64) int { 574 assert2(len(arr) > 0, "len must be > 0 (got %%v)", len(arr)) 575 assert(index >= 0 && index < d.valueCount) 576 577 sets := d.valueCount - index 578 if len(arr) < sets { 579 sets = len(arr) 580 } 581 for i, _ := range arr { 582 d.values[index+i] = %varr[i]) 583 } 584 return sets 585 } 586 587 func (d *Direct%v) fill(from, to int, val int64) { 588 assert(val == val%v) 589 for i := from; i < to; i ++ { 590 d.values[i] = %vval) 591 } 592 } 593 `, bpv, bpv, MASKS[bpv], bpv, CASTS[bpv], bpv, MASKS[bpv], CASTS[bpv]) 594 595 fmt.Printf(" case %v:\n", bpv) 596 fmt.Printf(" return newDirect%vFromInput(version, in, int(valueCount))\n", bpv) 597 } 598 } 599 gocog]]] */ 600 case 16: 601 return newDirect16FromInput(version, in, int(valueCount)) 602 case 32: 603 return newDirect32FromInput(version, in, int(valueCount)) 604 case 64: 605 return newDirect64FromInput(version, in, int(valueCount)) 606 case 8: 607 return newDirect8FromInput(version, in, int(valueCount)) 608 // [[[end]]] 609 case 24: 610 if valueCount <= PACKED8_THREE_BLOCKS_MAX_SIZE { 611 return newPacked8ThreeBlocksFromInput(version, in, valueCount) 612 } 613 case 48: 614 if valueCount <= PACKED16_THREE_BLOCKS_MAX_SIZE { 615 return newPacked16ThreeBlocksFromInput(version, in, valueCount) 616 } 617 } 618 return newPacked64FromInput(version, in, valueCount, bitsPerValue) 619 default: 620 panic(fmt.Sprintf("Unknown Writer format: %v", format)) 621 } 622 } 623 624 func asUint32(n int32, err error) (n2 uint32, err2 error) { 625 return uint32(n), err 626 } 627 628 func NewPackedReader(in DataInput) (r PackedIntsReader, err error) { 629 if version, err := codec.CheckHeader(in, PACKED_CODEC_NAME, PACKED_VERSION_START, VERSION_CURRENT); err == nil { 630 if bitsPerValue, err := asUint32(in.ReadVInt()); err == nil { 631 // assert bitsPerValue > 0 && bitsPerValue <= 64 632 if valueCount, err := in.ReadVInt(); err == nil { 633 if id, err := in.ReadVInt(); err == nil { 634 format := PackedFormat(id) 635 return ReaderNoHeader(in, format, version, valueCount, bitsPerValue) 636 } 637 } 638 } 639 } 640 return 641 } 642 643 /* 644 Expert: Restore a ReaderIterator from a stream without reading metadata at the 645 beginning of the stream. This method is useful to restore data from streams 646 which have been created using WriterNoHeader(). 647 */ 648 func ReaderIteratorNoHeader(in DataInput, format PackedFormat, version, 649 valueCount, bitsPerValue, mem int) ReaderIterator { 650 CheckVersion(int32(version)) 651 return newPackedReaderIterator(format, version, valueCount, bitsPerValue, in, mem) 652 } 653 654 /* 655 Create a packed integer slice with the given amount of values 656 initialized to 0. The valueCount and the bitsPerValue cannot be 657 changed after creation. All Mutables known by this factory are kept 658 fully in RAM. 659 660 Positive values of acceptableOverheadRatio will trade space for speed 661 by selecting a faster but potentially less memory-efficient 662 implementation. An acceptableOverheadRatio of COMPACT will make sure 663 that the most memory-efficient implementation is selected whereas 664 FASTEST will make sure that the fastest implementation is selected. 665 */ 666 func MutableFor(valueCount, bitsPerValue int, acceptableOverheadRatio float32) Mutable { 667 formatAndBits := FastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio) 668 return MutableForFormat(valueCount, formatAndBits.BitsPerValue, formatAndBits.Format) 669 } 670 671 /* Same as MutableFor() with a pre-computed number of bits per value and format. */ 672 func MutableForFormat(vc, bpv int, format PackedFormat) Mutable { 673 valueCount := int32(vc) 674 bitsPerValue := uint32(bpv) 675 assert(valueCount >= 0) 676 switch int(format) { 677 case PACKED_SINGLE_BLOCK: 678 return newPacked64SingleBlockBy(valueCount, bitsPerValue) 679 case PACKED: 680 switch bitsPerValue { 681 case 8: 682 return newDirect8(vc) 683 case 16: 684 return newDirect16(vc) 685 case 32: 686 return newDirect32(vc) 687 case 64: 688 return newDirect64(vc) 689 case 24: 690 if valueCount <= PACKED8_THREE_BLOCKS_MAX_SIZE { 691 return newPacked8ThreeBlocks(valueCount) 692 } 693 case 48: 694 if valueCount <= PACKED16_THREE_BLOCKS_MAX_SIZE { 695 return newPacked16ThreeBlocks(valueCount) 696 } 697 } 698 return newPacked64(valueCount, bitsPerValue) 699 } 700 panic(fmt.Sprintf("Invalid format: %v", format)) 701 } 702 703 /* 704 Expert: Create a packed integer array writer for the given output, 705 format, value count, and number of bits per value. 706 707 The resulting stream will be long-aligned. This means that depending 708 on the format which is used, up to 63 bits will be wasted. An easy 709 way to make sure tha tno space is lost is to always use a valueCount 710 that is a multiple of 64. 711 712 This method does not write any metadata to the stream, meaning that 713 it is your responsibility to store it somewhere else in order to be 714 able to recover data from the stream later on: 715 716 - format (using Format.Id()), 717 - valueCount, 718 - bitsPerValue, 719 - VERSION_CURRENT. 720 721 It is possible to start writing values without knowing how many of 722 them you are actually going to write. To do this, just pass -1 as 723 valueCount. On the other hand, for any positive value of valueCount, 724 the returned writer will make sure that you don't write more values 725 than expected and pad the end of stream with zeros in case you have 726 writen less than valueCount when calling Writer.Finish(). 727 728 The mem parameter lets your control how much memory can be used to 729 buffer changes in memory before finishing to disk. High values of mem 730 are likely to improve throughput. On the other hand, if speed is not 731 that important to you, a value of 0 will use as little memory as 732 possible and should already offer reasonble throughput. 733 */ 734 func WriterNoHeader(out DataOutput, format PackedFormat, 735 valueCount, bitsPerValue, mem int) Writer { 736 return newPackedWriter(format, out, valueCount, bitsPerValue, mem) 737 } 738 739 /* 740 Returns how many bits are required to hold values up to and including maxValue 741 NOTE: This method returns at least 1. 742 */ 743 func BitsRequired(maxValue int64) int { 744 assert2(maxValue >= 0, fmt.Sprintf("maxValue must be non-negative (got: %v)", maxValue)) 745 return UnsignedBitsRequired(maxValue) 746 } 747 748 /* 749 Returns how many bits are required to store bits, interpreted as an 750 unsigned value. 751 NOTE: This method returns at least 1. 752 */ 753 func UnsignedBitsRequired(bits int64) int { 754 if bits == 0 { 755 return 1 756 } 757 n := uint64(bits) 758 ans := 0 759 for n != 0 { 760 n >>= 1 761 ans++ 762 } 763 return ans 764 } 765 766 func assert2(ok bool, msg string, args ...interface{}) { 767 if !ok { 768 panic(fmt.Sprintf(msg, args...)) 769 } 770 } 771 772 // Calculate the maximum unsigned long that can be expressed with the given number of bits 773 func MaxValue(bitsPerValue int) int64 { 774 if bitsPerValue == 64 { 775 return math.MaxInt64 776 } 777 return (1 << uint64(bitsPerValue)) - 1 778 } 779 780 /* Copy src[srcPos:srcPos+len] into dest[destPos:destPos+len] using at most mem bytes. */ 781 func Copy(src PackedIntsReader, srcPos int, dest Mutable, destPos, length, mem int) { 782 assert(srcPos+length <= int(src.Size())) 783 assert(destPos+length <= int(dest.Size())) 784 capacity := int(uint(mem) >> 3) 785 if capacity == 0 { 786 panic("niy") 787 } else if length > 0 { 788 // use bulk operations 789 if length < capacity { 790 capacity = length 791 } 792 buf := make([]int64, capacity) 793 copyWith(src, srcPos, dest, destPos, length, buf) 794 } 795 } 796 797 func copyWith(src PackedIntsReader, srcPos int, dest Mutable, destPos, length int, buf []int64) { 798 assert(len(buf) > 0) 799 remaining := 0 800 for length > 0 { 801 limit := remaining + length 802 if limit > len(buf) { 803 limit = len(buf) 804 } 805 read := src.getBulk(srcPos, buf[remaining:limit]) 806 assert(read > 0) 807 srcPos += read 808 length -= read 809 remaining += read 810 written := dest.setBulk(destPos, buf[:remaining]) 811 assert(written > 0) 812 destPos += written 813 if written < remaining { 814 copy(buf, buf[written:remaining]) 815 } 816 remaining -= written 817 } 818 for remaining > 0 { 819 written := dest.setBulk(destPos, buf[:remaining]) 820 destPos += written 821 remaining -= written 822 copy(buf, buf[written:written+remaining]) 823 } 824 } 825 826 var TrailingZeros = func() map[int]int { 827 ans := make(map[int]int) 828 var n = 1 829 for i := 0; i < 32; i++ { 830 ans[n] = i 831 n <<= 1 832 } 833 return ans 834 }() 835 836 /* Check that the block size is a power of 2, in the right bounds, and return its log in base 2. */ 837 func checkBlockSize(blockSize, minBlockSize, maxBlockSize int) int { 838 assert2(blockSize >= minBlockSize && blockSize <= maxBlockSize, 839 "blockSize must be >= %v and <= %v, got %v", 840 minBlockSize, maxBlockSize, blockSize) 841 assert2((blockSize&(blockSize-1)) == 0, 842 "blockSIze must be a power of 2, got %v", blockSize) 843 return TrailingZeros[blockSize] 844 } 845 846 /* Return the number of blocks required to store size values on blockSize. */ 847 func numBlocks(size int64, blockSize int) int { 848 numBlocks := int(size / int64(blockSize)) 849 if size%int64(blockSize) != 0 { 850 numBlocks++ 851 } 852 assert2(int64(numBlocks)*int64(blockSize) >= size, "size is too large for this block size") 853 return numBlocks 854 } 855 856 // util/packed/PackedReaderIterator.java 857 858 type PackedReaderIterator struct { 859 *ReaderIteratorImpl 860 packedIntsVersion int 861 format PackedFormat 862 bulkOperation BulkOperation 863 nextBlocks []byte 864 nextValues []int64 865 nextValuesOrig []int64 866 _iterations int 867 position int 868 } 869 870 func newPackedReaderIterator(format PackedFormat, packedIntsVersion, valueCount, bitsPerValue int, in DataInput, mem int) *PackedReaderIterator { 871 it := &PackedReaderIterator{ 872 format: format, 873 packedIntsVersion: packedIntsVersion, 874 bulkOperation: newBulkOperation(format, uint32(bitsPerValue)), 875 position: -1, 876 } 877 assert(it.bulkOperation != nil) 878 it.ReaderIteratorImpl = newReaderIteratorImpl(it, valueCount, bitsPerValue, in) 879 it._iterations = it.iterations(mem) 880 assert(valueCount == 0 || it._iterations > 0) 881 it.nextBlocks = make([]byte, it._iterations*it.bulkOperation.ByteBlockCount()) 882 it.nextValuesOrig = make([]int64, it._iterations*it.bulkOperation.ByteValueCount()) 883 it.nextValues = nil 884 return it 885 } 886 887 func (it *PackedReaderIterator) iterations(mem int) int { 888 iterations := it.bulkOperation.computeIterations(it.valueCount, mem) 889 if it.packedIntsVersion < PACKED_VERSION_BYTE_ALIGNED { 890 // make sure iterations is a multiple of 8 891 iterations = int((int64(iterations) + 7) & 0xFFFFFFF8) 892 } 893 return iterations 894 } 895 896 /* 897 Go slice is used to mimic Lucene(Java)'s LongsRef and I have to keep the 898 original slice to avoid re-allocation. 899 */ 900 func (it *PackedReaderIterator) nextN(count int) (vs []int64, err error) { 901 assert(len(it.nextValues) >= 0) 902 assert(count > 0) 903 904 remaining := it.valueCount - it.position - 1 905 if remaining <= 0 { 906 return nil, errors.New("EOF") 907 } 908 if remaining < count { 909 count = remaining 910 } 911 912 if len(it.nextValues) == 0 { 913 remainingBlocks := it.format.ByteCount(int32(it.packedIntsVersion), int32(remaining), uint32(it._bitsPerValue)) 914 blocksToRead := len(it.nextBlocks) 915 if remainingBlocks < int64(blocksToRead) { 916 blocksToRead = int(remainingBlocks) 917 } 918 err = it.in.ReadBytes(it.nextBlocks[0:blocksToRead]) 919 if err != nil { 920 return nil, err 921 } 922 if blocksToRead < len(it.nextBlocks) { 923 for i := blocksToRead; i < len(it.nextBlocks); i++ { 924 it.nextBlocks[i] = 0 925 } 926 } 927 928 it.nextValues = it.nextValuesOrig // restore 929 it.bulkOperation.decodeByteToLong(it.nextBlocks, it.nextValues, it._iterations) 930 } 931 932 if len(it.nextValues) < count { 933 count = len(it.nextValues) 934 } 935 it.position += count 936 values := it.nextValues[0:count] 937 it.nextValues = it.nextValues[count:] 938 return values, nil 939 } 940 941 func (it *PackedReaderIterator) ord() int { 942 return it.position 943 }