github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/lucene41/forUtil.go (about)

     1  package lucene41
     2  
     3  import (
     4  	"fmt"
     5  	"github.com/balzaczyy/golucene/core/util"
     6  	"github.com/balzaczyy/golucene/core/util/packed"
     7  	"math"
     8  )
     9  
    10  // codecs/lucene41/ForUtil.java
    11  
    12  const (
    13  	/**
    14  	 * Special number of bits per value used whenever all values to encode are equal.
    15  	 */
    16  	ALL_VALUES_EQUAL = 0
    17  	/**
    18  	 * Upper limit of the number of bytes that might be required to stored
    19  	 * <code>BLOCK_SIZE</code> encoded values.
    20  	 */
    21  	MAX_ENCODED_SIZE = LUCENE41_BLOCK_SIZE * 4
    22  )
    23  
    24  /**
    25   * Upper limit of the number of values that might be decoded in a single call to
    26   * {@link #readBlock(IndexInput, byte[], int[])}. Although values after
    27   * <code>BLOCK_SIZE</code> are garbage, it is necessary to allocate value buffers
    28   * whose size is >= MAX_DATA_SIZE to avoid {@link ArrayIndexOutOfBoundsException}s.
    29   */
    30  var MAX_DATA_SIZE int = computeMaxDataSize()
    31  
    32  func computeMaxDataSize() int {
    33  	maxDataSize := 0
    34  	// for each version
    35  	for version := packed.PACKED_VERSION_START; version <= packed.VERSION_CURRENT; version++ {
    36  		// for each packed format
    37  		for format := packed.PACKED; format <= packed.PACKED_SINGLE_BLOCK; format++ {
    38  			// for each bit-per-value
    39  			for bpv := 1; bpv <= 32; bpv++ {
    40  				if !packed.PackedFormat(format).IsSupported(bpv) {
    41  					continue
    42  				}
    43  				decoder := packed.GetPackedIntsDecoder(packed.PackedFormat(format), int32(version), uint32(bpv))
    44  				iterations := int(computeIterations(decoder))
    45  				if n := iterations * decoder.ByteValueCount(); n > maxDataSize {
    46  					maxDataSize = n
    47  				}
    48  			}
    49  		}
    50  	}
    51  	return maxDataSize
    52  }
    53  
    54  /**
    55   * Encode all values in normal area with fixed bit width,
    56   * which is determined by the max value in this block.
    57   */
    58  type ForUtil struct {
    59  	encodedSizes []int32
    60  	encoders     []packed.PackedIntsEncoder
    61  	decoders     []packed.PackedIntsDecoder
    62  	iterations   []int32
    63  }
    64  
    65  /* Create a new ForUtil instance and save state into out. */
    66  func NewForUtilInto(accetableOverheadRatio float32, out util.DataOutput) (*ForUtil, error) {
    67  	ans, err := &ForUtil{}, out.WriteVInt(packed.VERSION_CURRENT)
    68  	if err != nil {
    69  		return ans, err
    70  	}
    71  	ans.encodedSizes = make([]int32, 33)
    72  	ans.encoders = make([]packed.PackedIntsEncoder, 33)
    73  	ans.decoders = make([]packed.PackedIntsDecoder, 33)
    74  	ans.iterations = make([]int32, 33)
    75  
    76  	packedIntsVersion := int32(packed.VERSION_CURRENT)
    77  	for bpv := 1; bpv <= 32; bpv++ {
    78  		formatAndBits := packed.FastestFormatAndBits(
    79  			LUCENE41_BLOCK_SIZE, bpv, accetableOverheadRatio)
    80  		format := formatAndBits.Format
    81  		bitsPerValue := formatAndBits.BitsPerValue
    82  		assert(format.IsSupported(bitsPerValue))
    83  		assert(bitsPerValue <= 32)
    84  		ans.encodedSizes[bpv] = encodedSize(format, packedIntsVersion, uint32(bitsPerValue))
    85  		ans.encoders[bpv] = packed.GetPackedIntsEncoder(format, packedIntsVersion, uint32(bitsPerValue))
    86  		ans.decoders[bpv] = packed.GetPackedIntsDecoder(format, packedIntsVersion, uint32(bitsPerValue))
    87  		ans.iterations[bpv] = computeIterations(ans.decoders[bpv])
    88  
    89  		err = out.WriteVInt(int32(format.Id()<<5 | (bitsPerValue - 1)))
    90  		if err != nil {
    91  			return ans, err
    92  		}
    93  	}
    94  	return ans, err
    95  }
    96  
    97  func assert(ok bool) {
    98  	if !ok {
    99  		panic("assert fail")
   100  	}
   101  }
   102  
   103  func assert2(ok bool, msg string, args ...interface{}) {
   104  	if !ok {
   105  		panic(fmt.Sprintf(msg, args...))
   106  	}
   107  }
   108  
   109  type DataInput interface {
   110  	ReadVInt() (n int32, err error)
   111  }
   112  
   113  /* Restore a ForUtil from a DataInput. */
   114  func NewForUtilFrom(in DataInput) (fu *ForUtil, err error) {
   115  	self := &ForUtil{}
   116  	packedIntsVersion, err := in.ReadVInt()
   117  	if err != nil {
   118  		return self, err
   119  	}
   120  	packed.CheckVersion(packedIntsVersion)
   121  	self.encodedSizes = make([]int32, 33)
   122  	self.encoders = make([]packed.PackedIntsEncoder, 33)
   123  	self.decoders = make([]packed.PackedIntsDecoder, 33)
   124  	self.iterations = make([]int32, 33)
   125  
   126  	for bpv := 1; bpv <= 32; bpv++ {
   127  		code, err := in.ReadVInt()
   128  		if err != nil {
   129  			return self, err
   130  		}
   131  		formatId := uint32(code) >> 5
   132  		bitsPerValue := (uint32(code) & 31) + 1
   133  
   134  		format := packed.PackedFormat(formatId)
   135  		// assert format.isSupported(bitsPerValue)
   136  		self.encodedSizes[bpv] = encodedSize(format, packedIntsVersion, bitsPerValue)
   137  		self.encoders[bpv] = packed.GetPackedIntsEncoder(format, packedIntsVersion, bitsPerValue)
   138  		self.decoders[bpv] = packed.GetPackedIntsDecoder(format, packedIntsVersion, bitsPerValue)
   139  		self.iterations[bpv] = computeIterations(self.decoders[bpv])
   140  	}
   141  	return self, nil
   142  }
   143  
   144  type IndexOutput interface {
   145  	WriteByte(byte) error
   146  	WriteBytes([]byte) error
   147  	WriteVInt(int32) error
   148  }
   149  
   150  func (u *ForUtil) writeBlock(data []int, encoded []byte, out IndexOutput) error {
   151  	if isAllEqual(data) {
   152  		var err error
   153  		if err = out.WriteByte(byte(ALL_VALUES_EQUAL)); err == nil {
   154  			err = out.WriteVInt(int32(data[0]))
   155  		}
   156  		return err
   157  	}
   158  
   159  	numBits := bitsRequired(data)
   160  	assert2(numBits > 0 && numBits <= 32, "%v", numBits)
   161  	encoder := u.encoders[numBits]
   162  	iters := int(u.iterations[numBits])
   163  	assert(iters*encoder.ByteValueCount() >= LUCENE41_BLOCK_SIZE)
   164  	encodedSize := int(u.encodedSizes[numBits])
   165  	assert(iters*encoder.ByteBlockCount() >= encodedSize)
   166  
   167  	if err := out.WriteByte(byte(numBits)); err != nil {
   168  		return err
   169  	}
   170  
   171  	encoder.EncodeIntToByte(data, encoded, iters)
   172  	return out.WriteBytes(encoded[:encodedSize])
   173  }
   174  
   175  func encodedSize(format packed.PackedFormat, packedIntsVersion int32, bitsPerValue uint32) int32 {
   176  	byteCount := format.ByteCount(packedIntsVersion, LUCENE41_BLOCK_SIZE, bitsPerValue)
   177  	// assert byteCount >= 0 && byteCount <= math.MaxInt32()
   178  	return int32(byteCount)
   179  }
   180  
   181  func computeIterations(decoder packed.PackedIntsDecoder) int32 {
   182  	return int32(math.Ceil(float64(LUCENE41_BLOCK_SIZE) / float64(decoder.ByteValueCount())))
   183  }
   184  
   185  func isAllEqual(data []int) bool {
   186  	first := data[0]
   187  	for _, v := range data[1:LUCENE41_BLOCK_SIZE] {
   188  		if v != first {
   189  			return false
   190  		}
   191  	}
   192  	return true
   193  }
   194  
   195  func bitsRequired(data []int) int {
   196  	or := int64(0)
   197  	for _, v := range data[:LUCENE41_BLOCK_SIZE] {
   198  		assert(v >= 0)
   199  		or |= int64(v)
   200  	}
   201  	return packed.BitsRequired(or)
   202  }