github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/lucene40/bitVector.go (about)

     1  package lucene40
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"github.com/balzaczyy/golucene/core/codec"
     7  	"github.com/balzaczyy/golucene/core/store"
     8  	"github.com/balzaczyy/golucene/core/util"
     9  	"reflect"
    10  )
    11  
    12  const (
    13  	CODEC = "BitVector"
    14  
    15  	/* Change DGaps to encode gaps between cleared bits, not set: */
    16  	BV_VERSION_DGAPS_CLEARED = 1
    17  
    18  	BV_VERSION_CHECKSUM = 2
    19  
    20  	/* Imcrement version to change it: */
    21  	BV_VERSION_CURRENT = BV_VERSION_CHECKSUM
    22  )
    23  
    24  type BitVector struct {
    25  	bits  []byte
    26  	size  int
    27  	count int
    28  }
    29  
    30  func NewBitVector(n int) *BitVector {
    31  	return &BitVector{
    32  		size: n,
    33  		bits: make([]byte, numBytes(n)),
    34  	}
    35  }
    36  
    37  func numBytes(size int) int {
    38  	bytesLength := int(uint(size) >> 3)
    39  	if (size & 7) != 0 {
    40  		bytesLength++
    41  	}
    42  	return bytesLength
    43  }
    44  
    45  func (bv *BitVector) Clear(bit int) {
    46  	assert2(bit >= 0 && bit < bv.size, "bit %v is out of bounds 0..%v", bit, bv.size-1)
    47  	bv.bits[bit>>3] &= ^(1 << (uint(bit) & 7))
    48  	bv.count = -1
    49  }
    50  
    51  func (bv *BitVector) At(bit int) bool {
    52  	assert2(bit >= 0 && bit < bv.size, "bit %v is out of bounds 0..%v", bit, bv.size-1)
    53  	return (bv.bits[bit>>3] & (1 << (uint(bit) & 7))) != 0
    54  }
    55  
    56  func assert(ok bool) {
    57  	assert2(ok, "assert fail")
    58  }
    59  
    60  func assert2(ok bool, msg string, args ...interface{}) {
    61  	if !ok {
    62  		panic(fmt.Sprintf(msg, args...))
    63  	}
    64  }
    65  
    66  func (bv *BitVector) Length() int {
    67  	return bv.size
    68  }
    69  
    70  /*
    71  Returns the total number of bits in this vector. This is efficiently
    72  computed and cached, so that, if the vector is not changed, no
    73  recomputation is done for repeated calls.
    74  */
    75  func (bv *BitVector) Count() int {
    76  	// if the vector has been modified
    77  	if bv.count == -1 {
    78  		c := 0
    79  		for _, v := range bv.bits {
    80  			c += util.BitCount(v) // sum bits per byte
    81  		}
    82  		bv.count = c
    83  	}
    84  	assert2(bv.count <= bv.size, "count=%v size=%v", bv.count, bv.size)
    85  	return bv.count
    86  }
    87  
    88  /*
    89  Writes this vector to the file name in Directory d, in a format that
    90  can be read by the constructor BitVector(Directory, String, IOContext)
    91  */
    92  func (bv *BitVector) Write(d store.Directory, name string, ctx store.IOContext) (err error) {
    93  	assert(reflect.TypeOf(d).Name() != "CompoundFileDirectory")
    94  	var output store.IndexOutput
    95  	if output, err = d.CreateOutput(name, ctx); err != nil {
    96  		return err
    97  	}
    98  	defer func() {
    99  		err = mergeError(err, output.Close())
   100  	}()
   101  
   102  	if err = output.WriteInt(-2); err != nil {
   103  		return err
   104  	}
   105  	if err = codec.WriteHeader(output, CODEC, BV_VERSION_CURRENT); err != nil {
   106  		return err
   107  	}
   108  	if bv.isSparse() {
   109  		// sparse bit-set more efficiently saved as d-gaps.
   110  		err = bv.writeClearedDgaps(output)
   111  	} else {
   112  		err = bv.writeBits(output)
   113  	}
   114  	if err != nil {
   115  		return err
   116  	}
   117  	if err = codec.WriteFooter(output); err != nil {
   118  		return err
   119  	}
   120  	bv.assertCount()
   121  	return nil
   122  }
   123  
   124  func mergeError(err, err2 error) error {
   125  	if err == nil {
   126  		return err2
   127  	} else {
   128  		return errors.New(fmt.Sprintf("%v\n  %v", err, err2))
   129  	}
   130  }
   131  
   132  /* Invert all bits */
   133  func (bv *BitVector) InvertAll() {
   134  	if bv.count != -1 {
   135  		bv.count = bv.size - bv.count
   136  	}
   137  	if len(bv.bits) > 0 {
   138  		for idx, v := range bv.bits {
   139  			bv.bits[idx] = byte(^v)
   140  		}
   141  	}
   142  }
   143  
   144  /* Write as a bit set */
   145  func (bv *BitVector) writeBits(output store.IndexOutput) error {
   146  	return store.Stream(output).
   147  		WriteInt(int32(bv.size)).
   148  		WriteInt(int32(bv.Count())).
   149  		WriteBytes(bv.bits).
   150  		Close()
   151  }
   152  
   153  /* Write as a d-gaps list */
   154  func (bv *BitVector) writeClearedDgaps(output store.IndexOutput) error {
   155  	err := store.Stream(output).
   156  		WriteInt(-1). // mark using d-gaps
   157  		WriteInt(int32(bv.size)).
   158  		WriteInt(int32(bv.Count())).
   159  		Close()
   160  	if err != nil {
   161  		return err
   162  	}
   163  	last, numCleared := 0, bv.size-bv.Count()
   164  	for i, v := range bv.bits {
   165  		if v == byte(0xff) {
   166  			continue
   167  		}
   168  		err = output.WriteVInt(int32(i - last))
   169  		if err == nil {
   170  			err = output.WriteByte(v)
   171  		}
   172  		if err != nil {
   173  			return err
   174  		}
   175  		last = i
   176  		numCleared -= (8 - util.BitCount(v))
   177  		assert(numCleared >= 0 ||
   178  			i == len(bv.bits)-1 && numCleared == -(8-(bv.size&7)))
   179  		if numCleared <= 0 {
   180  			break
   181  		}
   182  	}
   183  	return nil
   184  }
   185  
   186  /*
   187  Indicates if the bit vector is sparse and should be saved as a d-gaps
   188  list, or dense, and should be saved as a bit set.
   189  */
   190  func (bv *BitVector) isSparse() bool {
   191  	panic("not implemented yet")
   192  }
   193  
   194  func (bv *BitVector) assertCount() {
   195  	assert(bv.count != -1)
   196  	countSav := bv.count
   197  	bv.count = -1
   198  	assert2(countSav == bv.Count(), "saved count was %v but recomputed count is %v", countSav, bv.count)
   199  }