github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/lucene40/bitVector.go (about) 1 package lucene40 2 3 import ( 4 "errors" 5 "fmt" 6 "github.com/balzaczyy/golucene/core/codec" 7 "github.com/balzaczyy/golucene/core/store" 8 "github.com/balzaczyy/golucene/core/util" 9 "reflect" 10 ) 11 12 const ( 13 CODEC = "BitVector" 14 15 /* Change DGaps to encode gaps between cleared bits, not set: */ 16 BV_VERSION_DGAPS_CLEARED = 1 17 18 BV_VERSION_CHECKSUM = 2 19 20 /* Imcrement version to change it: */ 21 BV_VERSION_CURRENT = BV_VERSION_CHECKSUM 22 ) 23 24 type BitVector struct { 25 bits []byte 26 size int 27 count int 28 } 29 30 func NewBitVector(n int) *BitVector { 31 return &BitVector{ 32 size: n, 33 bits: make([]byte, numBytes(n)), 34 } 35 } 36 37 func numBytes(size int) int { 38 bytesLength := int(uint(size) >> 3) 39 if (size & 7) != 0 { 40 bytesLength++ 41 } 42 return bytesLength 43 } 44 45 func (bv *BitVector) Clear(bit int) { 46 assert2(bit >= 0 && bit < bv.size, "bit %v is out of bounds 0..%v", bit, bv.size-1) 47 bv.bits[bit>>3] &= ^(1 << (uint(bit) & 7)) 48 bv.count = -1 49 } 50 51 func (bv *BitVector) At(bit int) bool { 52 assert2(bit >= 0 && bit < bv.size, "bit %v is out of bounds 0..%v", bit, bv.size-1) 53 return (bv.bits[bit>>3] & (1 << (uint(bit) & 7))) != 0 54 } 55 56 func assert(ok bool) { 57 assert2(ok, "assert fail") 58 } 59 60 func assert2(ok bool, msg string, args ...interface{}) { 61 if !ok { 62 panic(fmt.Sprintf(msg, args...)) 63 } 64 } 65 66 func (bv *BitVector) Length() int { 67 return bv.size 68 } 69 70 /* 71 Returns the total number of bits in this vector. This is efficiently 72 computed and cached, so that, if the vector is not changed, no 73 recomputation is done for repeated calls. 74 */ 75 func (bv *BitVector) Count() int { 76 // if the vector has been modified 77 if bv.count == -1 { 78 c := 0 79 for _, v := range bv.bits { 80 c += util.BitCount(v) // sum bits per byte 81 } 82 bv.count = c 83 } 84 assert2(bv.count <= bv.size, "count=%v size=%v", bv.count, bv.size) 85 return bv.count 86 } 87 88 /* 89 Writes this vector to the file name in Directory d, in a format that 90 can be read by the constructor BitVector(Directory, String, IOContext) 91 */ 92 func (bv *BitVector) Write(d store.Directory, name string, ctx store.IOContext) (err error) { 93 assert(reflect.TypeOf(d).Name() != "CompoundFileDirectory") 94 var output store.IndexOutput 95 if output, err = d.CreateOutput(name, ctx); err != nil { 96 return err 97 } 98 defer func() { 99 err = mergeError(err, output.Close()) 100 }() 101 102 if err = output.WriteInt(-2); err != nil { 103 return err 104 } 105 if err = codec.WriteHeader(output, CODEC, BV_VERSION_CURRENT); err != nil { 106 return err 107 } 108 if bv.isSparse() { 109 // sparse bit-set more efficiently saved as d-gaps. 110 err = bv.writeClearedDgaps(output) 111 } else { 112 err = bv.writeBits(output) 113 } 114 if err != nil { 115 return err 116 } 117 if err = codec.WriteFooter(output); err != nil { 118 return err 119 } 120 bv.assertCount() 121 return nil 122 } 123 124 func mergeError(err, err2 error) error { 125 if err == nil { 126 return err2 127 } else { 128 return errors.New(fmt.Sprintf("%v\n %v", err, err2)) 129 } 130 } 131 132 /* Invert all bits */ 133 func (bv *BitVector) InvertAll() { 134 if bv.count != -1 { 135 bv.count = bv.size - bv.count 136 } 137 if len(bv.bits) > 0 { 138 for idx, v := range bv.bits { 139 bv.bits[idx] = byte(^v) 140 } 141 } 142 } 143 144 /* Write as a bit set */ 145 func (bv *BitVector) writeBits(output store.IndexOutput) error { 146 return store.Stream(output). 147 WriteInt(int32(bv.size)). 148 WriteInt(int32(bv.Count())). 149 WriteBytes(bv.bits). 150 Close() 151 } 152 153 /* Write as a d-gaps list */ 154 func (bv *BitVector) writeClearedDgaps(output store.IndexOutput) error { 155 err := store.Stream(output). 156 WriteInt(-1). // mark using d-gaps 157 WriteInt(int32(bv.size)). 158 WriteInt(int32(bv.Count())). 159 Close() 160 if err != nil { 161 return err 162 } 163 last, numCleared := 0, bv.size-bv.Count() 164 for i, v := range bv.bits { 165 if v == byte(0xff) { 166 continue 167 } 168 err = output.WriteVInt(int32(i - last)) 169 if err == nil { 170 err = output.WriteByte(v) 171 } 172 if err != nil { 173 return err 174 } 175 last = i 176 numCleared -= (8 - util.BitCount(v)) 177 assert(numCleared >= 0 || 178 i == len(bv.bits)-1 && numCleared == -(8-(bv.size&7))) 179 if numCleared <= 0 { 180 break 181 } 182 } 183 return nil 184 } 185 186 /* 187 Indicates if the bit vector is sparse and should be saved as a d-gaps 188 list, or dense, and should be saved as a bit set. 189 */ 190 func (bv *BitVector) isSparse() bool { 191 panic("not implemented yet") 192 } 193 194 func (bv *BitVector) assertCount() { 195 assert(bv.count != -1) 196 countSav := bv.count 197 bv.count = -1 198 assert2(countSav == bv.Count(), "saved count was %v but recomputed count is %v", countSav, bv.count) 199 }