github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/util/fst/bytes.go (about) 1 package fst 2 3 import ( 4 "fmt" 5 "github.com/balzaczyy/golucene/core/util" 6 ) 7 8 type BytesStore struct { 9 *util.DataOutputImpl 10 blocks [][]byte 11 blockSize uint32 12 blockBits uint32 13 blockMask uint32 14 current []byte 15 nextWrite uint32 16 } 17 18 func newBytesStore() *BytesStore { 19 bs := &BytesStore{} 20 bs.DataOutputImpl = util.NewDataOutput(bs) 21 return bs 22 } 23 24 func newBytesStoreFromBits(blockBits uint32) *BytesStore { 25 blockSize := uint32(1) << blockBits 26 self := newBytesStore() 27 self.blockBits = blockBits 28 self.blockSize = blockSize 29 self.blockMask = blockSize - 1 30 self.nextWrite = blockSize 31 return self 32 } 33 34 func newBytesStoreFromInput(in util.DataInput, numBytes int64, maxBlockSize uint32) (bs *BytesStore, err error) { 35 var blockSize uint32 = 2 36 var blockBits uint32 = 1 37 for int64(blockSize) < numBytes && blockSize < maxBlockSize { 38 blockSize *= 2 39 blockBits++ 40 } 41 self := newBytesStore() 42 self.blockBits = blockBits 43 self.blockSize = blockSize 44 self.blockMask = blockSize - 1 45 left := numBytes 46 for left > 0 { 47 chunk := blockSize 48 if left < int64(chunk) { 49 chunk = uint32(left) 50 } 51 block := make([]byte, chunk) 52 err = in.ReadBytes(block) 53 if err != nil { 54 return nil, err 55 } 56 self.blocks = append(self.blocks, block) 57 left -= int64(chunk) 58 } 59 // So .getPosition still works 60 self.nextWrite = uint32(len(self.blocks[len(self.blocks)-1])) 61 return self, nil 62 } 63 64 func (bs *BytesStore) WriteByte(b byte) error { 65 if bs.nextWrite == bs.blockSize { 66 bs.current = make([]byte, bs.blockSize) 67 bs.blocks = append(bs.blocks, bs.current) 68 bs.nextWrite = 0 69 } 70 bs.current[bs.nextWrite] = b 71 bs.nextWrite++ 72 return nil 73 } 74 75 func (bs *BytesStore) WriteBytes(buf []byte) error { 76 var offset uint32 = 0 77 length := uint32(len(buf)) 78 for length > 0 { 79 chunk := bs.blockSize - bs.nextWrite 80 if length <= chunk { 81 copy(bs.current[bs.nextWrite:], buf[offset:offset+length]) 82 bs.nextWrite += length 83 break 84 } else { 85 if chunk > 0 { 86 copy(bs.current[bs.nextWrite:], buf[offset:offset+chunk]) 87 offset += chunk 88 length -= chunk 89 } 90 bs.current = make([]byte, bs.blockSize) 91 bs.blocks = append(bs.blocks, bs.current) 92 bs.nextWrite = 0 93 } 94 } 95 return nil 96 } 97 98 func (s *BytesStore) writeBytesAt(dest int64, b []byte) { 99 length := len(b) 100 assert2(dest+int64(length) <= s.position(), 101 "dest=%v pos=%v len=%v", dest, s.position(), length) 102 103 end := dest + int64(length) 104 blockIndex := int(end >> s.blockBits) 105 downTo := int(end & int64(s.blockMask)) 106 if downTo == 0 { 107 blockIndex-- 108 downTo = int(s.blockSize) 109 } 110 block := s.blocks[blockIndex] 111 112 for length > 0 { 113 if length <= downTo { 114 copy(block[downTo-length:], b[:length]) 115 break 116 } 117 length -= downTo 118 copy(block, b[length:length+downTo]) 119 blockIndex-- 120 block = s.blocks[blockIndex] 121 downTo = int(s.blockSize) 122 } 123 } 124 125 func (s *BytesStore) copyBytesInside(src, dest int64, length int) { 126 assert(src < dest) 127 128 end := src + int64(length) 129 130 blockIndex := int(end >> s.blockBits) 131 downTo := int(end & int64(s.blockMask)) 132 if downTo == 0 { 133 blockIndex-- 134 downTo = int(s.blockSize) 135 } 136 block := s.blocks[blockIndex] 137 138 for length > 0 { 139 if length <= downTo { 140 s.writeBytesAt(dest, block[downTo-length:downTo]) 141 break 142 } 143 length -= downTo 144 s.writeBytesAt(dest+int64(length), block[:downTo]) 145 blockIndex-- 146 block = s.blocks[blockIndex] 147 downTo = int(s.blockSize) 148 } 149 } 150 151 /* Reverse from srcPos, inclusive, to destPos, inclusive. */ 152 func (s *BytesStore) reverse(srcPos, destPos int64) { 153 assert(srcPos < destPos) 154 assert(destPos < s.position()) 155 // fmt.Printf("reverse src=%v dest=%v\n", srcPos, destPos) 156 157 srcBlockIndex := int(srcPos >> s.blockBits) 158 src := int(srcPos & int64(s.blockMask)) 159 srcBlock := s.blocks[srcBlockIndex] 160 161 destBlockIndex := int(destPos >> s.blockBits) 162 dest := int(destPos & int64(s.blockMask)) 163 destBlock := s.blocks[destBlockIndex] 164 165 // fmt.Printf(" srcBlock=%v destBlock=%v\n", srcBlockIndex, destBlockIndex) 166 167 limit := int((destPos - srcPos + 1) / 2) 168 for i := 0; i < limit; i++ { 169 // fmt.Printf(" cycle src=%v dest=%v\n", src, dest) 170 srcBlock[src], destBlock[dest] = destBlock[dest], srcBlock[src] 171 if src++; src == int(s.blockSize) { 172 srcBlockIndex++ 173 srcBlock = s.blocks[srcBlockIndex] 174 fmt.Printf(" set destBlock=%v srcBlock=%v\n", destBlock, srcBlock) 175 src = 0 176 } 177 178 if dest--; dest == -1 { 179 destBlockIndex-- 180 destBlock = s.blocks[destBlockIndex] 181 fmt.Printf(" set destBlock=%v srcBlock=%v\n", destBlock, srcBlock) 182 dest = int(s.blockSize - 1) 183 } 184 } 185 } 186 187 func (s *BytesStore) skipBytes(length int) { 188 for length > 0 { 189 chunk := int(s.blockSize) - int(s.nextWrite) 190 if length <= chunk { 191 s.nextWrite += uint32(length) 192 break 193 } 194 length -= chunk 195 s.current = make([]byte, s.blockSize) 196 s.blocks = append(s.blocks, s.current) 197 s.nextWrite = 0 198 } 199 } 200 201 func (s *BytesStore) position() int64 { 202 return int64(len(s.blocks)-1)*int64(s.blockSize) + int64(s.nextWrite) 203 } 204 205 func (s *BytesStore) finish() { 206 if s.current != nil { 207 lastBuffer := make([]byte, s.nextWrite) 208 copy(lastBuffer, s.current[:s.nextWrite]) 209 s.blocks[len(s.blocks)-1] = lastBuffer 210 s.current = nil 211 } 212 } 213 214 /* Writes all of our bytes to the target DataOutput. */ 215 func (s *BytesStore) writeTo(out util.DataOutput) error { 216 for _, block := range s.blocks { 217 err := out.WriteBytes(block) 218 if err != nil { 219 return err 220 } 221 } 222 return nil 223 } 224 225 func (s *BytesStore) String() string { 226 return fmt.Sprintf("%v-bits x%v bytes store", s.blockBits, len(s.blocks)) 227 } 228 229 type BytesStoreForwardReader struct { 230 *util.DataInputImpl 231 owner *BytesStore 232 current []byte 233 nextBuffer uint32 234 nextRead uint32 235 } 236 237 func (r *BytesStoreForwardReader) ReadByte() (b byte, err error) { 238 if r.nextRead == r.owner.blockSize { 239 r.current = r.owner.blocks[r.nextBuffer] 240 r.nextBuffer++ 241 r.nextRead = 0 242 } 243 b = r.current[r.nextRead] 244 r.nextRead++ 245 return b, nil 246 } 247 248 func (r *BytesStoreForwardReader) ReadBytes(buf []byte) error { 249 var offset uint32 = 0 250 length := uint32(len(buf)) 251 for length > 0 { 252 chunkLeft := r.owner.blockSize - r.nextRead 253 if length <= chunkLeft { 254 copy(buf[offset:], r.current[r.nextRead:r.nextRead+length]) 255 r.nextRead += length 256 break 257 } else { 258 if chunkLeft > 0 { 259 copy(buf[offset:], r.current[r.nextRead:r.nextRead+chunkLeft]) 260 offset += chunkLeft 261 length -= chunkLeft 262 } 263 r.current = r.owner.blocks[r.nextBuffer] 264 r.nextBuffer++ 265 r.nextRead = 0 266 } 267 } 268 return nil 269 } 270 271 func (r *BytesStoreForwardReader) skipBytes(count int64) { 272 r.setPosition(r.getPosition() + count) 273 } 274 275 func (r *BytesStoreForwardReader) getPosition() int64 { 276 return (int64(r.nextBuffer)-1)*int64(r.owner.blockSize) + int64(r.nextRead) 277 } 278 279 func (r *BytesStoreForwardReader) setPosition(pos int64) { 280 bufferIndex := pos >> r.owner.blockBits 281 r.nextBuffer = uint32(bufferIndex + 1) 282 r.current = r.owner.blocks[bufferIndex] 283 r.nextRead = uint32(pos) & r.owner.blockMask 284 // assert self.getPosition() == pos 285 } 286 287 func (r *BytesStoreForwardReader) reversed() bool { 288 return false 289 } 290 291 func (bs *BytesStore) forwardReader() BytesReader { 292 if len(bs.blocks) == 1 { 293 return newForwardBytesReader(bs.blocks[0]) 294 } 295 ans := &BytesStoreForwardReader{owner: bs, nextRead: bs.blockSize} 296 ans.DataInputImpl = util.NewDataInput(ans) 297 return ans 298 } 299 300 func (bs *BytesStore) reverseReader() BytesReader { 301 return bs.reverseReaderAllowSingle(true) 302 } 303 304 type BytesStoreReverseReader struct { 305 *util.DataInputImpl 306 owner *BytesStore 307 current []byte 308 nextBuffer int32 309 nextRead int32 310 } 311 312 func newBytesStoreReverseReader(owner *BytesStore, current []byte) *BytesStoreReverseReader { 313 ans := &BytesStoreReverseReader{owner: owner, current: current, nextBuffer: -1} 314 ans.DataInputImpl = util.NewDataInput(ans) 315 return ans 316 } 317 318 func (r *BytesStoreReverseReader) ReadByte() (b byte, err error) { 319 if r.nextRead == -1 { 320 r.current = r.owner.blocks[r.nextBuffer] 321 r.nextBuffer-- 322 r.nextRead = int32(r.owner.blockSize - 1) 323 } 324 r.nextRead-- 325 return r.current[r.nextRead+1], nil 326 } 327 328 func (r *BytesStoreReverseReader) ReadBytes(buf []byte) error { 329 var err error 330 for i, _ := range buf { 331 buf[i], err = r.ReadByte() 332 if err != nil { 333 return err 334 } 335 } 336 return err 337 } 338 339 func (r *BytesStoreReverseReader) skipBytes(count int64) { 340 r.setPosition(r.getPosition() - count) 341 } 342 343 func (r *BytesStoreReverseReader) getPosition() int64 { 344 return (int64(r.nextBuffer)+1)*int64(r.owner.blockSize) + int64(r.nextRead) 345 } 346 347 func (r *BytesStoreReverseReader) setPosition(pos int64) { 348 // NOTE: a little weird because if you 349 // setPosition(0), the next byte you read is 350 // bytes[0] ... but I would expect bytes[-1] (ie, 351 // EOF)...? 352 bufferIndex := int32(pos >> r.owner.blockSize) 353 r.nextBuffer = bufferIndex - 1 354 r.current = r.owner.blocks[bufferIndex] 355 r.nextRead = int32(uint32(pos) & r.owner.blockMask) 356 // assert getPosition() == pos 357 } 358 359 func (r *BytesStoreReverseReader) reversed() bool { 360 return true 361 } 362 363 func (bs *BytesStore) reverseReaderAllowSingle(allowSingle bool) BytesReader { 364 if allowSingle && len(bs.blocks) == 1 { 365 return newReverseBytesReader(bs.blocks[0]) 366 } 367 var current []byte = nil 368 if len(bs.blocks) > 0 { 369 current = bs.blocks[0] 370 } 371 return newBytesStoreReverseReader(bs, current) 372 } 373 374 type ForwardBytesReader struct { 375 *util.DataInputImpl 376 bytes []byte 377 pos int 378 } 379 380 func (r *ForwardBytesReader) ReadByte() (b byte, err error) { 381 r.pos++ 382 return r.bytes[r.pos-1], nil 383 } 384 385 func (r *ForwardBytesReader) ReadBytes(buf []byte) error { 386 copy(buf, r.bytes[r.pos:r.pos+len(buf)]) 387 r.pos += len(buf) 388 return nil 389 } 390 391 func (r *ForwardBytesReader) skipBytes(count int64) { 392 r.pos += int(count) 393 } 394 395 func (r *ForwardBytesReader) getPosition() int64 { 396 return int64(r.pos) 397 } 398 399 func (r *ForwardBytesReader) setPosition(pos int64) { 400 r.pos = int(pos) 401 } 402 403 func (r *ForwardBytesReader) reversed() bool { 404 return false 405 } 406 407 func newForwardBytesReader(bytes []byte) BytesReader { 408 ans := &ForwardBytesReader{bytes: bytes} 409 ans.DataInputImpl = util.NewDataInput(ans) 410 return ans 411 } 412 413 type ReverseBytesReader struct { 414 *util.DataInputImpl 415 bytes []byte 416 pos int 417 } 418 419 func (r *ReverseBytesReader) ReadByte() (b byte, err error) { 420 r.pos-- 421 return r.bytes[r.pos+1], nil 422 } 423 424 func (r *ReverseBytesReader) ReadBytes(buf []byte) error { 425 for i, _ := range buf { 426 buf[i] = r.bytes[r.pos] 427 r.pos-- 428 } 429 return nil 430 } 431 432 func newReverseBytesReader(bytes []byte) BytesReader { 433 ans := &ReverseBytesReader{bytes: bytes} 434 ans.DataInputImpl = util.NewDataInput(ans) 435 return ans 436 } 437 438 func (r *ReverseBytesReader) skipBytes(count int64) { 439 r.pos -= int(count) 440 } 441 442 func (r *ReverseBytesReader) getPosition() int64 { 443 return int64(r.pos) 444 } 445 446 func (r *ReverseBytesReader) setPosition(pos int64) { 447 r.pos = int(pos) 448 } 449 450 func (r *ReverseBytesReader) reversed() bool { 451 return true 452 } 453 454 func (r *ReverseBytesReader) String() string { 455 return fmt.Sprintf("BytesReader(reversed, [%v,%v])", r.pos, len(r.bytes)) 456 }