github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/util/fst/bytes.go (about)

     1  package fst
     2  
     3  import (
     4  	"fmt"
     5  	"github.com/balzaczyy/golucene/core/util"
     6  )
     7  
     8  type BytesStore struct {
     9  	*util.DataOutputImpl
    10  	blocks    [][]byte
    11  	blockSize uint32
    12  	blockBits uint32
    13  	blockMask uint32
    14  	current   []byte
    15  	nextWrite uint32
    16  }
    17  
    18  func newBytesStore() *BytesStore {
    19  	bs := &BytesStore{}
    20  	bs.DataOutputImpl = util.NewDataOutput(bs)
    21  	return bs
    22  }
    23  
    24  func newBytesStoreFromBits(blockBits uint32) *BytesStore {
    25  	blockSize := uint32(1) << blockBits
    26  	self := newBytesStore()
    27  	self.blockBits = blockBits
    28  	self.blockSize = blockSize
    29  	self.blockMask = blockSize - 1
    30  	self.nextWrite = blockSize
    31  	return self
    32  }
    33  
    34  func newBytesStoreFromInput(in util.DataInput, numBytes int64, maxBlockSize uint32) (bs *BytesStore, err error) {
    35  	var blockSize uint32 = 2
    36  	var blockBits uint32 = 1
    37  	for int64(blockSize) < numBytes && blockSize < maxBlockSize {
    38  		blockSize *= 2
    39  		blockBits++
    40  	}
    41  	self := newBytesStore()
    42  	self.blockBits = blockBits
    43  	self.blockSize = blockSize
    44  	self.blockMask = blockSize - 1
    45  	left := numBytes
    46  	for left > 0 {
    47  		chunk := blockSize
    48  		if left < int64(chunk) {
    49  			chunk = uint32(left)
    50  		}
    51  		block := make([]byte, chunk)
    52  		err = in.ReadBytes(block)
    53  		if err != nil {
    54  			return nil, err
    55  		}
    56  		self.blocks = append(self.blocks, block)
    57  		left -= int64(chunk)
    58  	}
    59  	// So .getPosition still works
    60  	self.nextWrite = uint32(len(self.blocks[len(self.blocks)-1]))
    61  	return self, nil
    62  }
    63  
    64  func (bs *BytesStore) WriteByte(b byte) error {
    65  	if bs.nextWrite == bs.blockSize {
    66  		bs.current = make([]byte, bs.blockSize)
    67  		bs.blocks = append(bs.blocks, bs.current)
    68  		bs.nextWrite = 0
    69  	}
    70  	bs.current[bs.nextWrite] = b
    71  	bs.nextWrite++
    72  	return nil
    73  }
    74  
    75  func (bs *BytesStore) WriteBytes(buf []byte) error {
    76  	var offset uint32 = 0
    77  	length := uint32(len(buf))
    78  	for length > 0 {
    79  		chunk := bs.blockSize - bs.nextWrite
    80  		if length <= chunk {
    81  			copy(bs.current[bs.nextWrite:], buf[offset:offset+length])
    82  			bs.nextWrite += length
    83  			break
    84  		} else {
    85  			if chunk > 0 {
    86  				copy(bs.current[bs.nextWrite:], buf[offset:offset+chunk])
    87  				offset += chunk
    88  				length -= chunk
    89  			}
    90  			bs.current = make([]byte, bs.blockSize)
    91  			bs.blocks = append(bs.blocks, bs.current)
    92  			bs.nextWrite = 0
    93  		}
    94  	}
    95  	return nil
    96  }
    97  
    98  func (s *BytesStore) writeBytesAt(dest int64, b []byte) {
    99  	length := len(b)
   100  	assert2(dest+int64(length) <= s.position(),
   101  		"dest=%v pos=%v len=%v", dest, s.position(), length)
   102  
   103  	end := dest + int64(length)
   104  	blockIndex := int(end >> s.blockBits)
   105  	downTo := int(end & int64(s.blockMask))
   106  	if downTo == 0 {
   107  		blockIndex--
   108  		downTo = int(s.blockSize)
   109  	}
   110  	block := s.blocks[blockIndex]
   111  
   112  	for length > 0 {
   113  		if length <= downTo {
   114  			copy(block[downTo-length:], b[:length])
   115  			break
   116  		}
   117  		length -= downTo
   118  		copy(block, b[length:length+downTo])
   119  		blockIndex--
   120  		block = s.blocks[blockIndex]
   121  		downTo = int(s.blockSize)
   122  	}
   123  }
   124  
   125  func (s *BytesStore) copyBytesInside(src, dest int64, length int) {
   126  	assert(src < dest)
   127  
   128  	end := src + int64(length)
   129  
   130  	blockIndex := int(end >> s.blockBits)
   131  	downTo := int(end & int64(s.blockMask))
   132  	if downTo == 0 {
   133  		blockIndex--
   134  		downTo = int(s.blockSize)
   135  	}
   136  	block := s.blocks[blockIndex]
   137  
   138  	for length > 0 {
   139  		if length <= downTo {
   140  			s.writeBytesAt(dest, block[downTo-length:downTo])
   141  			break
   142  		}
   143  		length -= downTo
   144  		s.writeBytesAt(dest+int64(length), block[:downTo])
   145  		blockIndex--
   146  		block = s.blocks[blockIndex]
   147  		downTo = int(s.blockSize)
   148  	}
   149  }
   150  
   151  /* Reverse from srcPos, inclusive, to destPos, inclusive. */
   152  func (s *BytesStore) reverse(srcPos, destPos int64) {
   153  	assert(srcPos < destPos)
   154  	assert(destPos < s.position())
   155  	// fmt.Printf("reverse src=%v dest=%v\n", srcPos, destPos)
   156  
   157  	srcBlockIndex := int(srcPos >> s.blockBits)
   158  	src := int(srcPos & int64(s.blockMask))
   159  	srcBlock := s.blocks[srcBlockIndex]
   160  
   161  	destBlockIndex := int(destPos >> s.blockBits)
   162  	dest := int(destPos & int64(s.blockMask))
   163  	destBlock := s.blocks[destBlockIndex]
   164  
   165  	// fmt.Printf("  srcBlock=%v destBlock=%v\n", srcBlockIndex, destBlockIndex)
   166  
   167  	limit := int((destPos - srcPos + 1) / 2)
   168  	for i := 0; i < limit; i++ {
   169  		// fmt.Printf("  cycle src=%v dest=%v\n", src, dest)
   170  		srcBlock[src], destBlock[dest] = destBlock[dest], srcBlock[src]
   171  		if src++; src == int(s.blockSize) {
   172  			srcBlockIndex++
   173  			srcBlock = s.blocks[srcBlockIndex]
   174  			fmt.Printf("  set destBlock=%v srcBlock=%v\n", destBlock, srcBlock)
   175  			src = 0
   176  		}
   177  
   178  		if dest--; dest == -1 {
   179  			destBlockIndex--
   180  			destBlock = s.blocks[destBlockIndex]
   181  			fmt.Printf("  set destBlock=%v srcBlock=%v\n", destBlock, srcBlock)
   182  			dest = int(s.blockSize - 1)
   183  		}
   184  	}
   185  }
   186  
   187  func (s *BytesStore) skipBytes(length int) {
   188  	for length > 0 {
   189  		chunk := int(s.blockSize) - int(s.nextWrite)
   190  		if length <= chunk {
   191  			s.nextWrite += uint32(length)
   192  			break
   193  		}
   194  		length -= chunk
   195  		s.current = make([]byte, s.blockSize)
   196  		s.blocks = append(s.blocks, s.current)
   197  		s.nextWrite = 0
   198  	}
   199  }
   200  
   201  func (s *BytesStore) position() int64 {
   202  	return int64(len(s.blocks)-1)*int64(s.blockSize) + int64(s.nextWrite)
   203  }
   204  
   205  func (s *BytesStore) finish() {
   206  	if s.current != nil {
   207  		lastBuffer := make([]byte, s.nextWrite)
   208  		copy(lastBuffer, s.current[:s.nextWrite])
   209  		s.blocks[len(s.blocks)-1] = lastBuffer
   210  		s.current = nil
   211  	}
   212  }
   213  
   214  /* Writes all of our bytes to the target DataOutput. */
   215  func (s *BytesStore) writeTo(out util.DataOutput) error {
   216  	for _, block := range s.blocks {
   217  		err := out.WriteBytes(block)
   218  		if err != nil {
   219  			return err
   220  		}
   221  	}
   222  	return nil
   223  }
   224  
   225  func (s *BytesStore) String() string {
   226  	return fmt.Sprintf("%v-bits x%v bytes store", s.blockBits, len(s.blocks))
   227  }
   228  
   229  type BytesStoreForwardReader struct {
   230  	*util.DataInputImpl
   231  	owner      *BytesStore
   232  	current    []byte
   233  	nextBuffer uint32
   234  	nextRead   uint32
   235  }
   236  
   237  func (r *BytesStoreForwardReader) ReadByte() (b byte, err error) {
   238  	if r.nextRead == r.owner.blockSize {
   239  		r.current = r.owner.blocks[r.nextBuffer]
   240  		r.nextBuffer++
   241  		r.nextRead = 0
   242  	}
   243  	b = r.current[r.nextRead]
   244  	r.nextRead++
   245  	return b, nil
   246  }
   247  
   248  func (r *BytesStoreForwardReader) ReadBytes(buf []byte) error {
   249  	var offset uint32 = 0
   250  	length := uint32(len(buf))
   251  	for length > 0 {
   252  		chunkLeft := r.owner.blockSize - r.nextRead
   253  		if length <= chunkLeft {
   254  			copy(buf[offset:], r.current[r.nextRead:r.nextRead+length])
   255  			r.nextRead += length
   256  			break
   257  		} else {
   258  			if chunkLeft > 0 {
   259  				copy(buf[offset:], r.current[r.nextRead:r.nextRead+chunkLeft])
   260  				offset += chunkLeft
   261  				length -= chunkLeft
   262  			}
   263  			r.current = r.owner.blocks[r.nextBuffer]
   264  			r.nextBuffer++
   265  			r.nextRead = 0
   266  		}
   267  	}
   268  	return nil
   269  }
   270  
   271  func (r *BytesStoreForwardReader) skipBytes(count int64) {
   272  	r.setPosition(r.getPosition() + count)
   273  }
   274  
   275  func (r *BytesStoreForwardReader) getPosition() int64 {
   276  	return (int64(r.nextBuffer)-1)*int64(r.owner.blockSize) + int64(r.nextRead)
   277  }
   278  
   279  func (r *BytesStoreForwardReader) setPosition(pos int64) {
   280  	bufferIndex := pos >> r.owner.blockBits
   281  	r.nextBuffer = uint32(bufferIndex + 1)
   282  	r.current = r.owner.blocks[bufferIndex]
   283  	r.nextRead = uint32(pos) & r.owner.blockMask
   284  	// assert self.getPosition() == pos
   285  }
   286  
   287  func (r *BytesStoreForwardReader) reversed() bool {
   288  	return false
   289  }
   290  
   291  func (bs *BytesStore) forwardReader() BytesReader {
   292  	if len(bs.blocks) == 1 {
   293  		return newForwardBytesReader(bs.blocks[0])
   294  	}
   295  	ans := &BytesStoreForwardReader{owner: bs, nextRead: bs.blockSize}
   296  	ans.DataInputImpl = util.NewDataInput(ans)
   297  	return ans
   298  }
   299  
   300  func (bs *BytesStore) reverseReader() BytesReader {
   301  	return bs.reverseReaderAllowSingle(true)
   302  }
   303  
   304  type BytesStoreReverseReader struct {
   305  	*util.DataInputImpl
   306  	owner      *BytesStore
   307  	current    []byte
   308  	nextBuffer int32
   309  	nextRead   int32
   310  }
   311  
   312  func newBytesStoreReverseReader(owner *BytesStore, current []byte) *BytesStoreReverseReader {
   313  	ans := &BytesStoreReverseReader{owner: owner, current: current, nextBuffer: -1}
   314  	ans.DataInputImpl = util.NewDataInput(ans)
   315  	return ans
   316  }
   317  
   318  func (r *BytesStoreReverseReader) ReadByte() (b byte, err error) {
   319  	if r.nextRead == -1 {
   320  		r.current = r.owner.blocks[r.nextBuffer]
   321  		r.nextBuffer--
   322  		r.nextRead = int32(r.owner.blockSize - 1)
   323  	}
   324  	r.nextRead--
   325  	return r.current[r.nextRead+1], nil
   326  }
   327  
   328  func (r *BytesStoreReverseReader) ReadBytes(buf []byte) error {
   329  	var err error
   330  	for i, _ := range buf {
   331  		buf[i], err = r.ReadByte()
   332  		if err != nil {
   333  			return err
   334  		}
   335  	}
   336  	return err
   337  }
   338  
   339  func (r *BytesStoreReverseReader) skipBytes(count int64) {
   340  	r.setPosition(r.getPosition() - count)
   341  }
   342  
   343  func (r *BytesStoreReverseReader) getPosition() int64 {
   344  	return (int64(r.nextBuffer)+1)*int64(r.owner.blockSize) + int64(r.nextRead)
   345  }
   346  
   347  func (r *BytesStoreReverseReader) setPosition(pos int64) {
   348  	// NOTE: a little weird because if you
   349  	// setPosition(0), the next byte you read is
   350  	// bytes[0] ... but I would expect bytes[-1] (ie,
   351  	// EOF)...?
   352  	bufferIndex := int32(pos >> r.owner.blockSize)
   353  	r.nextBuffer = bufferIndex - 1
   354  	r.current = r.owner.blocks[bufferIndex]
   355  	r.nextRead = int32(uint32(pos) & r.owner.blockMask)
   356  	// assert getPosition() == pos
   357  }
   358  
   359  func (r *BytesStoreReverseReader) reversed() bool {
   360  	return true
   361  }
   362  
   363  func (bs *BytesStore) reverseReaderAllowSingle(allowSingle bool) BytesReader {
   364  	if allowSingle && len(bs.blocks) == 1 {
   365  		return newReverseBytesReader(bs.blocks[0])
   366  	}
   367  	var current []byte = nil
   368  	if len(bs.blocks) > 0 {
   369  		current = bs.blocks[0]
   370  	}
   371  	return newBytesStoreReverseReader(bs, current)
   372  }
   373  
   374  type ForwardBytesReader struct {
   375  	*util.DataInputImpl
   376  	bytes []byte
   377  	pos   int
   378  }
   379  
   380  func (r *ForwardBytesReader) ReadByte() (b byte, err error) {
   381  	r.pos++
   382  	return r.bytes[r.pos-1], nil
   383  }
   384  
   385  func (r *ForwardBytesReader) ReadBytes(buf []byte) error {
   386  	copy(buf, r.bytes[r.pos:r.pos+len(buf)])
   387  	r.pos += len(buf)
   388  	return nil
   389  }
   390  
   391  func (r *ForwardBytesReader) skipBytes(count int64) {
   392  	r.pos += int(count)
   393  }
   394  
   395  func (r *ForwardBytesReader) getPosition() int64 {
   396  	return int64(r.pos)
   397  }
   398  
   399  func (r *ForwardBytesReader) setPosition(pos int64) {
   400  	r.pos = int(pos)
   401  }
   402  
   403  func (r *ForwardBytesReader) reversed() bool {
   404  	return false
   405  }
   406  
   407  func newForwardBytesReader(bytes []byte) BytesReader {
   408  	ans := &ForwardBytesReader{bytes: bytes}
   409  	ans.DataInputImpl = util.NewDataInput(ans)
   410  	return ans
   411  }
   412  
   413  type ReverseBytesReader struct {
   414  	*util.DataInputImpl
   415  	bytes []byte
   416  	pos   int
   417  }
   418  
   419  func (r *ReverseBytesReader) ReadByte() (b byte, err error) {
   420  	r.pos--
   421  	return r.bytes[r.pos+1], nil
   422  }
   423  
   424  func (r *ReverseBytesReader) ReadBytes(buf []byte) error {
   425  	for i, _ := range buf {
   426  		buf[i] = r.bytes[r.pos]
   427  		r.pos--
   428  	}
   429  	return nil
   430  }
   431  
   432  func newReverseBytesReader(bytes []byte) BytesReader {
   433  	ans := &ReverseBytesReader{bytes: bytes}
   434  	ans.DataInputImpl = util.NewDataInput(ans)
   435  	return ans
   436  }
   437  
   438  func (r *ReverseBytesReader) skipBytes(count int64) {
   439  	r.pos -= int(count)
   440  }
   441  
   442  func (r *ReverseBytesReader) getPosition() int64 {
   443  	return int64(r.pos)
   444  }
   445  
   446  func (r *ReverseBytesReader) setPosition(pos int64) {
   447  	r.pos = int(pos)
   448  }
   449  
   450  func (r *ReverseBytesReader) reversed() bool {
   451  	return true
   452  }
   453  
   454  func (r *ReverseBytesReader) String() string {
   455  	return fmt.Sprintf("BytesReader(reversed, [%v,%v])", r.pos, len(r.bytes))
   456  }