github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/blocktree/segmentTermEnumFrame.go

github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/blocktree/segmentTermEnumFrame.go (about)

     1  package blocktree
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	. "github.com/balzaczyy/golucene/core/codec/spi"
     7  	. "github.com/balzaczyy/golucene/core/index/model"
     8  	"github.com/balzaczyy/golucene/core/store"
     9  	"github.com/balzaczyy/golucene/core/util"
    10  	"github.com/balzaczyy/golucene/core/util/fst"
    11  )
    12  
    13  type segmentTermsEnumFrame struct {
    14  	// Our index in stack[]:
    15  	ord int
    16  
    17  	hasTerms     bool
    18  	hasTermsOrig bool
    19  	isFloor      bool
    20  
    21  	arc *fst.Arc
    22  
    23  	// File pointer where this block was loaded from
    24  	fp     int64
    25  	fpOrig int64
    26  	fpEnd  int64
    27  
    28  	suffixBytes    []byte
    29  	suffixesReader store.ByteArrayDataInput
    30  
    31  	statBytes   []byte
    32  	statsReader store.ByteArrayDataInput
    33  
    34  	floorData       []byte
    35  	floorDataReader store.ByteArrayDataInput
    36  
    37  	// Length of prefix shared by all terms in this block
    38  	prefix int
    39  
    40  	// Number of entries (term or sub-block) in this block
    41  	entCount int
    42  
    43  	// Which term we will next read, or -1 if the block
    44  	// isn't loaded yet
    45  	nextEnt int
    46  
    47  	// True if this block is either not a floor block,
    48  	// or, it's the last sub-block of a floor block
    49  	isLastInFloor bool
    50  
    51  	// True if all entries are terms
    52  	isLeafBlock bool
    53  
    54  	lastSubFP int64
    55  
    56  	nextFloorLabel       int
    57  	numFollowFloorBlocks int
    58  
    59  	// Next term to decode metaData; we decode metaData
    60  	// lazily so that scanning to find the matching term is
    61  	// fast and only if you find a match and app wants the
    62  	// stats or docs/positions enums, will we decode the
    63  	// metaData
    64  	metaDataUpto int
    65  
    66  	state *BlockTermState
    67  
    68  	// metadata buffer, holding monotonic values
    69  	longs []int64
    70  	// metadata buffer, holding general values
    71  	bytes       []byte
    72  	bytesReader *store.ByteArrayDataInput
    73  
    74  	ste *SegmentTermsEnum
    75  
    76  	startBytePos int
    77  	suffix       int
    78  	subCode      int64
    79  }
    80  
    81  func newFrame(ste *SegmentTermsEnum, ord int) *segmentTermsEnumFrame {
    82  	f := &segmentTermsEnumFrame{
    83  		suffixBytes: make([]byte, 128),
    84  		statBytes:   make([]byte, 64),
    85  		floorData:   make([]byte, 32),
    86  		ste:         ste,
    87  		ord:         ord,
    88  		longs:       make([]int64, ste.fr.longsSize),
    89  	}
    90  	f.state = ste.fr.parent.postingsReader.NewTermState()
    91  	f.state.TotalTermFreq = -1
    92  	return f
    93  }
    94  
    95  func (f *segmentTermsEnumFrame) setFloorData(in *store.ByteArrayDataInput, source []byte) {
    96  	numBytes := len(source) - (in.Pos - 0)
    97  	if numBytes > len(f.floorData) {
    98  		// TODO over allocate
    99  		f.floorData = make([]byte, numBytes)
   100  	}
   101  	copy(f.floorData, source[in.Pos:])
   102  	f.floorDataReader.Reset(f.floorData)
   103  	f.numFollowFloorBlocks, _ = asInt(f.floorDataReader.ReadVInt())
   104  	b, _ := f.floorDataReader.ReadByte()
   105  	f.nextFloorLabel = int(b)
   106  	// fmt.Printf("    setFloorData fpOrig=%v bytes=%v numFollowFloorBlocks=%v nextFloorLabel=%x\n",
   107  	// 	f.fpOrig, source[in.Pos:], f.numFollowFloorBlocks, f.nextFloorLabel)
   108  }
   109  
   110  func (f *segmentTermsEnumFrame) getTermBlockOrd() int {
   111  	if f.isLeafBlock {
   112  		return f.nextEnt
   113  	} else {
   114  		return f.state.TermBlockOrd
   115  	}
   116  }
   117  
   118  /* Does initial decode of next block of terms; this
   119     doesn't actually decode the docFreq, totalTermFreq,
   120     postings details (frq/prx offset, etc.) metadata;
   121     it just loads them as byte[] blobs which are then
   122     decoded on-demand if the metadata is ever requested
   123     for any term in this block.  This enables terms-only
   124     intensive consumes (eg certain MTQs, respelling) to
   125     not pay the price of decoding metadata they won't
   126     use. */
   127  func (f *segmentTermsEnumFrame) loadBlock() (err error) {
   128  	// Clone the IndexInput lazily, so that consumers
   129  	// that just pull a TermsEnum to
   130  	// seekExact(TermState) don't pay this cost:
   131  	f.ste.initIndexInput()
   132  
   133  	if f.nextEnt != -1 {
   134  		// Already loaded
   135  		return
   136  	}
   137  
   138  	f.ste.in.Seek(f.fp)
   139  	code, err := asInt(f.ste.in.ReadVInt())
   140  	if err != nil {
   141  		return err
   142  	}
   143  	f.entCount = int(uint(code) >> 1)
   144  	assert(f.entCount > 0)
   145  	f.isLastInFloor = (code & 1) != 0
   146  
   147  	assert2(f.arc == nil || !f.isLastInFloor || !f.isFloor,
   148  		"fp=%v arc=%v isFloor=%v isLastInFloor=%v",
   149  		f.fp, f.arc, f.isFloor, f.isLastInFloor)
   150  
   151  	// TODO: if suffixes were stored in random-access
   152  	// array structure, then we could do binary search
   153  	// instead of linear scan to find target term; eg
   154  	// we could have simple array of offsets
   155  
   156  	// term suffixes:
   157  	code, err = asInt(f.ste.in.ReadVInt())
   158  	if err != nil {
   159  		return err
   160  	}
   161  	f.isLeafBlock = (code & 1) != 0
   162  	numBytes := int(uint(code) >> 1)
   163  	if len(f.suffixBytes) < numBytes {
   164  		f.suffixBytes = make([]byte, numBytes)
   165  	}
   166  	err = f.ste.in.ReadBytes(f.suffixBytes[:numBytes])
   167  	if err != nil {
   168  		return err
   169  	}
   170  	f.suffixesReader.Reset(f.suffixBytes)
   171  
   172  	// if f.arc == nil {
   173  	// 	fmt.Printf("    loadBlock (next) fp=%v entCount=%v prefixLen=%v isLastInFloor=%v leaf?=%v\n",
   174  	// 		f.fp, f.entCount, f.prefix, f.isLastInFloor, f.isLeafBlock)
   175  	// } else {
   176  	// 	fmt.Printf("    loadBlock (seek) fp=%v entCount=%v prefixLen=%v hasTerms?=%v isFloor?=%v isLastInFloor=%v leaf?=%v\n",
   177  	// 		f.fp, f.entCount, f.prefix, f.hasTerms, f.isFloor, f.isLastInFloor, f.isLeafBlock)
   178  	// }
   179  
   180  	// stats
   181  	numBytes, err = asInt(f.ste.in.ReadVInt())
   182  	if err != nil {
   183  		return err
   184  	}
   185  	if len(f.statBytes) < numBytes {
   186  		f.statBytes = make([]byte, numBytes)
   187  	}
   188  	err = f.ste.in.ReadBytes(f.statBytes[:numBytes])
   189  	if err != nil {
   190  		return err
   191  	}
   192  	f.statsReader.Reset(f.statBytes)
   193  	f.metaDataUpto = 0
   194  
   195  	f.state.TermBlockOrd = 0
   196  	f.nextEnt = 0
   197  	f.lastSubFP = -1
   198  
   199  	// TODO: we could skip this if !hasTerms; but
   200  	// that's rare so won't help much
   201  	// metadata
   202  	if numBytes, err = asInt(f.ste.in.ReadVInt()); err != nil {
   203  		return err
   204  	}
   205  	if f.bytes == nil {
   206  		f.bytes = make([]byte, util.Oversize(numBytes, 1))
   207  		f.bytesReader = store.NewEmptyByteArrayDataInput()
   208  	} else if len(f.bytes) < numBytes {
   209  		f.bytes = make([]byte, util.Oversize(numBytes, 1))
   210  	}
   211  	if err = f.ste.in.ReadBytes(f.bytes[:numBytes]); err != nil {
   212  		return err
   213  	}
   214  	f.bytesReader.Reset(f.bytes)
   215  
   216  	// Sub-blocks of a single floor block are always
   217  	// written one after another -- tail recurse:
   218  	f.fpEnd = f.ste.in.FilePointer()
   219  	// fmt.Printf("      fpEnd=%v\n", f.fpEnd)
   220  	return nil
   221  }
   222  
   223  func (f *segmentTermsEnumFrame) rewind() {
   224  	// Force reload:
   225  	f.fp = f.fpOrig
   226  	f.nextEnt = -1
   227  	f.hasTerms = f.hasTermsOrig
   228  	if f.isFloor {
   229  		f.floorDataReader.Rewind()
   230  		f.numFollowFloorBlocks, _ = asInt(f.floorDataReader.ReadVInt())
   231  		assert(f.numFollowFloorBlocks > 0)
   232  		b, _ := f.floorDataReader.ReadByte()
   233  		f.nextFloorLabel = int(b)
   234  	}
   235  }
   236  
   237  func (f *segmentTermsEnumFrame) next() bool {
   238  	if f.isLeafBlock {
   239  		return f.nextLeaf()
   240  	}
   241  	return f.nextNonLeaf()
   242  }
   243  
   244  // Decodes next entry; returns true if it's a sub-block
   245  func (f *segmentTermsEnumFrame) nextLeaf() bool {
   246  	panic("not implemented yet")
   247  }
   248  
   249  func (f *segmentTermsEnumFrame) nextNonLeaf() bool {
   250  	panic("not implemented yet")
   251  }
   252  
   253  // TODO: make this array'd so we can do bin search?
   254  // likely not worth it?  need to measure how many
   255  // floor blocks we "typically" get
   256  func (f *segmentTermsEnumFrame) scanToFloorFrame(target []byte) {
   257  	if !f.isFloor || len(target) <= f.prefix {
   258  		// fmt.Printf("    scanToFloorFrame skip: isFloor=%v target.length=%v vs prefix=%v\n",
   259  		// 	f.isFloor, len(target), f.prefix)
   260  		return
   261  	}
   262  
   263  	targetLabel := int(target[f.prefix])
   264  	fmt.Printf("    scanToFloorFrame fpOrig=%v targetLabel=%x vs nextFloorLabel=%x numFollowFloorBlocks=%v\n",
   265  		f.fpOrig, targetLabel, f.nextFloorLabel, f.numFollowFloorBlocks)
   266  	if targetLabel < f.nextFloorLabel {
   267  		fmt.Println("      already on correct block")
   268  		return
   269  	}
   270  
   271  	assert(f.numFollowFloorBlocks != 0)
   272  
   273  	var newFP int64
   274  	for {
   275  		code, _ := f.floorDataReader.ReadVLong() // ignore error
   276  		newFP = f.fpOrig + int64(uint64(code)>>1)
   277  		f.hasTerms = (code & 1) != 0
   278  		// fmt.Printf("      label=%x fp=%v hasTerms?=%v numFollorFloor=%v\n",
   279  		// f.nextFloorLabel, newFP, f.hasTerms, f.numFollowFloorBlocks)
   280  
   281  		f.isLastInFloor = f.numFollowFloorBlocks == 1
   282  		f.numFollowFloorBlocks--
   283  
   284  		if f.isLastInFloor {
   285  			f.nextFloorLabel = 256
   286  			fmt.Printf("        stop!  last block nextFloorLabel=%x\n", f.nextFloorLabel)
   287  			break
   288  		} else {
   289  			panic("niy")
   290  		}
   291  	}
   292  
   293  	if newFP != f.fp {
   294  		// Force re-load of the block:
   295  		fmt.Printf("      force switch to fp=%v oldFP=%v\n", newFP, f.fp)
   296  		f.nextEnt = -1
   297  		f.fp = newFP
   298  	} else {
   299  		//
   300  	}
   301  }
   302  
   303  func (f *segmentTermsEnumFrame) decodeMetaData() (err error) {
   304  	// fmt.Printf("BTTR.decodeMetadata seg=%v mdUpto=%v vs termBlockOrd=%v\n",
   305  	// 	f.ste.fr.parent.segment, f.metaDataUpto, f.state.TermBlockOrd)
   306  
   307  	// lazily catch up on metadata decode:
   308  	limit := f.getTermBlockOrd()
   309  	absolute := f.metaDataUpto == 0
   310  	assert(limit > 0)
   311  
   312  	// TODO: better API would be "jump straight to term=N"???
   313  	for f.metaDataUpto < limit {
   314  		// TODO: we could make "tiers" of metadata, ie,
   315  		// decode docFreq/totalTF but don't decode postings
   316  		// metadata; this way caller could get
   317  		// docFreq/totalTF w/o paying decode cost for
   318  		// postings
   319  
   320  		// TODO: if docFreq were bulk decoded we could
   321  		// just skipN here:
   322  
   323  		// stats
   324  		if f.state.DocFreq, err = asInt(f.statsReader.ReadVInt()); err != nil {
   325  			return err
   326  		}
   327  		// fmt.Printf("    dF=%v\n", f.state.DocFreq)
   328  		if f.ste.fr.fieldInfo.IndexOptions() != INDEX_OPT_DOCS_ONLY {
   329  			var n int64
   330  			if n, err = f.statsReader.ReadVLong(); err != nil {
   331  				return err
   332  			}
   333  			f.state.TotalTermFreq = int64(f.state.DocFreq) + n
   334  			// fmt.Printf("    totTF=%v\n", f.state.TotalTermFreq)
   335  		}
   336  
   337  		// metadata
   338  		for i := 0; i < f.ste.fr.longsSize; i++ {
   339  			if f.longs[i], err = f.bytesReader.ReadVLong(); err != nil {
   340  				return err
   341  			}
   342  		}
   343  
   344  		if err = f.ste.fr.parent.postingsReader.DecodeTerm(f.longs,
   345  			f.bytesReader, f.ste.fr.fieldInfo, f.state, absolute); err != nil {
   346  			return err
   347  		}
   348  		f.metaDataUpto++
   349  		absolute = false
   350  	}
   351  
   352  	f.state.TermBlockOrd = f.metaDataUpto
   353  	return nil
   354  }
   355  
   356  // Used only by assert
   357  func (f *segmentTermsEnumFrame) prefixMatches(target []byte) bool {
   358  	for i := 0; i < f.prefix; i++ {
   359  		if target[i] != f.ste.term.At(i) {
   360  			return false
   361  		}
   362  	}
   363  	return true
   364  }
   365  
   366  // NOTE: sets startBytePos/suffix as a side effect
   367  func (f *segmentTermsEnumFrame) scanToTerm(target []byte, exactOnly bool) (status SeekStatus, err error) {
   368  	if f.isLeafBlock {
   369  		return f.scanToTermLeaf(target, exactOnly)
   370  	}
   371  	return f.scanToTermNonLeaf(target, exactOnly)
   372  }
   373  
   374  // Target's prefix matches this block's prefix; we
   375  // scan the entries check if the suffix matches.
   376  func (f *segmentTermsEnumFrame) scanToTermLeaf(target []byte, exactOnly bool) (status SeekStatus, err error) {
   377  	// fmt.Printf("    scanToTermLeaf: block fp=%v prefix=%v nextEnt=%v (of %v) target=%v term=%v\n",
   378  	// 	f.fp, f.prefix, f.nextEnt, f.entCount, brToString(target), f.ste.term)
   379  	assert(f.nextEnt != -1)
   380  
   381  	f.ste.termExists = true
   382  	f.subCode = 0
   383  	if f.nextEnt == f.entCount {
   384  		if exactOnly {
   385  			f.fillTerm()
   386  		}
   387  		return SEEK_STATUS_END, nil
   388  	}
   389  
   390  	if !f.prefixMatches(target) {
   391  		panic("assert fail")
   392  	}
   393  
   394  	// Loop over each entry (term or sub-block) in this block:
   395  	//nextTerm: while(nextEnt < entCount) {
   396  	for {
   397  		f.nextEnt++
   398  		f.suffix, err = asInt(f.suffixesReader.ReadVInt())
   399  		if err != nil {
   400  			return 0, err
   401  		}
   402  
   403  		// suffixReaderPos := f.suffixesReader.Pos
   404  		// fmt.Printf("      cycle: term %v (of %v) suffix=%v\n",
   405  		// 	f.nextEnt-1, f.entCount, brToString(f.suffixBytes[suffixReaderPos:suffixReaderPos+f.suffix]))
   406  
   407  		termLen := f.prefix + f.suffix
   408  		f.startBytePos = f.suffixesReader.Pos
   409  		f.suffixesReader.SkipBytes(int64(f.suffix))
   410  
   411  		targetLimit := termLen
   412  		if len(target) < termLen {
   413  			targetLimit = len(target)
   414  		}
   415  		targetPos := f.prefix
   416  
   417  		// Loop over bytes in the suffix, comparing to
   418  		// the target
   419  		bytePos := f.startBytePos
   420  		isDone := false
   421  		for {
   422  			var cmp int
   423  			var stop bool
   424  			if targetPos < targetLimit {
   425  				cmp = int(f.suffixBytes[bytePos]) - int(target[targetPos])
   426  				bytePos++
   427  				targetPos++
   428  				stop = false
   429  			} else {
   430  				if targetPos != targetLimit {
   431  					panic("assert fail")
   432  				}
   433  				cmp = termLen - len(target)
   434  				stop = true
   435  			}
   436  
   437  			if cmp < 0 {
   438  				// Current entry is still before the target;
   439  				// keep scanning
   440  
   441  				if f.nextEnt == f.entCount {
   442  					if exactOnly {
   443  						f.fillTerm()
   444  					}
   445  					// We are done scanning this block
   446  					isDone = true
   447  				}
   448  				break
   449  			} else if cmp > 0 {
   450  				// // Done!  Current entry is after target --
   451  				//     // return NOT_FOUND:
   452  				f.fillTerm()
   453  
   454  				// fmt.Println("        not found")
   455  				return SEEK_STATUS_NOT_FOUND, nil
   456  			} else if stop {
   457  				// Exact match!
   458  
   459  				// This cannot be a sub-block because we
   460  				// would have followed the index to this
   461  				// sub-block from the start:
   462  
   463  				assert(f.ste.termExists)
   464  				f.fillTerm()
   465  				// fmt.Println("        found!")
   466  				return SEEK_STATUS_FOUND, nil
   467  			}
   468  		}
   469  		if isDone {
   470  			// double jump
   471  			break
   472  		}
   473  	}
   474  
   475  	// It is possible (and OK) that terms index pointed us
   476  	// at this block, but, we scanned the entire block and
   477  	// did not find the term to position to.  This happens
   478  	// when the target is after the last term in the block
   479  	// (but, before the next term in the index).  EG
   480  	// target could be foozzz, and terms index pointed us
   481  	// to the foo* block, but the last term in this block
   482  	// was fooz (and, eg, first term in the next block will
   483  	// bee fop).
   484  	fmt.Println("      block end")
   485  	if exactOnly {
   486  		f.fillTerm()
   487  	}
   488  
   489  	// TODO: not consistent that in the
   490  	// not-exact case we don't next() into the next
   491  	// frame here
   492  	return SEEK_STATUS_END, nil
   493  }
   494  
   495  // Target's prefix matches this block's prefix; we
   496  // scan the entries check if the suffix matches.
   497  func (f *segmentTermsEnumFrame) scanToTermNonLeaf(target []byte,
   498  	exactOnly bool) (status SeekStatus, err error) {
   499  
   500  	fmt.Printf(
   501  		"    scanToTermNonLeaf: block fp=%v prefix=%v nextEnt=%v (of %v) target=%v term=%v",
   502  		f.fp, f.prefix, f.nextEnt, f.entCount, brToString(target), "" /*brToString(term)*/)
   503  
   504  	assert(f.nextEnt != -1)
   505  
   506  	if f.nextEnt == f.entCount {
   507  		panic("not implemented yet")
   508  	}
   509  
   510  	assert(f.prefixMatches(target))
   511  
   512  	// Loop over each entry (term or sub-block) in this block:
   513  	for {
   514  		f.nextEnt++
   515  
   516  		code, _ := f.suffixesReader.ReadVInt() // no error
   517  		f.suffix = int(uint32(code) >> 1)
   518  
   519  		f.ste.termExists = (code & 1) == 0
   520  		termLen := f.prefix + f.suffix
   521  		f.startBytePos = f.suffixesReader.Position()
   522  		f.suffixesReader.SkipBytes(int64(f.suffix))
   523  		if f.ste.termExists {
   524  			f.state.TermBlockOrd++
   525  			f.subCode = 0
   526  		} else {
   527  			f.subCode, _ = f.suffixesReader.ReadVLong() // no error
   528  			f.lastSubFP = f.fp - f.subCode
   529  		}
   530  
   531  		targetLimit := termLen
   532  		if len(target) < termLen {
   533  			targetLimit = len(target)
   534  		}
   535  		targetPos := f.prefix
   536  
   537  		// Loop over bytes in the suffix, comparing to the target
   538  		bytePos := f.startBytePos
   539  		var toNextTerm, stopScan bool
   540  		for {
   541  			var cmp int
   542  			var stop bool
   543  			if targetPos < targetLimit {
   544  				cmp = int(f.suffixBytes[bytePos]) - int(target[targetPos])
   545  				bytePos++
   546  				targetPos++
   547  				stop = false
   548  			} else {
   549  				assert(targetPos == targetLimit)
   550  				cmp = termLen - len(target)
   551  				stop = true
   552  			}
   553  
   554  			if cmp < 0 {
   555  				// Current entry is still before the target;
   556  				// keep scanning
   557  
   558  				if f.nextEnt == f.entCount {
   559  					if exactOnly {
   560  						f.fillTerm()
   561  					}
   562  					// We are done scanning this block
   563  					stopScan = true
   564  					break
   565  				} else {
   566  					toNextTerm = true
   567  					break
   568  				}
   569  			} else if cmp > 0 {
   570  				// Done! Current entry is after target -- return NOT_FOUND:
   571  				f.fillTerm()
   572  
   573  				if !exactOnly && !f.ste.termExists {
   574  					panic("niy")
   575  				}
   576  
   577  				fmt.Println("        not found")
   578  				return SEEK_STATUS_NOT_FOUND, nil
   579  			} else if stop {
   580  				// Exact match!
   581  
   582  				// This cannot be a sub-block because we would have followed
   583  				// the index to this sub-block from the start:
   584  
   585  				assert(f.ste.termExists)
   586  				f.fillTerm()
   587  				fmt.Println("        found!")
   588  				return SEEK_STATUS_FOUND, nil
   589  			}
   590  		}
   591  		if toNextTerm {
   592  			continue
   593  		}
   594  		if stopScan {
   595  			break
   596  		}
   597  	}
   598  
   599  	// It is possible (and OK) that terms index pointed us at this
   600  	// block, but, we scanned the entire block and did not find the
   601  	// term to position to. This happens when the taret is after the
   602  	// last term in the block (but, before the next term in the index).
   603  	// E.g., target could be foozzz, and terms index pointed us to the
   604  	// foo* block, but the last term in this block was fooz (and, e.g.,
   605  	// first term in the next block will be fop).
   606  	fmt.Println("      block end")
   607  	if exactOnly {
   608  		f.fillTerm()
   609  	}
   610  
   611  	return SEEK_STATUS_END, nil
   612  }
   613  
   614  func (f *segmentTermsEnumFrame) fillTerm() {
   615  	termLength := f.prefix + f.suffix
   616  	f.ste.term.SetLength(termLength)
   617  	f.ste.term.Grow(termLength)
   618  	copy(f.ste.term.Bytes()[f.prefix:], f.suffixBytes[f.startBytePos:f.startBytePos+f.suffix])
   619  }
   620  
   621  // for debugging
   622  func brToString(b []byte) string {
   623  	if b == nil {
   624  		return "nil"
   625  	} else {
   626  		var buf bytes.Buffer
   627  		buf.WriteString("[")
   628  		for i, v := range b {
   629  			if i > 0 {
   630  				buf.WriteString(" ")
   631  			}
   632  			fmt.Fprintf(&buf, "%x", v)
   633  		}
   634  		buf.WriteString("]")
   635  		return fmt.Sprintf("%v %v", utf8ToString(b), buf.String())
   636  	}
   637  }
   638  
   639  // Simpler version of Lucene's own method
   640  func utf8ToString(iso8859_1_buf []byte) string {
   641  	// buf := make([]rune, len(iso8859_1_buf))
   642  	// for i, b := range iso8859_1_buf {
   643  	// 	buf[i] = rune(b)
   644  	// }
   645  	// return string(buf)
   646  	// TODO remove this method
   647  	return string(iso8859_1_buf)
   648  }
   649  
   650  // // Lucene's BytesRef is basically Slice in Go, except here
   651  // // that it's used as a local buffer when data is filled with
   652  // // length unchanged temporarily.
   653  // type bytesRef struct {
   654  // 	/** The contents of the BytesRef. Should never be {@code null}. */
   655  // 	bytes []byte
   656  // 	/** Length of used bytes. */
   657  // 	length int
   658  // }
   659  
   660  // func newBytesRef() *bytesRef {
   661  // 	return &bytesRef{}
   662  // }
   663  
   664  // func (br *bytesRef) toBytes() []byte {
   665  // 	return br.bytes[0:br.length]
   666  // }
   667  
   668  // func (br *bytesRef) ensureSize(minSize int) {
   669  // 	assert(minSize >= 0)
   670  // 	if cap(br.bytes) < minSize {
   671  // 		next := make([]byte, util.Oversize(minSize, 1))
   672  // 		copy(next, br.bytes)
   673  // 		br.bytes = next
   674  // 	}
   675  // }
   676  
   677  // func (br *bytesRef) String() string {
   678  // 	return brToString(br.bytes[0:br.length])
   679  // }
   680  
   681  // /**
   682  //  * Copies the bytes from the given {@link BytesRef}
   683  //  * <p>
   684  //  * NOTE: if this would exceed the array size, this method creates a
   685  //  * new reference array.
   686  //  */
   687  // func (br *bytesRef) copyBytes(other []byte) {
   688  // 	if cap(br.bytes) < len(other) {
   689  // 		next := make([]byte, len(other))
   690  // 		br.bytes = next
   691  // 	} else if len(br.bytes) < len(other) {
   692  // 		br.bytes = br.bytes[0:len(other)]
   693  // 	}
   694  // 	copy(br.bytes, other)
   695  // 	br.length = len(other)
   696  // }