github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/termsHashConsumerPerField.go

github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/termsHashConsumerPerField.go (about)

     1  package index
     2  
     3  import (
     4  	// "fmt"
     5  	. "github.com/balzaczyy/golucene/core/analysis/tokenattributes"
     6  	"github.com/balzaczyy/golucene/core/codec"
     7  	. "github.com/balzaczyy/golucene/core/codec/spi"
     8  	. "github.com/balzaczyy/golucene/core/index/model"
     9  	"github.com/balzaczyy/golucene/core/util"
    10  )
    11  
    12  // type TermsHashConsumerPerField interface {
    13  // 	start([]IndexableField, int) (bool, error)
    14  // 	finish() error
    15  // 	startField(IndexableField) error
    16  // 	newTerm(int) error
    17  // 	streamCount() int
    18  // 	createPostingsArray(int) *ParallelPostingsArray
    19  // }
    20  
    21  // index/TermVectorsConsumerPerField.java
    22  
    23  type TermVectorsConsumerPerField struct {
    24  	*TermsHashPerFieldImpl
    25  
    26  	termVectorsPostingsArray *TermVectorsPostingArray
    27  
    28  	termsWriter *TermVectorsConsumer
    29  
    30  	doVectors, doVectorPositions, doVectorOffsets, doVectorPayloads bool
    31  
    32  	payloadAttribute PayloadAttribute
    33  	offsetAttribute  OffsetAttribute
    34  	hasPayloads      bool // if enabled, and we actually saw any for this field
    35  }
    36  
    37  func newTermVectorsConsumerPerField(invertState *FieldInvertState,
    38  	termsWriter *TermVectorsConsumer,
    39  	fieldInfo *FieldInfo) *TermVectorsConsumerPerField {
    40  
    41  	ans := &TermVectorsConsumerPerField{
    42  		termsWriter: termsWriter,
    43  	}
    44  	ans.TermsHashPerFieldImpl = new(TermsHashPerFieldImpl)
    45  	ans.TermsHashPerFieldImpl._constructor(
    46  		ans, 2, invertState, termsWriter, nil, fieldInfo)
    47  	return ans
    48  }
    49  
    50  func (c *TermVectorsConsumerPerField) start(field IndexableField, first bool) bool {
    51  	t := field.FieldType()
    52  	assert(t.Indexed())
    53  
    54  	if first {
    55  
    56  		if c.bytesHash.Size() != 0 {
    57  			// only necessary if previous doc hit a non-aborting error
    58  			// while writing vectors in this field:
    59  			c.reset()
    60  		}
    61  
    62  		c.bytesHash.Reinit()
    63  
    64  		c.hasPayloads = false
    65  
    66  		if c.doVectors = t.StoreTermVectors(); c.doVectors {
    67  			panic("not implemented yet")
    68  		} else {
    69  			assert2(!t.StoreTermVectorOffsets(),
    70  				"cannot index term vector offsets when term vectors are not indexed (field='%v')",
    71  				field.Name())
    72  			assert2(!t.StoreTermVectorPositions(),
    73  				"cannot index term vector positions when term vectors are not indexed (field='%v')",
    74  				field.Name())
    75  			assert2(!t.StoreTermVectorPayloads(),
    76  				"cannot index term vector payloads when term vectors are not indexed (field='%v')",
    77  				field.Name())
    78  		}
    79  	} else {
    80  		panic("not implemented yet")
    81  	}
    82  
    83  	if c.doVectors {
    84  		panic("not implemented yet")
    85  	}
    86  
    87  	return c.doVectors
    88  }
    89  
    90  // /*
    91  // Called once per field per document if term vectors are enabled, to
    92  // write the vectors to RAMOutputStream, which is then quickly flushed
    93  // to the real term vectors files in the Directory.
    94  // */
    95  // func (c *TermVectorsConsumerPerField) finish() error {
    96  // 	if !c.doVectors || c.termsHashPerField.bytesHash.Size() == 0 {
    97  // 		return nil
    98  // 	}
    99  // 	c.termsWriter.addFieldToFlush(c)
   100  // 	return nil
   101  // }
   102  
   103  func (c *TermVectorsConsumerPerField) finishDocument() error {
   104  	panic("not implemented yet")
   105  }
   106  
   107  // func (c *TermVectorsConsumerPerField) shrinkHash() {
   108  // 	c.termsHashPerField.shrinkHash(c.maxNumPostings)
   109  // 	c.maxNumPostings = 0
   110  // }
   111  
   112  // func (c *TermVectorsConsumerPerField) startField(f IndexableField) error {
   113  // 	atts := c.fieldState.attributeSource
   114  // 	if c.doVectorOffsets {
   115  // 		c.offsetAttribute = atts.Add("OffsetAttribute").(OffsetAttribute)
   116  // 	} else {
   117  // 		c.offsetAttribute = nil
   118  // 	}
   119  // 	if c.doVectorPayloads && atts.Has("PayloadAttribute") {
   120  // 		c.payloadAttribute = atts.Get("PayloadAttribute").(PayloadAttribute)
   121  // 	} else {
   122  // 		c.payloadAttribute = nil
   123  // 	}
   124  // 	return nil
   125  // }
   126  
   127  func (c *TermVectorsConsumerPerField) newTerm(termId int) {
   128  	panic("not implemented yet")
   129  }
   130  
   131  func (c *TermVectorsConsumerPerField) addTerm(termid int) {
   132  	panic("not implemented yet")
   133  }
   134  
   135  func (c *TermVectorsConsumerPerField) newPostingsArray() {
   136  	if c.postingsArray != nil {
   137  		c.termVectorsPostingsArray = c.postingsArray.PostingsArray.(*TermVectorsPostingArray)
   138  	} else {
   139  		c.termVectorsPostingsArray = nil
   140  	}
   141  }
   142  
   143  func (c *TermVectorsConsumerPerField) createPostingsArray(size int) *ParallelPostingsArray {
   144  	return newTermVectorsPostingArray(size)
   145  }
   146  
   147  type TermVectorsPostingArray struct {
   148  	freqs         []int // How many times this term occurred in the current doc
   149  	lastOffsets   []int // Last offset we saw
   150  	lastPositions []int //Last position where this term occurred
   151  }
   152  
   153  func newTermVectorsPostingArray(size int) *ParallelPostingsArray {
   154  	ans := new(TermVectorsPostingArray)
   155  	return newParallelPostingsArray(ans, size)
   156  }
   157  
   158  func (arr *TermVectorsPostingArray) newInstance(size int) PostingsArray {
   159  	return newTermVectorsPostingArray(size)
   160  }
   161  
   162  func (arr *TermVectorsPostingArray) copyTo(toArray PostingsArray, numToCopy int) {
   163  	panic("not implemented yet")
   164  }
   165  
   166  func (arr *TermVectorsPostingArray) bytesPerPosting() int {
   167  	return BYTES_PER_POSTING + 3*util.NUM_BYTES_INT
   168  }
   169  
   170  // TODO: break into separate freq and prox writers as codes; make
   171  // separate container (tii/tis/skip/*) that can be configured as any
   172  // number of files 1..N
   173  type FreqProxTermsWriterPerField struct {
   174  	*TermsHashPerFieldImpl
   175  
   176  	freqProxPostingsArray *FreqProxPostingsArray
   177  
   178  	// parent            *FreqProxTermsWriter
   179  	// termsHashPerField *TermsHashPerField
   180  	// fieldInfo         *FieldInfo
   181  	// docState          *docState
   182  	// fieldState        *FieldInvertState
   183  
   184  	hasFreq          bool
   185  	hasProx          bool
   186  	hasOffsets       bool
   187  	hasPayloads      bool
   188  	payloadAttribute PayloadAttribute
   189  	offsetAttribute  OffsetAttribute
   190  
   191  	sawPayloads bool // true if any token had a payload in the current segment
   192  }
   193  
   194  func newFreqProxTermsWriterPerField(invertState *FieldInvertState,
   195  	termsHash TermsHash, fieldInfo *FieldInfo,
   196  	nextPerField TermsHashPerField) *FreqProxTermsWriterPerField {
   197  
   198  	indexOptions := fieldInfo.IndexOptions()
   199  	assert(int(indexOptions) != 0)
   200  	hasProx := indexOptions >= INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS
   201  	ans := &FreqProxTermsWriterPerField{
   202  		hasFreq:    indexOptions >= INDEX_OPT_DOCS_AND_FREQS,
   203  		hasProx:    hasProx,
   204  		hasOffsets: indexOptions >= INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
   205  	}
   206  	streamCount := map[bool]int{true: 2, false: 1}[hasProx]
   207  	ans.TermsHashPerFieldImpl = new(TermsHashPerFieldImpl)
   208  	ans.TermsHashPerFieldImpl._constructor(
   209  		ans, streamCount, invertState,
   210  		termsHash, nextPerField, fieldInfo,
   211  	)
   212  	return ans
   213  }
   214  
   215  // func (w *FreqProxTermsWriterPerField) streamCount() int {
   216  // 	if !w.hasProx {
   217  // 		return 1
   218  // 	}
   219  // 	return 2
   220  // }
   221  
   222  func (w *FreqProxTermsWriterPerField) finish() error {
   223  	err := w.TermsHashPerFieldImpl.finish()
   224  	if err == nil && w.sawPayloads {
   225  		panic("not implemented yet")
   226  		// w.fieldInfo.SetStorePayloads()
   227  	}
   228  	return err
   229  }
   230  
   231  /* Called after flush */
   232  // func (w *FreqProxTermsWriterPerField) reset() {
   233  // 	// record, up front, whether our in-RAM format will be
   234  // 	// with or without term freqs:
   235  // 	w.setIndexOptions(w.fieldInfo.IndexOptions())
   236  // 	w.payloadAttribute = nil
   237  // }
   238  
   239  // func (w *FreqProxTermsWriterPerField) setIndexOptions(indexOptions IndexOptions) {
   240  // 	if n := int(indexOptions); n > 0 {
   241  // 		w.hasFreq = n >= int(INDEX_OPT_DOCS_AND_FREQS)
   242  // 		w.hasProx = n >= int(INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS)
   243  // 		w.hasOffsets = n >= int(INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
   244  // 	} else {
   245  // 		// field could later be updated with indexed=true, so set everything on
   246  // 		w.hasFreq = true
   247  // 		w.hasProx = true
   248  // 		w.hasOffsets = true
   249  // 	}
   250  // }
   251  
   252  // func (w *FreqProxTermsWriterPerField) start(fields []IndexableField, count int) (bool, error) {
   253  // 	for _, field := range fields[:count] {
   254  // 		if field.FieldType().Indexed() {
   255  // 			return true, nil
   256  // 		}
   257  // 	}
   258  // 	return false, nil
   259  // }
   260  
   261  func (w *FreqProxTermsWriterPerField) start(f IndexableField, first bool) bool {
   262  	w.TermsHashPerFieldImpl.start(f, first)
   263  	w.payloadAttribute = w.fieldState.payloadAttribute
   264  	w.offsetAttribute = w.fieldState.offsetAttribute
   265  	return true
   266  }
   267  
   268  func (w *FreqProxTermsWriterPerField) writeProx(termId, proxCode int) {
   269  	if w.payloadAttribute == nil {
   270  		w.writeVInt(1, proxCode<<1)
   271  	} else {
   272  		payload := w.payloadAttribute.Payload()
   273  		if len(payload) > 0 {
   274  			panic("not implemented yet")
   275  		} else {
   276  			w.writeVInt(1, proxCode<<1)
   277  		}
   278  	}
   279  
   280  	assert(w.postingsArray.PostingsArray == w.freqProxPostingsArray)
   281  	w.freqProxPostingsArray.lastPositions[termId] = w.fieldState.position
   282  }
   283  
   284  func (w *FreqProxTermsWriterPerField) writeOffsets(termId, offsetAccum int) {
   285  	panic("not implemented yet")
   286  }
   287  
   288  func (w *FreqProxTermsWriterPerField) newTerm(termId int) {
   289  	// First time we're seeing this term since the last flush
   290  	w.docState.testPoint("FreqProxTermsWriterPerField.newTerm start")
   291  
   292  	postings := w.freqProxPostingsArray
   293  	assert(postings != nil)
   294  
   295  	postings.lastDocIDs[termId] = w.docState.docID
   296  	if !w.hasFreq {
   297  		assert(postings.termFreqs == nil)
   298  		postings.lastDocCodes[termId] = w.docState.docID
   299  	} else {
   300  		postings.lastDocCodes[termId] = w.docState.docID << 1
   301  		postings.termFreqs[termId] = 1
   302  		if w.hasProx {
   303  			w.writeProx(termId, w.fieldState.position)
   304  			if w.hasOffsets {
   305  				w.writeOffsets(termId, w.fieldState.offset)
   306  			}
   307  		} else {
   308  			assert(!w.hasOffsets)
   309  		}
   310  	}
   311  	if 1 > w.fieldState.maxTermFrequency {
   312  		w.fieldState.maxTermFrequency = 1
   313  	}
   314  	w.fieldState.uniqueTermCount++
   315  }
   316  
   317  func (w *FreqProxTermsWriterPerField) addTerm(termId int) {
   318  	w.docState.testPoint("FreqProxTermsWriterPerField.addTerm start")
   319  
   320  	postings := w.freqProxPostingsArray
   321  
   322  	assert(!w.hasFreq || postings.termFreqs[termId] > 0)
   323  
   324  	if !w.hasFreq {
   325  		panic("not implemented yet")
   326  	} else if w.docState.docID != postings.lastDocIDs[termId] {
   327  		assert2(w.docState.docID > postings.lastDocIDs[termId],
   328  			"id: %v postings ID: %v termID: %v",
   329  			w.docState.docID, postings.lastDocIDs[termId], termId)
   330  		// Term not yet seen in the current doc but previously seen in
   331  		// other doc(s) since the last flush
   332  
   333  		// Now that we know doc freq for previous doc, write it & lastDocCode
   334  		if 1 == postings.termFreqs[termId] {
   335  			w.writeVInt(0, postings.lastDocCodes[termId]|1)
   336  		} else {
   337  			w.writeVInt(0, postings.lastDocCodes[termId])
   338  			w.writeVInt(0, postings.termFreqs[termId])
   339  		}
   340  
   341  		// Init freq for the current document
   342  		postings.termFreqs[termId] = 1
   343  		if w.fieldState.maxTermFrequency < 1 {
   344  			w.fieldState.maxTermFrequency = 1
   345  		}
   346  		postings.lastDocCodes[termId] = (w.docState.docID - postings.lastDocIDs[termId]) << 1
   347  		postings.lastDocIDs[termId] = w.docState.docID
   348  		if w.hasProx {
   349  			w.writeProx(termId, w.fieldState.position)
   350  			if w.hasOffsets {
   351  				panic("niy")
   352  			}
   353  		} else {
   354  			assert(!w.hasOffsets)
   355  		}
   356  		w.fieldState.uniqueTermCount++
   357  	} else {
   358  		postings.termFreqs[termId]++
   359  		if n := postings.termFreqs[termId]; n > w.fieldState.maxTermFrequency {
   360  			w.fieldState.maxTermFrequency = n
   361  		}
   362  		if w.hasProx {
   363  			w.writeProx(termId, w.fieldState.position-postings.lastPositions[termId])
   364  			if w.hasOffsets {
   365  				w.writeOffsets(termId, w.fieldState.offset)
   366  			}
   367  		}
   368  	}
   369  }
   370  
   371  func (w *FreqProxTermsWriterPerField) newPostingsArray() {
   372  	if arr := w.postingsArray; arr != nil {
   373  		w.freqProxPostingsArray = arr.PostingsArray.(*FreqProxPostingsArray)
   374  	} else {
   375  		w.freqProxPostingsArray = nil
   376  	}
   377  }
   378  
   379  func (w *FreqProxTermsWriterPerField) createPostingsArray(size int) *ParallelPostingsArray {
   380  	indexOptions := w.fieldInfo.IndexOptions()
   381  	assert(indexOptions != 0)
   382  	hasFreq := indexOptions >= INDEX_OPT_DOCS_AND_FREQS
   383  	hasProx := indexOptions >= INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS
   384  	hasOffsets := indexOptions >= INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS
   385  	return newFreqProxPostingsArray(size, hasFreq, hasProx, hasOffsets)
   386  }
   387  
   388  type FreqProxPostingsArray struct {
   389  	*ParallelPostingsArray
   390  	termFreqs     []int // # times this term occurs in the current doc
   391  	lastDocIDs    []int // Last docID where this term occurred
   392  	lastDocCodes  []int // Code for prior doc
   393  	lastPositions []int //Last position where this term occurred
   394  	lastOffsets   []int // Last endOffsets where this term occurred
   395  }
   396  
   397  func newFreqProxPostingsArray(size int, writeFreqs, writeProx, writeOffsets bool) *ParallelPostingsArray {
   398  	ans := new(FreqProxPostingsArray)
   399  	ans.ParallelPostingsArray = newParallelPostingsArray(ans, size)
   400  	if writeFreqs {
   401  		ans.termFreqs = make([]int, size)
   402  	}
   403  	ans.lastDocIDs = make([]int, size)
   404  	ans.lastDocCodes = make([]int, size)
   405  	if writeProx {
   406  		ans.lastPositions = make([]int, size)
   407  		if writeOffsets {
   408  			ans.lastOffsets = make([]int, size)
   409  		}
   410  	} else {
   411  		assert(!writeOffsets)
   412  	}
   413  	// fmt.Printf("PA init freqs=%v pos=%v offs=%v\n", writeFreqs, writeProx, writeOffsets)
   414  	return ans.ParallelPostingsArray
   415  }
   416  
   417  func (arr *FreqProxPostingsArray) newInstance(size int) PostingsArray {
   418  	return newFreqProxPostingsArray(size, arr.termFreqs != nil,
   419  		arr.lastPositions != nil, arr.lastOffsets != nil)
   420  }
   421  
   422  func (arr *FreqProxPostingsArray) copyTo(toArray PostingsArray, numToCopy int) {
   423  	to, ok := toArray.(*ParallelPostingsArray).PostingsArray.(*FreqProxPostingsArray)
   424  	assert(ok)
   425  
   426  	arr.ParallelPostingsArray.copyTo(toArray, numToCopy)
   427  
   428  	copy(to.lastDocIDs[:numToCopy], arr.lastDocIDs[:numToCopy])
   429  	copy(to.lastDocCodes[:numToCopy], arr.lastDocCodes[:numToCopy])
   430  	if arr.lastPositions != nil {
   431  		assert(to.lastPositions != nil)
   432  		copy(to.lastPositions[:numToCopy], arr.lastPositions[:numToCopy])
   433  	}
   434  	if arr.lastOffsets != nil {
   435  		assert(to.lastOffsets != nil)
   436  		copy(to.lastOffsets[:numToCopy], arr.lastOffsets[:numToCopy])
   437  	}
   438  	if arr.termFreqs != nil {
   439  		assert(to.termFreqs != nil)
   440  		copy(to.termFreqs[:numToCopy], arr.termFreqs[:numToCopy])
   441  	}
   442  }
   443  
   444  func (arr *FreqProxPostingsArray) bytesPerPosting() int {
   445  	bytes := BYTES_PER_POSTING + 2*util.NUM_BYTES_INT
   446  	if arr.lastPositions != nil {
   447  		bytes += util.NUM_BYTES_INT
   448  	}
   449  	if arr.lastOffsets != nil {
   450  		bytes += util.NUM_BYTES_INT
   451  	}
   452  	if arr.termFreqs != nil {
   453  		bytes += util.NUM_BYTES_INT
   454  	}
   455  	return bytes
   456  }
   457  
   458  /*
   459  Walk through all unique text tokens (Posting instances) found in this
   460  field and serialie them into a single RAM segment.
   461  */
   462  func (w *FreqProxTermsWriterPerField) flush(fieldName string,
   463  	consumer FieldsConsumer, state *SegmentWriteState) error {
   464  	if !w.fieldInfo.IsIndexed() {
   465  		return nil // nothing to flush, don't bother the codc with the unindexed field
   466  	}
   467  
   468  	termsConsumer, err := consumer.AddField(w.fieldInfo)
   469  	if err != nil {
   470  		return err
   471  	}
   472  	termComp := termsConsumer.Comparator()
   473  
   474  	// CONFUSING: this.indexOptions holds the index options that were
   475  	// current when we first saw this field. But it's posible this has
   476  	// changed, e.g. when other documents are indexed that cause a
   477  	// "downgrade" of the IndexOptions. So we must decode the in-RAM
   478  	// buffer according to this.indexOptions, but then write the new
   479  	// segment to the directory according to currentFieldIndexOptions:
   480  	currentFieldIndexOptions := w.fieldInfo.IndexOptions()
   481  	assert(int(currentFieldIndexOptions) != 0)
   482  
   483  	writeTermFreq := int(currentFieldIndexOptions) >= int(INDEX_OPT_DOCS_AND_FREQS)
   484  	writePositions := int(currentFieldIndexOptions) >= int(INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS)
   485  	writeOffsets := int(currentFieldIndexOptions) >= int(INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
   486  
   487  	readTermFreq := w.hasFreq
   488  	readPositions := w.hasProx
   489  	readOffsets := w.hasOffsets
   490  
   491  	// fmt.Printf("flush readTF=%v readPos=%v readOffs=%v\n",
   492  	// 	readTermFreq, readPositions, readOffsets)
   493  
   494  	// Make sure FieldInfo.update is working correctly
   495  	assert(!writeTermFreq || readTermFreq)
   496  	assert(!writePositions || readPositions)
   497  	assert(!writeOffsets || readOffsets)
   498  
   499  	assert(!writeOffsets || writePositions)
   500  
   501  	var segUpdates map[*Term]int
   502  	if state.SegUpdates != nil && len(state.SegUpdates.(*BufferedUpdates).terms) > 0 {
   503  		segUpdates = state.SegUpdates.(*BufferedUpdates).terms
   504  	}
   505  
   506  	termIDs := w.sortPostings(termComp)
   507  	numTerms := w.bytesHash.Size()
   508  	text := new(util.BytesRef)
   509  	postings := w.freqProxPostingsArray
   510  	freq := newByteSliceReader()
   511  	prox := newByteSliceReader()
   512  
   513  	visitedDocs := util.NewFixedBitSetOf(state.SegmentInfo.DocCount())
   514  	sumTotalTermFreq := int64(0)
   515  	sumDocFreq := int64(0)
   516  
   517  	protoTerm := NewEmptyTerm(fieldName)
   518  	for i := 0; i < numTerms; i++ {
   519  		termId := termIDs[i]
   520  		// fmt.Printf("term=%v\n", termId)
   521  		// Get BytesRef
   522  		textStart := postings.textStarts[termId]
   523  		w.bytePool.SetBytesRef(text, textStart)
   524  
   525  		w.initReader(freq, termId, 0)
   526  		if readPositions || readOffsets {
   527  			w.initReader(prox, termId, 1)
   528  		}
   529  
   530  		// TODO: really TermsHashPerField shold take over most of this
   531  		// loop, including merge sort of terms from multiple threads and
   532  		// interacting with the TermsConsumer, only calling out to us
   533  		// (passing us the DocConsumer) to handle delivery of docs/positions
   534  
   535  		postingsConsumer, err := termsConsumer.StartTerm(text.ToBytes())
   536  		if err != nil {
   537  			return err
   538  		}
   539  
   540  		delDocLimit := 0
   541  		if segUpdates != nil {
   542  			protoTerm.Bytes = text.ToBytes()
   543  			if docIDUpto, ok := segUpdates[protoTerm]; ok {
   544  				delDocLimit = docIDUpto
   545  			}
   546  		}
   547  
   548  		// Now termStates has numToMerge FieldMergeStates which call
   549  		// share the same term. Now we must interleave the docID streams.
   550  		docFreq := 0
   551  		totalTermFreq := int64(0)
   552  		docId := 0
   553  
   554  		for {
   555  			// fmt.Println("  cycle")
   556  			var termFreq int
   557  			if freq.eof() {
   558  				if postings.lastDocCodes[termId] != -1 {
   559  					// return last doc
   560  					docId = postings.lastDocIDs[termId]
   561  					if readTermFreq {
   562  						termFreq = postings.termFreqs[termId]
   563  					} else {
   564  						termFreq = -1
   565  					}
   566  					postings.lastDocCodes[termId] = -1
   567  				} else {
   568  					// EOF
   569  					break
   570  				}
   571  			} else {
   572  				code, err := freq.ReadVInt()
   573  				if err != nil {
   574  					return err
   575  				}
   576  				if !readTermFreq {
   577  					docId += int(code)
   578  					termFreq = -1
   579  				} else {
   580  					docId += int(uint(code) >> 1)
   581  					if (code & 1) != 0 {
   582  						termFreq = 1
   583  					} else {
   584  						n, err := freq.ReadVInt()
   585  						if err != nil {
   586  							return err
   587  						}
   588  						termFreq = int(n)
   589  					}
   590  				}
   591  
   592  				assert(docId != postings.lastDocIDs[termId])
   593  			}
   594  
   595  			docFreq++
   596  			assert2(docId < state.SegmentInfo.DocCount(),
   597  				"doc=%v maxDoc=%v", docId, state.SegmentInfo.DocCount())
   598  
   599  			// NOTE: we could check here if the docID was deleted, and skip
   600  			// it. However, this is somewhat dangerous because it can yield
   601  			// non-deterministic behavior since we may see the docID before
   602  			// we see the term that caused it to be deleted. This would
   603  			// mean some (but not all) of its postings may make it into the
   604  			// index, which'd alter the docFreq for those terms. We could
   605  			// fix this by doing two passes, i.e. first sweep marks all del
   606  			// docs, and 2nd sweep does the real flush, but I suspect
   607  			// that'd add too much time to flush.
   608  			visitedDocs.Set(docId)
   609  			err := postingsConsumer.StartDoc(docId,
   610  				map[bool]int{true: termFreq, false: -1}[writeTermFreq])
   611  			if err != nil {
   612  				return err
   613  			}
   614  			if docId < delDocLimit {
   615  				panic("not implemented yet")
   616  			}
   617  
   618  			totalTermFreq += int64(termFreq)
   619  
   620  			// Carefully copy over the prox + payload info, changing the
   621  			// format to match Lucene's segment format.
   622  
   623  			if readPositions || readOffsets {
   624  				// we did record positions (& maybe payload) and/or offsets
   625  				position := 0
   626  				// offset := 0
   627  				for j := 0; j < termFreq; j++ {
   628  					var thisPayload []byte
   629  
   630  					if readPositions {
   631  						code, err := prox.ReadVInt()
   632  						if err != nil {
   633  							return err
   634  						}
   635  						position += int(uint(code) >> 1)
   636  
   637  						if (code & 1) != 0 {
   638  							panic("not implemented yet")
   639  						}
   640  
   641  						if readOffsets {
   642  							panic("not implemented yet")
   643  						} else if writePositions {
   644  							err = postingsConsumer.AddPosition(position, thisPayload, -1, -1)
   645  							if err != nil {
   646  								return err
   647  							}
   648  						}
   649  					}
   650  				}
   651  			}
   652  			err = postingsConsumer.FinishDoc()
   653  			if err != nil {
   654  				return err
   655  			}
   656  		}
   657  		err = termsConsumer.FinishTerm(text.ToBytes(), codec.NewTermStats(docFreq,
   658  			map[bool]int64{true: totalTermFreq, false: -1}[writeTermFreq]))
   659  		if err != nil {
   660  			return err
   661  		}
   662  		sumTotalTermFreq += int64(totalTermFreq)
   663  		sumDocFreq += int64(docFreq)
   664  	}
   665  
   666  	return termsConsumer.Finish(
   667  		map[bool]int64{true: sumTotalTermFreq, false: -1}[writeTermFreq],
   668  		sumDocFreq, visitedDocs.Cardinality())
   669  }