github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/termsHashConsumer.go (about)

     1  package index
     2  
     3  import (
     4  	. "github.com/balzaczyy/golucene/core/codec/spi"
     5  	"github.com/balzaczyy/golucene/core/index/model"
     6  	// "github.com/balzaczyy/golucene/core/store"
     7  	"github.com/balzaczyy/golucene/core/util"
     8  )
     9  
    10  // index/TermsHashConsumer.java
    11  
    12  // type TermsHashConsumer interface {
    13  // 	flush(map[string]TermsHashConsumerPerField, *model.SegmentWriteState) error
    14  // 	abort()
    15  // 	startDocument()
    16  // 	finishDocument(*TermsHash) error
    17  // 	addField(*TermsHashPerField, *model.FieldInfo) TermsHashConsumerPerField
    18  // }
    19  
    20  // index/TermVectorsConsumer.java
    21  
    22  type TermVectorsConsumer struct {
    23  	*TermsHashImpl
    24  
    25  	writer TermVectorsWriter
    26  
    27  	docWriter *DocumentsWriterPerThread
    28  
    29  	hasVectors       bool
    30  	numVectorsFields int
    31  	lastDocId        int
    32  	perFields        []*TermVectorsConsumerPerField
    33  }
    34  
    35  func newTermVectorsConsumer(docWriter *DocumentsWriterPerThread) *TermVectorsConsumer {
    36  	ans := &TermVectorsConsumer{
    37  		docWriter: docWriter,
    38  	}
    39  	ans.TermsHashImpl = newTermsHash(ans, docWriter, false, nil)
    40  	return ans
    41  }
    42  
    43  func (tvc *TermVectorsConsumer) flush(fieldsToFlush map[string]TermsHashPerField,
    44  	state *model.SegmentWriteState) (err error) {
    45  	if tvc.writer != nil {
    46  		numDocs := state.SegmentInfo.DocCount()
    47  		assert(numDocs > 0)
    48  		// At least one doc in this run had term vectors enabled
    49  		func() {
    50  			defer func() {
    51  				err = mergeError(err, util.Close(tvc.writer))
    52  				tvc.writer = nil
    53  				tvc.lastDocId = 0
    54  				tvc.hasVectors = false
    55  			}()
    56  
    57  			err = tvc.fill(numDocs)
    58  			if err == nil {
    59  				err = tvc.writer.Finish(state.FieldInfos, numDocs)
    60  			}
    61  		}()
    62  		if err != nil {
    63  			return err
    64  		}
    65  	}
    66  
    67  	return
    68  }
    69  
    70  /*
    71  Fills in no-term-vectors for all docs we haven't seen since the last
    72  doc that had term vectors.
    73  */
    74  func (c *TermVectorsConsumer) fill(docId int) error {
    75  	for c.lastDocId < docId {
    76  		c.writer.StartDocument(0)
    77  		err := c.writer.FinishDocument()
    78  		if err != nil {
    79  			return err
    80  		}
    81  		c.lastDocId++
    82  	}
    83  	return nil
    84  }
    85  
    86  func (c *TermVectorsConsumer) initTermVectorsWriter() error {
    87  	if c.writer == nil {
    88  		panic("not implemented yet")
    89  	}
    90  	return nil
    91  }
    92  
    93  type TermVectorsConsumerPerFields []*TermVectorsConsumerPerField
    94  
    95  func (a TermVectorsConsumerPerFields) Len() int      { return len(a) }
    96  func (a TermVectorsConsumerPerFields) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
    97  func (a TermVectorsConsumerPerFields) Less(i, j int) bool {
    98  	return a[i].fieldInfo.Name < a[j].fieldInfo.Name
    99  }
   100  
   101  func (c *TermVectorsConsumer) finishDocument() (err error) {
   102  	c.docWriter.testPoint("TermVectorsTermsWriter.finishDocument start")
   103  
   104  	if !c.hasVectors {
   105  		return
   106  	}
   107  
   108  	// Fields in term vectors are UTF16 sorted: (?)
   109  	util.IntroSort(TermVectorsConsumerPerFields(c.perFields[:c.numVectorsFields]))
   110  
   111  	if err = c.initTermVectorsWriter(); err != nil {
   112  		return
   113  	}
   114  
   115  	if err = c.fill(c.docState.docID); err != nil {
   116  		return
   117  	}
   118  
   119  	// Append term vectors to the real outputs:
   120  	if err = c.writer.StartDocument(c.numVectorsFields); err != nil {
   121  		return
   122  	}
   123  	for i := 0; i < c.numVectorsFields; i++ {
   124  		if err = c.perFields[i].finishDocument(); err != nil {
   125  			return
   126  		}
   127  	}
   128  	if err = c.writer.FinishDocument(); err != nil {
   129  		return
   130  	}
   131  
   132  	assert2(c.lastDocId == c.docState.docID,
   133  		"lastDocID=%v docState.docID=%v",
   134  		c.lastDocId, c.docState.docID)
   135  
   136  	c.lastDocId++
   137  
   138  	c.TermsHashImpl.reset()
   139  	c.resetFields()
   140  	c.docWriter.testPoint("TermVectorsTermsWriter.finishDocument end")
   141  	return
   142  }
   143  
   144  func (tvc *TermVectorsConsumer) abort() {
   145  	tvc.hasVectors = false
   146  
   147  	defer func() {
   148  		if tvc.writer != nil {
   149  			tvc.writer.Abort()
   150  			tvc.writer = nil
   151  		}
   152  
   153  		tvc.lastDocId = 0
   154  		tvc.reset()
   155  	}()
   156  
   157  	tvc.TermsHashImpl.abort()
   158  }
   159  
   160  func (tvc *TermVectorsConsumer) resetFields() {
   161  	tvc.perFields = nil
   162  	tvc.numVectorsFields = 0
   163  }
   164  
   165  func (tvc *TermVectorsConsumer) addField(invertState *FieldInvertState,
   166  	fieldInfo *model.FieldInfo) TermsHashPerField {
   167  	return newTermVectorsConsumerPerField(invertState, tvc, fieldInfo)
   168  }
   169  
   170  func (c *TermVectorsConsumer) addFieldToFlush(fieldToFlush *TermVectorsConsumerPerField) {
   171  	panic("not implemented yet")
   172  }
   173  
   174  func (c *TermVectorsConsumer) startDocument() {
   175  	c.resetFields()
   176  	c.numVectorsFields = 0
   177  }
   178  
   179  // func (c *TermVectorsConsumer) clearLastVectorFieldName() bool {
   180  // 	c.lastVectorFieldName = ""
   181  // 	return true
   182  // }
   183  
   184  // index/FreqProxTermsWriter.java
   185  
   186  type FreqProxTermsWriter struct {
   187  	*TermsHashImpl
   188  }
   189  
   190  func newFreqProxTermsWriter(docWriter *DocumentsWriterPerThread, termVectors TermsHash) *FreqProxTermsWriter {
   191  	ans := &FreqProxTermsWriter{}
   192  	ans.TermsHashImpl = newTermsHash(ans, docWriter, true, termVectors)
   193  	return ans
   194  }
   195  
   196  func (w *FreqProxTermsWriter) flush(fieldsToFlush map[string]TermsHashPerField,
   197  	state *model.SegmentWriteState) (err error) {
   198  
   199  	if err = w.TermsHashImpl.flush(fieldsToFlush, state); err != nil {
   200  		return
   201  	}
   202  
   203  	// Gather all FieldData's that have postings, across all ThreadStates
   204  	var allFields []*FreqProxTermsWriterPerField
   205  
   206  	for _, f := range fieldsToFlush {
   207  		if perField := f.(*FreqProxTermsWriterPerField); perField.bytesHash.Size() > 0 {
   208  			allFields = append(allFields, perField)
   209  		}
   210  	}
   211  
   212  	// Sort by field name
   213  	util.IntroSort(FreqProxTermsWriterPerFields(allFields))
   214  
   215  	var consumer FieldsConsumer
   216  	if consumer, err = state.SegmentInfo.Codec().(Codec).PostingsFormat().FieldsConsumer(state); err != nil {
   217  		return
   218  	}
   219  
   220  	var success = false
   221  	defer func() {
   222  		if success {
   223  			err = util.Close(consumer)
   224  		} else {
   225  			util.CloseWhileSuppressingError(consumer)
   226  		}
   227  	}()
   228  
   229  	var termsHash TermsHash
   230  	// Current writer chain:
   231  	// FieldsConsumer
   232  	// -> IMPL: FormatPostingsTermsDictWriter
   233  	// -> TermsConsumer
   234  	// -> IMPL: FormatPostingsTermsDictWriter.TermsWriter
   235  	// -> DocsConsumer
   236  	// -> IMPL: FormatPostingsDocWriter
   237  	// -> PositionsConsumer
   238  	// -> IMPL: FormatPostingsPositionsWriter
   239  
   240  	for _, fieldWriter := range allFields {
   241  		fieldInfo := fieldWriter.fieldInfo
   242  
   243  		// If this field has postings then add them to the segment
   244  		if err = fieldWriter.flush(fieldInfo.Name, consumer, state); err != nil {
   245  			return
   246  		}
   247  
   248  		assert(termsHash == nil || termsHash == fieldWriter.termsHash)
   249  		termsHash = fieldWriter.termsHash
   250  		fieldWriter.reset()
   251  	}
   252  
   253  	if termsHash != nil {
   254  		termsHash.reset()
   255  	}
   256  	success = true
   257  	return nil
   258  }
   259  
   260  type FreqProxTermsWriterPerFields []*FreqProxTermsWriterPerField
   261  
   262  func (a FreqProxTermsWriterPerFields) Len() int      { return len(a) }
   263  func (a FreqProxTermsWriterPerFields) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
   264  func (a FreqProxTermsWriterPerFields) Less(i, j int) bool {
   265  	return a[i].fieldInfo.Name < a[j].fieldInfo.Name
   266  }
   267  
   268  func (w *FreqProxTermsWriter) addField(invertState *FieldInvertState,
   269  	fieldInfo *model.FieldInfo) TermsHashPerField {
   270  
   271  	return newFreqProxTermsWriterPerField(invertState, w, fieldInfo,
   272  		w.nextTermsHash.addField(invertState, fieldInfo))
   273  }