github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/segments.go (about)

     1  package index
     2  
     3  import (
     4  	"strconv"
     5  )
     6  
     7  import (
     8  	"fmt"
     9  	// docu "github.com/balzaczyy/golucene/core/document"
    10  	. "github.com/balzaczyy/golucene/core/codec/spi"
    11  	. "github.com/balzaczyy/golucene/core/index/model"
    12  	"github.com/balzaczyy/golucene/core/store"
    13  	"github.com/balzaczyy/golucene/core/util"
    14  	"sync/atomic"
    15  )
    16  
    17  // index/SegmentReader.java
    18  
    19  /**
    20   * IndexReader implementation over a single segment.
    21   * <p>
    22   * Instances pointing to the same segment (but with different deletes, etc)
    23   * may share the same core data.
    24   * @lucene.experimental
    25   */
    26  type SegmentReader struct {
    27  	*AtomicReaderImpl
    28  	si       *SegmentCommitInfo
    29  	liveDocs util.Bits
    30  	// Normally set to si.docCount - si.delDocCount, unless we
    31  	// were created as an NRT reader from IW, in which case IW
    32  	// tells us the docCount:
    33  	numDocs int
    34  	core    *SegmentCoreReaders
    35  
    36  	fieldInfos FieldInfos
    37  }
    38  
    39  /**
    40   * Constructs a new SegmentReader with a new core.
    41   * @throws CorruptIndexException if the index is corrupt
    42   * @throws IOException if there is a low-level IO error
    43   */
    44  // TODO: why is this public?
    45  func NewSegmentReader(si *SegmentCommitInfo,
    46  	termInfosIndexDivisor int, context store.IOContext) (r *SegmentReader, err error) {
    47  
    48  	r = &SegmentReader{}
    49  	r.AtomicReaderImpl = newAtomicReader(r)
    50  	r.ARFieldsReader = r
    51  
    52  	r.si = si
    53  	if r.fieldInfos, err = ReadFieldInfos(si); err != nil {
    54  		return nil, err
    55  	}
    56  	// log.Print("Obtaining SegmentCoreReaders...")
    57  	if r.core, err = newSegmentCoreReaders(r, si.Info.Dir, si, context, termInfosIndexDivisor); err != nil {
    58  		return nil, err
    59  	}
    60  	// r.segDocValues = newSegmentDocValues()
    61  
    62  	var success = false
    63  	defer func() {
    64  		// With lock-less commits, it's entirely possible (and
    65  		// fine) to hit a FileNotFound exception above.  In
    66  		// this case, we want to explicitly close any subset
    67  		// of things that were opened so that we don't have to
    68  		// wait for a GC to do so.
    69  		if !success {
    70  			// log.Printf("Failed to initialize SegmentReader.")
    71  			r.core.decRef()
    72  		}
    73  	}()
    74  
    75  	codec := si.Info.Codec().(Codec)
    76  	if si.HasDeletions() {
    77  		panic("not supported yet")
    78  	} else {
    79  		assert(si.DelCount() == 0)
    80  	}
    81  	r.numDocs = si.Info.DocCount() - si.DelCount()
    82  
    83  	if r.fieldInfos.HasDocValues {
    84  		r.initDocValuesProducers(codec)
    85  	}
    86  	success = true
    87  	return r, nil
    88  }
    89  
    90  /* initialize the per-field DocValuesProducer */
    91  func (r *SegmentReader) initDocValuesProducers(codec Codec) error {
    92  	// var dir store.Directory
    93  	// if r.core.cfsReader != nil {
    94  	// 	dir = r.core.cfsReader
    95  	// } else {
    96  	// 	dir = r.si.Info.Dir
    97  	// }
    98  	// dvFormat := codec.DocValuesFormat()
    99  
   100  	// termsIndexDivisor := r.core.termsIndexDivisor
   101  	if !r.si.HasFieldUpdates() {
   102  		panic("not implemented yet")
   103  	}
   104  
   105  	panic("not implemented yet")
   106  }
   107  
   108  /* Reads the most recent FieldInfos of the given segment info. */
   109  func ReadFieldInfos(info *SegmentCommitInfo) (fis FieldInfos, err error) {
   110  	var dir store.Directory
   111  	var closeDir bool
   112  	if info.FieldInfosGen() == -1 && info.Info.IsCompoundFile() {
   113  		// no fieldInfos gen and segment uses a compound file
   114  		if dir, err = store.NewCompoundFileDirectory(info.Info.Dir,
   115  			util.SegmentFileName(info.Info.Name, "", store.COMPOUND_FILE_EXTENSION),
   116  			store.IO_CONTEXT_READONCE, false); err != nil {
   117  			return
   118  		}
   119  		closeDir = true
   120  	} else {
   121  		// gen'd FIS are read outside CFS, or the segment doesn't use a compound file
   122  		dir = info.Info.Dir
   123  		closeDir = false
   124  	}
   125  
   126  	defer func() {
   127  		if closeDir {
   128  			err = mergeError(err, dir.Close())
   129  		}
   130  	}()
   131  
   132  	var segmentSuffix string
   133  	if n := info.FieldInfosGen(); n != -1 {
   134  		segmentSuffix = strconv.FormatInt(n, 36)
   135  	}
   136  	codec := info.Info.Codec().(Codec)
   137  	fisFormat := codec.FieldInfosFormat()
   138  	return fisFormat.FieldInfosReader()(dir, info.Info.Name, segmentSuffix, store.IO_CONTEXT_READONCE)
   139  }
   140  
   141  func (r *SegmentReader) LiveDocs() util.Bits {
   142  	r.ensureOpen()
   143  	return r.liveDocs
   144  }
   145  
   146  func (r *SegmentReader) doClose() error {
   147  	panic("not implemented yet")
   148  	r.core.decRef()
   149  	return nil
   150  }
   151  
   152  func (r *SegmentReader) FieldInfos() FieldInfos {
   153  	r.ensureOpen()
   154  	return r.fieldInfos
   155  }
   156  
   157  // Expert: retrieve thread-private StoredFieldsReader
   158  func (r *SegmentReader) FieldsReader() StoredFieldsReader {
   159  	r.ensureOpen()
   160  	return r.core.fieldsReaderLocal()
   161  }
   162  
   163  func (r *SegmentReader) VisitDocument(docID int, visitor StoredFieldVisitor) error {
   164  	r.checkBounds(docID)
   165  	return r.FieldsReader().VisitDocument(docID, visitor)
   166  }
   167  
   168  func (r *SegmentReader) Fields() Fields {
   169  	r.ensureOpen()
   170  	return r.core.fields
   171  }
   172  
   173  func (r *SegmentReader) NumDocs() int {
   174  	// Don't call ensureOpen() here (it could affect performance)
   175  	return r.numDocs
   176  }
   177  
   178  func (r *SegmentReader) MaxDoc() int {
   179  	// Don't call ensureOpen() here (it could affect performance)
   180  	return r.si.Info.DocCount()
   181  }
   182  
   183  func (r *SegmentReader) TermVectorsReader() TermVectorsReader {
   184  	panic("not implemented yet")
   185  }
   186  
   187  func (r *SegmentReader) TermVectors(docID int) (fs Fields, err error) {
   188  	panic("not implemented yet")
   189  }
   190  
   191  func (r *SegmentReader) checkBounds(docID int) {
   192  	if docID < 0 || docID >= r.MaxDoc() {
   193  		panic(fmt.Sprintf("docID must be >= 0 and < maxDoc=%v (got docID=%v)", r.MaxDoc(), docID))
   194  	}
   195  }
   196  
   197  // SegmentReader.java L179
   198  func (r *SegmentReader) String() string {
   199  	// SegmentInfo.toString takes dir and number of
   200  	// *pending* deletions; so we reverse compute that here:
   201  	return r.si.StringOf(r.si.Info.Dir, r.si.Info.DocCount()-r.numDocs-r.si.DelCount())
   202  }
   203  
   204  func (r *SegmentReader) SegmentName() string {
   205  	return r.si.Info.Name
   206  }
   207  
   208  func (r *SegmentReader) SegmentInfos() *SegmentCommitInfo {
   209  	return r.si
   210  }
   211  
   212  func (r *SegmentReader) Directory() store.Directory {
   213  	// Don't ensureOpen here -- in certain cases, when a
   214  	// cloned/reopened reader needs to commit, it may call
   215  	// this method on the closed original reader
   216  	return r.si.Info.Dir
   217  }
   218  
   219  func (r *SegmentReader) CoreCacheKey() interface{} {
   220  	return r.core
   221  }
   222  
   223  func (r *SegmentReader) CombinedCoreAndDeletesKey() interface{} {
   224  	return r
   225  }
   226  
   227  func (r *SegmentReader) TermInfosIndexDivisor() int {
   228  	return r.core.termsIndexDivisor
   229  }
   230  
   231  func (r *SegmentReader) NumericDocValues(field string) (v NumericDocValues, err error) {
   232  	r.ensureOpen()
   233  	panic("not implemented yet")
   234  }
   235  
   236  func (r *SegmentReader) BinaryDocValues(field string) (v BinaryDocValues, err error) {
   237  	r.ensureOpen()
   238  	panic("not implemented yet")
   239  }
   240  
   241  func (r *SegmentReader) SortedDocValues(field string) (v SortedDocValues, err error) {
   242  	r.ensureOpen()
   243  	panic("not implemented yet")
   244  }
   245  
   246  func (r *SegmentReader) SortedSetDocValues(field string) (v SortedSetDocValues, err error) {
   247  	r.ensureOpen()
   248  	panic("not implemented yet")
   249  }
   250  
   251  func (r *SegmentReader) NormValues(field string) (v NumericDocValues, err error) {
   252  	r.ensureOpen()
   253  	return r.core.normValues(r.fieldInfos, field)
   254  }
   255  
   256  type CoreClosedListener interface {
   257  	onClose(r interface{})
   258  }
   259  
   260  // index/SegmentCoreReaders.java
   261  
   262  type SegmentCoreReaders struct {
   263  	refCount int32 // synchronized
   264  
   265  	fields        FieldsProducer
   266  	normsProducer DocValuesProducer
   267  
   268  	termsIndexDivisor int
   269  
   270  	owner *SegmentReader
   271  
   272  	fieldsReaderOrig      StoredFieldsReader
   273  	termVectorsReaderOrig TermVectorsReader
   274  	cfsReader             *store.CompoundFileDirectory
   275  
   276  	/*
   277  	 Lucene Java use ThreadLocal to serve as thread-level cache, to avoid
   278  	 expensive read actions while limit memory consumption. Since Go doesn't
   279  	 have thread or routine Local, a new object is always returned.
   280  
   281  	 TODO redesign when ported to goroutines
   282  	*/
   283  	fieldsReaderLocal func() StoredFieldsReader
   284  	normsLocal        func() map[string]interface{}
   285  
   286  	addListener    chan CoreClosedListener
   287  	removeListener chan CoreClosedListener
   288  	notifyListener chan bool
   289  }
   290  
   291  func newSegmentCoreReaders(owner *SegmentReader, dir store.Directory, si *SegmentCommitInfo,
   292  	context store.IOContext, termsIndexDivisor int) (self *SegmentCoreReaders, err error) {
   293  
   294  	assert2(termsIndexDivisor != 0,
   295  		"indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)")
   296  	// fmt.Println("Initializing SegmentCoreReaders from directory:", dir)
   297  
   298  	self = &SegmentCoreReaders{
   299  		refCount: 1,
   300  		normsLocal: func() map[string]interface{} {
   301  			return make(map[string]interface{})
   302  		},
   303  	}
   304  	self.fieldsReaderLocal = func() StoredFieldsReader {
   305  		return self.fieldsReaderOrig.Clone()
   306  	}
   307  
   308  	// fmt.Println("Initializing listeners...")
   309  	self.addListener = make(chan CoreClosedListener)
   310  	self.removeListener = make(chan CoreClosedListener)
   311  	self.notifyListener = make(chan bool)
   312  	// TODO re-enable later
   313  	go func() { // ensure listners are synchronized
   314  		coreClosedListeners := make([]CoreClosedListener, 0)
   315  		isRunning := true
   316  		var listener CoreClosedListener
   317  		for isRunning {
   318  			// fmt.Println("Listening for events...")
   319  			select {
   320  			case listener = <-self.addListener:
   321  				coreClosedListeners = append(coreClosedListeners, listener)
   322  			case listener = <-self.removeListener:
   323  				n := len(coreClosedListeners)
   324  				for i, v := range coreClosedListeners {
   325  					if v == listener {
   326  						newListeners := make([]CoreClosedListener, 0, n-1)
   327  						newListeners = append(newListeners, coreClosedListeners[0:i]...)
   328  						newListeners = append(newListeners, coreClosedListeners[i+1:]...)
   329  						coreClosedListeners = newListeners
   330  						break
   331  					}
   332  				}
   333  			case <-self.notifyListener:
   334  				fmt.Println("Shutting down SegmentCoreReaders...")
   335  				isRunning = false
   336  				for _, v := range coreClosedListeners {
   337  					v.onClose(self)
   338  				}
   339  			}
   340  		}
   341  		fmt.Println("Listeners are done.")
   342  	}()
   343  
   344  	var success = false
   345  	ans := self
   346  	defer func() {
   347  		if !success {
   348  			fmt.Println("Failed to initialize SegmentCoreReaders.")
   349  			ans.decRef()
   350  		}
   351  	}()
   352  
   353  	codec := si.Info.Codec().(Codec)
   354  	// fmt.Println("Obtaining CFS Directory...")
   355  	var cfsDir store.Directory // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.
   356  	if si.Info.IsCompoundFile() {
   357  		// fmt.Println("Detected CompoundFile.")
   358  		name := util.SegmentFileName(si.Info.Name, "", store.COMPOUND_FILE_EXTENSION)
   359  		if self.cfsReader, err = store.NewCompoundFileDirectory(dir, name, context, false); err != nil {
   360  			return nil, err
   361  		}
   362  		// fmt.Println("CompoundFileDirectory: ", self.cfsReader)
   363  		cfsDir = self.cfsReader
   364  	} else {
   365  		cfsDir = dir
   366  	}
   367  	// fmt.Println("CFS Directory:", cfsDir)
   368  
   369  	// fmt.Println("Reading FieldInfos...")
   370  	fieldInfos := owner.fieldInfos
   371  
   372  	self.termsIndexDivisor = termsIndexDivisor
   373  	format := codec.PostingsFormat()
   374  
   375  	// fmt.Println("Obtaining SegmentReadState...")
   376  	segmentReadState := NewSegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor)
   377  	// Ask codec for its Fields
   378  	// fmt.Println("Obtaining FieldsProducer...")
   379  	if self.fields, err = format.FieldsProducer(segmentReadState); err != nil {
   380  		return nil, err
   381  	}
   382  	assert(self.fields != nil)
   383  	// ask codec for its Norms:
   384  	// TODO: since we don't write any norms file if there are no norms,
   385  	// kinda jaky to assume the codec handles the case of no norms file at all gracefully?!
   386  
   387  	if fieldInfos.HasNorms {
   388  		// fmt.Println("Obtaining NormsDocValuesProducer...")
   389  		if self.normsProducer, err = codec.NormsFormat().NormsProducer(segmentReadState); err != nil {
   390  			return nil, err
   391  		}
   392  		assert(self.normsProducer != nil)
   393  	}
   394  
   395  	// fmt.Println("Obtaining StoredFieldsReader...")
   396  	if self.fieldsReaderOrig, err = si.Info.Codec().(Codec).StoredFieldsFormat().FieldsReader(cfsDir, si.Info, fieldInfos, context); err != nil {
   397  		return nil, err
   398  	}
   399  
   400  	if fieldInfos.HasVectors { // open term vector files only as needed
   401  		// fmt.Println("Obtaining TermVectorsReader...")
   402  		if self.termVectorsReaderOrig, err = si.Info.Codec().(Codec).TermVectorsFormat().VectorsReader(cfsDir, si.Info, fieldInfos, context); err != nil {
   403  			return nil, err
   404  		}
   405  	}
   406  
   407  	// fmt.Println("Success")
   408  	success = true
   409  
   410  	return self, nil
   411  }
   412  
   413  func (r *SegmentCoreReaders) normValues(infos FieldInfos,
   414  	field string) (ndv NumericDocValues, err error) {
   415  
   416  	if norms, ok := r.normsLocal()[field]; ok {
   417  		ndv = norms.(NumericDocValues)
   418  	} else if fi := infos.FieldInfoByName(field); fi != nil && fi.HasNorms() {
   419  		assert(r.normsProducer != nil)
   420  		if ndv, err = r.normsProducer.Numeric(fi); err == nil {
   421  			r.normsLocal()[field] = norms
   422  		} // else Field does not exist
   423  	}
   424  	return
   425  }
   426  
   427  func (r *SegmentCoreReaders) decRef() {
   428  	if atomic.AddInt32(&r.refCount, -1) == 0 {
   429  		fmt.Println("--- closing core readers")
   430  		util.Close( /*self.termVectorsLocal, self.fieldsReaderLocal,  r.normsLocal,*/
   431  			r.fields, r.termVectorsReaderOrig, r.fieldsReaderOrig,
   432  			r.cfsReader, r.normsProducer)
   433  		r.notifyListener <- true
   434  	}
   435  }