github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/reader.go (about)

     1  package index
     2  
     3  import (
     4  	// "errors"
     5  	"errors"
     6  	"fmt"
     7  	. "github.com/balzaczyy/golucene/core/codec/spi"
     8  	docu "github.com/balzaczyy/golucene/core/document"
     9  	. "github.com/balzaczyy/golucene/core/index/model"
    10  	"github.com/balzaczyy/golucene/core/util"
    11  	"io"
    12  	"reflect"
    13  	"sync"
    14  	"sync/atomic"
    15  )
    16  
    17  type IndexReader interface {
    18  	io.Closer
    19  	decRef() error
    20  	ensureOpen()
    21  	registerParentReader(r IndexReader)
    22  	NumDocs() int
    23  	MaxDoc() int
    24  	/** Expert: visits the fields of a stored document, for
    25  	 *  custom processing/loading of each field.  If you
    26  	 *  simply want to load all fields, use {@link
    27  	 *  #document(int)}.  If you want to load a subset, use
    28  	 *  {@link DocumentStoredFieldVisitor}.  */
    29  	VisitDocument(docID int, visitor StoredFieldVisitor) error
    30  	/**
    31  	 * Returns the stored fields of the <code>n</code><sup>th</sup>
    32  	 * <code>Document</code> in this index.  This is just
    33  	 * sugar for using {@link DocumentStoredFieldVisitor}.
    34  	 * <p>
    35  	 * <b>NOTE:</b> for performance reasons, this method does not check if the
    36  	 * requested document is deleted, and therefore asking for a deleted document
    37  	 * may yield unspecified results. Usually this is not required, however you
    38  	 * can test if the doc is deleted by checking the {@link
    39  	 * Bits} returned from {@link MultiFields#getLiveDocs}.
    40  	 *
    41  	 * <b>NOTE:</b> only the content of a field is returned,
    42  	 * if that field was stored during indexing.  Metadata
    43  	 * like boost, omitNorm, IndexOptions, tokenized, etc.,
    44  	 * are not preserved.
    45  	 *
    46  	 * @throws IOException if there is a low-level IO error
    47  	 */
    48  	// TODO: we need a separate StoredField, so that the
    49  	// Document returned here contains that class not
    50  	//model.IndexableField
    51  	Document(docID int) (doc *docu.Document, err error)
    52  	doClose() error
    53  	Context() IndexReaderContext
    54  	Leaves() []*AtomicReaderContext
    55  	// Returns the number of documents containing the term. This method
    56  	// returns 0 if the term of field does not exists. This method does
    57  	// not take into account deleted documents that have not yet been
    58  	// merged away.
    59  	DocFreq(*Term) (int, error)
    60  }
    61  
    62  /* A custom listener that's invoked when the IndexReader is closed. */
    63  type ReaderClosedListener interface {
    64  	onClose(IndexReader)
    65  }
    66  
    67  type IndexReaderImplSPI interface {
    68  	NumDocs() int
    69  	MaxDoc() int
    70  	VisitDocument(int, StoredFieldVisitor) error
    71  	doClose() error
    72  	Context() IndexReaderContext
    73  	DocFreq(*Term) (int, error)
    74  }
    75  
    76  type IndexReaderImpl struct {
    77  	IndexReaderImplSPI
    78  
    79  	lock                      sync.Mutex
    80  	closed                    bool
    81  	closedByChild             bool
    82  	refCount                  int32 // synchronized
    83  	parentReaders             map[IndexReader]bool
    84  	parentReadersLock         sync.RWMutex
    85  	readerClosedListeners     map[ReaderClosedListener]bool
    86  	readerClosedListenersLock sync.RWMutex
    87  }
    88  
    89  func newIndexReader(spi IndexReaderImplSPI) *IndexReaderImpl {
    90  	return &IndexReaderImpl{
    91  		IndexReaderImplSPI: spi,
    92  		refCount:           1,
    93  		parentReaders:      make(map[IndexReader]bool),
    94  	}
    95  }
    96  
    97  func (r *IndexReaderImpl) decRef() error {
    98  	// only check refcount here (don't call ensureOpen()), so we can
    99  	// still close the reader if it was made invalid by a child:
   100  	assert2(r.refCount > 0, "this IndexReader is closed")
   101  
   102  	rc := atomic.AddInt32(&r.refCount, -1)
   103  	assert2(rc >= 0, "too many decRef calls: refCount is %v after decrement", rc)
   104  	if rc == 0 {
   105  		r.closed = true
   106  		var err error
   107  		defer func() {
   108  			defer r.notifyReaderClosedListeners(err)
   109  			r.reportCloseToParentReaders()
   110  		}()
   111  		return r.doClose()
   112  	}
   113  	return nil
   114  }
   115  
   116  func (r *IndexReaderImpl) ensureOpen() {
   117  	if atomic.LoadInt32(&r.refCount) <= 0 {
   118  		panic("this IndexReader is closed")
   119  	}
   120  	// the happens before rule on reading the refCount, which must be after the fake write,
   121  	// ensures that we see the value:
   122  	if r.closedByChild {
   123  		panic("this IndexReader cannot be used anymore as one of its child readers was closed")
   124  	}
   125  }
   126  
   127  func (r *IndexReaderImpl) registerParentReader(reader IndexReader) {
   128  	r.ensureOpen()
   129  	r.parentReadersLock.Lock()
   130  	defer r.parentReadersLock.Unlock()
   131  	r.parentReaders[reader] = true
   132  }
   133  
   134  func (r *IndexReaderImpl) notifyReaderClosedListeners(err error) {
   135  	r.readerClosedListenersLock.RLock()
   136  	defer r.readerClosedListenersLock.RUnlock()
   137  	for listener, _ := range r.readerClosedListeners {
   138  		func() {
   139  			defer func() {
   140  				if e := recover(); e != nil {
   141  					err = mergeError(err, errors.New(fmt.Sprintf("%v", e)))
   142  				}
   143  			}()
   144  			listener.onClose(r)
   145  		}()
   146  	}
   147  	return
   148  }
   149  
   150  func (r *IndexReaderImpl) reportCloseToParentReaders() {
   151  	r.parentReadersLock.RLock()
   152  	defer r.parentReadersLock.RUnlock()
   153  	for parent, _ := range r.parentReaders {
   154  		if p, ok := parent.(*IndexReaderImpl); ok {
   155  			p.closedByChild = true
   156  			// cross memory barrier by a fake write:
   157  			// FIXME do we need it in Go?
   158  			atomic.AddInt32(&p.refCount, 0)
   159  			// recurse:
   160  			p.reportCloseToParentReaders()
   161  		} else if p, ok := parent.(*BaseCompositeReader); ok {
   162  			p.closedByChild = true
   163  			// cross memory barrier by a fake write:
   164  			// FIXME do we need it in Go?
   165  			atomic.AddInt32(&p.refCount, 0)
   166  			// recurse:
   167  			p.reportCloseToParentReaders()
   168  		} else {
   169  			panic(fmt.Sprintf("Unknown IndexReader type: %v", reflect.TypeOf(parent).Name()))
   170  		}
   171  	}
   172  }
   173  
   174  /* Returns the number of deleted documents. */
   175  func (r *IndexReaderImpl) numDeletedDocs() int {
   176  	return r.MaxDoc() - r.NumDocs()
   177  }
   178  
   179  func (r *IndexReaderImpl) Document(docID int) (doc *docu.Document, err error) {
   180  	visitor := docu.NewDocumentStoredFieldVisitor()
   181  	if err = r.VisitDocument(docID, visitor); err != nil {
   182  		return nil, err
   183  	}
   184  	return visitor.Document(), nil
   185  }
   186  
   187  /*
   188  Returns true if any documents have been deleted. Implementers should
   189  consider overriding this method if maxDoc() or numDocs() are not
   190  constant-time operations.
   191  */
   192  func (r *IndexReaderImpl) hasDeletions() bool {
   193  	return r.numDeletedDocs() > 0
   194  }
   195  
   196  func (r *IndexReaderImpl) Close() error {
   197  	r.lock.Lock()
   198  	defer r.lock.Unlock()
   199  	if !r.closed {
   200  		if err := r.decRef(); err != nil {
   201  			return err
   202  		}
   203  		r.closed = true
   204  	}
   205  	return nil
   206  }
   207  
   208  func (r *IndexReaderImpl) Leaves() []*AtomicReaderContext {
   209  	return r.Context().Leaves()
   210  }
   211  
   212  type IndexReaderContext interface {
   213  	Reader() IndexReader
   214  	Parent() *CompositeReaderContext
   215  	Leaves() []*AtomicReaderContext
   216  	Children() []IndexReaderContext
   217  }
   218  
   219  type IndexReaderContextImpl struct {
   220  	parent          *CompositeReaderContext
   221  	isTopLevel      bool
   222  	docBaseInParent int
   223  	ordInParent     int
   224  }
   225  
   226  func newIndexReaderContext(parent *CompositeReaderContext, ordInParent, docBaseInParent int) *IndexReaderContextImpl {
   227  	return &IndexReaderContextImpl{
   228  		parent:          parent,
   229  		isTopLevel:      parent == nil,
   230  		docBaseInParent: docBaseInParent,
   231  		ordInParent:     ordInParent}
   232  }
   233  
   234  func (ctx *IndexReaderContextImpl) Parent() *CompositeReaderContext {
   235  	return ctx.parent
   236  }
   237  
   238  type ARFieldsReader interface {
   239  	Terms(field string) Terms
   240  	Fields() Fields
   241  	LiveDocs() util.Bits
   242  	/** Returns {@link NumericDocValues} representing norms
   243  	 *  for this field, or null if no {@link NumericDocValues}
   244  	 *  were indexed. The returned instance should only be
   245  	 *  used by a single thread. */
   246  	NormValues(field string) (ndv NumericDocValues, err error)
   247  }
   248  
   249  type AtomicReader interface {
   250  	IndexReader
   251  	ARFieldsReader
   252  }
   253  
   254  type AtomicReaderImplSPI interface {
   255  	IndexReaderImplSPI
   256  	ARFieldsReader
   257  }
   258  
   259  type AtomicReaderImpl struct {
   260  	*IndexReaderImpl
   261  	ARFieldsReader
   262  
   263  	readerContext *AtomicReaderContext
   264  }
   265  
   266  func newAtomicReader(spi AtomicReaderImplSPI) *AtomicReaderImpl {
   267  	r := &AtomicReaderImpl{
   268  		IndexReaderImpl: newIndexReader(spi),
   269  		ARFieldsReader:  spi,
   270  	}
   271  	r.readerContext = newAtomicReaderContextFromReader(r)
   272  	return r
   273  }
   274  
   275  func (r *AtomicReaderImpl) Context() IndexReaderContext {
   276  	r.ensureOpen()
   277  	return r.readerContext
   278  }
   279  
   280  func (r *AtomicReaderImpl) DocFreq(term *Term) (int, error) {
   281  	if fields := r.Fields(); fields != nil {
   282  		if terms := fields.Terms(term.Field); terms != nil {
   283  			termsEnum := terms.Iterator(nil)
   284  			ok, err := termsEnum.SeekExact(term.Bytes)
   285  			if err != nil {
   286  				return 0, err
   287  			}
   288  			if ok {
   289  				return termsEnum.DocFreq()
   290  			}
   291  		}
   292  	}
   293  	return 0, nil
   294  }
   295  
   296  func (r *AtomicReaderImpl) TotalTermFreq(term *Term) (n int64, err error) {
   297  	panic("not implemented yet")
   298  }
   299  
   300  func (r *AtomicReaderImpl) SumDocFreq(field string) (n int64, err error) {
   301  	panic("not implemented yet")
   302  }
   303  
   304  func (r *AtomicReaderImpl) DocCount(field string) (n int, err error) {
   305  	panic("not implemented yet")
   306  }
   307  
   308  func (r *AtomicReaderImpl) SumTotalTermFreq(field string) (n int64, err error) {
   309  	panic("not implemented yet")
   310  }
   311  
   312  func (r *AtomicReaderImpl) Terms(field string) Terms {
   313  	fields := r.Fields()
   314  	if fields == nil {
   315  		return nil
   316  	}
   317  	return fields.Terms(field)
   318  }
   319  
   320  type AtomicReaderContext struct {
   321  	*IndexReaderContextImpl
   322  	Ord, DocBase int
   323  	reader       AtomicReader
   324  	leaves       []*AtomicReaderContext
   325  }
   326  
   327  func (ctx *AtomicReaderContext) String() string {
   328  	return fmt.Sprintf("AtomicReaderContext{%v ord=%v docBase=%v %v}",
   329  		ctx.IndexReaderContextImpl, ctx.Ord, ctx.DocBase, ctx.reader)
   330  }
   331  
   332  func newAtomicReaderContextFromReader(r AtomicReader) *AtomicReaderContext {
   333  	return newAtomicReaderContext(nil, r, 0, 0, 0, 0)
   334  }
   335  
   336  func newAtomicReaderContext(parent *CompositeReaderContext, reader AtomicReader, ord, docBase, leafOrd, leafDocBase int) *AtomicReaderContext {
   337  	ans := &AtomicReaderContext{}
   338  	ans.IndexReaderContextImpl = newIndexReaderContext(parent, ord, docBase)
   339  	ans.Ord = leafOrd
   340  	ans.DocBase = leafDocBase
   341  	ans.reader = reader
   342  	if ans.isTopLevel {
   343  		ans.leaves = []*AtomicReaderContext{ans}
   344  	}
   345  	return ans
   346  }
   347  
   348  func (ctx *AtomicReaderContext) Leaves() []*AtomicReaderContext {
   349  	if !ctx.IndexReaderContextImpl.isTopLevel {
   350  		panic("This is not a top-level context.")
   351  	}
   352  	// assert leaves != null
   353  	return ctx.leaves
   354  }
   355  
   356  func (ctx *AtomicReaderContext) Children() []IndexReaderContext {
   357  	return nil
   358  }
   359  
   360  func (ctx *AtomicReaderContext) Reader() IndexReader {
   361  	return ctx.reader
   362  }