github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/reader.go (about) 1 package index 2 3 import ( 4 // "errors" 5 "errors" 6 "fmt" 7 . "github.com/balzaczyy/golucene/core/codec/spi" 8 docu "github.com/balzaczyy/golucene/core/document" 9 . "github.com/balzaczyy/golucene/core/index/model" 10 "github.com/balzaczyy/golucene/core/util" 11 "io" 12 "reflect" 13 "sync" 14 "sync/atomic" 15 ) 16 17 type IndexReader interface { 18 io.Closer 19 decRef() error 20 ensureOpen() 21 registerParentReader(r IndexReader) 22 NumDocs() int 23 MaxDoc() int 24 /** Expert: visits the fields of a stored document, for 25 * custom processing/loading of each field. If you 26 * simply want to load all fields, use {@link 27 * #document(int)}. If you want to load a subset, use 28 * {@link DocumentStoredFieldVisitor}. */ 29 VisitDocument(docID int, visitor StoredFieldVisitor) error 30 /** 31 * Returns the stored fields of the <code>n</code><sup>th</sup> 32 * <code>Document</code> in this index. This is just 33 * sugar for using {@link DocumentStoredFieldVisitor}. 34 * <p> 35 * <b>NOTE:</b> for performance reasons, this method does not check if the 36 * requested document is deleted, and therefore asking for a deleted document 37 * may yield unspecified results. Usually this is not required, however you 38 * can test if the doc is deleted by checking the {@link 39 * Bits} returned from {@link MultiFields#getLiveDocs}. 40 * 41 * <b>NOTE:</b> only the content of a field is returned, 42 * if that field was stored during indexing. Metadata 43 * like boost, omitNorm, IndexOptions, tokenized, etc., 44 * are not preserved. 45 * 46 * @throws IOException if there is a low-level IO error 47 */ 48 // TODO: we need a separate StoredField, so that the 49 // Document returned here contains that class not 50 //model.IndexableField 51 Document(docID int) (doc *docu.Document, err error) 52 doClose() error 53 Context() IndexReaderContext 54 Leaves() []*AtomicReaderContext 55 // Returns the number of documents containing the term. This method 56 // returns 0 if the term of field does not exists. This method does 57 // not take into account deleted documents that have not yet been 58 // merged away. 59 DocFreq(*Term) (int, error) 60 } 61 62 /* A custom listener that's invoked when the IndexReader is closed. */ 63 type ReaderClosedListener interface { 64 onClose(IndexReader) 65 } 66 67 type IndexReaderImplSPI interface { 68 NumDocs() int 69 MaxDoc() int 70 VisitDocument(int, StoredFieldVisitor) error 71 doClose() error 72 Context() IndexReaderContext 73 DocFreq(*Term) (int, error) 74 } 75 76 type IndexReaderImpl struct { 77 IndexReaderImplSPI 78 79 lock sync.Mutex 80 closed bool 81 closedByChild bool 82 refCount int32 // synchronized 83 parentReaders map[IndexReader]bool 84 parentReadersLock sync.RWMutex 85 readerClosedListeners map[ReaderClosedListener]bool 86 readerClosedListenersLock sync.RWMutex 87 } 88 89 func newIndexReader(spi IndexReaderImplSPI) *IndexReaderImpl { 90 return &IndexReaderImpl{ 91 IndexReaderImplSPI: spi, 92 refCount: 1, 93 parentReaders: make(map[IndexReader]bool), 94 } 95 } 96 97 func (r *IndexReaderImpl) decRef() error { 98 // only check refcount here (don't call ensureOpen()), so we can 99 // still close the reader if it was made invalid by a child: 100 assert2(r.refCount > 0, "this IndexReader is closed") 101 102 rc := atomic.AddInt32(&r.refCount, -1) 103 assert2(rc >= 0, "too many decRef calls: refCount is %v after decrement", rc) 104 if rc == 0 { 105 r.closed = true 106 var err error 107 defer func() { 108 defer r.notifyReaderClosedListeners(err) 109 r.reportCloseToParentReaders() 110 }() 111 return r.doClose() 112 } 113 return nil 114 } 115 116 func (r *IndexReaderImpl) ensureOpen() { 117 if atomic.LoadInt32(&r.refCount) <= 0 { 118 panic("this IndexReader is closed") 119 } 120 // the happens before rule on reading the refCount, which must be after the fake write, 121 // ensures that we see the value: 122 if r.closedByChild { 123 panic("this IndexReader cannot be used anymore as one of its child readers was closed") 124 } 125 } 126 127 func (r *IndexReaderImpl) registerParentReader(reader IndexReader) { 128 r.ensureOpen() 129 r.parentReadersLock.Lock() 130 defer r.parentReadersLock.Unlock() 131 r.parentReaders[reader] = true 132 } 133 134 func (r *IndexReaderImpl) notifyReaderClosedListeners(err error) { 135 r.readerClosedListenersLock.RLock() 136 defer r.readerClosedListenersLock.RUnlock() 137 for listener, _ := range r.readerClosedListeners { 138 func() { 139 defer func() { 140 if e := recover(); e != nil { 141 err = mergeError(err, errors.New(fmt.Sprintf("%v", e))) 142 } 143 }() 144 listener.onClose(r) 145 }() 146 } 147 return 148 } 149 150 func (r *IndexReaderImpl) reportCloseToParentReaders() { 151 r.parentReadersLock.RLock() 152 defer r.parentReadersLock.RUnlock() 153 for parent, _ := range r.parentReaders { 154 if p, ok := parent.(*IndexReaderImpl); ok { 155 p.closedByChild = true 156 // cross memory barrier by a fake write: 157 // FIXME do we need it in Go? 158 atomic.AddInt32(&p.refCount, 0) 159 // recurse: 160 p.reportCloseToParentReaders() 161 } else if p, ok := parent.(*BaseCompositeReader); ok { 162 p.closedByChild = true 163 // cross memory barrier by a fake write: 164 // FIXME do we need it in Go? 165 atomic.AddInt32(&p.refCount, 0) 166 // recurse: 167 p.reportCloseToParentReaders() 168 } else { 169 panic(fmt.Sprintf("Unknown IndexReader type: %v", reflect.TypeOf(parent).Name())) 170 } 171 } 172 } 173 174 /* Returns the number of deleted documents. */ 175 func (r *IndexReaderImpl) numDeletedDocs() int { 176 return r.MaxDoc() - r.NumDocs() 177 } 178 179 func (r *IndexReaderImpl) Document(docID int) (doc *docu.Document, err error) { 180 visitor := docu.NewDocumentStoredFieldVisitor() 181 if err = r.VisitDocument(docID, visitor); err != nil { 182 return nil, err 183 } 184 return visitor.Document(), nil 185 } 186 187 /* 188 Returns true if any documents have been deleted. Implementers should 189 consider overriding this method if maxDoc() or numDocs() are not 190 constant-time operations. 191 */ 192 func (r *IndexReaderImpl) hasDeletions() bool { 193 return r.numDeletedDocs() > 0 194 } 195 196 func (r *IndexReaderImpl) Close() error { 197 r.lock.Lock() 198 defer r.lock.Unlock() 199 if !r.closed { 200 if err := r.decRef(); err != nil { 201 return err 202 } 203 r.closed = true 204 } 205 return nil 206 } 207 208 func (r *IndexReaderImpl) Leaves() []*AtomicReaderContext { 209 return r.Context().Leaves() 210 } 211 212 type IndexReaderContext interface { 213 Reader() IndexReader 214 Parent() *CompositeReaderContext 215 Leaves() []*AtomicReaderContext 216 Children() []IndexReaderContext 217 } 218 219 type IndexReaderContextImpl struct { 220 parent *CompositeReaderContext 221 isTopLevel bool 222 docBaseInParent int 223 ordInParent int 224 } 225 226 func newIndexReaderContext(parent *CompositeReaderContext, ordInParent, docBaseInParent int) *IndexReaderContextImpl { 227 return &IndexReaderContextImpl{ 228 parent: parent, 229 isTopLevel: parent == nil, 230 docBaseInParent: docBaseInParent, 231 ordInParent: ordInParent} 232 } 233 234 func (ctx *IndexReaderContextImpl) Parent() *CompositeReaderContext { 235 return ctx.parent 236 } 237 238 type ARFieldsReader interface { 239 Terms(field string) Terms 240 Fields() Fields 241 LiveDocs() util.Bits 242 /** Returns {@link NumericDocValues} representing norms 243 * for this field, or null if no {@link NumericDocValues} 244 * were indexed. The returned instance should only be 245 * used by a single thread. */ 246 NormValues(field string) (ndv NumericDocValues, err error) 247 } 248 249 type AtomicReader interface { 250 IndexReader 251 ARFieldsReader 252 } 253 254 type AtomicReaderImplSPI interface { 255 IndexReaderImplSPI 256 ARFieldsReader 257 } 258 259 type AtomicReaderImpl struct { 260 *IndexReaderImpl 261 ARFieldsReader 262 263 readerContext *AtomicReaderContext 264 } 265 266 func newAtomicReader(spi AtomicReaderImplSPI) *AtomicReaderImpl { 267 r := &AtomicReaderImpl{ 268 IndexReaderImpl: newIndexReader(spi), 269 ARFieldsReader: spi, 270 } 271 r.readerContext = newAtomicReaderContextFromReader(r) 272 return r 273 } 274 275 func (r *AtomicReaderImpl) Context() IndexReaderContext { 276 r.ensureOpen() 277 return r.readerContext 278 } 279 280 func (r *AtomicReaderImpl) DocFreq(term *Term) (int, error) { 281 if fields := r.Fields(); fields != nil { 282 if terms := fields.Terms(term.Field); terms != nil { 283 termsEnum := terms.Iterator(nil) 284 ok, err := termsEnum.SeekExact(term.Bytes) 285 if err != nil { 286 return 0, err 287 } 288 if ok { 289 return termsEnum.DocFreq() 290 } 291 } 292 } 293 return 0, nil 294 } 295 296 func (r *AtomicReaderImpl) TotalTermFreq(term *Term) (n int64, err error) { 297 panic("not implemented yet") 298 } 299 300 func (r *AtomicReaderImpl) SumDocFreq(field string) (n int64, err error) { 301 panic("not implemented yet") 302 } 303 304 func (r *AtomicReaderImpl) DocCount(field string) (n int, err error) { 305 panic("not implemented yet") 306 } 307 308 func (r *AtomicReaderImpl) SumTotalTermFreq(field string) (n int64, err error) { 309 panic("not implemented yet") 310 } 311 312 func (r *AtomicReaderImpl) Terms(field string) Terms { 313 fields := r.Fields() 314 if fields == nil { 315 return nil 316 } 317 return fields.Terms(field) 318 } 319 320 type AtomicReaderContext struct { 321 *IndexReaderContextImpl 322 Ord, DocBase int 323 reader AtomicReader 324 leaves []*AtomicReaderContext 325 } 326 327 func (ctx *AtomicReaderContext) String() string { 328 return fmt.Sprintf("AtomicReaderContext{%v ord=%v docBase=%v %v}", 329 ctx.IndexReaderContextImpl, ctx.Ord, ctx.DocBase, ctx.reader) 330 } 331 332 func newAtomicReaderContextFromReader(r AtomicReader) *AtomicReaderContext { 333 return newAtomicReaderContext(nil, r, 0, 0, 0, 0) 334 } 335 336 func newAtomicReaderContext(parent *CompositeReaderContext, reader AtomicReader, ord, docBase, leafOrd, leafDocBase int) *AtomicReaderContext { 337 ans := &AtomicReaderContext{} 338 ans.IndexReaderContextImpl = newIndexReaderContext(parent, ord, docBase) 339 ans.Ord = leafOrd 340 ans.DocBase = leafDocBase 341 ans.reader = reader 342 if ans.isTopLevel { 343 ans.leaves = []*AtomicReaderContext{ans} 344 } 345 return ans 346 } 347 348 func (ctx *AtomicReaderContext) Leaves() []*AtomicReaderContext { 349 if !ctx.IndexReaderContextImpl.isTopLevel { 350 panic("This is not a top-level context.") 351 } 352 // assert leaves != null 353 return ctx.leaves 354 } 355 356 func (ctx *AtomicReaderContext) Children() []IndexReaderContext { 357 return nil 358 } 359 360 func (ctx *AtomicReaderContext) Reader() IndexReader { 361 return ctx.reader 362 }