github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/segments.go (about) 1 package index 2 3 import ( 4 "strconv" 5 ) 6 7 import ( 8 "fmt" 9 // docu "github.com/balzaczyy/golucene/core/document" 10 . "github.com/balzaczyy/golucene/core/codec/spi" 11 . "github.com/balzaczyy/golucene/core/index/model" 12 "github.com/balzaczyy/golucene/core/store" 13 "github.com/balzaczyy/golucene/core/util" 14 "sync/atomic" 15 ) 16 17 // index/SegmentReader.java 18 19 /** 20 * IndexReader implementation over a single segment. 21 * <p> 22 * Instances pointing to the same segment (but with different deletes, etc) 23 * may share the same core data. 24 * @lucene.experimental 25 */ 26 type SegmentReader struct { 27 *AtomicReaderImpl 28 si *SegmentCommitInfo 29 liveDocs util.Bits 30 // Normally set to si.docCount - si.delDocCount, unless we 31 // were created as an NRT reader from IW, in which case IW 32 // tells us the docCount: 33 numDocs int 34 core *SegmentCoreReaders 35 36 fieldInfos FieldInfos 37 } 38 39 /** 40 * Constructs a new SegmentReader with a new core. 41 * @throws CorruptIndexException if the index is corrupt 42 * @throws IOException if there is a low-level IO error 43 */ 44 // TODO: why is this public? 45 func NewSegmentReader(si *SegmentCommitInfo, 46 termInfosIndexDivisor int, context store.IOContext) (r *SegmentReader, err error) { 47 48 r = &SegmentReader{} 49 r.AtomicReaderImpl = newAtomicReader(r) 50 r.ARFieldsReader = r 51 52 r.si = si 53 if r.fieldInfos, err = ReadFieldInfos(si); err != nil { 54 return nil, err 55 } 56 // log.Print("Obtaining SegmentCoreReaders...") 57 if r.core, err = newSegmentCoreReaders(r, si.Info.Dir, si, context, termInfosIndexDivisor); err != nil { 58 return nil, err 59 } 60 // r.segDocValues = newSegmentDocValues() 61 62 var success = false 63 defer func() { 64 // With lock-less commits, it's entirely possible (and 65 // fine) to hit a FileNotFound exception above. In 66 // this case, we want to explicitly close any subset 67 // of things that were opened so that we don't have to 68 // wait for a GC to do so. 69 if !success { 70 // log.Printf("Failed to initialize SegmentReader.") 71 r.core.decRef() 72 } 73 }() 74 75 codec := si.Info.Codec().(Codec) 76 if si.HasDeletions() { 77 panic("not supported yet") 78 } else { 79 assert(si.DelCount() == 0) 80 } 81 r.numDocs = si.Info.DocCount() - si.DelCount() 82 83 if r.fieldInfos.HasDocValues { 84 r.initDocValuesProducers(codec) 85 } 86 success = true 87 return r, nil 88 } 89 90 /* initialize the per-field DocValuesProducer */ 91 func (r *SegmentReader) initDocValuesProducers(codec Codec) error { 92 // var dir store.Directory 93 // if r.core.cfsReader != nil { 94 // dir = r.core.cfsReader 95 // } else { 96 // dir = r.si.Info.Dir 97 // } 98 // dvFormat := codec.DocValuesFormat() 99 100 // termsIndexDivisor := r.core.termsIndexDivisor 101 if !r.si.HasFieldUpdates() { 102 panic("not implemented yet") 103 } 104 105 panic("not implemented yet") 106 } 107 108 /* Reads the most recent FieldInfos of the given segment info. */ 109 func ReadFieldInfos(info *SegmentCommitInfo) (fis FieldInfos, err error) { 110 var dir store.Directory 111 var closeDir bool 112 if info.FieldInfosGen() == -1 && info.Info.IsCompoundFile() { 113 // no fieldInfos gen and segment uses a compound file 114 if dir, err = store.NewCompoundFileDirectory(info.Info.Dir, 115 util.SegmentFileName(info.Info.Name, "", store.COMPOUND_FILE_EXTENSION), 116 store.IO_CONTEXT_READONCE, false); err != nil { 117 return 118 } 119 closeDir = true 120 } else { 121 // gen'd FIS are read outside CFS, or the segment doesn't use a compound file 122 dir = info.Info.Dir 123 closeDir = false 124 } 125 126 defer func() { 127 if closeDir { 128 err = mergeError(err, dir.Close()) 129 } 130 }() 131 132 var segmentSuffix string 133 if n := info.FieldInfosGen(); n != -1 { 134 segmentSuffix = strconv.FormatInt(n, 36) 135 } 136 codec := info.Info.Codec().(Codec) 137 fisFormat := codec.FieldInfosFormat() 138 return fisFormat.FieldInfosReader()(dir, info.Info.Name, segmentSuffix, store.IO_CONTEXT_READONCE) 139 } 140 141 func (r *SegmentReader) LiveDocs() util.Bits { 142 r.ensureOpen() 143 return r.liveDocs 144 } 145 146 func (r *SegmentReader) doClose() error { 147 panic("not implemented yet") 148 r.core.decRef() 149 return nil 150 } 151 152 func (r *SegmentReader) FieldInfos() FieldInfos { 153 r.ensureOpen() 154 return r.fieldInfos 155 } 156 157 // Expert: retrieve thread-private StoredFieldsReader 158 func (r *SegmentReader) FieldsReader() StoredFieldsReader { 159 r.ensureOpen() 160 return r.core.fieldsReaderLocal() 161 } 162 163 func (r *SegmentReader) VisitDocument(docID int, visitor StoredFieldVisitor) error { 164 r.checkBounds(docID) 165 return r.FieldsReader().VisitDocument(docID, visitor) 166 } 167 168 func (r *SegmentReader) Fields() Fields { 169 r.ensureOpen() 170 return r.core.fields 171 } 172 173 func (r *SegmentReader) NumDocs() int { 174 // Don't call ensureOpen() here (it could affect performance) 175 return r.numDocs 176 } 177 178 func (r *SegmentReader) MaxDoc() int { 179 // Don't call ensureOpen() here (it could affect performance) 180 return r.si.Info.DocCount() 181 } 182 183 func (r *SegmentReader) TermVectorsReader() TermVectorsReader { 184 panic("not implemented yet") 185 } 186 187 func (r *SegmentReader) TermVectors(docID int) (fs Fields, err error) { 188 panic("not implemented yet") 189 } 190 191 func (r *SegmentReader) checkBounds(docID int) { 192 if docID < 0 || docID >= r.MaxDoc() { 193 panic(fmt.Sprintf("docID must be >= 0 and < maxDoc=%v (got docID=%v)", r.MaxDoc(), docID)) 194 } 195 } 196 197 // SegmentReader.java L179 198 func (r *SegmentReader) String() string { 199 // SegmentInfo.toString takes dir and number of 200 // *pending* deletions; so we reverse compute that here: 201 return r.si.StringOf(r.si.Info.Dir, r.si.Info.DocCount()-r.numDocs-r.si.DelCount()) 202 } 203 204 func (r *SegmentReader) SegmentName() string { 205 return r.si.Info.Name 206 } 207 208 func (r *SegmentReader) SegmentInfos() *SegmentCommitInfo { 209 return r.si 210 } 211 212 func (r *SegmentReader) Directory() store.Directory { 213 // Don't ensureOpen here -- in certain cases, when a 214 // cloned/reopened reader needs to commit, it may call 215 // this method on the closed original reader 216 return r.si.Info.Dir 217 } 218 219 func (r *SegmentReader) CoreCacheKey() interface{} { 220 return r.core 221 } 222 223 func (r *SegmentReader) CombinedCoreAndDeletesKey() interface{} { 224 return r 225 } 226 227 func (r *SegmentReader) TermInfosIndexDivisor() int { 228 return r.core.termsIndexDivisor 229 } 230 231 func (r *SegmentReader) NumericDocValues(field string) (v NumericDocValues, err error) { 232 r.ensureOpen() 233 panic("not implemented yet") 234 } 235 236 func (r *SegmentReader) BinaryDocValues(field string) (v BinaryDocValues, err error) { 237 r.ensureOpen() 238 panic("not implemented yet") 239 } 240 241 func (r *SegmentReader) SortedDocValues(field string) (v SortedDocValues, err error) { 242 r.ensureOpen() 243 panic("not implemented yet") 244 } 245 246 func (r *SegmentReader) SortedSetDocValues(field string) (v SortedSetDocValues, err error) { 247 r.ensureOpen() 248 panic("not implemented yet") 249 } 250 251 func (r *SegmentReader) NormValues(field string) (v NumericDocValues, err error) { 252 r.ensureOpen() 253 return r.core.normValues(r.fieldInfos, field) 254 } 255 256 type CoreClosedListener interface { 257 onClose(r interface{}) 258 } 259 260 // index/SegmentCoreReaders.java 261 262 type SegmentCoreReaders struct { 263 refCount int32 // synchronized 264 265 fields FieldsProducer 266 normsProducer DocValuesProducer 267 268 termsIndexDivisor int 269 270 owner *SegmentReader 271 272 fieldsReaderOrig StoredFieldsReader 273 termVectorsReaderOrig TermVectorsReader 274 cfsReader *store.CompoundFileDirectory 275 276 /* 277 Lucene Java use ThreadLocal to serve as thread-level cache, to avoid 278 expensive read actions while limit memory consumption. Since Go doesn't 279 have thread or routine Local, a new object is always returned. 280 281 TODO redesign when ported to goroutines 282 */ 283 fieldsReaderLocal func() StoredFieldsReader 284 normsLocal func() map[string]interface{} 285 286 addListener chan CoreClosedListener 287 removeListener chan CoreClosedListener 288 notifyListener chan bool 289 } 290 291 func newSegmentCoreReaders(owner *SegmentReader, dir store.Directory, si *SegmentCommitInfo, 292 context store.IOContext, termsIndexDivisor int) (self *SegmentCoreReaders, err error) { 293 294 assert2(termsIndexDivisor != 0, 295 "indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)") 296 // fmt.Println("Initializing SegmentCoreReaders from directory:", dir) 297 298 self = &SegmentCoreReaders{ 299 refCount: 1, 300 normsLocal: func() map[string]interface{} { 301 return make(map[string]interface{}) 302 }, 303 } 304 self.fieldsReaderLocal = func() StoredFieldsReader { 305 return self.fieldsReaderOrig.Clone() 306 } 307 308 // fmt.Println("Initializing listeners...") 309 self.addListener = make(chan CoreClosedListener) 310 self.removeListener = make(chan CoreClosedListener) 311 self.notifyListener = make(chan bool) 312 // TODO re-enable later 313 go func() { // ensure listners are synchronized 314 coreClosedListeners := make([]CoreClosedListener, 0) 315 isRunning := true 316 var listener CoreClosedListener 317 for isRunning { 318 // fmt.Println("Listening for events...") 319 select { 320 case listener = <-self.addListener: 321 coreClosedListeners = append(coreClosedListeners, listener) 322 case listener = <-self.removeListener: 323 n := len(coreClosedListeners) 324 for i, v := range coreClosedListeners { 325 if v == listener { 326 newListeners := make([]CoreClosedListener, 0, n-1) 327 newListeners = append(newListeners, coreClosedListeners[0:i]...) 328 newListeners = append(newListeners, coreClosedListeners[i+1:]...) 329 coreClosedListeners = newListeners 330 break 331 } 332 } 333 case <-self.notifyListener: 334 fmt.Println("Shutting down SegmentCoreReaders...") 335 isRunning = false 336 for _, v := range coreClosedListeners { 337 v.onClose(self) 338 } 339 } 340 } 341 fmt.Println("Listeners are done.") 342 }() 343 344 var success = false 345 ans := self 346 defer func() { 347 if !success { 348 fmt.Println("Failed to initialize SegmentCoreReaders.") 349 ans.decRef() 350 } 351 }() 352 353 codec := si.Info.Codec().(Codec) 354 // fmt.Println("Obtaining CFS Directory...") 355 var cfsDir store.Directory // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. 356 if si.Info.IsCompoundFile() { 357 // fmt.Println("Detected CompoundFile.") 358 name := util.SegmentFileName(si.Info.Name, "", store.COMPOUND_FILE_EXTENSION) 359 if self.cfsReader, err = store.NewCompoundFileDirectory(dir, name, context, false); err != nil { 360 return nil, err 361 } 362 // fmt.Println("CompoundFileDirectory: ", self.cfsReader) 363 cfsDir = self.cfsReader 364 } else { 365 cfsDir = dir 366 } 367 // fmt.Println("CFS Directory:", cfsDir) 368 369 // fmt.Println("Reading FieldInfos...") 370 fieldInfos := owner.fieldInfos 371 372 self.termsIndexDivisor = termsIndexDivisor 373 format := codec.PostingsFormat() 374 375 // fmt.Println("Obtaining SegmentReadState...") 376 segmentReadState := NewSegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor) 377 // Ask codec for its Fields 378 // fmt.Println("Obtaining FieldsProducer...") 379 if self.fields, err = format.FieldsProducer(segmentReadState); err != nil { 380 return nil, err 381 } 382 assert(self.fields != nil) 383 // ask codec for its Norms: 384 // TODO: since we don't write any norms file if there are no norms, 385 // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! 386 387 if fieldInfos.HasNorms { 388 // fmt.Println("Obtaining NormsDocValuesProducer...") 389 if self.normsProducer, err = codec.NormsFormat().NormsProducer(segmentReadState); err != nil { 390 return nil, err 391 } 392 assert(self.normsProducer != nil) 393 } 394 395 // fmt.Println("Obtaining StoredFieldsReader...") 396 if self.fieldsReaderOrig, err = si.Info.Codec().(Codec).StoredFieldsFormat().FieldsReader(cfsDir, si.Info, fieldInfos, context); err != nil { 397 return nil, err 398 } 399 400 if fieldInfos.HasVectors { // open term vector files only as needed 401 // fmt.Println("Obtaining TermVectorsReader...") 402 if self.termVectorsReaderOrig, err = si.Info.Codec().(Codec).TermVectorsFormat().VectorsReader(cfsDir, si.Info, fieldInfos, context); err != nil { 403 return nil, err 404 } 405 } 406 407 // fmt.Println("Success") 408 success = true 409 410 return self, nil 411 } 412 413 func (r *SegmentCoreReaders) normValues(infos FieldInfos, 414 field string) (ndv NumericDocValues, err error) { 415 416 if norms, ok := r.normsLocal()[field]; ok { 417 ndv = norms.(NumericDocValues) 418 } else if fi := infos.FieldInfoByName(field); fi != nil && fi.HasNorms() { 419 assert(r.normsProducer != nil) 420 if ndv, err = r.normsProducer.Numeric(fi); err == nil { 421 r.normsLocal()[field] = norms 422 } // else Field does not exist 423 } 424 return 425 } 426 427 func (r *SegmentCoreReaders) decRef() { 428 if atomic.AddInt32(&r.refCount, -1) == 0 { 429 fmt.Println("--- closing core readers") 430 util.Close( /*self.termVectorsLocal, self.fieldsReaderLocal, r.normsLocal,*/ 431 r.fields, r.termVectorsReaderOrig, r.fieldsReaderOrig, 432 r.cfsReader, r.normsProducer) 433 r.notifyListener <- true 434 } 435 }