github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/writer.go (about) 1 package index 2 3 import ( 4 "container/list" 5 "errors" 6 "fmt" 7 "github.com/balzaczyy/golucene/core/analysis" 8 . "github.com/balzaczyy/golucene/core/codec/spi" 9 . "github.com/balzaczyy/golucene/core/index/model" 10 "github.com/balzaczyy/golucene/core/store" 11 "github.com/balzaczyy/golucene/core/util" 12 "log" 13 "math" 14 "os" 15 "runtime" 16 "sort" 17 "strconv" 18 "sync" 19 "sync/atomic" 20 "time" 21 ) 22 23 // index/IndexCommit.java 24 25 /* 26 Expert: represents a single commit into an index as seen by the 27 IndexDeletionPolicy or IndexReader. 28 29 Changes to the content of an index are made visible only after the 30 writer who made that change commits by writing a new segments file 31 (segments_N). This point in time, when the action of writing of a new 32 segments file to the directory is completed, is an index commit. 33 34 Each index commit oint has a unique segments file associated with it. 35 The segments file associated with a later index commit point would 36 have a larger N. 37 */ 38 type IndexCommit interface { 39 // Get the segments file (segments_N) associated with the commit point. 40 SegmentsFileName() string 41 // Returns all index files referenced by this commit point. 42 FileNames() []string 43 // Returns the Directory for the index. 44 Directory() store.Directory 45 /* 46 Delete this commit point. This only applies when using the commit 47 point in the context of IndexWriter's IndexDeletionPolicy. 48 49 Upon calling this, the writer is notified that this commit point 50 should be deleted. 51 52 Decision that a commit-point should be deleted is taken by the 53 IndexDeletionPolicy in effect and therefore this should only be 54 called by its onInit() or onCommit() methods. 55 */ 56 Delete() 57 // Returns true if this commit should be deleted; this is only used 58 // by IndexWriter after invoking the IndexDeletionPolicy. 59 IsDeleted() bool 60 // returns number of segments referenced by this commit. 61 SegmentCount() int 62 // Returns the generation (the _N in segments_N) for this IndexCommit 63 Generation() int64 64 // Returns userData, previously passed to SetCommitData(map) for this commit. 65 UserData() map[string]string 66 } 67 68 type IndexCommits []IndexCommit 69 70 func (s IndexCommits) Len() int { return len(s) } 71 func (s IndexCommits) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 72 func (s IndexCommits) Less(i, j int) bool { 73 if s[i].Directory() != s[j].Directory() { 74 panic("cannot compare IndexCommits from different Directory instances") 75 } 76 return s[i].Generation() < s[j].Generation() 77 } 78 79 // Used by search package to assign a default similarity 80 var DefaultSimilarity func() Similarity 81 82 // index/IndexWriter.java 83 84 // Use a seprate goroutine to protect closing control 85 type ClosingControl struct { 86 _closed bool // volatile 87 _closing bool // volatile 88 closer chan func() (bool, error) 89 done chan error 90 } 91 92 func newClosingControl() *ClosingControl { 93 ans := &ClosingControl{ 94 closer: make(chan func() (bool, error)), 95 done: make(chan error), 96 } 97 go ans.daemon() 98 return ans 99 } 100 101 func (cc *ClosingControl) daemon() { 102 var err error 103 for !cc._closed { 104 err = nil 105 select { 106 case f := <-cc.closer: 107 log.Println("...closing...") 108 if !cc._closed { 109 cc._closing = true 110 cc._closed, err = f() 111 cc._closing = false 112 } 113 cc.done <- err 114 } 115 } 116 log.Println("IW CC daemon is stopped.") 117 } 118 119 // Used internally to throw an AlreadyClosedError if this IndexWriter 120 // has been closed or is in the process of closing. 121 func (cc *ClosingControl) ensureOpen(failIfClosing bool) { 122 assert2(!cc._closed && (!failIfClosing || !cc._closing), "this IndexWriter is closed") 123 } 124 125 func (cc *ClosingControl) close(f func() (ok bool, err error)) error { 126 if cc._closed { 127 return nil // already closed 128 } 129 cc.closer <- f 130 log.Println("Closing IW...") 131 return <-cc.done 132 } 133 134 /* 135 Hard limit on maximum number of documents that may be added to the 136 index. If you try to add more than this, you'll hit panic. 137 */ 138 const MAX_DOCS = math.MaxInt32 - 128 139 140 /* test only */ 141 var actualMaxDocs = MAX_DOCS 142 143 const UNBOUNDED_MAX_MERGE_SEGMENTS = -1 144 145 /* Name of the write lock in the index. */ 146 const WRITE_LOCK_NAME = "write.lock" 147 148 /* Source of a segment which results from a flush. */ 149 const SOURCE_FLUSH = "flush" 150 151 /* 152 Absolute hard maximum length for a term, in bytes once encoded as 153 UTF8. If a term arrives from the analyzer longer than this length, 154 it panics and a message is printed to infoStream, if set (see 155 SetInfoStream()). 156 */ 157 const MAX_TERM_LENGTH = MAX_TERM_LENGTH_UTF8 158 159 /* 160 An IndexWriter creates and maintains an index. 161 162 The OpenMode option on IndexWriterConfig.SetOpenMode() determines 163 whether a new index is created, or whether an existing index is 164 opened. Note that you can open an index with OPEN_MODE_CREATE even 165 while readers are using the index. The old readers will continue to 166 search the "point in time" snapshot they had opened, and won't see 167 the newly created index until they re-open. If OPEN_MODE_CREATE_OR_APPEND 168 is used, IndexWriter will create a new index if there is not already 169 an index at the provided path and otherwise open th existing index. 170 171 In either case, documents are added with AddDocument() and removed 172 with DeleteDocumentsByTerm() or DeleteDocumentsByQuery(). A document 173 can be updated with UpdateDocuments() (which just deletes and then 174 adds the entire document). When finished adding, deleting and 175 updating documents, Close() should be called. 176 177 ... 178 */ 179 type IndexWriter struct { 180 sync.Locker 181 *ClosingControl 182 *MergeControl 183 184 // when unrecoverable disaster strikes, we populate this with the 185 // reason that we had to close IndexWriter 186 tragedy error // volatile 187 188 directory store.Directory // where this index resides 189 analyzer analysis.Analyzer // how to analyze text 190 191 changeCount int64 // volatile, increments every time a change is completed 192 lastCommitChangeCount int64 // volatile, last changeCount that was committed 193 194 rollbackSegments []*SegmentCommitInfo // list of segmentInfo we will fallback to if the commit fails 195 196 pendingCommit *SegmentInfos // set when a commit is pending (after prepareCommit() & before commit()) 197 pendingCommitChangeCount int64 // volatile 198 199 filesToCommit []string 200 201 segmentInfos *SegmentInfos // the segments 202 globalFieldNumberMap *FieldNumbers 203 204 docWriter *DocumentsWriter 205 eventQueue *list.List 206 deleter *IndexFileDeleter 207 208 // used by forceMerge to note those needing merging 209 segmentsToMerge map[*SegmentCommitInfo]bool 210 211 writeLock store.Lock 212 213 mergeScheduler MergeScheduler 214 mergeExceptions []*OneMerge 215 didMessageState bool 216 217 flushCount int32 // atomic 218 flushDeletesCount int32 // atomic 219 220 readerPool *ReaderPool 221 bufferedUpdatesStream *BufferedUpdatesStream 222 223 bufferedUpdatesStreamLock sync.Locker 224 225 // This is a "write once" variable (like the organic dye on a DVD-R 226 // that may or may not be heated by a laser and then cooled to 227 // permanently record the event): it's false, until Reader() is 228 // called for the first time, at which point it's switched to true 229 // and never changes back to false. Once this is true, we hold open 230 // and reuse SegmentReader instances internally for applying 231 // deletes, doing merges, and reopening near real-time readers. 232 poolReaders bool 233 234 // The instance that we passed to the constructor. It is saved only 235 // in order to allow users to query an IndexWriter settings. 236 config LiveIndexWriterConfig 237 238 // time.Now() when commits started; used to write an infoStream 239 // message about how long commit took. 240 startCommitTime time.Time 241 242 // How many documents are in the index, or are in the process of 243 // being added (reserved). E.g., operations like addIndexes will 244 // first reserve the right to add N docs, before they actually 245 // charge the index, much like how hotels place an "authorization 246 // hold" on your credit card to make sure they can later charge you 247 // when you checkout. 248 pendingNumDocs int64 249 250 codec Codec // for writing new segments 251 252 // If non-nil, information about merges will be printed to this. 253 infoStream util.InfoStream 254 255 // A hook for extending classes to execute operations after pending 256 // and deleted documents have been flushed ot the Directory but 257 // before the change is committed (new segments_N file written). 258 doAfterFlush func() error 259 // A hook for extending classes to execute operations before 260 // pending added and deleted documents are flushed to the Directory. 261 doBeforeFlush func() error 262 263 // Used only by commit and prepareCommit, below; lock order is 264 // commitLock -> IW 265 commitLock sync.Locker 266 267 // Ensures only one flush() is actually flushing segments at a time: 268 fullFlushLock sync.Locker 269 270 keepFullyDeletedSegments bool // test only 271 } 272 273 /* 274 Used internally to throw an AlreadyClosedError if this IndexWriter 275 has been closed or is in the process of closing. 276 277 Calls ensureOpen(true). 278 */ 279 func (w *IndexWriter) ensureOpen() { 280 w.ClosingControl.ensureOpen(true) 281 } 282 283 /* 284 Constructs a new IndexWriter per the settings given in conf. If you want to 285 make "live" changes to this writer instance, use Config(). 286 287 NOTE: after this writer is created, the given configuration instance cannot be 288 passed to another writer. If you intend to do so, you should clone it 289 beforehand. 290 */ 291 func NewIndexWriter(d store.Directory, conf *IndexWriterConfig) (w *IndexWriter, err error) { 292 ans := &IndexWriter{ 293 Locker: &sync.Mutex{}, 294 ClosingControl: newClosingControl(), 295 296 segmentsToMerge: make(map[*SegmentCommitInfo]bool), 297 mergeExceptions: make([]*OneMerge, 0), 298 doAfterFlush: func() error { return nil }, 299 doBeforeFlush: func() error { return nil }, 300 commitLock: &sync.Mutex{}, 301 fullFlushLock: &sync.Mutex{}, 302 303 config: conf, 304 directory: d, 305 analyzer: conf.analyzer, 306 infoStream: conf.infoStream, 307 mergeScheduler: conf.mergeScheduler, 308 codec: conf.codec, 309 310 bufferedUpdatesStream: newBufferedUpdatesStream(conf.infoStream), 311 poolReaders: conf.readerPooling, 312 313 bufferedUpdatesStreamLock: &sync.Mutex{}, 314 315 writeLock: d.MakeLock(WRITE_LOCK_NAME), 316 } 317 ans.readerPool = newReaderPool(ans) 318 ans.MergeControl = newMergeControl(conf.infoStream, ans.readerPool) 319 320 conf.setIndexWriter(ans) 321 322 // obtain write lock 323 if ok, err := ans.writeLock.ObtainWithin(conf.writeLockTimeout); !ok || err != nil { 324 if err != nil { 325 return nil, err 326 } 327 return nil, errors.New(fmt.Sprintf("Index locked for write: %v", ans.writeLock)) 328 } 329 330 var success bool = false 331 defer func() { 332 if !success { 333 if ans.infoStream.IsEnabled("IW") { 334 ans.infoStream.Message("IW", "init: hit exception on init; releasing write lock") 335 } 336 util.CloseWhileSuppressingError(ans.writeLock) // don't mask the original exception 337 ans.writeLock = nil 338 } 339 }() 340 341 var create bool 342 switch conf.openMode { 343 case OPEN_MODE_CREATE: 344 create = true 345 case OPEN_MODE_APPEND: 346 create = false 347 default: 348 // CREATE_OR_APPEND - create only if an index does not exist 349 ok, err := IsIndexExists(d) 350 if err != nil { 351 return nil, err 352 } 353 create = !ok 354 } 355 356 // If index is too old, reading the segments will return 357 // IndexFormatTooOldError 358 ans.segmentInfos = &SegmentInfos{} 359 360 var initialIndexExists bool = true 361 362 if create { 363 // Try to read first. This is to allow create against an index 364 // that's currently open for searching. In this case we write the 365 // next segments_N file with no segments: 366 err = ans.segmentInfos.ReadAll(d) 367 if err == nil { 368 ans.segmentInfos.Clear() 369 } else { 370 // Likely this means it's a fresh directory 371 initialIndexExists = false 372 err = nil 373 } 374 375 // Record that we have a change (zero out all segments) pending: 376 ans.changed() 377 } else { 378 err = ans.segmentInfos.ReadAll(d) 379 if err != nil { 380 return 381 } 382 383 if commit := conf.commit; commit != nil { 384 // Swap out all segments, but, keep metadta in SegmentInfos, 385 // like version & generation, to preserve write-once. This is 386 // important if readers are open against the future commit 387 // points. 388 assert2(commit.Directory() == d, 389 "IndexCommit's directory doesn't match my directory") 390 oldInfos := &SegmentInfos{} 391 ans.segmentInfos.replace(oldInfos) 392 ans.changed() 393 ans.infoStream.Message("IW", "init: loaded commit '%v'", 394 commit.SegmentsFileName()) 395 } 396 } 397 398 ans.rollbackSegments = ans.segmentInfos.createBackupSegmentInfos() 399 400 // start with previous field numbers, but new FieldInfos 401 ans.globalFieldNumberMap, err = ans.fieldNumberMap() 402 if err != nil { 403 return 404 } 405 ans.config.flushPolicy().init(ans.config) 406 ans.docWriter = newDocumentsWriter(ans, ans.config, d) 407 ans.eventQueue = ans.docWriter.events 408 409 // Default deleter (for backwards compatibility) is 410 // KeepOnlyLastCommitDeleter: 411 ans.deleter, err = newIndexFileDeleter(d, conf.delPolicy, 412 ans.segmentInfos, ans.infoStream, ans, initialIndexExists) 413 if err != nil { 414 return 415 } 416 417 if ans.deleter.startingCommitDeleted { 418 // Deletion policy deleted the "head" commit point. We have to 419 // mark outsef as changed so that if we are closed w/o any 420 // further changes we write a new segments_N file. 421 ans.changed() 422 } 423 424 if ans.infoStream.IsEnabled("IW") { 425 ans.infoStream.Message("IW", "init: create=%v", create) 426 ans.messageState() 427 } 428 429 success = true 430 return ans, nil 431 } 432 433 // func (w *IndexWriter) fieldInfos(info *SegmentInfo) (infos FieldInfos, err error) { 434 // var cfsDir store.Directory 435 // if info.IsCompoundFile() { 436 // cfsDir, err = store.NewCompoundFileDirectory( 437 // info.Dir, 438 // util.SegmentFileName(info.Name, "", store.COMPOUND_FILE_EXTENSION), 439 // store.IO_CONTEXT_READONCE, 440 // false, 441 // ) 442 // if err != nil { 443 // return 444 // } 445 // assert(cfsDir != nil) 446 // defer func() { 447 // err = mergeError(err, cfsDir.Close()) 448 // }() 449 // } else { 450 // cfsDir = info.Dir 451 // } 452 // return info.Codec().(Codec).FieldInfosFormat().FieldInfosReader()( 453 // cfsDir, info.Name, store.IO_CONTEXT_READONCE) 454 // } 455 456 /* 457 Loads or returns the alread loaded the global field number map for 458 this SegmentInfos. If this SegmentInfos has no global field number 459 map the returned instance is empty. 460 */ 461 func (w *IndexWriter) fieldNumberMap() (m *FieldNumbers, err error) { 462 m = NewFieldNumbers() 463 for _, info := range w.segmentInfos.Segments { 464 fis, err := ReadFieldInfos(info) 465 if err != nil { 466 return nil, err 467 } 468 for _, fi := range fis.Values { 469 m.AddOrGet(fi) 470 } 471 } 472 return m, nil 473 } 474 475 func (w *IndexWriter) messageState() { 476 if w.infoStream.IsEnabled("IW") && !w.didMessageState { 477 w.didMessageState = true 478 w.infoStream.Message("IW", "\ndir=%v\nindex=%v\nversion=%v\n%v", 479 w.directory, w.segString(), util.VERSION_LATEST, w.config) 480 } 481 } 482 483 /* 484 Commits all changes to an index, wait for pending merges to complete, 485 and closes all associate files. 486 487 Note that: 488 1. If you called prepare Commit but failed to call commit, this 489 method will panic and the IndexWriter will not be closed. 490 2. If this method throws any other exception, the IndexWriter will 491 be closed, but changes may have been lost. 492 493 Note that this may be a costly operation, so, try to re-use a single 494 writer instead of closing and opening a new one. See commit() for 495 caveats about write caching done by some IO devices. 496 497 NOTE: You must ensure no other threads are still making changes at 498 the same time that this method is invoked. 499 */ 500 func (w *IndexWriter) Close() error { 501 assert2(w.pendingCommit == nil, 502 "cannot close: prepareCommit was already called with no corresponding call to commit") 503 // Ensure that only one goroutine actaully gets to do the closing 504 w.commitLock.Lock() 505 defer w.commitLock.Unlock() 506 return w.close(func() (ok bool, err error) { 507 defer func() { 508 if !ok { // be certain to close the index on any error 509 defer recover() // suppress so we keep returning original error 510 w.rollbackInternal() 511 } 512 }() 513 if w.infoStream.IsEnabled("IW") { 514 w.infoStream.Message("IW", "now flush at close") 515 } 516 if err = w.flush(true, true); err != nil { 517 return 518 } 519 w.waitForMerges() 520 if err = w.commitInternal(w.config.MergePolicy()); err != nil { 521 return 522 } 523 return w.rollbackInternal() // ie close, since we just committed 524 }) 525 } 526 527 // Retuns the Directory used by this index. 528 func (w *IndexWriter) Directory() store.Directory { 529 return w.directory 530 } 531 532 // L1201 533 /* 534 Adds a document to this index. 535 536 Note that if an Error is hit (for example disk full) then the index 537 will be consistent, but this document may not have been added. 538 Furthermore, it's possible the index will have one segment in 539 non-compound format even when using compound files (when a merge has 540 partially succeeded). 541 542 This method periodically flushes pending documents to the Directory 543 (see flush()), and also periodically triggers segment merges in the 544 index according to the MergePolicy in use. 545 546 Merges temporarily consume space in the directory. The amount of 547 space required is up to 1X the size of all segments being merged, 548 when no readers/searchers are open against the index, and up to 2X 549 the size of all segments being merged when readers/searchers are open 550 against the index (see forceMerge() for details). The sequence of 551 primitive merge operations performed is governed by the merge policy. 552 553 Note that each term in the document can be no longer than 554 MAX_TERM_LENGTH in bytes, otherwise error will be returned. 555 556 Note that it's possible to creat an invalid Unicode string in Java if 557 a UTF16 surrogate pair is malformed. In this case, the invalid 558 characters are silently replaced with the Unicode replacement 559 character U+FFFD. 560 */ 561 func (w *IndexWriter) AddDocument(doc []IndexableField) error { 562 return w.AddDocumentWithAnalyzer(doc, w.analyzer) 563 } 564 565 /* 566 Adds a document to this index, using the provided analyzer instead of 567 the value of Analyzer(). 568 569 See AddDocument() for details on index and IndexWriter state after an 570 error, and flushing/merging temporary free space requirements. 571 572 NOTE: if this method hits a memory issue, you hsould immediately 573 close the writer. See above for details. 574 */ 575 func (w *IndexWriter) AddDocumentWithAnalyzer(doc []IndexableField, analyzer analysis.Analyzer) error { 576 return w.UpdateDocument(nil, doc, analyzer) 577 } 578 579 // L1545 580 /* 581 Updates a document by first deleting the document(s) containing term 582 and then adding the new document. The delete and then add are atomic 583 as seen by a reader on the same index (flush may happen only after 584 the add). 585 */ 586 func (w *IndexWriter) UpdateDocument(term *Term, doc []IndexableField, analyzer analysis.Analyzer) error { 587 w.ensureOpen() 588 var success = false 589 defer func() { 590 if !success { 591 if w.infoStream.IsEnabled("IW") { 592 w.infoStream.Message("IW", "hit error updating document") 593 } 594 } 595 }() 596 597 ok, err := w.docWriter.updateDocument(doc, analyzer, term) 598 if err != nil { 599 return err 600 } 601 if ok { 602 _, err = w.docWriter.processEvents(w, true, false) 603 if err != nil { 604 return err 605 } 606 } 607 success = true 608 return nil 609 } 610 611 func (w *IndexWriter) newSegmentName() string { 612 // Cannot synchronize on IndexWriter because that causes deadlook 613 // Ian: but why? 614 w.Lock() 615 defer w.Unlock() 616 // Important to increment changeCount so that the segmentInfos is 617 // written on close. Otherwise we could close, re-open and 618 // re-return the same segment name that was previously returned 619 // which can cause problems at least with ConcurrentMergeScheculer. 620 w.changeCount++ 621 w.segmentInfos.changed() 622 defer func() { w.segmentInfos.counter++ }() 623 return fmt.Sprintf("_%v", strconv.FormatInt(int64(w.segmentInfos.counter), 36)) 624 } 625 626 /* 627 Forces merge policy to merge segments until there are <= 628 maxNumSegments. The actual merge to be executed are determined by the 629 MergePolicy. 630 631 This is a horribly costly operation, especially when you pass a small 632 maxNumSegments; usually you should only call this if the index is 633 static (will no longer be changed). 634 635 Note that this requires up to 2X the index size free space in your 636 Directory (3X if you're using compound file format). For example, if 637 your index size is 10 MB, then you need up to 20 MB free for this to 638 complete (30 MB if you're using compound file format). Also, it's 639 best to call commit() afterwards, to allow IndexWriter to free up 640 disk space. 641 642 If some but not all readers re-open while merging is underway, this 643 will cause > 2X temporary space to be consumed as those new readers 644 will then hold open the temporary segments at that time. it is best 645 not to re-open readers while merging is running. 646 647 The actual temporary usage could be much less than these figures (it 648 depends on many factors). 649 650 In general, once this completes, the total size of the index will be 651 less than the size of the starting index. It could be quite a bit 652 smaller (if there were many pending deletes) or just slightly smaller. 653 654 If an error is hit, for example, due to disk full, the index will not 655 be corrupted and no documents will be list. However, it may have been 656 partially merged (some segments were merged but not all), and it's 657 possible that one of the segments in the index will be in 658 non-compound format even when using compound file format. This will 659 occur when the error is hit during conversion of the segment into 660 compound format. 661 662 This call will merge those segments present in the index when call 663 started. If other routines are still adding documents and flushing 664 segments, those newly created segments will not be merged unless you 665 call forceMerge again. 666 667 NOTE: if you call CloseAndWait() with false, which aborts all running 668 merges, then any routine still running this method might hit a 669 MergeAbortedError. 670 */ 671 func (w *IndexWriter) forceMerge(maxNumSegments int) error { 672 return w.forceMergeAndWait(maxNumSegments, true) 673 } 674 675 /* 676 Just like forceMerge(), except you can specify whether the call 677 should block until all merging completes. This is only meaningful 678 with a Mergecheduler that is able to run merges in background 679 routines. 680 */ 681 func (w *IndexWriter) forceMergeAndWait(maxNumSegments int, doWait bool) error { 682 panic("not implemented yet") 683 } 684 685 // Returns true if any merges in pendingMerges or runningMerges 686 // are maxNumSegments merges. 687 func (w *IndexWriter) maxNumSegmentsMergePending() bool { 688 w.Lock() // synchronized 689 defer w.Unlock() 690 691 panic("not implemented yet") 692 } 693 694 func (w *IndexWriter) maybeMerge(mergePolicy MergePolicy, 695 trigger MergeTrigger, maxNumSegments int) error { 696 697 w.ClosingControl.ensureOpen(false) 698 newMergesFound, err := w.updatePendingMerges(mergePolicy, trigger, maxNumSegments) 699 if err == nil { 700 err = w.mergeScheduler.Merge(w, trigger, newMergesFound) 701 } 702 return err 703 } 704 705 func (w *IndexWriter) updatePendingMerges(mergePolicy MergePolicy, 706 trigger MergeTrigger, maxNumSegments int) (found bool, err error) { 707 708 w.Lock() // synchronized 709 defer w.Unlock() 710 711 // in case infoStream was disabled on init, but then enabled at some 712 // point, try again to log the config here: 713 w.messageState() 714 715 assert(maxNumSegments == -1 || maxNumSegments > 0) 716 if w.stopMerges { 717 return false, nil 718 } 719 720 // Do not start new merges if disaster struck 721 if w.tragedy != nil { 722 return false, nil 723 } 724 725 var spec MergeSpecification 726 if maxNumSegments != UNBOUNDED_MAX_MERGE_SEGMENTS { 727 assertn(trigger == MERGE_TRIGGER_EXPLICIT || trigger == MERGE_FINISHED, 728 "Expected EXPLIT or MEGE_FINISHED as trigger even with maxNumSegments set but was: %v", 729 MergeTriggerName(trigger)) 730 if spec, err = mergePolicy.FindForcedMerges( 731 w.segmentInfos, 732 maxNumSegments, 733 w.segmentsToMerge, w); err != nil { 734 return false, err 735 } 736 if found = spec != nil; found { 737 for _, merge := range spec { 738 merge.maxNumSegments = maxNumSegments 739 } 740 } 741 } else { 742 if spec, err = mergePolicy.FindMerges(trigger, w.segmentInfos, w); err != nil { 743 return false, err 744 } 745 } 746 747 if found = spec != nil; found { 748 for _, merge := range spec { 749 if _, err = w.registerMerge(merge); err != nil { 750 return false, err 751 } 752 } 753 } 754 return true, nil 755 } 756 757 /* 758 Experts: to be used by a MergePolicy to avoid selecting merges for 759 segments already being merged. The returned collection is not cloned, 760 and thus is only safe to access if you hold IndexWriter's lock (which 761 you do when IndexWriter invokes the MergePolicy). 762 */ 763 func (w *IndexWriter) MergingSegments() map[*SegmentCommitInfo]bool { 764 // no need to synchronized but should be 765 return w.mergingSegments 766 } 767 768 /* 769 Expert: the MergeScheduler calls this method to retrieve the next 770 merge requested by the MergePolicy. 771 */ 772 func (w *IndexWriter) nextMerge() *OneMerge { 773 w.Lock() // synchronized 774 defer w.Unlock() 775 776 if w.pendingMerges.Len() == 0 { 777 return nil 778 } 779 // Advance the merge from pending to running 780 merge := w.pendingMerges.Front().Value.(*OneMerge) 781 w.pendingMerges.Remove(w.pendingMerges.Front()) 782 w.runningMerges[merge] = true 783 return merge 784 } 785 786 // Expert: returns true if there are merges waiting to be scheduled. 787 func (w *IndexWriter) hasPendingMerges() bool { 788 return w.pendingMerges.Len() > 0 789 } 790 791 /* 792 Close the IndexWriter without committing any changes that have 793 occurred since the last commit (or since it was opened, if commit 794 hasn't been called). This removes any temporary files that had been 795 created, after which the state of the index will be the same as it 796 was when commit() was last called or when this writer was first 797 opened. This also clears a previous call to prepareCommit() 798 */ 799 func (w *IndexWriter) Rollback() error { 800 // don't call ensureOpen here; this acts like close() in closeable 801 802 return w.close(w.rollbackInternal) 803 } 804 805 func (w *IndexWriter) rollbackInternal() (ok bool, err error) { 806 if w.infoStream.IsEnabled("IW") { 807 w.infoStream.Message("IW", "rollback") 808 } 809 810 err = func() error { 811 var success = false 812 defer func() { 813 if !success { 814 // Must not hold IW's lock while closing mergeScheduler: this could lead to deadlock 815 util.CloseWhileSuppressingError(w.mergeScheduler) 816 } 817 w.Lock() 818 defer w.Unlock() 819 820 if !success { 821 func() { 822 defer recover() // ignore any error 823 // we tried to be nice about it: do the minimum 824 // don't leak a segments_N file if there is a pending commit 825 if w.pendingCommit != nil { 826 w.pendingCommit.rollbackCommit(w.directory) 827 w.deleter.decRefInfos(w.pendingCommit) 828 } 829 w.pendingCommit = nil 830 }() 831 832 // close all the closeables we can (but important is readerPool and writeLock to prevent leaks) 833 util.CloseWhileSuppressingError(w.readerPool, w.deleter, w.writeLock) 834 w.writeLock = nil 835 } 836 }() 837 838 func() { 839 w.Lock() 840 defer w.Unlock() 841 842 w.abortAllMerges() 843 w.stopMerges = true 844 }() 845 846 if w.infoStream.IsEnabled("IW") { 847 w.infoStream.Message("IW", "rollback: done finish merges") 848 } 849 850 // Must pre-close in case it increments changeCount so that we 851 // then set it to false before calling closeInternal 852 if err = w.mergeScheduler.Close(); err != nil { 853 return err 854 } 855 856 w.bufferedUpdatesStream.clear() 857 w.docWriter.close() // mark it as closed first to prevent subsequent indexing actions/flushes 858 w.docWriter.abort(w) // don't sync on IW here 859 860 if err = func() error { 861 w.Lock() 862 defer w.Unlock() 863 864 if w.pendingCommit != nil { 865 w.pendingCommit.rollbackCommit(w.directory) 866 w.deleter.decRefInfos(w.pendingCommit) 867 w.pendingCommit = nil 868 } 869 870 // Don't bother saving any changes in our segmentInfos 871 if err = w.readerPool.dropAll(false); err != nil { 872 return err 873 } 874 875 // Keep the same segmentInfos instance but replace all of its 876 // SegmentInfo instances. This is so the next attempt to commit 877 // using this instance of IndexWriter will always write to a 878 // new generation ("write once"). 879 w.segmentInfos.rollbackSegmentInfos(w.rollbackSegments) 880 if w.infoStream.IsEnabled("IW") { 881 w.infoStream.Message("IW", "rollback: infos=%v", w.readerPool.segmentsToString(w.segmentInfos.Segments)) 882 } 883 884 w.testPoint("rollback before checkpoint") 885 886 // Ask deleter to locate unreferenced files & remove them: 887 if err = w.deleter.checkpoint(w.segmentInfos, false); err == nil { 888 if err = w.deleter.refreshList(); err == nil { 889 890 w.lastCommitChangeCount = w.changeCount 891 892 if err = w.deleter.refreshList(); err == nil { 893 if err = w.deleter.Close(); err == nil { 894 895 // Must set closed while inside same sync block where 896 // we call deleter.refresh, else concurrent routines 897 // may try to sneak a flush in, after we leave this 898 // sync block and before we enter the sync block in the 899 // finally clause below that sets closed: 900 // w._closed = true 901 // For GoLucene, it may not be necessary. 902 903 if err = util.Close(w.writeLock); err == nil { // release write lock 904 w.writeLock = nil 905 } 906 } 907 } 908 } 909 } 910 911 success = err != nil 912 return err 913 }(); err != nil { 914 return err 915 } 916 917 success = true 918 return nil 919 }() 920 921 return err != nil, err 922 } 923 924 /* 925 Called whenever the SegmentInfos has been updatd and the index files 926 referenced exist (correctly) in the index directory. 927 */ 928 func (w *IndexWriter) checkpoint() error { 929 w.Lock() // synchronized 930 defer w.Unlock() 931 return w._checkpoint() 932 } 933 934 func (w *IndexWriter) _checkpoint() error { 935 w.changeCount++ 936 w.segmentInfos.changed() 937 return w.deleter.checkpoint(w.segmentInfos, false) 938 } 939 940 /* 941 Checkpoints with IndexFileDeleter, so it's aware of new files, and 942 increments changeCount, so on close/commit we will write a new 943 segments file, but does NOT bump segmentInfos.version. 944 */ 945 func (w *IndexWriter) checkpointNoSIS() (err error) { 946 w.Lock() // synchronized 947 defer w.Unlock() 948 w.changeCount++ 949 return w.deleter.checkpoint(w.segmentInfos, false) 950 } 951 952 /* Called internally if any index state has changed. */ 953 func (w *IndexWriter) changed() { 954 w.Lock() 955 defer w.Unlock() 956 w.changeCount++ 957 w.segmentInfos.changed() 958 } 959 960 func (w *IndexWriter) publishFrozenUpdates(packet *FrozenBufferedUpdates) { 961 w.Lock() 962 defer w.Unlock() 963 assert(packet != nil && packet.any()) 964 w.bufferedUpdatesStreamLock.Lock() 965 defer w.bufferedUpdatesStreamLock.Unlock() 966 w.bufferedUpdatesStream.push(packet) 967 } 968 969 /* 970 Atomically adds the segment private delete packet and publishes the 971 flushed segments SegmentInfo to the index writer. 972 */ 973 func (w *IndexWriter) publishFlushedSegment(newSegment *SegmentCommitInfo, 974 packet *FrozenBufferedUpdates, globalPacket *FrozenBufferedUpdates) (err error) { 975 defer func() { 976 atomic.AddInt32(&w.flushCount, 1) 977 err = mergeError(err, w.doAfterFlush()) 978 }() 979 980 // Lock order IW -> BDS 981 w.Lock() 982 defer w.Unlock() 983 w.ClosingControl.ensureOpen(false) 984 w.bufferedUpdatesStreamLock.Lock() 985 defer w.bufferedUpdatesStreamLock.Unlock() 986 987 if w.infoStream.IsEnabled("IW") { 988 w.infoStream.Message("IW", "publishFlushedSegment") 989 } 990 991 if globalPacket != nil && globalPacket.any() { 992 w.bufferedUpdatesStream.push(globalPacket) 993 } 994 // Publishing the segment must be synched on IW -> BDS to make sure 995 // that no merge prunes away the seg. private delete packet 996 var nextGen int64 997 if packet != nil && packet.any() { 998 nextGen = w.bufferedUpdatesStream.push(packet) 999 } else { 1000 // Since we don't have a delete packet to apply we can get a new 1001 // generation right away 1002 nextGen = w.bufferedUpdatesStream.nextGen 1003 } 1004 if w.infoStream.IsEnabled("IW") { 1005 w.infoStream.Message("IW", "publish sets newSegment delGen=%v seg=%v", nextGen, w.readerPool.segmentToString(newSegment)) 1006 } 1007 newSegment.SetBufferedUpdatesGen(nextGen) 1008 w.segmentInfos.Segments = append(w.segmentInfos.Segments, newSegment) 1009 return w._checkpoint() 1010 } 1011 1012 func (w *IndexWriter) resetMergeExceptions() { 1013 w.Lock() // synchronized 1014 defer w.Unlock() 1015 panic("not implemented yet") 1016 } 1017 1018 /* 1019 Requires commitLock 1020 */ 1021 func (w *IndexWriter) prepareCommitInternal(mergePolicy MergePolicy) error { 1022 w.startCommitTime = time.Now() 1023 w.ClosingControl.ensureOpen(false) 1024 if w.infoStream.IsEnabled("IW") { 1025 w.infoStream.Message("IW", "prepareCommit: flush") 1026 w.infoStream.Message("IW", " index before flush %v", w.segString()) 1027 } 1028 1029 assert2(w.tragedy == nil, "this writer hit an unrecoverable error; cannot commit\n%v", w.tragedy) 1030 assert2(w.pendingCommit == nil, "prepareCommit was already called with no corresponding call to commit") 1031 1032 err := w.doBeforeFlush() 1033 if err != nil { 1034 return err 1035 } 1036 w.testPoint("startDoFlush") 1037 1038 // This is copied from doFLush, except it's modified to clone & 1039 // incRef the flushed SegmentInfos inside the sync block: 1040 1041 toCommit, anySegmentsFlushed, err := func() (toCommit *SegmentInfos, anySegmentsFlushed bool, err error) { 1042 w.fullFlushLock.Lock() 1043 defer w.fullFlushLock.Unlock() 1044 1045 var flushSuccess = false 1046 var success = false 1047 defer func() { 1048 if !success { 1049 if w.infoStream.IsEnabled("IW") { 1050 w.infoStream.Message("IW", "hit error during prepareCommit") 1051 } 1052 } 1053 // Done: finish the full flush! 1054 w.docWriter.finishFullFlush(flushSuccess) 1055 err2 := w.doAfterFlush() 1056 if err2 != nil { 1057 log.Printf("Error in doAfterFlush: %v", err2) 1058 } 1059 }() 1060 1061 anySegmentsFlushed, err = w.docWriter.flushAllThreads(w) 1062 if err != nil { 1063 return 1064 } 1065 if !anySegmentsFlushed { 1066 // prevent double increment since docWriter.doFlush increments 1067 // the flushCount if we flushed anything. 1068 atomic.AddInt32(&w.flushCount, -1) 1069 } 1070 w.docWriter.processEvents(w, false, true) 1071 flushSuccess = true 1072 1073 err = func() (err error) { 1074 w.Lock() 1075 defer w.Unlock() 1076 1077 err = w._maybeApplyDeletes(true) 1078 if err != nil { 1079 return 1080 } 1081 1082 err = w.readerPool.commit(w.segmentInfos) 1083 if err != nil { 1084 return 1085 } 1086 1087 // Must clone the segmentInfos while we still 1088 // hold fullFlushLock and while sync'd so that 1089 // no partial changes (eg a delete w/o 1090 // corresponding add from an updateDocument) can 1091 // sneak into the commit point: 1092 toCommit = w.segmentInfos.Clone() 1093 1094 w.pendingCommitChangeCount = w.changeCount 1095 1096 // This protects the segmentInfos we are now going 1097 // to commit. This is important in case, eg, while 1098 // we are trying to sync all referenced files, a 1099 // merge completes which would otherwise have 1100 // removed the files we are now syncing. 1101 w.filesToCommit = toCommit.files(w.directory, false) 1102 w.deleter.incRefFiles(w.filesToCommit) 1103 return 1104 }() 1105 if err != nil { 1106 return 1107 } 1108 success = true 1109 return 1110 }() 1111 1112 var success = false 1113 defer func() { 1114 if !success { 1115 func() { 1116 w.Lock() 1117 defer w.Unlock() 1118 if w.filesToCommit != nil { 1119 w.deleter.decRefFiles(w.filesToCommit) 1120 w.filesToCommit = nil 1121 } 1122 }() 1123 } 1124 }() 1125 if anySegmentsFlushed { 1126 err := w.maybeMerge(mergePolicy, MERGE_TRIGGER_FULL_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS) 1127 if err != nil { 1128 return err 1129 } 1130 } 1131 if err := w.startCommit(toCommit); err != nil { 1132 return err 1133 } 1134 success = true 1135 return nil 1136 } 1137 1138 /* 1139 Commits all pending changes (added & deleted documents, segment 1140 merges, added indexes, etc.) to the index, and syncs all referenced 1141 index files, such that a reader will see the changes and the index 1142 updates will survive an OS or machine crash or power loss. Note that 1143 this does not wait for any running background merges to finish. This 1144 may be a costly operation, so you should test the cost in your 1145 application and do it only when really necessary. 1146 1147 Note that this operation calls Directory.sync on the index files. 1148 That call should not return until the file contents & metadata are 1149 on stable storage. For FSDirectory, this calls the OS's fsync. But, 1150 beware: some hardware devices may in fact cache writes even during 1151 fsync, and return before the bits are actually on stable storage, to 1152 give the appearance of faster performance. If you have such a device, 1153 and it does not hav a battery backup (for example) then on power loss 1154 it may still lose data. Lucene cannot guarantee consistency on such 1155 devices. 1156 */ 1157 func (w *IndexWriter) Commit() error { 1158 w.ensureOpen() 1159 w.commitLock.Lock() 1160 defer w.commitLock.Unlock() 1161 return w.commitInternal(w.config.MergePolicy()) 1162 } 1163 1164 /* 1165 Assume commitLock is locked. 1166 */ 1167 func (w *IndexWriter) commitInternal(mergePolicy MergePolicy) error { 1168 if w.infoStream.IsEnabled("IW") { 1169 w.infoStream.Message("IW", "commit: start") 1170 } 1171 1172 w.ClosingControl.ensureOpen(false) 1173 1174 if w.infoStream.IsEnabled("IW") { 1175 w.infoStream.Message("IW", "commit: enter lock") 1176 } 1177 1178 if w.pendingCommit == nil { 1179 if w.infoStream.IsEnabled("IW") { 1180 w.infoStream.Message("IW", "commit: now prepare") 1181 } 1182 err := w.prepareCommitInternal(mergePolicy) 1183 if err != nil { 1184 return err 1185 } 1186 } else { 1187 if w.infoStream.IsEnabled("IW") { 1188 w.infoStream.Message("IW", "commit: already prepared") 1189 } 1190 } 1191 return w.finishCommit() 1192 } 1193 1194 func (w *IndexWriter) finishCommit() (err error) { 1195 var commitCompleted bool 1196 var finished bool 1197 var committedSegmentsFileName string 1198 1199 defer func() { 1200 if err != nil { 1201 if w.infoStream.IsEnabled("IW") { 1202 w.infoStream.Message("IW", "hit error during finishCommit: %v", err) 1203 } 1204 if commitCompleted { 1205 w.tragicEvent(err, "finishComit") 1206 err = nil 1207 } 1208 } 1209 }() 1210 1211 w.Lock() // synchronized 1212 defer w.Unlock() 1213 1214 if w.pendingCommit == nil { 1215 assert(w.filesToCommit == nil) 1216 if w.infoStream.IsEnabled("IW") { 1217 w.infoStream.Message("IW", "commit: pendingCommit == nil; skip") 1218 } 1219 return nil 1220 } 1221 1222 defer func() { 1223 defer func() { 1224 w.filesToCommit = nil 1225 w.pendingCommit = nil 1226 }() 1227 1228 if finished { // all is good 1229 w.deleter.decRefFiles(w.filesToCommit) 1230 } else if !commitCompleted { // error happened in finishCommit: not a tragedy 1231 w.deleter.decRefFilesWhileSuppressingError(w.filesToCommit) 1232 } 1233 // TODO check if any wait() 1234 }() 1235 1236 if w.infoStream.IsEnabled("IW") { 1237 w.infoStream.Message("IW", "commit: pendingCommit != nil") 1238 } 1239 if committedSegmentsFileName, err = w.pendingCommit.finishCommit(w.directory); err != nil { 1240 return 1241 } 1242 1243 // we committed, if anything goes wrong after this, we are screwed and it's a tragedy 1244 commitCompleted = true 1245 1246 // NOTE: don't use this.checkpoint() here, because 1247 // we do not want to increment changeCount: 1248 if err = w.deleter.checkpoint(w.pendingCommit, true); err != nil { 1249 return 1250 } 1251 1252 w.lastCommitChangeCount = w.pendingCommitChangeCount 1253 w.rollbackSegments = w.pendingCommit.createBackupSegmentInfos() 1254 1255 finished = true 1256 1257 if w.infoStream.IsEnabled("IW") { 1258 w.infoStream.Message("IW", "commit: wrote segments file '%v'", committedSegmentsFileName) 1259 w.infoStream.Message("IW", fmt.Sprintf("commit: took %v", time.Now().Sub(w.startCommitTime))) 1260 w.infoStream.Message("IW", "commit: done") 1261 } 1262 return nil 1263 } 1264 1265 /* 1266 Flush all in-memory buffered updates (adds and deletes) to the 1267 Directory. 1268 */ 1269 func (w *IndexWriter) flush(triggerMerge bool, applyAllDeletes bool) error { 1270 // NOTE: this method cannot be sync'd because 1271 // maybeMerge() in turn calls mergeScheduler.merge which 1272 // in turn can take a long time to run and we don't want 1273 // to hold the lock for that. In the case of 1274 // ConcurrentMergeScheduler this can lead to deadlock 1275 // when it stalls due to too many running merges. 1276 1277 // We can be called during close, when closing==true, so we must pass false to ensureOpen: 1278 w.ClosingControl.ensureOpen(false) 1279 ok, err := w.doFlush(applyAllDeletes) 1280 if err != nil { 1281 return err 1282 } 1283 if ok && triggerMerge { 1284 return w.maybeMerge(w.config.MergePolicy(), MERGE_TRIGGER_FULL_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS) 1285 } 1286 return nil 1287 } 1288 1289 func (w *IndexWriter) doFlush(applyAllDeletes bool) (bool, error) { 1290 assert2(w.tragedy == nil, "this writer hit an unrecoverable error; cannot flush\n%v", w.tragedy) 1291 1292 err := w.doBeforeFlush() 1293 if err != nil { 1294 return false, err 1295 } 1296 if w.infoStream.IsEnabled("TP") { 1297 w.infoStream.Message("TP", "startDoFlush") 1298 } 1299 1300 success := false 1301 defer func() { 1302 if !success && w.infoStream.IsEnabled("IW") { 1303 w.infoStream.Message("IW", "hit error during flush") 1304 } 1305 }() 1306 1307 if w.infoStream.IsEnabled("IW") { 1308 w.infoStream.Message("IW", " start flush: applyAllDeletes=%v", applyAllDeletes) 1309 w.infoStream.Message("IW", " index before flush %v", w.segString()) 1310 } 1311 1312 anySegmentFlushed, err := func() (ok bool, err error) { 1313 w.fullFlushLock.Lock() 1314 defer w.fullFlushLock.Unlock() 1315 1316 flushSuccess := false 1317 defer func() { 1318 w.docWriter.finishFullFlush(flushSuccess) 1319 w.docWriter.processEvents(w, false, true) 1320 }() 1321 1322 if ok, err = w.docWriter.flushAllThreads(w); err == nil { 1323 flushSuccess = true 1324 } 1325 return 1326 }() 1327 if err != nil { 1328 return false, err 1329 } 1330 1331 err = func() error { 1332 w.Lock() 1333 defer w.Unlock() 1334 err := w._maybeApplyDeletes(applyAllDeletes) 1335 if err != nil { 1336 return err 1337 } 1338 err = w.doAfterFlush() 1339 if err != nil { 1340 return err 1341 } 1342 if !anySegmentFlushed { 1343 //flushCount is incremented in flushAllThreads 1344 atomic.AddInt32(&w.flushCount, 1) 1345 } 1346 return nil 1347 }() 1348 if err != nil { 1349 return false, err 1350 } 1351 1352 success = true 1353 return anySegmentFlushed, nil 1354 } 1355 1356 func (w *IndexWriter) _maybeApplyDeletes(applyAllDeletes bool) error { 1357 if applyAllDeletes { 1358 if w.infoStream.IsEnabled("IW") { 1359 w.infoStream.Message("IW", "apply all deletes during flush") 1360 } 1361 return w._applyAllDeletesAndUpdates() 1362 } else if w.infoStream.IsEnabled("IW") { 1363 w.infoStream.Message("IW", "don't apply deletes now delTermCount=%v bytesUsed=%v", 1364 atomic.LoadInt32(&w.bufferedUpdatesStream.numTerms), 1365 atomic.LoadInt64(&w.bufferedUpdatesStream.bytesUsed)) 1366 } 1367 return nil 1368 } 1369 1370 func (w *IndexWriter) applyAllDeletesAndUpdates() error { 1371 w.Lock() // synchronized 1372 defer w.Unlock() 1373 return w._applyAllDeletesAndUpdates() 1374 } 1375 1376 func (w *IndexWriter) _applyAllDeletesAndUpdates() error { 1377 atomic.AddInt32(&w.flushDeletesCount, 1) 1378 result, err := w.bufferedUpdatesStream.applyDeletesAndUpdates(w.readerPool, w.segmentInfos.Segments) 1379 if err != nil { 1380 return err 1381 } 1382 if result.anyDeletes { 1383 err = w.checkpoint() 1384 if err != nil { 1385 return err 1386 } 1387 } 1388 if !w.keepFullyDeletedSegments && result.allDeleted != nil { 1389 if w.infoStream.IsEnabled("IW") { 1390 w.infoStream.Message("IW", "drop 100%% deleted segments: %v", 1391 w.readerPool.segmentsToString(result.allDeleted)) 1392 } 1393 for _, info := range result.allDeleted { 1394 // If a merge has already registered for this segment, we leave 1395 // it in the readerPool; the merge will skip merging it and 1396 // will then drop it once it's done: 1397 if _, ok := w.mergingSegments[info]; !ok { 1398 w.segmentInfos.remove(info) 1399 atomic.AddInt64(&w.pendingNumDocs, -int64(info.Info.DocCount())) 1400 err = w.readerPool.drop(info) 1401 if err != nil { 1402 return err 1403 } 1404 } 1405 } 1406 err = w.checkpoint() 1407 if err != nil { 1408 return err 1409 } 1410 } 1411 w.bufferedUpdatesStream.prune(w.segmentInfos) 1412 return nil 1413 } 1414 1415 // L3440 1416 /* 1417 Merges the indicated segments, replacing them in the stack with a 1418 single segment. 1419 */ 1420 func (w *IndexWriter) merge(merge *OneMerge) error { 1421 panic("not implemented yet") 1422 } 1423 1424 /* 1425 Checks whether this merge involves any segments already participating 1426 in a merge. If not, this merge is "registered", meaning we record 1427 that its semgents are now participating in a merge, and true is 1428 returned. Else (the merge conflicts) false is returned. 1429 */ 1430 func (w *IndexWriter) registerMerge(merge *OneMerge) (bool, error) { 1431 panic("not implemented yet") 1432 } 1433 1434 func setDiagnostics(info *SegmentInfo, source string) { 1435 setDiagnosticsAndDetails(info, source, nil) 1436 } 1437 1438 func setDiagnosticsAndDetails(info *SegmentInfo, source string, details map[string]string) { 1439 ans := map[string]string{ 1440 "source": source, 1441 "lucene.version": util.VERSION_LATEST.String(), 1442 "os": runtime.GOOS, 1443 "os.arch": runtime.GOARCH, 1444 "go.version": runtime.Version(), 1445 "timestamp": fmt.Sprintf("%v", time.Now().Unix()), 1446 } 1447 if details != nil { 1448 for k, v := range details { 1449 ans[k] = v 1450 } 1451 } 1452 info.SetDiagnostics(ans) 1453 } 1454 1455 // Returns a string description of all segments, for debugging. 1456 func (w *IndexWriter) segString() string { 1457 // TODO synchronized 1458 return w.readerPool.segmentsToString(w.segmentInfos.Segments) 1459 } 1460 1461 // called only from assert 1462 func (w *IndexWriter) assertFilesExist(toSync *SegmentInfos) error { 1463 files := toSync.files(w.directory, false) 1464 for _, filename := range files { 1465 allFiles, err := w.directory.ListAll() 1466 if err != nil { 1467 return err 1468 } 1469 ok, err := w.slowFileExists(w.directory, filename) 1470 if err != nil { 1471 return err 1472 } 1473 assert2(ok, "file %v does not exist; files=%v", filename, allFiles) 1474 // If this trips it means we are missing a call to checkpoint 1475 // somewhere, because by the time we are called, deleter should 1476 // know about every file referenced by the current head 1477 // segmentInfos: 1478 assert2(w.deleter.exists(filename), "IndexFileDeleter doesn't know about file %v", filename) 1479 } 1480 return nil 1481 } 1482 1483 /* For infoStream output */ 1484 func (w *IndexWriter) toLiveInfos(sis *SegmentInfos) *SegmentInfos { 1485 w.Lock() // synchronized 1486 defer w.Unlock() 1487 return w._toLiveInfos(sis) 1488 } 1489 1490 func (w *IndexWriter) _toLiveInfos(sis *SegmentInfos) *SegmentInfos { 1491 newSIS := new(SegmentInfos) 1492 // liveSIS := make(map[*SegmentCommitInfo]bool) 1493 // for _, info := range w.segmentInfos.Segments { 1494 // liveSIS[info] = true 1495 // } 1496 for _, info := range sis.Segments { 1497 // if _, ok := liveSIS[info] ; ok { 1498 newSIS.Segments = append(newSIS.Segments, info) 1499 // } 1500 } 1501 return newSIS 1502 } 1503 1504 /* 1505 Walk through all files referenced by the current segmentInfos and ask 1506 the Directory to sync each file, if it wans't already. If that 1507 succeeds, then we prepare a new segments_N file but do not fully 1508 commit it. 1509 */ 1510 func (w *IndexWriter) startCommit(toSync *SegmentInfos) error { 1511 w.testPoint("startStartCommit") 1512 assert(w.pendingCommit == nil) 1513 assert2(w.tragedy == nil, "this writer hit an unrecoverable error; cannot commit\n%v", w.tragedy) 1514 1515 if w.infoStream.IsEnabled("IW") { 1516 w.infoStream.Message("IW", "startCommit(): start") 1517 } 1518 1519 if err := func() error { 1520 w.Lock() 1521 defer w.Unlock() 1522 1523 assertn(w.lastCommitChangeCount <= w.changeCount, 1524 "lastCommitChangeCount=%v changeCount=%v", w.lastCommitChangeCount, w.changeCount) 1525 if w.pendingCommitChangeCount == w.lastCommitChangeCount { 1526 if w.infoStream.IsEnabled("IW") { 1527 w.infoStream.Message("IW", " skip startCommit(): no changes pending") 1528 } 1529 w.deleter.decRefFiles(w.filesToCommit) 1530 w.filesToCommit = nil 1531 return nil 1532 } 1533 1534 if w.infoStream.IsEnabled("IW") { 1535 w.infoStream.Message("IW", "startCommit index=%v changeCount=%v", 1536 w.readerPool.segmentsToString(toSync.Segments), w.changeCount) 1537 } 1538 1539 return w.assertFilesExist(toSync) 1540 }(); err != nil { 1541 return err 1542 } 1543 1544 w.testPoint("midStartCommit") 1545 1546 var pendingCommitSet = false 1547 defer func() { 1548 w.Lock() 1549 defer w.Unlock() 1550 1551 // Have out master segmentInfos record the generations we just 1552 // prepared. We do this on error or success so we don't 1553 // double-write a segments_N file. 1554 w.segmentInfos.updateGeneration(toSync) 1555 1556 if !pendingCommitSet { 1557 if w.infoStream.IsEnabled("IW") { 1558 w.infoStream.Message("IW", "hit error committing segments file") 1559 } 1560 1561 // Hit error 1562 w.deleter.decRefFiles(w.filesToCommit) 1563 w.filesToCommit = nil 1564 } 1565 }() 1566 1567 w.testPoint("midStartCommit2") 1568 err := func() (err error) { 1569 w.Lock() 1570 defer w.Unlock() 1571 1572 assert(w.pendingCommit == nil) 1573 assert(w.segmentInfos.generation == toSync.generation) 1574 1575 // Eror here means nothing is prepared (this method unwinds 1576 // everything it did on an error) 1577 err = toSync.prepareCommit(w.directory) 1578 if err != nil { 1579 return err 1580 } 1581 fmt.Println("DONE prepareCommit") 1582 1583 pendingCommitSet = true 1584 w.pendingCommit = toSync 1585 return nil 1586 }() 1587 if err != nil { 1588 return err 1589 } 1590 1591 // This call can take a long time -- 10s of seconds or more. We do 1592 // it without syncing on this: 1593 var success = false 1594 var filesToSync []string 1595 defer func() { 1596 if !success { 1597 pendingCommitSet = false 1598 w.pendingCommit = nil 1599 toSync.rollbackCommit(w.directory) 1600 } 1601 }() 1602 1603 filesToSync = toSync.files(w.directory, false) 1604 err = w.directory.Sync(filesToSync) 1605 if err != nil { 1606 return err 1607 } 1608 success = true 1609 1610 if w.infoStream.IsEnabled("IW") { 1611 w.infoStream.Message("IW", "done all syncs: %v", filesToSync) 1612 } 1613 1614 w.testPoint("midStartCommitSuccess") 1615 w.testPoint("finishStartCommit") 1616 return nil 1617 } 1618 1619 func (w *IndexWriter) tragicEvent(tragedy error, location string) { 1620 panic("niy") 1621 } 1622 1623 /* 1624 Used only by assert for testing. Current points: 1625 - startDoFlush 1626 - startCommitMerge 1627 - startStartCommit 1628 - midStartCommit 1629 - midStartCommit2 1630 - midStartCommitSuccess 1631 - finishStartCommit 1632 - startCommitMergeDeletes 1633 - startMergeInit 1634 - DocumentsWriter.ThreadState.init start 1635 */ 1636 func (w *IndexWriter) testPoint(message string) { 1637 if w.infoStream.IsEnabled("TP") { 1638 w.infoStream.Message("TP", message) 1639 } 1640 } 1641 1642 // L4356 1643 1644 /* Called by DirectoryReader.doClose() */ 1645 func (w *IndexWriter) deletePendingFiles() { 1646 w.deleter.deletePendingFiles() 1647 } 1648 1649 /* 1650 NOTE: this method creates a compound file for all files returned by 1651 info.files(). While, generally, this may include separate norms and 1652 deleteion files, this SegmentInfos must not reference such files when 1653 this method is called, because they are not allowed within a compound 1654 file. 1655 */ 1656 func createCompoundFile(infoStream util.InfoStream, 1657 directory store.Directory, 1658 checkAbort CheckAbort, 1659 info *SegmentInfo, 1660 context store.IOContext) (names []string, err error) { 1661 1662 filename := util.SegmentFileName(info.Name, "", store.COMPOUND_FILE_EXTENSION) 1663 if infoStream.IsEnabled("IW") { 1664 infoStream.Message("IW", "create compound file %v", filename) 1665 } 1666 // Now merge all added files 1667 files := info.Files() 1668 var cfsDir *store.CompoundFileDirectory 1669 cfsDir, err = store.NewCompoundFileDirectory(directory, filename, context, true) 1670 if err != nil { 1671 return 1672 } 1673 func() { 1674 var success = false 1675 defer func() { 1676 if success { 1677 err = util.Close(cfsDir) 1678 } else { 1679 util.CloseWhileSuppressingError(cfsDir) 1680 directory.DeleteFile(filename) // ignore error 1681 directory.DeleteFile(util.SegmentFileName(info.Name, "", store.COMPOUND_FILE_EXTENSION)) 1682 } 1683 }() 1684 1685 var length int64 1686 var sortedFiles []string 1687 for file, _ := range files { 1688 sortedFiles = append(sortedFiles, file) 1689 } 1690 sort.Strings(sortedFiles) // maintain order 1691 for _, file := range sortedFiles { 1692 if err = directory.Copy(cfsDir, file, file, context); err != nil { 1693 return 1694 } 1695 if length, err = directory.FileLength(file); err != nil { 1696 return 1697 } 1698 if err = checkAbort.work(float64(length)); err != nil { 1699 return 1700 } 1701 } 1702 success = true 1703 }() 1704 if err != nil { 1705 return 1706 } 1707 1708 // Replace all previous files with the CFS/CFE files: 1709 siFiles := make(map[string]bool) 1710 siFiles[filename] = true 1711 siFiles[util.SegmentFileName(info.Name, "", store.COMPOUND_FILE_ENTRIES_EXTENSION)] = true 1712 info.SetFiles(siFiles) 1713 1714 for file, _ := range files { 1715 names = append(names, file) 1716 } 1717 return 1718 } 1719 1720 // Tries to delete the given files if unreferenced. 1721 func (w *IndexWriter) deleteNewFiles(files []string) error { 1722 w.Lock() // synchronized 1723 defer w.Unlock() 1724 panic("not implemented yet") 1725 } 1726 1727 /* Cleans up residuals from a segment that could not be entirely flushed due to an error */ 1728 func (w *IndexWriter) flushFailed(info *SegmentInfo) error { 1729 w.Lock() 1730 defer w.Unlock() 1731 return w.deleter.refresh(info.Name) 1732 } 1733 1734 func (w *IndexWriter) purge(forced bool) (n int, err error) { 1735 return w.docWriter.purgeBuffer(w, forced) 1736 } 1737 1738 func (w *IndexWriter) doAfterSegmentFlushed(triggerMerge bool, forcePurge bool) (err error) { 1739 defer func() { 1740 if triggerMerge { 1741 err = mergeError(err, w.maybeMerge(w.config.MergePolicy(), MERGE_TRIGGER_SEGMENT_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS)) 1742 } 1743 }() 1744 _, err = w.purge(forcePurge) 1745 return err 1746 } 1747 1748 func (w *IndexWriter) slowFileExists(dir store.Directory, filename string) (bool, error) { 1749 o, err := dir.OpenInput(filename, store.IO_CONTEXT_DEFAULT) 1750 if os.IsNotExist(err) { 1751 return false, nil 1752 } 1753 if err != nil { 1754 return false, err 1755 } 1756 defer o.Close() 1757 return true, nil 1758 } 1759 1760 /* 1761 If openDirectoryReader() has been called (ie, this writer is in near 1762 real-time mode), then after a merge comletes, this class can be 1763 invoked to warm the reader on the newly merged segment, before the 1764 merge commits. This is not required for near real-time search, but 1765 will reduce search latency on opening a new near real-time reader 1766 after a merge completes. 1767 1768 NOTE: warm is called before any deletes have been carried over to the 1769 merged segment. 1770 */ 1771 type IndexReaderWarmer interface { 1772 // Invoked on the AtomicReader for the newly merged segment, before 1773 // that segment is made visible to near-real-time readers. 1774 warm(reader AtomicReader) error 1775 }