github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/writer.go (about)

     1  package index
     2  
     3  import (
     4  	"container/list"
     5  	"errors"
     6  	"fmt"
     7  	"github.com/balzaczyy/golucene/core/analysis"
     8  	. "github.com/balzaczyy/golucene/core/codec/spi"
     9  	. "github.com/balzaczyy/golucene/core/index/model"
    10  	"github.com/balzaczyy/golucene/core/store"
    11  	"github.com/balzaczyy/golucene/core/util"
    12  	"log"
    13  	"math"
    14  	"os"
    15  	"runtime"
    16  	"sort"
    17  	"strconv"
    18  	"sync"
    19  	"sync/atomic"
    20  	"time"
    21  )
    22  
    23  // index/IndexCommit.java
    24  
    25  /*
    26  Expert: represents a single commit into an index as seen by the
    27  IndexDeletionPolicy or IndexReader.
    28  
    29  Changes to the content of an index are made visible only after the
    30  writer who made that change commits by writing a new segments file
    31  (segments_N). This point in time, when the action of writing of a new
    32  segments file to the directory is completed, is an index commit.
    33  
    34  Each index commit oint has a unique segments file associated with it.
    35  The segments file associated with a later index commit point would
    36  have a larger N.
    37  */
    38  type IndexCommit interface {
    39  	// Get the segments file (segments_N) associated with the commit point.
    40  	SegmentsFileName() string
    41  	// Returns all index files referenced by this commit point.
    42  	FileNames() []string
    43  	// Returns the Directory for the index.
    44  	Directory() store.Directory
    45  	/*
    46  		Delete this commit point. This only applies when using the commit
    47  		point in the context of IndexWriter's IndexDeletionPolicy.
    48  
    49  		Upon calling this, the writer is notified that this commit point
    50  		should be deleted.
    51  
    52  		Decision that a commit-point should be deleted is taken by the
    53  		IndexDeletionPolicy in effect and therefore this should only be
    54  		called by its onInit() or onCommit() methods.
    55  	*/
    56  	Delete()
    57  	// Returns true if this commit should be deleted; this is only used
    58  	// by IndexWriter after invoking the IndexDeletionPolicy.
    59  	IsDeleted() bool
    60  	// returns number of segments referenced by this commit.
    61  	SegmentCount() int
    62  	// Returns the generation (the _N in segments_N) for this IndexCommit
    63  	Generation() int64
    64  	// Returns userData, previously passed to SetCommitData(map) for this commit.
    65  	UserData() map[string]string
    66  }
    67  
    68  type IndexCommits []IndexCommit
    69  
    70  func (s IndexCommits) Len() int      { return len(s) }
    71  func (s IndexCommits) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
    72  func (s IndexCommits) Less(i, j int) bool {
    73  	if s[i].Directory() != s[j].Directory() {
    74  		panic("cannot compare IndexCommits from different Directory instances")
    75  	}
    76  	return s[i].Generation() < s[j].Generation()
    77  }
    78  
    79  // Used by search package to assign a default similarity
    80  var DefaultSimilarity func() Similarity
    81  
    82  // index/IndexWriter.java
    83  
    84  // Use a seprate goroutine to protect closing control
    85  type ClosingControl struct {
    86  	_closed  bool // volatile
    87  	_closing bool // volatile
    88  	closer   chan func() (bool, error)
    89  	done     chan error
    90  }
    91  
    92  func newClosingControl() *ClosingControl {
    93  	ans := &ClosingControl{
    94  		closer: make(chan func() (bool, error)),
    95  		done:   make(chan error),
    96  	}
    97  	go ans.daemon()
    98  	return ans
    99  }
   100  
   101  func (cc *ClosingControl) daemon() {
   102  	var err error
   103  	for !cc._closed {
   104  		err = nil
   105  		select {
   106  		case f := <-cc.closer:
   107  			log.Println("...closing...")
   108  			if !cc._closed {
   109  				cc._closing = true
   110  				cc._closed, err = f()
   111  				cc._closing = false
   112  			}
   113  			cc.done <- err
   114  		}
   115  	}
   116  	log.Println("IW CC daemon is stopped.")
   117  }
   118  
   119  // Used internally to throw an AlreadyClosedError if this IndexWriter
   120  // has been closed or is in the process of closing.
   121  func (cc *ClosingControl) ensureOpen(failIfClosing bool) {
   122  	assert2(!cc._closed && (!failIfClosing || !cc._closing), "this IndexWriter is closed")
   123  }
   124  
   125  func (cc *ClosingControl) close(f func() (ok bool, err error)) error {
   126  	if cc._closed {
   127  		return nil // already closed
   128  	}
   129  	cc.closer <- f
   130  	log.Println("Closing IW...")
   131  	return <-cc.done
   132  }
   133  
   134  /*
   135  Hard limit on maximum number of documents that may be added to the
   136  index. If you try to add more than this, you'll hit panic.
   137  */
   138  const MAX_DOCS = math.MaxInt32 - 128
   139  
   140  /* test only */
   141  var actualMaxDocs = MAX_DOCS
   142  
   143  const UNBOUNDED_MAX_MERGE_SEGMENTS = -1
   144  
   145  /* Name of the write lock in the index. */
   146  const WRITE_LOCK_NAME = "write.lock"
   147  
   148  /* Source of a segment which results from a flush. */
   149  const SOURCE_FLUSH = "flush"
   150  
   151  /*
   152  Absolute hard maximum length for a term, in bytes once encoded as
   153  UTF8. If a term arrives from the analyzer longer than this length,
   154  it panics and a message is printed to infoStream, if set (see
   155  SetInfoStream()).
   156  */
   157  const MAX_TERM_LENGTH = MAX_TERM_LENGTH_UTF8
   158  
   159  /*
   160  An IndexWriter creates and maintains an index.
   161  
   162  The OpenMode option on IndexWriterConfig.SetOpenMode() determines
   163  whether a new index is created, or whether an existing index is
   164  opened. Note that you can open an index with OPEN_MODE_CREATE even
   165  while readers are using the index. The old readers will continue to
   166  search the "point in time" snapshot they had opened, and won't see
   167  the newly created index until they re-open. If OPEN_MODE_CREATE_OR_APPEND
   168  is used, IndexWriter will create a new index if there is not already
   169  an index at the provided path and otherwise open th existing index.
   170  
   171  In either case, documents are added with AddDocument() and removed
   172  with DeleteDocumentsByTerm() or DeleteDocumentsByQuery(). A document
   173  can be updated with UpdateDocuments() (which just deletes and then
   174  adds the entire document). When finished adding, deleting and
   175  updating documents, Close() should be called.
   176  
   177  ...
   178  */
   179  type IndexWriter struct {
   180  	sync.Locker
   181  	*ClosingControl
   182  	*MergeControl
   183  
   184  	// when unrecoverable disaster strikes, we populate this with the
   185  	// reason that we had to close IndexWriter
   186  	tragedy error // volatile
   187  
   188  	directory store.Directory   // where this index resides
   189  	analyzer  analysis.Analyzer // how to analyze text
   190  
   191  	changeCount           int64 // volatile, increments every time a change is completed
   192  	lastCommitChangeCount int64 // volatile, last changeCount that was committed
   193  
   194  	rollbackSegments []*SegmentCommitInfo // list of segmentInfo we will fallback to if the commit fails
   195  
   196  	pendingCommit            *SegmentInfos // set when a commit is pending (after prepareCommit() & before commit())
   197  	pendingCommitChangeCount int64         // volatile
   198  
   199  	filesToCommit []string
   200  
   201  	segmentInfos         *SegmentInfos // the segments
   202  	globalFieldNumberMap *FieldNumbers
   203  
   204  	docWriter  *DocumentsWriter
   205  	eventQueue *list.List
   206  	deleter    *IndexFileDeleter
   207  
   208  	// used by forceMerge to note those needing merging
   209  	segmentsToMerge map[*SegmentCommitInfo]bool
   210  
   211  	writeLock store.Lock
   212  
   213  	mergeScheduler  MergeScheduler
   214  	mergeExceptions []*OneMerge
   215  	didMessageState bool
   216  
   217  	flushCount        int32 // atomic
   218  	flushDeletesCount int32 // atomic
   219  
   220  	readerPool            *ReaderPool
   221  	bufferedUpdatesStream *BufferedUpdatesStream
   222  
   223  	bufferedUpdatesStreamLock sync.Locker
   224  
   225  	// This is a "write once" variable (like the organic dye on a DVD-R
   226  	// that may or may not be heated by a laser and then cooled to
   227  	// permanently record the event): it's false, until Reader() is
   228  	// called for the first time, at which point it's switched to true
   229  	// and never changes back to false. Once this is true, we hold open
   230  	// and reuse SegmentReader instances internally for applying
   231  	// deletes, doing merges, and reopening near real-time readers.
   232  	poolReaders bool
   233  
   234  	// The instance that we passed to the constructor. It is saved only
   235  	// in order to allow users to query an IndexWriter settings.
   236  	config LiveIndexWriterConfig
   237  
   238  	// time.Now() when commits started; used to write an infoStream
   239  	// message about how long commit took.
   240  	startCommitTime time.Time
   241  
   242  	// How many documents are in the index, or are in the process of
   243  	// being added (reserved). E.g., operations like addIndexes will
   244  	// first reserve the right to add N docs, before they actually
   245  	// charge the index, much like how hotels place an "authorization
   246  	// hold" on your credit card to make sure they can later charge you
   247  	// when you checkout.
   248  	pendingNumDocs int64
   249  
   250  	codec Codec // for writing new segments
   251  
   252  	// If non-nil, information about merges will be printed to this.
   253  	infoStream util.InfoStream
   254  
   255  	// A hook for extending classes to execute operations after pending
   256  	// and deleted documents have been flushed ot the Directory but
   257  	// before the change is committed (new segments_N file written).
   258  	doAfterFlush func() error
   259  	// A hook for extending classes to execute operations before
   260  	// pending added and deleted documents are flushed to the Directory.
   261  	doBeforeFlush func() error
   262  
   263  	// Used only by commit and prepareCommit, below; lock order is
   264  	// commitLock -> IW
   265  	commitLock sync.Locker
   266  
   267  	// Ensures only one flush() is actually flushing segments at a time:
   268  	fullFlushLock sync.Locker
   269  
   270  	keepFullyDeletedSegments bool // test only
   271  }
   272  
   273  /*
   274  Used internally to throw an AlreadyClosedError if this IndexWriter
   275  has been closed or is in the process of closing.
   276  
   277  Calls ensureOpen(true).
   278  */
   279  func (w *IndexWriter) ensureOpen() {
   280  	w.ClosingControl.ensureOpen(true)
   281  }
   282  
   283  /*
   284  Constructs a new IndexWriter per the settings given in conf. If you want to
   285  make "live" changes to this writer instance, use Config().
   286  
   287  NOTE: after this writer is created, the given configuration instance cannot be
   288  passed to another writer. If you intend to do so, you should clone it
   289  beforehand.
   290  */
   291  func NewIndexWriter(d store.Directory, conf *IndexWriterConfig) (w *IndexWriter, err error) {
   292  	ans := &IndexWriter{
   293  		Locker:         &sync.Mutex{},
   294  		ClosingControl: newClosingControl(),
   295  
   296  		segmentsToMerge: make(map[*SegmentCommitInfo]bool),
   297  		mergeExceptions: make([]*OneMerge, 0),
   298  		doAfterFlush:    func() error { return nil },
   299  		doBeforeFlush:   func() error { return nil },
   300  		commitLock:      &sync.Mutex{},
   301  		fullFlushLock:   &sync.Mutex{},
   302  
   303  		config:         conf,
   304  		directory:      d,
   305  		analyzer:       conf.analyzer,
   306  		infoStream:     conf.infoStream,
   307  		mergeScheduler: conf.mergeScheduler,
   308  		codec:          conf.codec,
   309  
   310  		bufferedUpdatesStream: newBufferedUpdatesStream(conf.infoStream),
   311  		poolReaders:           conf.readerPooling,
   312  
   313  		bufferedUpdatesStreamLock: &sync.Mutex{},
   314  
   315  		writeLock: d.MakeLock(WRITE_LOCK_NAME),
   316  	}
   317  	ans.readerPool = newReaderPool(ans)
   318  	ans.MergeControl = newMergeControl(conf.infoStream, ans.readerPool)
   319  
   320  	conf.setIndexWriter(ans)
   321  
   322  	// obtain write lock
   323  	if ok, err := ans.writeLock.ObtainWithin(conf.writeLockTimeout); !ok || err != nil {
   324  		if err != nil {
   325  			return nil, err
   326  		}
   327  		return nil, errors.New(fmt.Sprintf("Index locked for write: %v", ans.writeLock))
   328  	}
   329  
   330  	var success bool = false
   331  	defer func() {
   332  		if !success {
   333  			if ans.infoStream.IsEnabled("IW") {
   334  				ans.infoStream.Message("IW", "init: hit exception on init; releasing write lock")
   335  			}
   336  			util.CloseWhileSuppressingError(ans.writeLock) // don't mask the original exception
   337  			ans.writeLock = nil
   338  		}
   339  	}()
   340  
   341  	var create bool
   342  	switch conf.openMode {
   343  	case OPEN_MODE_CREATE:
   344  		create = true
   345  	case OPEN_MODE_APPEND:
   346  		create = false
   347  	default:
   348  		// CREATE_OR_APPEND - create only if an index does not exist
   349  		ok, err := IsIndexExists(d)
   350  		if err != nil {
   351  			return nil, err
   352  		}
   353  		create = !ok
   354  	}
   355  
   356  	// If index is too old, reading the segments will return
   357  	// IndexFormatTooOldError
   358  	ans.segmentInfos = &SegmentInfos{}
   359  
   360  	var initialIndexExists bool = true
   361  
   362  	if create {
   363  		// Try to read first. This is to allow create against an index
   364  		// that's currently open for searching. In this case we write the
   365  		// next segments_N file with no segments:
   366  		err = ans.segmentInfos.ReadAll(d)
   367  		if err == nil {
   368  			ans.segmentInfos.Clear()
   369  		} else {
   370  			// Likely this means it's a fresh directory
   371  			initialIndexExists = false
   372  			err = nil
   373  		}
   374  
   375  		// Record that we have a change (zero out all segments) pending:
   376  		ans.changed()
   377  	} else {
   378  		err = ans.segmentInfos.ReadAll(d)
   379  		if err != nil {
   380  			return
   381  		}
   382  
   383  		if commit := conf.commit; commit != nil {
   384  			// Swap out all segments, but, keep metadta in SegmentInfos,
   385  			// like version & generation, to preserve write-once. This is
   386  			// important if readers are open against the future commit
   387  			// points.
   388  			assert2(commit.Directory() == d,
   389  				"IndexCommit's directory doesn't match my directory")
   390  			oldInfos := &SegmentInfos{}
   391  			ans.segmentInfos.replace(oldInfos)
   392  			ans.changed()
   393  			ans.infoStream.Message("IW", "init: loaded commit '%v'",
   394  				commit.SegmentsFileName())
   395  		}
   396  	}
   397  
   398  	ans.rollbackSegments = ans.segmentInfos.createBackupSegmentInfos()
   399  
   400  	// start with previous field numbers, but new FieldInfos
   401  	ans.globalFieldNumberMap, err = ans.fieldNumberMap()
   402  	if err != nil {
   403  		return
   404  	}
   405  	ans.config.flushPolicy().init(ans.config)
   406  	ans.docWriter = newDocumentsWriter(ans, ans.config, d)
   407  	ans.eventQueue = ans.docWriter.events
   408  
   409  	// Default deleter (for backwards compatibility) is
   410  	// KeepOnlyLastCommitDeleter:
   411  	ans.deleter, err = newIndexFileDeleter(d, conf.delPolicy,
   412  		ans.segmentInfos, ans.infoStream, ans, initialIndexExists)
   413  	if err != nil {
   414  		return
   415  	}
   416  
   417  	if ans.deleter.startingCommitDeleted {
   418  		// Deletion policy deleted the "head" commit point. We have to
   419  		// mark outsef as changed so that if we are closed w/o any
   420  		// further changes we write a new segments_N file.
   421  		ans.changed()
   422  	}
   423  
   424  	if ans.infoStream.IsEnabled("IW") {
   425  		ans.infoStream.Message("IW", "init: create=%v", create)
   426  		ans.messageState()
   427  	}
   428  
   429  	success = true
   430  	return ans, nil
   431  }
   432  
   433  // func (w *IndexWriter) fieldInfos(info *SegmentInfo) (infos FieldInfos, err error) {
   434  // 	var cfsDir store.Directory
   435  // 	if info.IsCompoundFile() {
   436  // 		cfsDir, err = store.NewCompoundFileDirectory(
   437  // 			info.Dir,
   438  // 			util.SegmentFileName(info.Name, "", store.COMPOUND_FILE_EXTENSION),
   439  // 			store.IO_CONTEXT_READONCE,
   440  // 			false,
   441  // 		)
   442  // 		if err != nil {
   443  // 			return
   444  // 		}
   445  // 		assert(cfsDir != nil)
   446  // 		defer func() {
   447  // 			err = mergeError(err, cfsDir.Close())
   448  // 		}()
   449  // 	} else {
   450  // 		cfsDir = info.Dir
   451  // 	}
   452  // 	return info.Codec().(Codec).FieldInfosFormat().FieldInfosReader()(
   453  // 		cfsDir, info.Name, store.IO_CONTEXT_READONCE)
   454  // }
   455  
   456  /*
   457  Loads or returns the alread loaded the global field number map for
   458  this SegmentInfos. If this SegmentInfos has no global field number
   459  map the returned instance is empty.
   460  */
   461  func (w *IndexWriter) fieldNumberMap() (m *FieldNumbers, err error) {
   462  	m = NewFieldNumbers()
   463  	for _, info := range w.segmentInfos.Segments {
   464  		fis, err := ReadFieldInfos(info)
   465  		if err != nil {
   466  			return nil, err
   467  		}
   468  		for _, fi := range fis.Values {
   469  			m.AddOrGet(fi)
   470  		}
   471  	}
   472  	return m, nil
   473  }
   474  
   475  func (w *IndexWriter) messageState() {
   476  	if w.infoStream.IsEnabled("IW") && !w.didMessageState {
   477  		w.didMessageState = true
   478  		w.infoStream.Message("IW", "\ndir=%v\nindex=%v\nversion=%v\n%v",
   479  			w.directory, w.segString(), util.VERSION_LATEST, w.config)
   480  	}
   481  }
   482  
   483  /*
   484  Commits all changes to an index, wait for pending merges to complete,
   485  and closes all associate files.
   486  
   487  Note that:
   488  	1. If you called prepare Commit but failed to call commit, this
   489  	method will panic and the IndexWriter will not be closed.
   490  	2. If this method throws any other exception, the IndexWriter will
   491  	be closed, but changes may have been lost.
   492  
   493  Note that this may be a costly operation, so, try to re-use a single
   494  writer instead of closing and opening a new one. See commit() for
   495  caveats about write caching done by some IO devices.
   496  
   497  NOTE: You must ensure no other threads are still making changes at
   498  the same time that this method is invoked.
   499  */
   500  func (w *IndexWriter) Close() error {
   501  	assert2(w.pendingCommit == nil,
   502  		"cannot close: prepareCommit was already called with no corresponding call to commit")
   503  	// Ensure that only one goroutine actaully gets to do the closing
   504  	w.commitLock.Lock()
   505  	defer w.commitLock.Unlock()
   506  	return w.close(func() (ok bool, err error) {
   507  		defer func() {
   508  			if !ok { // be certain to close the index on any error
   509  				defer recover() // suppress so we keep returning original error
   510  				w.rollbackInternal()
   511  			}
   512  		}()
   513  		if w.infoStream.IsEnabled("IW") {
   514  			w.infoStream.Message("IW", "now flush at close")
   515  		}
   516  		if err = w.flush(true, true); err != nil {
   517  			return
   518  		}
   519  		w.waitForMerges()
   520  		if err = w.commitInternal(w.config.MergePolicy()); err != nil {
   521  			return
   522  		}
   523  		return w.rollbackInternal() // ie close, since we just committed
   524  	})
   525  }
   526  
   527  // Retuns the Directory used by this index.
   528  func (w *IndexWriter) Directory() store.Directory {
   529  	return w.directory
   530  }
   531  
   532  // L1201
   533  /*
   534  Adds a document to this index.
   535  
   536  Note that if an Error is hit (for example disk full) then the index
   537  will be consistent, but this document may not have been added.
   538  Furthermore, it's possible the index will have one segment in
   539  non-compound format even when using compound files (when a merge has
   540  partially succeeded).
   541  
   542  This method periodically flushes pending documents to the Directory
   543  (see flush()), and also periodically triggers segment merges in the
   544  index according to the MergePolicy in use.
   545  
   546  Merges temporarily consume space in the directory. The amount of
   547  space required is up to 1X the size of all segments being merged,
   548  when no readers/searchers are open against the index, and up to 2X
   549  the size of all segments being merged when readers/searchers are open
   550  against the index (see forceMerge() for details). The sequence of
   551  primitive merge operations performed is governed by the merge policy.
   552  
   553  Note that each term in the document can be no longer than
   554  MAX_TERM_LENGTH in bytes, otherwise error will be returned.
   555  
   556  Note that it's possible to creat an invalid Unicode string in Java if
   557  a UTF16 surrogate pair is malformed. In this case, the invalid
   558  characters are silently replaced with the Unicode replacement
   559  character U+FFFD.
   560  */
   561  func (w *IndexWriter) AddDocument(doc []IndexableField) error {
   562  	return w.AddDocumentWithAnalyzer(doc, w.analyzer)
   563  }
   564  
   565  /*
   566  Adds a document to this index, using the provided analyzer instead of
   567  the value of Analyzer().
   568  
   569  See AddDocument() for details on index and IndexWriter state after an
   570  error, and flushing/merging temporary free space requirements.
   571  
   572  NOTE: if this method hits a memory issue, you hsould immediately
   573  close the writer. See above for details.
   574  */
   575  func (w *IndexWriter) AddDocumentWithAnalyzer(doc []IndexableField, analyzer analysis.Analyzer) error {
   576  	return w.UpdateDocument(nil, doc, analyzer)
   577  }
   578  
   579  // L1545
   580  /*
   581  Updates a document by first deleting the document(s) containing term
   582  and then adding the new document. The delete and then add are atomic
   583  as seen by a reader on the same index (flush may happen only after
   584  the add).
   585  */
   586  func (w *IndexWriter) UpdateDocument(term *Term, doc []IndexableField, analyzer analysis.Analyzer) error {
   587  	w.ensureOpen()
   588  	var success = false
   589  	defer func() {
   590  		if !success {
   591  			if w.infoStream.IsEnabled("IW") {
   592  				w.infoStream.Message("IW", "hit error updating document")
   593  			}
   594  		}
   595  	}()
   596  
   597  	ok, err := w.docWriter.updateDocument(doc, analyzer, term)
   598  	if err != nil {
   599  		return err
   600  	}
   601  	if ok {
   602  		_, err = w.docWriter.processEvents(w, true, false)
   603  		if err != nil {
   604  			return err
   605  		}
   606  	}
   607  	success = true
   608  	return nil
   609  }
   610  
   611  func (w *IndexWriter) newSegmentName() string {
   612  	// Cannot synchronize on IndexWriter because that causes deadlook
   613  	// Ian: but why?
   614  	w.Lock()
   615  	defer w.Unlock()
   616  	// Important to increment changeCount so that the segmentInfos is
   617  	// written on close. Otherwise we could close, re-open and
   618  	// re-return the same segment name that was previously returned
   619  	// which can cause problems at least with ConcurrentMergeScheculer.
   620  	w.changeCount++
   621  	w.segmentInfos.changed()
   622  	defer func() { w.segmentInfos.counter++ }()
   623  	return fmt.Sprintf("_%v", strconv.FormatInt(int64(w.segmentInfos.counter), 36))
   624  }
   625  
   626  /*
   627  Forces merge policy to merge segments until there are <=
   628  maxNumSegments. The actual merge to be executed are determined by the
   629  MergePolicy.
   630  
   631  This is a horribly costly operation, especially when you pass a small
   632  maxNumSegments; usually you should only call this if the index is
   633  static (will no longer be changed).
   634  
   635  Note that this requires up to 2X the index size free space in your
   636  Directory (3X if you're using compound file format). For example, if
   637  your index size is 10 MB, then you need up to 20 MB free for this to
   638  complete (30 MB if you're using compound file format). Also, it's
   639  best to call commit() afterwards, to allow IndexWriter to free up
   640  disk space.
   641  
   642  If some but not all readers re-open while merging is underway, this
   643  will cause > 2X temporary space to be consumed as those new readers
   644  will then hold open the temporary segments at that time. it is best
   645  not to re-open readers while merging is running.
   646  
   647  The actual temporary usage could be much less than these figures (it
   648  depends on many factors).
   649  
   650  In general, once this completes, the total size of the index will be
   651  less than the size of the starting index. It could be quite a bit
   652  smaller (if there were many pending deletes) or just slightly smaller.
   653  
   654  If an error is hit, for example, due to disk full, the index will not
   655  be corrupted and no documents will be list. However, it may have been
   656  partially merged (some segments were merged but not all), and it's
   657  possible that one of the segments in the index will be in
   658  non-compound format even when using compound file format. This will
   659  occur when the error is hit during conversion of the segment into
   660  compound format.
   661  
   662  This call will merge those segments present in the index when call
   663  started. If other routines are still adding documents and flushing
   664  segments, those newly created segments will not be merged unless you
   665  call forceMerge again.
   666  
   667  NOTE: if you call CloseAndWait() with false, which aborts all running
   668  merges, then any routine still running this method might hit a
   669  MergeAbortedError.
   670  */
   671  func (w *IndexWriter) forceMerge(maxNumSegments int) error {
   672  	return w.forceMergeAndWait(maxNumSegments, true)
   673  }
   674  
   675  /*
   676  Just like forceMerge(), except you can specify whether the call
   677  should block until all merging completes. This is only meaningful
   678  with  a Mergecheduler that is able to run merges in background
   679  routines.
   680  */
   681  func (w *IndexWriter) forceMergeAndWait(maxNumSegments int, doWait bool) error {
   682  	panic("not implemented yet")
   683  }
   684  
   685  // Returns true if any merges in pendingMerges or runningMerges
   686  // are maxNumSegments merges.
   687  func (w *IndexWriter) maxNumSegmentsMergePending() bool {
   688  	w.Lock() // synchronized
   689  	defer w.Unlock()
   690  
   691  	panic("not implemented yet")
   692  }
   693  
   694  func (w *IndexWriter) maybeMerge(mergePolicy MergePolicy,
   695  	trigger MergeTrigger, maxNumSegments int) error {
   696  
   697  	w.ClosingControl.ensureOpen(false)
   698  	newMergesFound, err := w.updatePendingMerges(mergePolicy, trigger, maxNumSegments)
   699  	if err == nil {
   700  		err = w.mergeScheduler.Merge(w, trigger, newMergesFound)
   701  	}
   702  	return err
   703  }
   704  
   705  func (w *IndexWriter) updatePendingMerges(mergePolicy MergePolicy,
   706  	trigger MergeTrigger, maxNumSegments int) (found bool, err error) {
   707  
   708  	w.Lock() // synchronized
   709  	defer w.Unlock()
   710  
   711  	// in case infoStream was disabled on init, but then enabled at some
   712  	// point, try again to log the config here:
   713  	w.messageState()
   714  
   715  	assert(maxNumSegments == -1 || maxNumSegments > 0)
   716  	if w.stopMerges {
   717  		return false, nil
   718  	}
   719  
   720  	// Do not start new merges if disaster struck
   721  	if w.tragedy != nil {
   722  		return false, nil
   723  	}
   724  
   725  	var spec MergeSpecification
   726  	if maxNumSegments != UNBOUNDED_MAX_MERGE_SEGMENTS {
   727  		assertn(trigger == MERGE_TRIGGER_EXPLICIT || trigger == MERGE_FINISHED,
   728  			"Expected EXPLIT or MEGE_FINISHED as trigger even with maxNumSegments set but was: %v",
   729  			MergeTriggerName(trigger))
   730  		if spec, err = mergePolicy.FindForcedMerges(
   731  			w.segmentInfos,
   732  			maxNumSegments,
   733  			w.segmentsToMerge, w); err != nil {
   734  			return false, err
   735  		}
   736  		if found = spec != nil; found {
   737  			for _, merge := range spec {
   738  				merge.maxNumSegments = maxNumSegments
   739  			}
   740  		}
   741  	} else {
   742  		if spec, err = mergePolicy.FindMerges(trigger, w.segmentInfos, w); err != nil {
   743  			return false, err
   744  		}
   745  	}
   746  
   747  	if found = spec != nil; found {
   748  		for _, merge := range spec {
   749  			if _, err = w.registerMerge(merge); err != nil {
   750  				return false, err
   751  			}
   752  		}
   753  	}
   754  	return true, nil
   755  }
   756  
   757  /*
   758  Experts: to be used by a MergePolicy to avoid selecting merges for
   759  segments already being merged. The returned collection is not cloned,
   760  and thus is only safe to access if you hold IndexWriter's lock (which
   761  you do when IndexWriter invokes the MergePolicy).
   762  */
   763  func (w *IndexWriter) MergingSegments() map[*SegmentCommitInfo]bool {
   764  	// no need to synchronized but should be
   765  	return w.mergingSegments
   766  }
   767  
   768  /*
   769  Expert: the MergeScheduler calls this method to retrieve the next
   770  merge requested by the MergePolicy.
   771  */
   772  func (w *IndexWriter) nextMerge() *OneMerge {
   773  	w.Lock() // synchronized
   774  	defer w.Unlock()
   775  
   776  	if w.pendingMerges.Len() == 0 {
   777  		return nil
   778  	}
   779  	// Advance the merge from pending to running
   780  	merge := w.pendingMerges.Front().Value.(*OneMerge)
   781  	w.pendingMerges.Remove(w.pendingMerges.Front())
   782  	w.runningMerges[merge] = true
   783  	return merge
   784  }
   785  
   786  // Expert: returns true if there are merges waiting to be scheduled.
   787  func (w *IndexWriter) hasPendingMerges() bool {
   788  	return w.pendingMerges.Len() > 0
   789  }
   790  
   791  /*
   792  Close the IndexWriter without committing any changes that have
   793  occurred since the last commit (or since it was opened, if commit
   794  hasn't been called). This removes any temporary files that had been
   795  created, after which the state of the index will be the same as it
   796  was when commit() was last called or when this writer was first
   797  opened. This also clears a previous call to prepareCommit()
   798  */
   799  func (w *IndexWriter) Rollback() error {
   800  	// don't call ensureOpen here; this acts like close() in closeable
   801  
   802  	return w.close(w.rollbackInternal)
   803  }
   804  
   805  func (w *IndexWriter) rollbackInternal() (ok bool, err error) {
   806  	if w.infoStream.IsEnabled("IW") {
   807  		w.infoStream.Message("IW", "rollback")
   808  	}
   809  
   810  	err = func() error {
   811  		var success = false
   812  		defer func() {
   813  			if !success {
   814  				// Must not hold IW's lock while closing mergeScheduler: this could lead to deadlock
   815  				util.CloseWhileSuppressingError(w.mergeScheduler)
   816  			}
   817  			w.Lock()
   818  			defer w.Unlock()
   819  
   820  			if !success {
   821  				func() {
   822  					defer recover() // ignore any error
   823  					// we tried to be nice about it: do the minimum
   824  					// don't leak a segments_N file if there is a pending commit
   825  					if w.pendingCommit != nil {
   826  						w.pendingCommit.rollbackCommit(w.directory)
   827  						w.deleter.decRefInfos(w.pendingCommit)
   828  					}
   829  					w.pendingCommit = nil
   830  				}()
   831  
   832  				// close all the closeables we can (but important is readerPool and writeLock to prevent leaks)
   833  				util.CloseWhileSuppressingError(w.readerPool, w.deleter, w.writeLock)
   834  				w.writeLock = nil
   835  			}
   836  		}()
   837  
   838  		func() {
   839  			w.Lock()
   840  			defer w.Unlock()
   841  
   842  			w.abortAllMerges()
   843  			w.stopMerges = true
   844  		}()
   845  
   846  		if w.infoStream.IsEnabled("IW") {
   847  			w.infoStream.Message("IW", "rollback: done finish merges")
   848  		}
   849  
   850  		// Must pre-close in case it increments changeCount so that we
   851  		// then set it to false before calling closeInternal
   852  		if err = w.mergeScheduler.Close(); err != nil {
   853  			return err
   854  		}
   855  
   856  		w.bufferedUpdatesStream.clear()
   857  		w.docWriter.close()  // mark it as closed first to prevent subsequent indexing actions/flushes
   858  		w.docWriter.abort(w) // don't sync on IW here
   859  
   860  		if err = func() error {
   861  			w.Lock()
   862  			defer w.Unlock()
   863  
   864  			if w.pendingCommit != nil {
   865  				w.pendingCommit.rollbackCommit(w.directory)
   866  				w.deleter.decRefInfos(w.pendingCommit)
   867  				w.pendingCommit = nil
   868  			}
   869  
   870  			// Don't bother saving any changes in our segmentInfos
   871  			if err = w.readerPool.dropAll(false); err != nil {
   872  				return err
   873  			}
   874  
   875  			// Keep the same segmentInfos instance but replace all of its
   876  			// SegmentInfo instances. This is so the next attempt to commit
   877  			// using this instance of IndexWriter will always write to a
   878  			// new generation ("write once").
   879  			w.segmentInfos.rollbackSegmentInfos(w.rollbackSegments)
   880  			if w.infoStream.IsEnabled("IW") {
   881  				w.infoStream.Message("IW", "rollback: infos=%v", w.readerPool.segmentsToString(w.segmentInfos.Segments))
   882  			}
   883  
   884  			w.testPoint("rollback before checkpoint")
   885  
   886  			// Ask deleter to locate unreferenced files & remove them:
   887  			if err = w.deleter.checkpoint(w.segmentInfos, false); err == nil {
   888  				if err = w.deleter.refreshList(); err == nil {
   889  
   890  					w.lastCommitChangeCount = w.changeCount
   891  
   892  					if err = w.deleter.refreshList(); err == nil {
   893  						if err = w.deleter.Close(); err == nil {
   894  
   895  							// Must set closed while inside same sync block where
   896  							// we call deleter.refresh, else concurrent routines
   897  							// may try to sneak a flush in, after we leave this
   898  							// sync block and before we enter the sync block in the
   899  							// finally clause below that sets closed:
   900  							// w._closed = true
   901  							// For GoLucene, it may not be necessary.
   902  
   903  							if err = util.Close(w.writeLock); err == nil { // release write lock
   904  								w.writeLock = nil
   905  							}
   906  						}
   907  					}
   908  				}
   909  			}
   910  
   911  			success = err != nil
   912  			return err
   913  		}(); err != nil {
   914  			return err
   915  		}
   916  
   917  		success = true
   918  		return nil
   919  	}()
   920  
   921  	return err != nil, err
   922  }
   923  
   924  /*
   925  Called whenever the SegmentInfos has been updatd and the index files
   926  referenced exist (correctly) in the index directory.
   927  */
   928  func (w *IndexWriter) checkpoint() error {
   929  	w.Lock() // synchronized
   930  	defer w.Unlock()
   931  	return w._checkpoint()
   932  }
   933  
   934  func (w *IndexWriter) _checkpoint() error {
   935  	w.changeCount++
   936  	w.segmentInfos.changed()
   937  	return w.deleter.checkpoint(w.segmentInfos, false)
   938  }
   939  
   940  /*
   941  Checkpoints with IndexFileDeleter, so it's aware of new files, and
   942  increments changeCount, so on close/commit we will write a new
   943  segments file, but does NOT bump segmentInfos.version.
   944  */
   945  func (w *IndexWriter) checkpointNoSIS() (err error) {
   946  	w.Lock() // synchronized
   947  	defer w.Unlock()
   948  	w.changeCount++
   949  	return w.deleter.checkpoint(w.segmentInfos, false)
   950  }
   951  
   952  /* Called internally if any index state has changed. */
   953  func (w *IndexWriter) changed() {
   954  	w.Lock()
   955  	defer w.Unlock()
   956  	w.changeCount++
   957  	w.segmentInfos.changed()
   958  }
   959  
   960  func (w *IndexWriter) publishFrozenUpdates(packet *FrozenBufferedUpdates) {
   961  	w.Lock()
   962  	defer w.Unlock()
   963  	assert(packet != nil && packet.any())
   964  	w.bufferedUpdatesStreamLock.Lock()
   965  	defer w.bufferedUpdatesStreamLock.Unlock()
   966  	w.bufferedUpdatesStream.push(packet)
   967  }
   968  
   969  /*
   970  Atomically adds the segment private delete packet and publishes the
   971  flushed segments SegmentInfo to the index writer.
   972  */
   973  func (w *IndexWriter) publishFlushedSegment(newSegment *SegmentCommitInfo,
   974  	packet *FrozenBufferedUpdates, globalPacket *FrozenBufferedUpdates) (err error) {
   975  	defer func() {
   976  		atomic.AddInt32(&w.flushCount, 1)
   977  		err = mergeError(err, w.doAfterFlush())
   978  	}()
   979  
   980  	// Lock order IW -> BDS
   981  	w.Lock()
   982  	defer w.Unlock()
   983  	w.ClosingControl.ensureOpen(false)
   984  	w.bufferedUpdatesStreamLock.Lock()
   985  	defer w.bufferedUpdatesStreamLock.Unlock()
   986  
   987  	if w.infoStream.IsEnabled("IW") {
   988  		w.infoStream.Message("IW", "publishFlushedSegment")
   989  	}
   990  
   991  	if globalPacket != nil && globalPacket.any() {
   992  		w.bufferedUpdatesStream.push(globalPacket)
   993  	}
   994  	// Publishing the segment must be synched on IW -> BDS to make sure
   995  	// that no merge prunes away the seg. private delete packet
   996  	var nextGen int64
   997  	if packet != nil && packet.any() {
   998  		nextGen = w.bufferedUpdatesStream.push(packet)
   999  	} else {
  1000  		// Since we don't have a delete packet to apply we can get a new
  1001  		// generation right away
  1002  		nextGen = w.bufferedUpdatesStream.nextGen
  1003  	}
  1004  	if w.infoStream.IsEnabled("IW") {
  1005  		w.infoStream.Message("IW", "publish sets newSegment delGen=%v seg=%v", nextGen, w.readerPool.segmentToString(newSegment))
  1006  	}
  1007  	newSegment.SetBufferedUpdatesGen(nextGen)
  1008  	w.segmentInfos.Segments = append(w.segmentInfos.Segments, newSegment)
  1009  	return w._checkpoint()
  1010  }
  1011  
  1012  func (w *IndexWriter) resetMergeExceptions() {
  1013  	w.Lock() // synchronized
  1014  	defer w.Unlock()
  1015  	panic("not implemented yet")
  1016  }
  1017  
  1018  /*
  1019  Requires commitLock
  1020  */
  1021  func (w *IndexWriter) prepareCommitInternal(mergePolicy MergePolicy) error {
  1022  	w.startCommitTime = time.Now()
  1023  	w.ClosingControl.ensureOpen(false)
  1024  	if w.infoStream.IsEnabled("IW") {
  1025  		w.infoStream.Message("IW", "prepareCommit: flush")
  1026  		w.infoStream.Message("IW", "  index before flush %v", w.segString())
  1027  	}
  1028  
  1029  	assert2(w.tragedy == nil, "this writer hit an unrecoverable error; cannot commit\n%v", w.tragedy)
  1030  	assert2(w.pendingCommit == nil, "prepareCommit was already called with no corresponding call to commit")
  1031  
  1032  	err := w.doBeforeFlush()
  1033  	if err != nil {
  1034  		return err
  1035  	}
  1036  	w.testPoint("startDoFlush")
  1037  
  1038  	// This is copied from doFLush, except it's modified to clone &
  1039  	// incRef the flushed SegmentInfos inside the sync block:
  1040  
  1041  	toCommit, anySegmentsFlushed, err := func() (toCommit *SegmentInfos, anySegmentsFlushed bool, err error) {
  1042  		w.fullFlushLock.Lock()
  1043  		defer w.fullFlushLock.Unlock()
  1044  
  1045  		var flushSuccess = false
  1046  		var success = false
  1047  		defer func() {
  1048  			if !success {
  1049  				if w.infoStream.IsEnabled("IW") {
  1050  					w.infoStream.Message("IW", "hit error during prepareCommit")
  1051  				}
  1052  			}
  1053  			// Done: finish the full flush!
  1054  			w.docWriter.finishFullFlush(flushSuccess)
  1055  			err2 := w.doAfterFlush()
  1056  			if err2 != nil {
  1057  				log.Printf("Error in doAfterFlush: %v", err2)
  1058  			}
  1059  		}()
  1060  
  1061  		anySegmentsFlushed, err = w.docWriter.flushAllThreads(w)
  1062  		if err != nil {
  1063  			return
  1064  		}
  1065  		if !anySegmentsFlushed {
  1066  			// prevent double increment since docWriter.doFlush increments
  1067  			// the flushCount if we flushed anything.
  1068  			atomic.AddInt32(&w.flushCount, -1)
  1069  		}
  1070  		w.docWriter.processEvents(w, false, true)
  1071  		flushSuccess = true
  1072  
  1073  		err = func() (err error) {
  1074  			w.Lock()
  1075  			defer w.Unlock()
  1076  
  1077  			err = w._maybeApplyDeletes(true)
  1078  			if err != nil {
  1079  				return
  1080  			}
  1081  
  1082  			err = w.readerPool.commit(w.segmentInfos)
  1083  			if err != nil {
  1084  				return
  1085  			}
  1086  
  1087  			// Must clone the segmentInfos while we still
  1088  			// hold fullFlushLock and while sync'd so that
  1089  			// no partial changes (eg a delete w/o
  1090  			// corresponding add from an updateDocument) can
  1091  			// sneak into the commit point:
  1092  			toCommit = w.segmentInfos.Clone()
  1093  
  1094  			w.pendingCommitChangeCount = w.changeCount
  1095  
  1096  			// This protects the segmentInfos we are now going
  1097  			// to commit.  This is important in case, eg, while
  1098  			// we are trying to sync all referenced files, a
  1099  			// merge completes which would otherwise have
  1100  			// removed the files we are now syncing.
  1101  			w.filesToCommit = toCommit.files(w.directory, false)
  1102  			w.deleter.incRefFiles(w.filesToCommit)
  1103  			return
  1104  		}()
  1105  		if err != nil {
  1106  			return
  1107  		}
  1108  		success = true
  1109  		return
  1110  	}()
  1111  
  1112  	var success = false
  1113  	defer func() {
  1114  		if !success {
  1115  			func() {
  1116  				w.Lock()
  1117  				defer w.Unlock()
  1118  				if w.filesToCommit != nil {
  1119  					w.deleter.decRefFiles(w.filesToCommit)
  1120  					w.filesToCommit = nil
  1121  				}
  1122  			}()
  1123  		}
  1124  	}()
  1125  	if anySegmentsFlushed {
  1126  		err := w.maybeMerge(mergePolicy, MERGE_TRIGGER_FULL_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS)
  1127  		if err != nil {
  1128  			return err
  1129  		}
  1130  	}
  1131  	if err := w.startCommit(toCommit); err != nil {
  1132  		return err
  1133  	}
  1134  	success = true
  1135  	return nil
  1136  }
  1137  
  1138  /*
  1139  Commits all pending changes (added & deleted documents, segment
  1140  merges, added indexes, etc.) to the index, and syncs all referenced
  1141  index files, such that a reader will see the changes and the index
  1142  updates will survive an OS or machine crash or power loss. Note that
  1143  this does not wait for any running background merges to finish. This
  1144  may be a costly operation, so you should test the cost in your
  1145  application and do it only when really necessary.
  1146  
  1147  Note that this operation calls Directory.sync on the index files.
  1148  That call  should not return until the file contents & metadata are
  1149  on stable storage. For FSDirectory, this calls the OS's fsync. But,
  1150  beware: some hardware devices may in fact cache writes even during
  1151  fsync, and return before the bits are actually on stable storage, to
  1152  give the appearance of faster performance. If you have such a device,
  1153  and it does not hav a battery backup (for example) then on power loss
  1154  it may still lose data. Lucene cannot guarantee consistency on such
  1155  devices.
  1156  */
  1157  func (w *IndexWriter) Commit() error {
  1158  	w.ensureOpen()
  1159  	w.commitLock.Lock()
  1160  	defer w.commitLock.Unlock()
  1161  	return w.commitInternal(w.config.MergePolicy())
  1162  }
  1163  
  1164  /*
  1165  Assume commitLock is locked.
  1166  */
  1167  func (w *IndexWriter) commitInternal(mergePolicy MergePolicy) error {
  1168  	if w.infoStream.IsEnabled("IW") {
  1169  		w.infoStream.Message("IW", "commit: start")
  1170  	}
  1171  
  1172  	w.ClosingControl.ensureOpen(false)
  1173  
  1174  	if w.infoStream.IsEnabled("IW") {
  1175  		w.infoStream.Message("IW", "commit: enter lock")
  1176  	}
  1177  
  1178  	if w.pendingCommit == nil {
  1179  		if w.infoStream.IsEnabled("IW") {
  1180  			w.infoStream.Message("IW", "commit: now prepare")
  1181  		}
  1182  		err := w.prepareCommitInternal(mergePolicy)
  1183  		if err != nil {
  1184  			return err
  1185  		}
  1186  	} else {
  1187  		if w.infoStream.IsEnabled("IW") {
  1188  			w.infoStream.Message("IW", "commit: already prepared")
  1189  		}
  1190  	}
  1191  	return w.finishCommit()
  1192  }
  1193  
  1194  func (w *IndexWriter) finishCommit() (err error) {
  1195  	var commitCompleted bool
  1196  	var finished bool
  1197  	var committedSegmentsFileName string
  1198  
  1199  	defer func() {
  1200  		if err != nil {
  1201  			if w.infoStream.IsEnabled("IW") {
  1202  				w.infoStream.Message("IW", "hit error during finishCommit: %v", err)
  1203  			}
  1204  			if commitCompleted {
  1205  				w.tragicEvent(err, "finishComit")
  1206  				err = nil
  1207  			}
  1208  		}
  1209  	}()
  1210  
  1211  	w.Lock() // synchronized
  1212  	defer w.Unlock()
  1213  
  1214  	if w.pendingCommit == nil {
  1215  		assert(w.filesToCommit == nil)
  1216  		if w.infoStream.IsEnabled("IW") {
  1217  			w.infoStream.Message("IW", "commit: pendingCommit == nil; skip")
  1218  		}
  1219  		return nil
  1220  	}
  1221  
  1222  	defer func() {
  1223  		defer func() {
  1224  			w.filesToCommit = nil
  1225  			w.pendingCommit = nil
  1226  		}()
  1227  
  1228  		if finished { // all is good
  1229  			w.deleter.decRefFiles(w.filesToCommit)
  1230  		} else if !commitCompleted { // error happened in finishCommit: not a tragedy
  1231  			w.deleter.decRefFilesWhileSuppressingError(w.filesToCommit)
  1232  		}
  1233  		// TODO check if any wait()
  1234  	}()
  1235  
  1236  	if w.infoStream.IsEnabled("IW") {
  1237  		w.infoStream.Message("IW", "commit: pendingCommit != nil")
  1238  	}
  1239  	if committedSegmentsFileName, err = w.pendingCommit.finishCommit(w.directory); err != nil {
  1240  		return
  1241  	}
  1242  
  1243  	// we committed, if anything goes wrong after this, we are screwed and it's a tragedy
  1244  	commitCompleted = true
  1245  
  1246  	// NOTE: don't use this.checkpoint() here, because
  1247  	// we do not want to increment changeCount:
  1248  	if err = w.deleter.checkpoint(w.pendingCommit, true); err != nil {
  1249  		return
  1250  	}
  1251  
  1252  	w.lastCommitChangeCount = w.pendingCommitChangeCount
  1253  	w.rollbackSegments = w.pendingCommit.createBackupSegmentInfos()
  1254  
  1255  	finished = true
  1256  
  1257  	if w.infoStream.IsEnabled("IW") {
  1258  		w.infoStream.Message("IW", "commit: wrote segments file '%v'", committedSegmentsFileName)
  1259  		w.infoStream.Message("IW", fmt.Sprintf("commit: took %v", time.Now().Sub(w.startCommitTime)))
  1260  		w.infoStream.Message("IW", "commit: done")
  1261  	}
  1262  	return nil
  1263  }
  1264  
  1265  /*
  1266  Flush all in-memory buffered updates (adds and deletes) to the
  1267  Directory.
  1268  */
  1269  func (w *IndexWriter) flush(triggerMerge bool, applyAllDeletes bool) error {
  1270  	// NOTE: this method cannot be sync'd because
  1271  	// maybeMerge() in turn calls mergeScheduler.merge which
  1272  	// in turn can take a long time to run and we don't want
  1273  	// to hold the lock for that.  In the case of
  1274  	// ConcurrentMergeScheduler this can lead to deadlock
  1275  	// when it stalls due to too many running merges.
  1276  
  1277  	// We can be called during close, when closing==true, so we must pass false to ensureOpen:
  1278  	w.ClosingControl.ensureOpen(false)
  1279  	ok, err := w.doFlush(applyAllDeletes)
  1280  	if err != nil {
  1281  		return err
  1282  	}
  1283  	if ok && triggerMerge {
  1284  		return w.maybeMerge(w.config.MergePolicy(), MERGE_TRIGGER_FULL_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS)
  1285  	}
  1286  	return nil
  1287  }
  1288  
  1289  func (w *IndexWriter) doFlush(applyAllDeletes bool) (bool, error) {
  1290  	assert2(w.tragedy == nil, "this writer hit an unrecoverable error; cannot flush\n%v", w.tragedy)
  1291  
  1292  	err := w.doBeforeFlush()
  1293  	if err != nil {
  1294  		return false, err
  1295  	}
  1296  	if w.infoStream.IsEnabled("TP") {
  1297  		w.infoStream.Message("TP", "startDoFlush")
  1298  	}
  1299  
  1300  	success := false
  1301  	defer func() {
  1302  		if !success && w.infoStream.IsEnabled("IW") {
  1303  			w.infoStream.Message("IW", "hit error during flush")
  1304  		}
  1305  	}()
  1306  
  1307  	if w.infoStream.IsEnabled("IW") {
  1308  		w.infoStream.Message("IW", "  start flush: applyAllDeletes=%v", applyAllDeletes)
  1309  		w.infoStream.Message("IW", "  index before flush %v", w.segString())
  1310  	}
  1311  
  1312  	anySegmentFlushed, err := func() (ok bool, err error) {
  1313  		w.fullFlushLock.Lock()
  1314  		defer w.fullFlushLock.Unlock()
  1315  
  1316  		flushSuccess := false
  1317  		defer func() {
  1318  			w.docWriter.finishFullFlush(flushSuccess)
  1319  			w.docWriter.processEvents(w, false, true)
  1320  		}()
  1321  
  1322  		if ok, err = w.docWriter.flushAllThreads(w); err == nil {
  1323  			flushSuccess = true
  1324  		}
  1325  		return
  1326  	}()
  1327  	if err != nil {
  1328  		return false, err
  1329  	}
  1330  
  1331  	err = func() error {
  1332  		w.Lock()
  1333  		defer w.Unlock()
  1334  		err := w._maybeApplyDeletes(applyAllDeletes)
  1335  		if err != nil {
  1336  			return err
  1337  		}
  1338  		err = w.doAfterFlush()
  1339  		if err != nil {
  1340  			return err
  1341  		}
  1342  		if !anySegmentFlushed {
  1343  			//flushCount is incremented in flushAllThreads
  1344  			atomic.AddInt32(&w.flushCount, 1)
  1345  		}
  1346  		return nil
  1347  	}()
  1348  	if err != nil {
  1349  		return false, err
  1350  	}
  1351  
  1352  	success = true
  1353  	return anySegmentFlushed, nil
  1354  }
  1355  
  1356  func (w *IndexWriter) _maybeApplyDeletes(applyAllDeletes bool) error {
  1357  	if applyAllDeletes {
  1358  		if w.infoStream.IsEnabled("IW") {
  1359  			w.infoStream.Message("IW", "apply all deletes during flush")
  1360  		}
  1361  		return w._applyAllDeletesAndUpdates()
  1362  	} else if w.infoStream.IsEnabled("IW") {
  1363  		w.infoStream.Message("IW", "don't apply deletes now delTermCount=%v bytesUsed=%v",
  1364  			atomic.LoadInt32(&w.bufferedUpdatesStream.numTerms),
  1365  			atomic.LoadInt64(&w.bufferedUpdatesStream.bytesUsed))
  1366  	}
  1367  	return nil
  1368  }
  1369  
  1370  func (w *IndexWriter) applyAllDeletesAndUpdates() error {
  1371  	w.Lock() // synchronized
  1372  	defer w.Unlock()
  1373  	return w._applyAllDeletesAndUpdates()
  1374  }
  1375  
  1376  func (w *IndexWriter) _applyAllDeletesAndUpdates() error {
  1377  	atomic.AddInt32(&w.flushDeletesCount, 1)
  1378  	result, err := w.bufferedUpdatesStream.applyDeletesAndUpdates(w.readerPool, w.segmentInfos.Segments)
  1379  	if err != nil {
  1380  		return err
  1381  	}
  1382  	if result.anyDeletes {
  1383  		err = w.checkpoint()
  1384  		if err != nil {
  1385  			return err
  1386  		}
  1387  	}
  1388  	if !w.keepFullyDeletedSegments && result.allDeleted != nil {
  1389  		if w.infoStream.IsEnabled("IW") {
  1390  			w.infoStream.Message("IW", "drop 100%% deleted segments: %v",
  1391  				w.readerPool.segmentsToString(result.allDeleted))
  1392  		}
  1393  		for _, info := range result.allDeleted {
  1394  			// If a merge has already registered for this segment, we leave
  1395  			// it in the readerPool; the merge will skip merging it and
  1396  			// will then drop it once it's done:
  1397  			if _, ok := w.mergingSegments[info]; !ok {
  1398  				w.segmentInfos.remove(info)
  1399  				atomic.AddInt64(&w.pendingNumDocs, -int64(info.Info.DocCount()))
  1400  				err = w.readerPool.drop(info)
  1401  				if err != nil {
  1402  					return err
  1403  				}
  1404  			}
  1405  		}
  1406  		err = w.checkpoint()
  1407  		if err != nil {
  1408  			return err
  1409  		}
  1410  	}
  1411  	w.bufferedUpdatesStream.prune(w.segmentInfos)
  1412  	return nil
  1413  }
  1414  
  1415  // L3440
  1416  /*
  1417  Merges the indicated segments, replacing them in the stack with a
  1418  single segment.
  1419  */
  1420  func (w *IndexWriter) merge(merge *OneMerge) error {
  1421  	panic("not implemented yet")
  1422  }
  1423  
  1424  /*
  1425  Checks whether this merge involves any segments already participating
  1426  in a merge. If not, this merge is "registered", meaning we record
  1427  that its semgents are now participating in a merge, and true is
  1428  returned. Else (the merge conflicts) false is returned.
  1429  */
  1430  func (w *IndexWriter) registerMerge(merge *OneMerge) (bool, error) {
  1431  	panic("not implemented yet")
  1432  }
  1433  
  1434  func setDiagnostics(info *SegmentInfo, source string) {
  1435  	setDiagnosticsAndDetails(info, source, nil)
  1436  }
  1437  
  1438  func setDiagnosticsAndDetails(info *SegmentInfo, source string, details map[string]string) {
  1439  	ans := map[string]string{
  1440  		"source":         source,
  1441  		"lucene.version": util.VERSION_LATEST.String(),
  1442  		"os":             runtime.GOOS,
  1443  		"os.arch":        runtime.GOARCH,
  1444  		"go.version":     runtime.Version(),
  1445  		"timestamp":      fmt.Sprintf("%v", time.Now().Unix()),
  1446  	}
  1447  	if details != nil {
  1448  		for k, v := range details {
  1449  			ans[k] = v
  1450  		}
  1451  	}
  1452  	info.SetDiagnostics(ans)
  1453  }
  1454  
  1455  // Returns a string description of all segments, for debugging.
  1456  func (w *IndexWriter) segString() string {
  1457  	// TODO synchronized
  1458  	return w.readerPool.segmentsToString(w.segmentInfos.Segments)
  1459  }
  1460  
  1461  // called only from assert
  1462  func (w *IndexWriter) assertFilesExist(toSync *SegmentInfos) error {
  1463  	files := toSync.files(w.directory, false)
  1464  	for _, filename := range files {
  1465  		allFiles, err := w.directory.ListAll()
  1466  		if err != nil {
  1467  			return err
  1468  		}
  1469  		ok, err := w.slowFileExists(w.directory, filename)
  1470  		if err != nil {
  1471  			return err
  1472  		}
  1473  		assert2(ok, "file %v does not exist; files=%v", filename, allFiles)
  1474  		// If this trips it means we are missing a call to checkpoint
  1475  		// somewhere, because by the time we are called, deleter should
  1476  		// know about every file referenced by the current head
  1477  		// segmentInfos:
  1478  		assert2(w.deleter.exists(filename), "IndexFileDeleter doesn't know about file %v", filename)
  1479  	}
  1480  	return nil
  1481  }
  1482  
  1483  /* For infoStream output */
  1484  func (w *IndexWriter) toLiveInfos(sis *SegmentInfos) *SegmentInfos {
  1485  	w.Lock() // synchronized
  1486  	defer w.Unlock()
  1487  	return w._toLiveInfos(sis)
  1488  }
  1489  
  1490  func (w *IndexWriter) _toLiveInfos(sis *SegmentInfos) *SegmentInfos {
  1491  	newSIS := new(SegmentInfos)
  1492  	// liveSIS := make(map[*SegmentCommitInfo]bool)
  1493  	// for _, info := range w.segmentInfos.Segments {
  1494  	// 	liveSIS[info] = true
  1495  	// }
  1496  	for _, info := range sis.Segments {
  1497  		// if _, ok :=  liveSIS[info] ; ok {
  1498  		newSIS.Segments = append(newSIS.Segments, info)
  1499  		// }
  1500  	}
  1501  	return newSIS
  1502  }
  1503  
  1504  /*
  1505  Walk through all files referenced by the current segmentInfos and ask
  1506  the  Directory to sync each file, if it wans't already. If that
  1507  succeeds, then we prepare a new segments_N file but do not fully
  1508  commit it.
  1509  */
  1510  func (w *IndexWriter) startCommit(toSync *SegmentInfos) error {
  1511  	w.testPoint("startStartCommit")
  1512  	assert(w.pendingCommit == nil)
  1513  	assert2(w.tragedy == nil, "this writer hit an unrecoverable error; cannot commit\n%v", w.tragedy)
  1514  
  1515  	if w.infoStream.IsEnabled("IW") {
  1516  		w.infoStream.Message("IW", "startCommit(): start")
  1517  	}
  1518  
  1519  	if err := func() error {
  1520  		w.Lock()
  1521  		defer w.Unlock()
  1522  
  1523  		assertn(w.lastCommitChangeCount <= w.changeCount,
  1524  			"lastCommitChangeCount=%v changeCount=%v", w.lastCommitChangeCount, w.changeCount)
  1525  		if w.pendingCommitChangeCount == w.lastCommitChangeCount {
  1526  			if w.infoStream.IsEnabled("IW") {
  1527  				w.infoStream.Message("IW", "  skip startCommit(): no changes pending")
  1528  			}
  1529  			w.deleter.decRefFiles(w.filesToCommit)
  1530  			w.filesToCommit = nil
  1531  			return nil
  1532  		}
  1533  
  1534  		if w.infoStream.IsEnabled("IW") {
  1535  			w.infoStream.Message("IW", "startCommit index=%v changeCount=%v",
  1536  				w.readerPool.segmentsToString(toSync.Segments), w.changeCount)
  1537  		}
  1538  
  1539  		return w.assertFilesExist(toSync)
  1540  	}(); err != nil {
  1541  		return err
  1542  	}
  1543  
  1544  	w.testPoint("midStartCommit")
  1545  
  1546  	var pendingCommitSet = false
  1547  	defer func() {
  1548  		w.Lock()
  1549  		defer w.Unlock()
  1550  
  1551  		// Have out master segmentInfos record the generations we just
  1552  		// prepared. We do this on error or success so we don't
  1553  		// double-write a segments_N file.
  1554  		w.segmentInfos.updateGeneration(toSync)
  1555  
  1556  		if !pendingCommitSet {
  1557  			if w.infoStream.IsEnabled("IW") {
  1558  				w.infoStream.Message("IW", "hit error committing segments file")
  1559  			}
  1560  
  1561  			// Hit error
  1562  			w.deleter.decRefFiles(w.filesToCommit)
  1563  			w.filesToCommit = nil
  1564  		}
  1565  	}()
  1566  
  1567  	w.testPoint("midStartCommit2")
  1568  	err := func() (err error) {
  1569  		w.Lock()
  1570  		defer w.Unlock()
  1571  
  1572  		assert(w.pendingCommit == nil)
  1573  		assert(w.segmentInfos.generation == toSync.generation)
  1574  
  1575  		// Eror here means nothing is prepared (this method unwinds
  1576  		// everything it did on an error)
  1577  		err = toSync.prepareCommit(w.directory)
  1578  		if err != nil {
  1579  			return err
  1580  		}
  1581  		fmt.Println("DONE prepareCommit")
  1582  
  1583  		pendingCommitSet = true
  1584  		w.pendingCommit = toSync
  1585  		return nil
  1586  	}()
  1587  	if err != nil {
  1588  		return err
  1589  	}
  1590  
  1591  	// This call can take a long time -- 10s of seconds or more. We do
  1592  	// it without syncing on this:
  1593  	var success = false
  1594  	var filesToSync []string
  1595  	defer func() {
  1596  		if !success {
  1597  			pendingCommitSet = false
  1598  			w.pendingCommit = nil
  1599  			toSync.rollbackCommit(w.directory)
  1600  		}
  1601  	}()
  1602  
  1603  	filesToSync = toSync.files(w.directory, false)
  1604  	err = w.directory.Sync(filesToSync)
  1605  	if err != nil {
  1606  		return err
  1607  	}
  1608  	success = true
  1609  
  1610  	if w.infoStream.IsEnabled("IW") {
  1611  		w.infoStream.Message("IW", "done all syncs: %v", filesToSync)
  1612  	}
  1613  
  1614  	w.testPoint("midStartCommitSuccess")
  1615  	w.testPoint("finishStartCommit")
  1616  	return nil
  1617  }
  1618  
  1619  func (w *IndexWriter) tragicEvent(tragedy error, location string) {
  1620  	panic("niy")
  1621  }
  1622  
  1623  /*
  1624  Used only  by assert for testing. Current points:
  1625  - startDoFlush
  1626  - startCommitMerge
  1627  - startStartCommit
  1628  - midStartCommit
  1629  - midStartCommit2
  1630  - midStartCommitSuccess
  1631  - finishStartCommit
  1632  - startCommitMergeDeletes
  1633  - startMergeInit
  1634  - DocumentsWriter.ThreadState.init start
  1635  */
  1636  func (w *IndexWriter) testPoint(message string) {
  1637  	if w.infoStream.IsEnabled("TP") {
  1638  		w.infoStream.Message("TP", message)
  1639  	}
  1640  }
  1641  
  1642  // L4356
  1643  
  1644  /* Called by DirectoryReader.doClose() */
  1645  func (w *IndexWriter) deletePendingFiles() {
  1646  	w.deleter.deletePendingFiles()
  1647  }
  1648  
  1649  /*
  1650  NOTE: this method creates a compound file for all files returned by
  1651  info.files(). While, generally, this may include separate norms and
  1652  deleteion files, this SegmentInfos must not reference such files when
  1653  this method is called, because they are not allowed within a compound
  1654  file.
  1655  */
  1656  func createCompoundFile(infoStream util.InfoStream,
  1657  	directory store.Directory,
  1658  	checkAbort CheckAbort,
  1659  	info *SegmentInfo,
  1660  	context store.IOContext) (names []string, err error) {
  1661  
  1662  	filename := util.SegmentFileName(info.Name, "", store.COMPOUND_FILE_EXTENSION)
  1663  	if infoStream.IsEnabled("IW") {
  1664  		infoStream.Message("IW", "create compound file %v", filename)
  1665  	}
  1666  	// Now merge all added files
  1667  	files := info.Files()
  1668  	var cfsDir *store.CompoundFileDirectory
  1669  	cfsDir, err = store.NewCompoundFileDirectory(directory, filename, context, true)
  1670  	if err != nil {
  1671  		return
  1672  	}
  1673  	func() {
  1674  		var success = false
  1675  		defer func() {
  1676  			if success {
  1677  				err = util.Close(cfsDir)
  1678  			} else {
  1679  				util.CloseWhileSuppressingError(cfsDir)
  1680  				directory.DeleteFile(filename) // ignore error
  1681  				directory.DeleteFile(util.SegmentFileName(info.Name, "", store.COMPOUND_FILE_EXTENSION))
  1682  			}
  1683  		}()
  1684  
  1685  		var length int64
  1686  		var sortedFiles []string
  1687  		for file, _ := range files {
  1688  			sortedFiles = append(sortedFiles, file)
  1689  		}
  1690  		sort.Strings(sortedFiles) // maintain order
  1691  		for _, file := range sortedFiles {
  1692  			if err = directory.Copy(cfsDir, file, file, context); err != nil {
  1693  				return
  1694  			}
  1695  			if length, err = directory.FileLength(file); err != nil {
  1696  				return
  1697  			}
  1698  			if err = checkAbort.work(float64(length)); err != nil {
  1699  				return
  1700  			}
  1701  		}
  1702  		success = true
  1703  	}()
  1704  	if err != nil {
  1705  		return
  1706  	}
  1707  
  1708  	// Replace all previous files with the CFS/CFE files:
  1709  	siFiles := make(map[string]bool)
  1710  	siFiles[filename] = true
  1711  	siFiles[util.SegmentFileName(info.Name, "", store.COMPOUND_FILE_ENTRIES_EXTENSION)] = true
  1712  	info.SetFiles(siFiles)
  1713  
  1714  	for file, _ := range files {
  1715  		names = append(names, file)
  1716  	}
  1717  	return
  1718  }
  1719  
  1720  // Tries to delete the given files if unreferenced.
  1721  func (w *IndexWriter) deleteNewFiles(files []string) error {
  1722  	w.Lock() // synchronized
  1723  	defer w.Unlock()
  1724  	panic("not implemented yet")
  1725  }
  1726  
  1727  /* Cleans up residuals from a segment that could not be entirely flushed due to an error */
  1728  func (w *IndexWriter) flushFailed(info *SegmentInfo) error {
  1729  	w.Lock()
  1730  	defer w.Unlock()
  1731  	return w.deleter.refresh(info.Name)
  1732  }
  1733  
  1734  func (w *IndexWriter) purge(forced bool) (n int, err error) {
  1735  	return w.docWriter.purgeBuffer(w, forced)
  1736  }
  1737  
  1738  func (w *IndexWriter) doAfterSegmentFlushed(triggerMerge bool, forcePurge bool) (err error) {
  1739  	defer func() {
  1740  		if triggerMerge {
  1741  			err = mergeError(err, w.maybeMerge(w.config.MergePolicy(), MERGE_TRIGGER_SEGMENT_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS))
  1742  		}
  1743  	}()
  1744  	_, err = w.purge(forcePurge)
  1745  	return err
  1746  }
  1747  
  1748  func (w *IndexWriter) slowFileExists(dir store.Directory, filename string) (bool, error) {
  1749  	o, err := dir.OpenInput(filename, store.IO_CONTEXT_DEFAULT)
  1750  	if os.IsNotExist(err) {
  1751  		return false, nil
  1752  	}
  1753  	if err != nil {
  1754  		return false, err
  1755  	}
  1756  	defer o.Close()
  1757  	return true, nil
  1758  }
  1759  
  1760  /*
  1761  If openDirectoryReader() has been called (ie, this writer is in near
  1762  real-time mode), then after a merge comletes, this class can be
  1763  invoked to warm the reader on the newly merged segment, before the
  1764  merge commits. This is not required for near real-time search, but
  1765  will reduce search latency on opening a new near real-time reader
  1766  after a merge completes.
  1767  
  1768  NOTE: warm is called before any deletes have been carried over to the
  1769  merged segment.
  1770  */
  1771  type IndexReaderWarmer interface {
  1772  	// Invoked on the AtomicReader for the newly merged segment, before
  1773  	// that segment is made visible to near-real-time readers.
  1774  	warm(reader AtomicReader) error
  1775  }