github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/flush.go (about)

     1  package index
     2  
     3  import (
     4  	"github.com/balzaczyy/golucene/core/util"
     5  	"sync"
     6  )
     7  
     8  /*
     9  FlushPlicy controls when segments are flushed from a RAM resident
    10  internal data-structure to the IndexWriter's Directory.
    11  
    12  Segments are traditionally flushed by:
    13  1. RAM consumption - configured via IndexWriterConfig.SetRAMBufferSizeMB()
    14  2. Number of RAM resident documents - configured via IndexWriterConfig.SetMaxBufferedDocs()
    15  
    16  The policy also applies pending delete operations (by term and/or
    17  query), given the threshold set in IndexcWriterConfig.SetMaxBufferedDeleteTerms().
    18  
    19  IndexWriter consults the provided FlushPolicy to control the flushing
    20  process. The policy is informed for each added or updated document as
    21  well as for each delete term. Based on the FlushPolicy, the
    22  information provided via ThreadState and DocumentsWriterFlushControl,
    23  the FlushPolicy decides if a DocumentsWriterPerThread needs flushing
    24  and mark it as flush-pending via DocumentsWriterFlushControl.SetFLushingPending(),
    25  or if deletes need to be applied.
    26  */
    27  type FlushPolicy interface {
    28  	// Called for each delete term. If this is a delte triggered due to
    29  	// an update the given ThreadState is non-nil.
    30  	//
    31  	// Note: this method is called synchronized on the given
    32  	// DocumentsWriterFlushControl and it is guaranteed that the
    33  	// calling goroutine holds the lock on the given ThreadState
    34  	onDelete(*DocumentsWriterFlushControl, *ThreadState)
    35  	// Called for each document update on the given ThreadState's DWPT
    36  	//
    37  	// Note: this method is called synchronized on the given DWFC and
    38  	// it is guaranteed that the calling thread holds the lock on the
    39  	// given ThreadState
    40  	onUpdate(*DocumentsWriterFlushControl, *ThreadState)
    41  	// Called for each document addition on the given ThreadState's DWPT.
    42  	//
    43  	// Note: this method is synchronized by the given DWFC and it is
    44  	// guaranteed that the calling thread holds the lock on the given
    45  	// ThreadState
    46  	onInsert(*DocumentsWriterFlushControl, *ThreadState)
    47  	// Called by DocumentsWriter to initialize the FlushPolicy
    48  	init(indexWriterConfig LiveIndexWriterConfig)
    49  }
    50  
    51  type FlushPolicyImplSPI interface {
    52  	onInsert(*DocumentsWriterFlushControl, *ThreadState)
    53  	onDelete(*DocumentsWriterFlushControl, *ThreadState)
    54  }
    55  
    56  type FlushPolicyImpl struct {
    57  	sync.Locker
    58  	spi               FlushPolicyImplSPI
    59  	indexWriterConfig LiveIndexWriterConfig
    60  	infoStream        util.InfoStream
    61  }
    62  
    63  func newFlushPolicyImpl(spi FlushPolicyImplSPI) *FlushPolicyImpl {
    64  	return &FlushPolicyImpl{
    65  		Locker: &sync.Mutex{},
    66  		spi:    spi,
    67  	}
    68  }
    69  
    70  func (fp *FlushPolicyImpl) onUpdate(control *DocumentsWriterFlushControl, state *ThreadState) {
    71  	fp.spi.onInsert(control, state)
    72  	fp.spi.onDelete(control, state)
    73  }
    74  
    75  func (fp *FlushPolicyImpl) init(indexWriterConfig LiveIndexWriterConfig) {
    76  	fp.Lock() // synchronized
    77  	defer fp.Unlock()
    78  	fp.indexWriterConfig = indexWriterConfig
    79  	fp.infoStream = indexWriterConfig.InfoStream()
    80  }
    81  
    82  /*
    83  Returns the current most RAM consuming non-pending ThreadState with
    84  at least one indexed document.
    85  
    86  This method will never return nil
    87  */
    88  func (p *FlushPolicyImpl) findLargestNonPendingWriter(control *DocumentsWriterFlushControl,
    89  	perThreadState *ThreadState) *ThreadState {
    90  	assert(perThreadState.dwpt.numDocsInRAM > 0)
    91  	maxRamSoFar := perThreadState.bytesUsed
    92  	// the dwpt which needs to be flushed eventually
    93  	maxRamUsingThreadState := perThreadState
    94  	assert2(!perThreadState.flushPending, "DWPT should have flushed")
    95  	count := 0
    96  	control.perThreadPool.foreach(func(next *ThreadState) {
    97  		if !next.flushPending {
    98  			if nextRam := next.bytesUsed; nextRam > 0 && next.dwpt.numDocsInRAM > 0 {
    99  				if p.infoStream.IsEnabled("FP") {
   100  					p.infoStream.Message("FP", "thread state has %v bytes; docInRAM=%v",
   101  						nextRam, next.dwpt.numDocsInRAM)
   102  				}
   103  				count++
   104  				if nextRam > maxRamSoFar {
   105  					maxRamSoFar = nextRam
   106  					maxRamUsingThreadState = next
   107  				}
   108  			}
   109  		}
   110  	})
   111  	if p.infoStream.IsEnabled("FP") {
   112  		p.infoStream.Message("FP", "%v in-use non-flusing threads states", count)
   113  	}
   114  	p.message("set largest ram consuming thread pending on lower watermark")
   115  	return maxRamUsingThreadState
   116  }
   117  
   118  func (p *FlushPolicyImpl) message(s string) {
   119  	if p.infoStream.IsEnabled("FP") {
   120  		p.infoStream.Message("FP", s)
   121  	}
   122  }
   123  
   124  // index/FlushByRamOrCountsPolicy.java
   125  
   126  /*
   127  Default FlushPolicy implementation that flushes new segments based on
   128  RAM used and document count depending on the IndexWriter's
   129  IndexWriterConfig. It also applies pending deletes based on the
   130  number of buffered delete terms.
   131  
   132  1. onDelete() - applies pending delete operations based on the global
   133  number of buffered delete terms iff MaxBufferedDeleteTerms() is
   134  enabled
   135  2. onInsert() - flushes either on the number of documents per
   136  DocumentsWriterPerThread (NumDocsInRAM()) or on the global active
   137  memory consumption in the current indexing session iff
   138  MaxBufferedDocs() or RAMBufferSizeMB() is enabled respectively
   139  3. onUpdate() - calls onInsert() and onDelete() in order
   140  
   141  All IndexWriterConfig settings are used to mark DocumentsWriterPerThread
   142  as flush pending during indexing with respect to their live updates.
   143  
   144  If SetRAMBufferSizeMB() is enabled, the largest ram consuming
   145  DocumentsWriterPerThread will be marked as pending iff the global
   146  active RAM consumption is >= the configured max RAM buffer.
   147  */
   148  type FlushByRamOrCountsPolicy struct {
   149  	*FlushPolicyImpl
   150  }
   151  
   152  func newFlushByRamOrCountsPolicy() *FlushByRamOrCountsPolicy {
   153  	ans := new(FlushByRamOrCountsPolicy)
   154  	ans.FlushPolicyImpl = newFlushPolicyImpl(ans)
   155  	return ans
   156  }
   157  
   158  func (p *FlushByRamOrCountsPolicy) onDelete(control *DocumentsWriterFlushControl, state *ThreadState) {
   159  	panic("not implemented yet")
   160  }
   161  
   162  func (p *FlushByRamOrCountsPolicy) onInsert(control *DocumentsWriterFlushControl, state *ThreadState) {
   163  	if p.flushOnDocCount() && state.dwpt.numDocsInRAM >= p.indexWriterConfig.MaxBufferedDocs() {
   164  		// flush this state by num docs
   165  		control.setFlushPending(state)
   166  	} else if p.flushOnRAM() { // flush by RAM
   167  		limit := int64(p.indexWriterConfig.RAMBufferSizeMB() * 1024 * 1024)
   168  		totalRam := control._activeBytes + control.deleteBytesUsed() // safe w/o sync
   169  		if totalRam >= limit {
   170  			if p.infoStream.IsEnabled("FP") {
   171  				p.infoStream.Message("FP",
   172  					"trigger flush: activeBytes=%v deleteBytes=%v vs limit=%v",
   173  					control._activeBytes, control.deleteBytesUsed(), limit)
   174  			}
   175  			p.markLargestWriterPending(control, state, totalRam)
   176  		}
   177  	}
   178  }
   179  
   180  /* Marks the mos tram consuming active DWPT flush pending */
   181  func (p *FlushByRamOrCountsPolicy) markLargestWriterPending(control *DocumentsWriterFlushControl,
   182  	perThreadState *ThreadState, currentBytesPerThread int64) {
   183  	control.setFlushPending(p.findLargestNonPendingWriter(control, perThreadState))
   184  }
   185  
   186  /* Returns true if this FLushPolicy flushes on IndexWriterConfig.MaxBufferedDocs(), otherwise false */
   187  func (p *FlushByRamOrCountsPolicy) flushOnDocCount() bool {
   188  	return p.indexWriterConfig.MaxBufferedDocs() != DISABLE_AUTO_FLUSH
   189  }
   190  
   191  /* Returns true if this FlushPolicy flushes on IndexWriterConfig.RAMBufferSizeMB(), otherwise false */
   192  func (p *FlushByRamOrCountsPolicy) flushOnRAM() bool {
   193  	return p.indexWriterConfig.RAMBufferSizeMB() != DISABLE_AUTO_FLUSH
   194  }